diff --git a/data/preprocess_scripts/agilex.py b/data/preprocess_scripts/agilex.py new file mode 100644 index 0000000000000000000000000000000000000000..d6a4965e4884a9b43cade76c9007e464cbd97903 --- /dev/null +++ b/data/preprocess_scripts/agilex.py @@ -0,0 +1,186 @@ +import json +import random +import os +import fnmatch +import random + +import tensorflow as tf + + +def _parse_function(proto, precomputed_instr_embed_path): + keys_to_features = { + 'action': tf.io.FixedLenFeature([], tf.string), + 'base_action': tf.io.FixedLenFeature([], tf.string), + 'qpos': tf.io.FixedLenFeature([], tf.string), + 'qvel': tf.io.FixedLenFeature([], tf.string), + 'cam_high': tf.io.FixedLenFeature([], tf.string), + 'cam_left_wrist': tf.io.FixedLenFeature([], tf.string), + 'cam_right_wrist': tf.io.FixedLenFeature([], tf.string), + 'instruction': tf.io.FixedLenFeature([], tf.string), + 'terminate_episode': tf.io.FixedLenFeature([], tf.int64) + } + + parsed_features = tf.io.parse_single_example(proto, keys_to_features) + + action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32) + base_action = tf.io.parse_tensor(parsed_features['base_action'], out_type=tf.float32) + qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32) + qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32) + cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.string) + cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.string) + cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.string) + # instruction = parsed_features['instruction'] + terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64) + + cam_high = tf.image.decode_jpeg(cam_high, channels=3, dct_method='INTEGER_ACCURATE') + cam_left_wrist = tf.image.decode_jpeg(cam_left_wrist, channels=3, dct_method='INTEGER_ACCURATE') + cam_right_wrist = tf.image.decode_jpeg(cam_right_wrist, channels=3, dct_method='INTEGER_ACCURATE') + # BGR to RGB + cam_high = tf.reverse(cam_high, axis=[-1]) + cam_left_wrist = tf.reverse(cam_left_wrist, axis=[-1]) + cam_right_wrist = tf.reverse(cam_right_wrist, axis=[-1]) + + action = tf.reshape(action, [14]) + base_action = tf.reshape(base_action, [2]) + qpos = tf.reshape(qpos, [14]) + qvel = tf.reshape(qvel, [14]) + cam_high = tf.reshape(cam_high, [480, 640, 3]) + cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3]) + cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3]) + + return { + "action": action, + "base_action": base_action, + "qpos": qpos, + "qvel": qvel, + 'observation':{ + "cam_high": cam_high, + "cam_left_wrist": cam_left_wrist, + "cam_right_wrist": cam_right_wrist + }, + "instruction": precomputed_instr_embed_path, + "terminate_episode": terminate_episode, + } + + +def dataset_generator_from_tfrecords(seed): + tfrecord_path = './data/datasets/agilex/tfrecords/' + filepaths = [] + for root, dirs, files in os.walk(tfrecord_path): + for filename in fnmatch.filter(files, '*.tfrecord'): + filepath = os.path.join(root, filename) + filepaths.append(filepath) + + random.seed(seed) + random.shuffle(filepaths) + for filepath in filepaths: + raw_dataset = tf.data.TFRecordDataset(filepath) + dataset = raw_dataset.map(lambda x: _parse_function(x, os.path.dirname(filepath))) + yield { + 'steps': dataset + } + + +def load_dataset(seed): + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator_from_tfrecords(seed), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'action': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'base_action': tf.TensorSpec(shape=(2), dtype=tf.float32), + 'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'observation': { + 'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + }, + 'instruction': tf.TensorSpec(shape=(), dtype=tf.string), + 'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64), + } + ) + } + ) + + return dataset + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + old_action = step['action'] + step['action'] = {} + action = step['action'] + step['action']['terminate'] = step['terminate_episode'] + # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus + left_arm_pos = old_action[:6] + left_gripper_open = old_action[6:7] / 11.8997 + right_arm_pos = old_action[7:13] + right_gripper_open = old_action[13:14] / 13.9231 + + # Base action is dummy (all zeros) + arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0) + action['arm_concat'] = arm_action + # Write the action format + action['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open") + + state = step['observation'] + left_qpos = step['qpos'][:6] + left_gripper_open = step['qpos'][6:7] / 4.7908 # rescale to [0, 1] + right_qpos = step['qpos'][7:13] + right_gripper_open = step['qpos'][13:14] / 4.7888 # rescale to [0, 1] + state['arm_concat'] = tf.concat([left_qpos, left_gripper_open,right_qpos, right_gripper_open], axis=0) + # # Write the state format + state['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open") + + # We randomly sample [original,expanded,simplified] instructions. The ratio is 1:1:1 + instr_type = tf.random.uniform(shape=(), minval=0, maxval=3, dtype=tf.int32) + # # NOTE bg : tf.random and tf.constant is buggy as it always return 0 (?) + # instr_type = tf.constant(instr_type) + # print(instr_type) + @tf.function + def f0(): + return tf.strings.join([step['instruction'], tf.constant('/lang_embed_0.pt')]) + @tf.function + def f1(): + return tf.strings.join([step['instruction'], tf.constant('/lang_embed_1.pt')]) + @tf.function + def f2(): + index = tf.random.uniform(shape=(), minval=0, maxval=100, dtype=tf.int32) + return tf.strings.join([ + step['instruction'], tf.constant('/lang_embed_'), tf.strings.as_string(index+2), tf.constant('.pt')]) + + instr = tf.case([ + (tf.equal(instr_type, 0), f0), + (tf.equal(instr_type, 1), f1), + (tf.equal(instr_type, 2), f2) + ], exclusive=True) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + dataset = load_dataset(42) + + for episode in dataset: + for step in episode['steps']: + step = process_step(step) + # save the images + print(step['observation']['natural_language_instruction']) diff --git a/data/preprocess_scripts/aloha_box_into_pot.py b/data/preprocess_scripts/aloha_box_into_pot.py new file mode 100644 index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e --- /dev/null +++ b/data/preprocess_scripts/aloha_box_into_pot.py @@ -0,0 +1,55 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = action_dict['ee_6d_pos'] + + # Write the action format + action['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + + # Convert raw state to our state + # Robot state + state_dict = step['observation']['state'] + state = {} + state['arm_concat'] = state_dict + + # Write the state format + state['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/aloha_dish_drainer.py b/data/preprocess_scripts/aloha_dish_drainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e --- /dev/null +++ b/data/preprocess_scripts/aloha_dish_drainer.py @@ -0,0 +1,55 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = action_dict['ee_6d_pos'] + + # Write the action format + action['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + + # Convert raw state to our state + # Robot state + state_dict = step['observation']['state'] + state = {} + state['arm_concat'] = state_dict + + # Write the state format + state['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/aloha_handover_box.py b/data/preprocess_scripts/aloha_handover_box.py new file mode 100644 index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e --- /dev/null +++ b/data/preprocess_scripts/aloha_handover_box.py @@ -0,0 +1,55 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = action_dict['ee_6d_pos'] + + # Write the action format + action['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + + # Convert raw state to our state + # Robot state + state_dict = step['observation']['state'] + state = {} + state['arm_concat'] = state_dict + + # Write the state format + state['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/aloha_lift_box.py b/data/preprocess_scripts/aloha_lift_box.py new file mode 100644 index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e --- /dev/null +++ b/data/preprocess_scripts/aloha_lift_box.py @@ -0,0 +1,55 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = action_dict['ee_6d_pos'] + + # Write the action format + action['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + + # Convert raw state to our state + # Robot state + state_dict = step['observation']['state'] + state = {} + state['arm_concat'] = state_dict + + # Write the state format + state['format'] = tf.constant( + "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open" + ) + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/aloha_mobile.py b/data/preprocess_scripts/aloha_mobile.py new file mode 100644 index 0000000000000000000000000000000000000000..013cfc3c117a401b9ebf20656bcfb172db70a73c --- /dev/null +++ b/data/preprocess_scripts/aloha_mobile.py @@ -0,0 +1,183 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler +import tensorflow as tf +import h5py +import numpy as np +from tqdm import tqdm +import os +import imageio +import concurrent.futures +import fnmatch +import cv2 +import random + +def _parse_function(proto): + keys_to_features = { + 'action': tf.io.FixedLenFeature([], tf.string), + 'base_action': tf.io.FixedLenFeature([], tf.string), + 'qpos': tf.io.FixedLenFeature([], tf.string), + 'qvel': tf.io.FixedLenFeature([], tf.string), + 'cam_high': tf.io.FixedLenFeature([], tf.string), + 'cam_left_wrist': tf.io.FixedLenFeature([], tf.string), + 'cam_right_wrist': tf.io.FixedLenFeature([], tf.string), + 'instruction': tf.io.FixedLenFeature([], tf.string), + 'terminate_episode': tf.io.FixedLenFeature([], tf.int64) + } + + parsed_features = tf.io.parse_single_example(proto, keys_to_features) + + action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32) + base_action = tf.io.parse_tensor(parsed_features['base_action'], out_type=tf.float32) + qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32) + qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32) + cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.uint8) + cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.uint8) + cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.uint8) + instruction = parsed_features['instruction'] + terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64) + action = tf.reshape(action, [14]) + base_action = tf.reshape(base_action, [2]) + qpos = tf.reshape(qpos, [14]) + qvel = tf.reshape(qvel, [14]) + cam_high = tf.reshape(cam_high, [480, 640, 3]) + cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3]) + cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3]) + + return { + "action": action, + "base_action": base_action, + "qpos": qpos, + "qvel": qvel, + 'observation':{ + "cam_high": cam_high, + "cam_left_wrist": cam_left_wrist, + "cam_right_wrist": cam_right_wrist + }, + "instruction": instruction, + "terminate_episode": terminate_episode + } + +def dataset_generator_from_tfrecords(seed): + tfrecord_path = './data/datasets/aloha/tfrecords/aloha_mobile/' + datasets = [] + filepaths = [] + for root, dirs, files in os.walk(tfrecord_path): + for filename in fnmatch.filter(files, '*.tfrecord'): + filepath = os.path.join(root, filename) + filepaths.append(filepath) + + random.seed(seed) + random.shuffle(filepaths) + for filepath in filepaths: + raw_dataset = tf.data.TFRecordDataset(filepath) + dataset = raw_dataset.map(_parse_function) + yield { + 'steps': dataset + } + +def load_dataset(seed): + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator_from_tfrecords(seed), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'action': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'base_action': tf.TensorSpec(shape=(2), dtype=tf.float32), + 'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'observation': { + 'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + }, + 'instruction': tf.TensorSpec(shape=(), dtype=tf.string), + 'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64) + } + ) + } + ) + + return dataset + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + old_action = step['action'] + step['action'] = {} + action = step['action'] + step['action']['terminate'] = step['terminate_episode'] + # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus + left_arm_pos = old_action[:6] + left_gripper_open = old_action[6:7] + right_arm_pos = old_action[7:13] + right_gripper_open = old_action[13:14] + + base_vel_y = step['base_action'][:1] + base_delta_ang = step['base_action'][1:] + base_action = tf.concat([base_vel_y, base_delta_ang], axis=0) + # # No base found + arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0) + action['arm_concat'] = arm_action + action['base_concat'] = base_action + # # Write the action format + action['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open,base_vel_y,base_angular_vel") + + state = step['observation'] + left_qpos = step['qpos'][:6] + left_gripper_open = step['qpos'][6:7] + right_qpos = step['qpos'][7:13] + right_gripper_open = step['qpos'][13:14] + left_qvel = step['qvel'][:6] + # left_gripper_joint_vel = step['qvel'][6:7] + right_qvel = step['qvel'][7:13] + # right_gripper_joint_vel = step['qvel'][13:14] + + state['arm_concat'] = tf.concat([left_qpos, left_qvel, left_gripper_open, right_qpos, right_qvel, right_gripper_open], axis=0) + # # Write the state format + state['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = '/mnt/d/aloha/' + DATASET_NAME = 'dataset' + # Load the dataset + dataset = load_dataset() + for data in dataset.take(1): + for step in data['steps'].take(1): + from IPython import embed; embed() + print(step) \ No newline at end of file diff --git a/data/preprocess_scripts/aloha_static.py b/data/preprocess_scripts/aloha_static.py new file mode 100644 index 0000000000000000000000000000000000000000..23480f021cb015511815ee48e6a8ae9c11d1efce --- /dev/null +++ b/data/preprocess_scripts/aloha_static.py @@ -0,0 +1,193 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler +import tensorflow as tf +import h5py +import numpy as np +from tqdm import tqdm +import os +import imageio +import concurrent.futures +import fnmatch +import cv2 +import random + +def get_all_hdf5s(root_dir): + num_files = 0 + for root, dirs, files in os.walk(root_dir): + for filename in fnmatch.filter(files, '*.hdf5'): + num_files += 1 + return num_files + +def stash_image_into_observation(step): + step['observation'] = {'cam_high': [], 'cam_left_wrist': [], 'cam_right_wrist':[], 'cam_low':[] } + step['observation']['cam_high'] = step['cam_high'] + step['observation']['cam_left_wrist'] = step['cam_left_wrist'] + step['observation']['cam_right_wrist'] = step['cam_right_wrist'] + step['observation']['cam_low'] = step['cam_low'] + return step + +def _parse_function(proto): + keys_to_features = { + 'action': tf.io.FixedLenFeature([], tf.string), + 'qpos': tf.io.FixedLenFeature([], tf.string), + 'qvel': tf.io.FixedLenFeature([], tf.string), + 'cam_high': tf.io.FixedLenFeature([], tf.string), + 'cam_left_wrist': tf.io.FixedLenFeature([], tf.string), + 'cam_right_wrist': tf.io.FixedLenFeature([], tf.string), + 'cam_low': tf.io.FixedLenFeature([], tf.string), + 'instruction': tf.io.FixedLenFeature([], tf.string), + 'terminate_episode': tf.io.FixedLenFeature([], tf.int64) + } + + parsed_features = tf.io.parse_single_example(proto, keys_to_features) + + action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32) + qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32) + qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32) + cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.uint8) + cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.uint8) + cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.uint8) + cam_low = tf.io.parse_tensor(parsed_features['cam_low'], out_type=tf.uint8) + instruction = parsed_features['instruction'] + terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64) + action = tf.reshape(action, [14]) + qpos = tf.reshape(qpos, [14]) + qvel = tf.reshape(qvel, [14]) + cam_high = tf.reshape(cam_high, [480, 640, 3]) + cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3]) + cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3]) + cam_low = tf.reshape(cam_low, [480, 640, 3]) + return { + "action": action, + "qpos": qpos, + "qvel": qvel, + 'observation':{ + "cam_high": cam_high, + "cam_left_wrist": cam_left_wrist, + "cam_right_wrist": cam_right_wrist, + "cam_low":cam_low + }, + "instruction": instruction, + "terminate_episode": terminate_episode + } + +def dataset_generator_from_tfrecords(seed): + tfrecord_path = './data/datasets/aloha/tfrecords/aloha_static_cotraining_datasets/' + datasets = [] + filepaths = [] + for root, dirs, files in os.walk(tfrecord_path): + for filename in fnmatch.filter(files, '*.tfrecord'): + filepath = os.path.join(root, filename) + filepaths.append(filepath) + + random.seed(seed) + random.shuffle(filepaths) + for filepath in filepaths: + raw_dataset = tf.data.TFRecordDataset(filepath) + dataset = raw_dataset.map(_parse_function) + yield { + 'steps': dataset + } + +def load_dataset(seed): + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator_from_tfrecords(seed), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'action': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32), + 'observation': { + 'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'cam_low': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + }, + 'instruction': tf.TensorSpec(shape=(), dtype=tf.string), + 'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64) + } + ) + } + ) + + return dataset + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + old_action = step['action'] + step['action'] = {} + action = step['action'] + step['action']['terminate'] = step['terminate_episode'] + # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus + left_arm_pos = old_action[:6] + left_gripper_open = old_action[6:7] + right_arm_pos = old_action[7:13] + right_gripper_open = old_action[13:14] + + arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0) + + action['arm_concat'] = arm_action + # # Write the action format + action['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open") + + state = step['observation'] + left_qpos = step['qpos'][:6] + left_gripper_open = step['qpos'][6:7] + right_qpos = step['qpos'][7:13] + right_gripper_open = step['qpos'][13:14] + left_qvel = step['qvel'][:6] + # left_gripper_joint_vel = step['qvel'][6:7] + right_qvel = step['qvel'][7:13] + # right_gripper_joint_vel = step['qvel'][13:14] + + state['arm_concat'] = tf.concat([left_qpos, left_qvel, left_gripper_open, right_qpos, right_qvel, right_gripper_open], axis=0) + # # Write the state format + state['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = '/mnt/d/aloha/' + DATASET_NAME = 'dataset' + # Load the dataset + dataset = load_dataset() + for data in dataset.take(1): + for step in data['steps'].take(1): + from IPython import embed; embed() + print(step) \ No newline at end of file diff --git a/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py b/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..7af2596d0abcbc148b1f88a8c18868af664e9715 --- /dev/null +++ b/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode]. + # NOTE the dimension of action is actually 7, so only 7x joint velocities exists + joint_vel = action[:7] + gripper_vel = action[7:9] + # there are extra action_delta information + # Robot delta action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode]. + # action_delta = step['action_delta'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([joint_vel, gripper_vel], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_vel,gripper_joint_1_vel") + + # Convert raw state to our state + state = step['observation'] + state_vec = state['state'] + # Robot state, consists of [6x robot joint angles, 1x gripper position]. + arm_joint_ang = state_vec[:6] + grip_pos = state_vec[6:7] + # Robot joint velocity, consists of [6x robot joint angles, 1x gripper position]. + state_vel = state['state_vel'] + arm_joint_vel = state_vel[:6] + grip_vel = state_vel[6:7] + + # Concatenate the state + state['arm_concat'] = tf.concat([arm_joint_ang,arm_joint_vel,grip_pos,grip_vel], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,gripper_joint_0_pos,gripper_joint_0_vel") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..ca2c88d41041fa3a0a92974f40ebdb25ce9b0bd9 --- /dev/null +++ b/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, \ + quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action]. + action = step['action'] + eef_delta_pos = action[:3] + eef_ang = action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation']['state'] + joint_pos = step['observation']['state_joint'] + # Default robot state, consists of [3x robot ee pos, 3x ee quat, 1x gripper state]. + eef_pos = state[:3] + eef_quat = state[3:7] + eef_ang = quaternion_to_rotation_matrix(eef_quat) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + gripper_joint_pos = state[7:8] * 12.85 # rescale to [0, 1] + + # Concatenate the state + state = step['observation'] + state['arm_concat'] = tf.concat([joint_pos,gripper_joint_pos,eef_pos,eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..2b6d73fe5c2b8b454aea4eb55c5250e333dd953b --- /dev/null +++ b/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py @@ -0,0 +1,93 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action]. + action = step['action'] + eef_pos = action[:3] + eef_ang = action[3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + + action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Convert raw state to our state + joint_pos = step['observation']['state'][:7] + grip_open = step['observation']['state'][7:8] * 12.55 + state_ee = step['observation']['state_ee'] + # Tensor (16,) End-effector state, represented as 4x4 homogeneous transformation matrix of ee pose. + transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4))) + eef_pos = transform_matrix[:3, 3] + rotation_matrix = transform_matrix[:3, :3] + eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) + # Concatenate the state + + state = step['observation'] + state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/bc_z.py b/data/preprocess_scripts/bc_z.py new file mode 100644 index 0000000000000000000000000000000000000000..748252ddbeda2219dfad4dfcb22c546299850a61 --- /dev/null +++ b/data/preprocess_scripts/bc_z.py @@ -0,0 +1,95 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, \ + euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + action = step['action'] + # The next 10 actions for the positions. Each action is a 3D delta to add to current position. + eef_delta_pos = action['future/xyz_residual'][:3] + # The next 10 actions for the rotation. Each action is a 3D delta to add to the current axis angle. + eef_ang = action['future/axis_angle_residual'][:3] + eef_ang = euler_to_quaternion(eef_ang) + # The next 10 actions for the gripper. Each action is the value the gripper closure should be changed to (notably it is not a delta.) + grip_open = tf.cast(tf.expand_dims(1 - action['future/target_close'][0], axis=0), dtype=tf.float32) + + # Concatenate the action + step['action'] = {} + action = step['action'] + + action['terminate'] = step['is_terminal'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + + gripper_ang = state['present/axis_angle'] + gripper_ang = euler_to_rotation_matrix(gripper_ang) + gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) + gripper_pos = state['present/xyz'] + # How much the gripper is currently closed. Scaled from 0 to 1, but not all values from 0 to 1 are reachable. The range in the data is about 0.2 to 1 + gripper_open = 1- state['present/sensed_close'] + + + # Concatenate the state + state = step['observation'] + state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/berkeley_fanuc_manipulation.py b/data/preprocess_scripts/berkeley_fanuc_manipulation.py new file mode 100644 index 0000000000000000000000000000000000000000..d07003cbaf6bbf0e18aec088eab2764d47ee96ec --- /dev/null +++ b/data/preprocess_scripts/berkeley_fanuc_manipulation.py @@ -0,0 +1,81 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, \ + quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + # 6x end effector delta pose, 1x gripper position + eef_delta_pos = origin_action[:3] + eef_ang=origin_action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + # [6x robot joint angles, 1x gripper open status, 6x robot joint velocities]. + arm_joint_ang=state['state'][:6] + grip_open=1-state['state'][6:7] + # arm_joint_vel=state['state'][7:13] # all zeros + eef_pos = state['end_effector_state'][:3] + eef_ang = quaternion_to_rotation_matrix(state['end_effector_state'][3:]) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'berkeley_fanuc_manipulation' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/berkeley_gnm_cory_hall.py b/data/preprocess_scripts/berkeley_gnm_cory_hall.py new file mode 100644 index 0000000000000000000000000000000000000000..6cfa5e5750f0255eff5bfc914e8a385141d26a15 --- /dev/null +++ b/data/preprocess_scripts/berkeley_gnm_cory_hall.py @@ -0,0 +1,78 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + + eef_pos=tf.cast(origin_action,dtype=tf.float32) + eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32) + eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0)) + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + eef_pos=tf.cast(state['position'],dtype=tf.float32) + eef_ang=tf.cast(state['yaw'],dtype=tf.float32) + eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0)) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'berkeley_gnm_cory_hall' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step['action'][6:7]) + diff --git a/data/preprocess_scripts/berkeley_gnm_recon.py b/data/preprocess_scripts/berkeley_gnm_recon.py new file mode 100644 index 0000000000000000000000000000000000000000..0cb9b8d6191589159d4e899da8171f4b2e211895 --- /dev/null +++ b/data/preprocess_scripts/berkeley_gnm_recon.py @@ -0,0 +1,78 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + + eef_pos=tf.cast(origin_action,dtype=tf.float32) + eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32) + eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0)) + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + eef_pos=tf.cast(state['position'],dtype=tf.float32) + eef_ang=tf.cast(state['yaw'],dtype=tf.float32) + eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0)) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0) + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_angle_x,eef_angle_y,eef_angle_z,eef_angle_w") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'berkeley_gnm_recon' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step['action'][6:7]) + diff --git a/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py b/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..aef391a432e879bc00b6b43a7f3d0b39f8d98c90 --- /dev/null +++ b/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py @@ -0,0 +1,90 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [7 delta joint pos,1x gripper binary state]. + delta_joint_pos = action[:7] + grip_open = tf.expand_dims(1 - action[7], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open") + + # Convert raw state to our state + state = step['observation'] + # xArm joint positions (7 DoF). + arm_joint_pos = state['joint_pos'] + # Binary gripper state (1 - closed, 0 - open) + grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0) + # Gripper pose, robot frame, [3 position, 4 rotation] + gripper_pose = state['pose'][:3] + # gripper_ang = quaternion_to_euler(state['pose'][3:7]) + gripper_ang = quaternion_to_rotation_matrix(state['pose'][3:7]) + gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) + + # Concatenate the state + state['arm_concat'] = tf.concat([arm_joint_pos, gripper_pose,gripper_ang, grip_open], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/bridge.py b/data/preprocess_scripts/bridge.py new file mode 100644 index 0000000000000000000000000000000000000000..05d1e46ec8a3b61afdb528987be0add4afba67f7 --- /dev/null +++ b/data/preprocess_scripts/bridge.py @@ -0,0 +1,90 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + + eef_delta_pos = action['world_vector'] + eef_ang=action['rotation_delta'] + eef_ang = euler_to_quaternion(eef_ang) + grip_open=tf.reshape(tf.where(action['open_gripper'],1.0, 0.0),(1,)) + # grip_open:tensor + + # No base found + + # Concatenate the action + arm_action=tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) + action['arm_concat']=arm_action + #base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) + #action['base_concat'] = base_action + + # Write the action format + action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state= step['observation'] + eef_pos=state['state'][:3] + eef_ang=state['state'][3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + gripper_action=state['state'][6:] + + # Concatenate the state + state['arm_concat']=tf.concat([eef_pos,eef_ang,gripper_action],axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod/' + DATASET_NAME = 'bridge' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/bridgev2.py b/data/preprocess_scripts/bridgev2.py new file mode 100644 index 0000000000000000000000000000000000000000..b937569b939cf88b4dc63cb1256ffc66aba144bb --- /dev/null +++ b/data/preprocess_scripts/bridgev2.py @@ -0,0 +1,179 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion, euler_to_rotation_matrix, rotation_matrix_to_ortho6d +import tensorflow as tf +import h5py +import numpy as np +from tqdm import tqdm +import os +import imageio +import concurrent.futures +import fnmatch +import random + + +def _parse_function(proto): + keys_to_features = { + 'observations/images0': tf.io.FixedLenFeature([], tf.string), + 'observations/state': tf.io.FixedLenFeature([], tf.string), + 'observations/qpos': tf.io.FixedLenFeature([], tf.string), + 'observations/eef_transform': tf.io.FixedLenFeature([], tf.string), + 'language': tf.io.FixedLenFeature([], tf.string), + 'actions': tf.io.FixedLenFeature([], tf.string), + 'truncates': tf.io.FixedLenFeature([], tf.int64), + } + + parsed_features = tf.io.parse_single_example(proto, keys_to_features) + + observations_images0 = tf.io.parse_tensor(parsed_features['observations/images0'], out_type=tf.uint8) + observations_state = tf.io.parse_tensor(parsed_features['observations/state'], out_type=tf.float32) + observations_qpos = tf.io.parse_tensor(parsed_features['observations/qpos'], out_type=tf.float32) + # observations_eef_transform = tf.io.parse_tensor(parsed_features['observations/eef_transform'], out_type=tf.float32) + language = parsed_features['language'] + actions = tf.io.parse_tensor(parsed_features['actions'], out_type=tf.float32) + truncates = parsed_features['truncates'] + + actions = tf.reshape(actions, [7]) + observations_images0 = tf.reshape(observations_images0, [480, 640, 3]) + # observations_eef_transform = tf.reshape(observations_eef_transform, [4,4]) + # observations_eef_transform = extract_angles_and_translation(observations_eef_transform) + # observations_eef_transform = tf.reshape(observations_eef_transform, [6]) + observations_qpos = tf.reshape(observations_qpos, [6]) + observations_state = tf.reshape(observations_state, [7]) + + return { + 'observation': { + 'images0': observations_images0, + 'state': observations_state, + 'qpos': observations_qpos, + }, + 'language': language, + 'actions': actions, + 'truncates': truncates + } + + +def dataset_generator_from_tfrecords(seed): + tfrecord_path = './data/datasets/bridgev2/tfrecords' + filepaths = [] + for root, dirs, files in os.walk(tfrecord_path): + for filename in fnmatch.filter(files, '*.tfrecord'): + filepath = os.path.join(root, filename) + filepaths.append(filepath) + + random.seed(seed) + random.shuffle(filepaths) + for filepath in filepaths: + raw_dataset = tf.data.TFRecordDataset(filepath) + dataset = raw_dataset.map(_parse_function) + yield { + 'steps': dataset + } + +def load_dataset(seed): + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator_from_tfrecords(seed), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'observation': { + 'images0': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8), + 'state': tf.TensorSpec(shape=(7,), dtype=tf.float32), + 'qpos': tf.TensorSpec(shape=(6,), dtype=tf.float32), + }, + 'language': tf.TensorSpec(shape=(), dtype=tf.string), + 'actions': tf.TensorSpec(shape=(7,), dtype=tf.float32), + 'truncates': tf.TensorSpec(shape=(), dtype=tf.int64) + } + ) + } + ) + + return dataset + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + old_action = step['actions'] + step['action'] = {} + action = step['action'] + step['action']['terminate'] = step['truncates'] + # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/utils/transformation_utils.py line 154 + eef_delta_pos = old_action[:3] + eef_ang = old_action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + gripper_state = old_action[6] + # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/base/robot_base_env.py line 231 + # gripper_open = tf.constant(0.0,dtype=tf.float32) if gripper_state < 0.5 else tf.constant(1.0,dtype=tf.float32) + gripper_open = tf.cond(tf.less(gripper_state, 0.5), lambda: tf.constant(0.0, dtype=tf.float32), lambda: tf.constant(1.0, dtype=tf.float32)) + gripper_open = tf.expand_dims(gripper_open,axis=0) + + # # No base found + # # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang,gripper_open], axis=0) + action['arm_concat'] = arm_action + # # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z, eef_delta_angle_x, eef_delta_angle_y, eef_delta_angle_z, eef_delta_angle_w, gripper_open") + + old_state = step['observation']['state'] + qpos = step['observation']['qpos'] + state = step['observation'] + + # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/base/robot_base_env.py line 292 + eef_pos = old_state[:3] + eef_ang = old_state[3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + gripper_open = old_state[6:] + # gripper_open = tf.cond(tf.less(gripper_state, 0.5), lambda: tf.constant(0.0, dtype=tf.float32), lambda: tf.constant(1.0, dtype=tf.float32)) + # gripper_open = tf.expand_dims(gripper_open,axis=0) + + state['arm_concat'] = tf.concat([qpos,gripper_open,eef_pos,eef_ang], axis=0) + # # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + # copied from openxembod + instr = step['language'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + # Load the dataset + dataset = load_dataset(0) + for episode in dataset.take(1): + for step in episode['steps']: + step = process_step(step) + print(step) + break + diff --git a/data/preprocess_scripts/columbia_cairlab_pusht_real.py b/data/preprocess_scripts/columbia_cairlab_pusht_real.py new file mode 100644 index 0000000000000000000000000000000000000000..24dfa078ea1dc4e63078b81ea67bd94aea817bd5 --- /dev/null +++ b/data/preprocess_scripts/columbia_cairlab_pusht_real.py @@ -0,0 +1,84 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + + eef_delta_pos = action['world_vector'] + eef_ang = action['rotation_delta'] + eef_ang = euler_to_quaternion(eef_ang) + + # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change. + + # No base found + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + eef_pos = state['robot_state'] + + # Concatenate the state + state['arm_concat'] = eef_pos + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'columbia_cairlab_pusht_real' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..e41efb117f8bf41855987fccc6c4be723e03470c --- /dev/null +++ b/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py @@ -0,0 +1,89 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. + eef_delta_pos = action[:3] + eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_quaternion(eef_ang) + # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + state_vec = state['state'] + # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench]. + eef_pos = state_vec[:3] + eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_open = tf.expand_dims(1 - state_vec[6], axis=0) + # Concatenate the state + state['arm_concat'] = tf.concat([eef_pos, eef_ang,grip_open], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..41fa0c794d5e02e9b8ec076bde8361a4044104e3 --- /dev/null +++ b/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py @@ -0,0 +1,78 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. + eef_delta_pos = action[:3] + eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_quaternion(eef_ang) + # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + state_vec = state['state'] + # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench]. + eef_pos = state_vec[:3] + eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # joint_pos = state_vec[6:12] # EEF wrench is not the joint posit + # Concatenate the state + # state['arm_concat'] = tf.concat([joint_pos,eef_pos, eef_ang], axis=0) + state['arm_concat'] = tf.concat([eef_pos, eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Define the task instruction + step['observation']['natural_language_instruction'] = tf.constant( + "Place the grid clamp on the table, aligning the protrusion on its bottom with the slots on the table.") + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..2163f69fd31e548f61ce3d5b73a196e8e2440fc6 --- /dev/null +++ b/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py @@ -0,0 +1,88 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. + eef_delta_pos = action[:3] + eef_ang = tf.gather(action[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_quaternion(eef_ang) + # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close. + grip_open = tf.expand_dims(1 - action[6], axis=0) + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + state_vec = state['state'] + # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class]. + eef_pos = state_vec[:3] + eef_ang = tf.gather(state_vec[3:6], [1, 2, 0]) # To Rotation.as_euler("xyz") + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + + # Concatenate the state + state['arm_concat'] = tf.concat([ eef_pos, eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/dobbe.py b/data/preprocess_scripts/dobbe.py new file mode 100644 index 0000000000000000000000000000000000000000..27464d936826cae285d3b77e2654623289d8a8c6 --- /dev/null +++ b/data/preprocess_scripts/dobbe.py @@ -0,0 +1,86 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + arm_action = step['action'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = arm_action + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") + action['terminate'] = step['is_terminal'] + + # The state has any problem? + state = step['observation'] + eef_pos = state['xyz'] + # Clip eef_pos to be [-10, 10] for stability + eef_pos = tf.clip_by_value(eef_pos, -10, 10) + eef_ang = state['rot'] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_pos = state['gripper'] + + # Concatenate the state + state['arm_concat'] = tf.concat([ + grip_pos,eef_pos,eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + from tqdm import tqdm + import numpy as np + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'dobbe' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + # dataset = dataset.filter( + # lambda x: tf.math.less( + # tf.math.reduce_max(tf.math.abs( + # tf.convert_to_tensor( + # [step['observation']['xyz'] for step in x['steps']]))), 3)) + + # Inspect the dataset + for i, episode in tqdm(enumerate(dataset), total=5208): + res = [] + for step in episode['steps']: + res.append(step['observation']['xyz'].numpy()) + max_val = np.max(np.abs(res)) + if max_val > 2: + print(f"Episode {i} has a max value of {max_val}") diff --git a/data/preprocess_scripts/eth_agent_affordances.py b/data/preprocess_scripts/eth_agent_affordances.py new file mode 100644 index 0000000000000000000000000000000000000000..683509d038901c63b3c83e4d9c4572160389edcf --- /dev/null +++ b/data/preprocess_scripts/eth_agent_affordances.py @@ -0,0 +1,77 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + + eef_vel = origin_action[:3] + eef_ang_vel=origin_action[3:6] + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_vel,eef_ang_vel],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + eef_pos = state['state'][:3] + eef_ang = tf.gather(state['state'][3:6], [2, 1, 0]) + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_open=state['state'][6:7] + # state['state'][8]:door opening angle + state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'eth_agent_affordances' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/fmb.py b/data/preprocess_scripts/fmb.py new file mode 100644 index 0000000000000000000000000000000000000000..92a057c1b68f002ba061901b1d7b9f5d385b3208 --- /dev/null +++ b/data/preprocess_scripts/fmb.py @@ -0,0 +1,78 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d, \ + quaternion_to_rotation_matrix + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + + # Robot action + eef_pos = action[:3] + eef_ang = action[3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + gripper_open = action[6:7] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([eef_pos, eef_ang, gripper_open], axis=0) + action['arm_concat'] = arm_action + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Convert raw state to our state + # Robot state + state = step['observation'] + eef_pos = state['eef_pose'][:3] + eef_ang = state['eef_pose'][3:] + eef_ang = quaternion_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_pos_vel = state['eef_vel'][:3] + eef_ang_vel = state['eef_vel'][3:] + joint_pos = state['joint_pos'] + joint_vel = state['joint_vel'] + grip_pos = 1 - state['state_gripper_pose'] + grip_pos = tf.expand_dims(grip_pos, axis=0) + + # Concatenate the state + state['arm_concat'] = tf.concat([ + joint_pos,joint_vel,grip_pos,eef_pos,eef_ang,eef_pos_vel,eef_ang_vel], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + # Refine language instruction: + 'object': 'brick and insert it into the slot of the matching shape' + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..5c2af1830314ad04d08ec7b886f5ee20f09898f9 --- /dev/null +++ b/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py @@ -0,0 +1,98 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + + # No base found + eef_pos_delta=origin_action[:3] + eef_quat_delta=origin_action[3:7] + eef_ang = eef_quat_delta + # eef_ang=quaternion_to_euler(eef_quat_delta) + grip_open=-origin_action[7:8] + + action["arm_concat"]=tf.concat([eef_pos_delta,eef_ang,grip_open],axis=0) + action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + # Ignore action + action['terminate'] = step['is_terminal'] + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + + ''' 'state': + { + 'ee_pos': EEF position (3,) + 'ee_quat': EEF orientation (4,) + 'ee_pos_vel': EEF linear velocity (3,) + 'ee_ori_vel': EEF angular velocity (3,) + 'joint_positions': Joint positions (7,) + 'joint_velocities': Joint velocities (7,) + 'joint_torques': Joint torques (7,) + 'gripper_width': Gripper width (1,) + }''' + eef_pos=state['state'][:3] + # eef_ang=quaternion_to_euler(state['state'][3:7]) + eef_ang = quaternion_to_rotation_matrix(state['state'][3:7]) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_pos_vel=state['state'][7:10] + eef_ang_vel=state['state'][10:13] + arm_joint_pos=state['state'][13:20] + arm_joint_vel=state['state'][20:27] + arm_joint_tor=state['state'][27:34] + # gripper_width? + grip_open=state['state'][34:35] * 12.507 # rescale to [0, 1] + + state['arm_concat'] = tf.concat([arm_joint_pos,grip_open,arm_joint_vel,\ + eef_pos,eef_ang,eef_pos_vel,eef_ang_vel],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'furniture_bench_dataset_converted_externally_to_rlds' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py b/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py new file mode 100644 index 0000000000000000000000000000000000000000..44c8711fcba56254f21fe89e515612990de050e8 --- /dev/null +++ b/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py @@ -0,0 +1,77 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action'] = {} + action = step['action'] + + # Multiplied by 10 Hz to get units m/s and rad/s + eef_delta_pos = origin_action[:3] * 10 + # delta ZYX euler angles == roll/pitch/yaw velocities + eef_ang = origin_action[3:6] * 10 + grip_open = 1 - origin_action[6:7] + + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") + + # Convert raw state to our state + # state = step['observation'] + # # Concatenate the state + # grip_open=state['state'] + # state['arm_concat'] = grip_open + + # Write the state format + # state['format'] = tf.constant( + # "gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'imperialcollege_sawyer_wrist_cam' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/jaco_play.py b/data/preprocess_scripts/jaco_play.py new file mode 100644 index 0000000000000000000000000000000000000000..0b3401b850ecdf4503d766a9d3fd9807eff625df --- /dev/null +++ b/data/preprocess_scripts/jaco_play.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + eef_delta_pos = action['world_vector'] + # eef_ang = action['rotation_delta'] + # (NOTE) due to the formality problem, grip_open is not used + # grip_open = 1 - (action['gripper_closedness_action'] ) / 2 + # base_delta_pos = action['base_displacement_vector'] + # base_delta_ang = action['base_displacement_vertical_rotation'] + + # Concatenate the action + arm_action = eef_delta_pos + action['arm_concat'] = arm_action + # base_action = tf.constant([0, 0, 0, 0], dtype=tf.float32) + # action['base_concat'] = None + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") + + # Convert raw state to our state + state = step['observation'] + joint_pos = state['joint_pos'] + eef_pos = state['end_effector_cartesian_pos'][:3] + eef_quat = state['end_effector_cartesian_pos'][3:] + eef_ang = quaternion_to_rotation_matrix(eef_quat) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_vel = state['end_effector_cartesian_velocity'][:3] + # We do not use angular velocity since it is very inaccurate in this environment + # eef_angular_vel = state['end_effector_cartesian_velocity'][3:] + # Concatenate the state + state['arm_concat'] = tf.concat([joint_pos, eef_pos, eef_ang, eef_vel], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'jaco_play' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py b/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..acfd5fe1a8bf5637c14777ec72341d0cf5e7ee93 --- /dev/null +++ b/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x end-effector position residual, 3x end-effector axis-angle residual, + # 7x robot joint k_p gain coefficient, 7x robot joint damping ratio coefficient]. + # The action residuals are global, i.e. multiplied on theleft-hand side of the current end-effector state. + eef_delta_pos = action[:3] + eef_ang = action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['terminate'] = step['is_terminal'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,], axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + # Robot state, consists of [joint_states, end_effector_pose].Joint states are 14-dimensional, formatted in the order of [q_0, w_0, q_1, w_0, ...]. + # In other words, joint positions and velocities are interleaved.The end-effector pose is 7-dimensional, formatted in the order of [position, quaternion].The quaternion is formatted in (x,y,z,w) order. The end-effector pose references the tool frame, in the center of the two fingers of the gripper. + joint_states = state['state'][:14] + arm_joint_pos = joint_states[::2] + arm_joint_vel = joint_states[1::2] + eef_pos = state['state'][14:17] + # eef_ang = quaternion_to_euler(state['state'][17:21]) + eef_ang = quaternion_to_rotation_matrix(state['state'][17:21]) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # Concatenate the state + state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, eef_pos, eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/kuka.py b/data/preprocess_scripts/kuka.py new file mode 100644 index 0000000000000000000000000000000000000000..156e239a4af9b19b24cce8dde115bf972bacf743 --- /dev/null +++ b/data/preprocess_scripts/kuka.py @@ -0,0 +1,107 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # keys_view = step.keys() + # print("step") + # print(list(keys_view)) + # Convert raw action to our action + action: dict = step['action'] + # print("action") + # print(list(action.keys())) + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + + eef_delta_pos = action['world_vector'] + eef_ang = action['rotation_delta'] + eef_ang = euler_to_quaternion(eef_ang) + grip_open = 1 - (action['gripper_closedness_action'] + 1) / 2 + base_delta_pos = action['base_displacement_vector'] + base_delta_ang = action['base_displacement_vertical_rotation'] + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['arm_concat'] = arm_action + base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) + action['base_concat'] = base_action + + # print("action len:", len(action['arm_concat']) + len(action['base_concat'])) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open,base_vel_x,base_vel_y,base_angular_vel") + + # action good for kuka same as example + + # Convert raw state to our state + state = step['observation'] + # print("state") + # print(list(state.keys())) + eef_pos = state['clip_function_input/base_pose_tool_reached'][:3] + eef_ang = quaternion_to_rotation_matrix(state['clip_function_input/base_pose_tool_reached'][3:]) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_open = 1 - state['gripper_closed'] + + # Concatenate the state + state['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0) + # print("state len:", len(state['arm_concat'])) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'kuka' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # with open('example.txt', 'w') as file: + # Inspect the dataset + episode_num = len(dataset) + for episode in dataset: + # print("episode") + # print(list(episode.keys())) + for step in episode['steps']: + process_step(step) + # break + print(f"episode_num: {episode_num}") diff --git a/data/preprocess_scripts/language_table.py b/data/preprocess_scripts/language_table.py new file mode 100644 index 0000000000000000000000000000000000000000..6d369f4178ba919233d876265ceb5e6984186293 --- /dev/null +++ b/data/preprocess_scripts/language_table.py @@ -0,0 +1,75 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + + eef_delta_pos=origin_action + # No base found + + # Concatenate the action + action['arm_concat'] = eef_delta_pos + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + eef_pos=state['effector_translation'] + state['arm_concat'] = eef_pos + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['instruction'] + # Convert bytes to tf.string + instr = tf.strings.unicode_encode(instr, 'UTF-8') + # Remove '\x00' + instr = tf.strings.regex_replace(instr, '\x00', '') + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'language_table' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) + diff --git a/data/preprocess_scripts/libero_10_no_noops.py b/data/preprocess_scripts/libero_10_no_noops.py new file mode 100644 index 0000000000000000000000000000000000000000..1c3fba7145744db1176d899581239b31cb6868d2 --- /dev/null +++ b/data/preprocess_scripts/libero_10_no_noops.py @@ -0,0 +1,82 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + + # Robot action + # eef_pos = action_dict['ee_pos'][:3] + # eef_ang = action_dict['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_pos_vel = action_dict[:3] + eef_ang_vel = action_dict[3:6] + # joint_pos = action_dict['joint_pos'][:-1] + # joint_vel = action_dict['delta_joint'][:-1] + grip_pos = 1 - tf.clip_by_value(action_dict[-1:], 0, 1) + + # grip_vel = action_dict['gripper_velocity'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([eef_pos_vel, eef_ang_vel, grip_pos], axis=0) + action['arm_concat'] = arm_action + # action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_pos") + + # Convert raw state to our state + # Robot state + state = step['observation'] + # print(state.keys()) + # image = step['observation']['image'] + eef_pos = state['state'][:3] + eef_ang = state['state'][3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # joint_pos = state['joint_pos'][:-1] + grip_pos = state['state'][-2:] + + # Concatenate the state + state['arm_concat'] = tf.concat([ + grip_pos,eef_pos,eef_ang], axis=0) + + + # Write the state format + state['format'] = tf.constant( + "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/libero_object_no_noops.py b/data/preprocess_scripts/libero_object_no_noops.py new file mode 100644 index 0000000000000000000000000000000000000000..1c3fba7145744db1176d899581239b31cb6868d2 --- /dev/null +++ b/data/preprocess_scripts/libero_object_no_noops.py @@ -0,0 +1,82 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + + # Robot action + # eef_pos = action_dict['ee_pos'][:3] + # eef_ang = action_dict['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_pos_vel = action_dict[:3] + eef_ang_vel = action_dict[3:6] + # joint_pos = action_dict['joint_pos'][:-1] + # joint_vel = action_dict['delta_joint'][:-1] + grip_pos = 1 - tf.clip_by_value(action_dict[-1:], 0, 1) + + # grip_vel = action_dict['gripper_velocity'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([eef_pos_vel, eef_ang_vel, grip_pos], axis=0) + action['arm_concat'] = arm_action + # action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_pos") + + # Convert raw state to our state + # Robot state + state = step['observation'] + # print(state.keys()) + # image = step['observation']['image'] + eef_pos = state['state'][:3] + eef_ang = state['state'][3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # joint_pos = state['joint_pos'][:-1] + grip_pos = state['state'][-2:] + + # Concatenate the state + state['arm_concat'] = tf.concat([ + grip_pos,eef_pos,eef_ang], axis=0) + + + # Write the state format + state['format'] = tf.constant( + "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..ef89473445e0cc014128fb073e96c6306962704b --- /dev/null +++ b/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py @@ -0,0 +1,106 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + action = step['action'] + + # Robot action, consists of [3x end effector delta target position, 3x end effector delta target orientation in axis-angle format, 1x gripper target position (mimic for two fingers)]. + # For delta target position, an action of -1 maps to a robot movement of -0.1m, and action of 1 maps to a movement of 0.1m. + # For delta target orientation, its encoded angle is mapped to a range of [-0.1rad, 0.1rad] for robot execution. For example, an action of [1, 0, 0] means rotating along the x-axis by 0.1 rad. + # For gripper target position, an action of -1 means close, and an action of 1 means open. + eef_delta_pos = action[:3] * 0.1 + eef_ang = action[3:6] * 0.1 + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims((action[6] + 1) / 2, axis=0) + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['arm_concat'] = arm_action + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + # Robot state, consists of [7x robot joint angles, 2x gripper position, 7x robot joint angle velocity, 2x gripper velocity]. Angle in radians, position in meters. + state = step['observation']['state'] + arm_joint_pos = state[:7] + gripper_pos = state[7:9] * 25 # rescale to [0, 1] + # We do not use velocity since it is very inaccurate in this environment + # arm_joint_vel = state[9:16] + # gripper_vel = state[16:18] + + # Concatenate the state + + state = step['observation'] + state['arm_concat'] = tf.concat([ + arm_joint_pos, gripper_pos], axis=0) + + # Robot base pose in the world frame, consists of [x, y, z, qw, qx, qy, qz]. + # The first three dimensions represent xyz positions in meters. The last four dimensions are the quaternion representation of rotation + # base_pose = step['observation']['base_pose'] + # base_pose_xyz = base_pose[:3] + # base_pose_ang = quaternion_to_euler(base_pose[3:]) + # processed_base_pose = tf.concat([base_pose_xyz, base_pose_ang], axis=0) + + # state['arm_concat'] = tf.concat([state['arm_concat'], processed_base_pose], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,gripper_joint_1_pos") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'maniskill_dataset_converted_externally_to_rlds' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset.take(1): + for step in episode['steps']: + print(step['is_last']) diff --git a/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py b/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py new file mode 100644 index 0000000000000000000000000000000000000000..8c2ea0f9338f90e5240ac6748725f3af8f9ca5d3 --- /dev/null +++ b/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py @@ -0,0 +1,77 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + + # Multiplied by 3 Hz to get units m/s and rad/s + eef_delta_pos = action['world_vector'] * 3 + eef_ang = action['rotation_delta'] * 3 + # Origin: [-0.28, 0.96]: open, close + # 1-Origin: [0.04, 1.28]: close, open + grip_open = 1 - action['gripper_closedness_action'] + # base_delta_pos = action['base_displacement_vector'] + # base_delta_ang = action['base_displacement_vertical_rotation'] + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['arm_concat'] = arm_action + # base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0) + # action['base_concat'] = base_action + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open") + + # Convert raw state to our state + # state = step['observation'] + # eef_pos = state['base_pose_tool_reached'][:3] + # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:]) + # grip_open = 1 - state['gripper_closed'] + + # create empty tensor + # state['arm_concat'] = tf.constant([0, 0, 0, 0, 0, 0], dtype=tf.float32) + + # Write the state format + # state['format'] = tf.constant( + # "") + + # Define the task instruction + step['observation']['natural_language_instruction'] = tf.constant( + "Open the cabinet door.") + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'nyu_door_opening_surprising_effectiveness' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset.take(1): + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..e950ae3fc4c7989d50080a8493364f05c29b10e3 --- /dev/null +++ b/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py @@ -0,0 +1,87 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate']=terminate_act_to_bool(origin_action[14]) + joint_vel=origin_action[:7] + # gripper_open: 1-open, -1-closed + + grip_open=tf.where(tf.equal(origin_action[13:14],tf.constant(1.0)),tf.constant(1.0),tf.constant(0.0)) + eef_pos=origin_action[7:10] + eef_ang=origin_action[10:13] + eef_ang = euler_to_quaternion(eef_ang) + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([joint_vel,grip_open,eef_pos,eef_ang],axis=0) + + # Write the action format + action['format'] = tf.constant( + "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + state_arm_joint = state['state'][:7] + eef_pos = state['state'][7:10] + eef_ang = euler_to_rotation_matrix(state['state'][10:13]) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + state['arm_concat'] = tf.concat([state_arm_joint,eef_pos,eef_ang],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'nyu_franka_play_dataset_converted_externally_to_rlds' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/qut_dexterous_manpulation.py b/data/preprocess_scripts/qut_dexterous_manpulation.py new file mode 100644 index 0000000000000000000000000000000000000000..6ce6835740513c5bf159a11427b07ff7da5bf3c1 --- /dev/null +++ b/data/preprocess_scripts/qut_dexterous_manpulation.py @@ -0,0 +1,68 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + eef_pos = action[:3] + eef_quat = action[3:7] + eef_ang = quaternion_to_rotation_matrix(eef_quat) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_pos = action[7:] + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([ + grip_pos,eef_pos,eef_ang], axis=0) + # Write the action format + action['format'] = tf.constant( + "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + action['terminate'] = step['is_terminal'] + + # Convert raw state to our state + state = step['observation'] + robot_state = state['state'] + eef_pos = robot_state[:3] + eef_quat = robot_state[3:7] + eef_ang = quaternion_to_rotation_matrix(eef_quat) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + joint_pos = robot_state[7:14] + grip_pos = robot_state[14:16] * 24.707 # rescale to [0, 1] + joint_vel = robot_state[16:23] + + # Concatenate the state + state['arm_concat'] = tf.concat([ + grip_pos,eef_pos,eef_ang,joint_pos,joint_vel], axis=0) + + # Write the state format + state['format'] = tf.constant( + "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/rh20t.py b/data/preprocess_scripts/rh20t.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea6c3495df1a5ca1a1568a8981a7f7de5dc3a04 --- /dev/null +++ b/data/preprocess_scripts/rh20t.py @@ -0,0 +1,214 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix_wo_static_check, \ + rotation_matrix_to_ortho6d_1d +import tensorflow as tf +import h5py +import numpy as np +from tqdm import tqdm +import os +import imageio +import concurrent.futures +import fnmatch +import cv2 +import random + +def decode_image(image_data): + image_data = np.frombuffer(image_data, dtype=np.uint8) + image = cv2.imdecode(image_data, cv2.IMREAD_COLOR) + return image + +def _parse_function(proto): + feature_description = { + 'joint': tf.io.FixedLenFeature([], tf.string), + 'image': tf.io.FixedLenFeature([], tf.string), + 'instruction': tf.io.FixedLenFeature([], tf.string), + 'terminate_episode': tf.io.FixedLenFeature([], tf.int64), + 'gripper': tf.io.FixedLenFeature([], tf.string, default_value=""), + 'tcp': tf.io.FixedLenFeature([], tf.string, default_value=""), + 'tcp_base': tf.io.FixedLenFeature([], tf.string, default_value="") + } + parsed_features = tf.io.parse_single_example(proto, feature_description) + + parsed_features['joint'] = tf.io.parse_tensor(parsed_features['joint'], out_type=tf.float64) + parsed_features['image'] = tf.io.parse_tensor(parsed_features['image'], out_type=tf.string) + parsed_features['instruction'] = tf.io.parse_tensor(parsed_features['instruction'], out_type=tf.string) + parsed_features['gripper'] = tf.cond( + tf.math.equal(parsed_features['gripper'], ""), + lambda: tf.constant([], dtype=tf.float64), + lambda: tf.reshape(tf.io.parse_tensor(parsed_features['gripper'], out_type=tf.float64), [3]) + ) + parsed_features['tcp_base'] = tf.cond( + tf.math.equal(parsed_features['tcp_base'], ""), + lambda: tf.constant([], dtype=tf.float64), + lambda: tf.reshape(tf.io.parse_tensor(parsed_features['tcp_base'], out_type=tf.float64),[7]) + ) + + image = tf.image.decode_jpeg(parsed_features['image'],channels=3) + joint = parsed_features['joint'] + shape = tf.shape(joint)[0] + joint = tf.reshape(joint, [shape]) + + instruction = parsed_features['instruction'] + terminate_episode = tf.cast(parsed_features['terminate_episode'],tf.int64) + gripper = parsed_features['gripper'] + tcp_base = parsed_features['tcp_base'] + return { + 'joint': joint, + 'observation':{ + 'image': image, + }, + 'instruction': instruction, + 'terminate_episode': terminate_episode, + 'gripper': gripper, + 'tcp_base': tcp_base, + } + +def dataset_generator_from_tfrecords(seed): + tfrecord_path = './data/datasets/rh20t/tfrecords/' + datasets = [] + filepaths = [] + for root, dirs, files in os.walk(tfrecord_path): + for filename in fnmatch.filter(files, '*.tfrecord'): + filepath = os.path.join(root, filename) + filepaths.append(filepath) + + random.seed(seed) + random.shuffle(filepaths) + for filepath in filepaths: + raw_dataset = tf.data.TFRecordDataset(filepath) + dataset = raw_dataset.map(_parse_function) + num = 0 + for data in dataset: + num = num + 1 + if data['joint'].shape != tf.TensorShape((6,)) and data['joint'].shape != tf.TensorShape((7,)): + num = 0 + break + if num <= 1: # discard dataset + continue + yield { + 'steps': dataset + } + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.int64)),tf.constant(False),tf.constant(True)) + + +def load_dataset(seed): + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator_from_tfrecords(seed), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'joint': tf.TensorSpec(shape=(None), dtype=tf.float64), + 'observation':{ + 'image': tf.TensorSpec(shape=(None), dtype=tf.uint8), + }, + 'instruction': tf.TensorSpec(shape=(), dtype=tf.string), + 'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64), + 'gripper': tf.TensorSpec(shape=(None), dtype=tf.float64), + 'tcp_base': tf.TensorSpec(shape=(None), dtype=tf.float64), + } + ) + } + ) + + return dataset + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0, dtype=tf.int64)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + + # Convert raw action to our action + step['action'] = {} + step['action']['terminate'] = terminate_act_to_bool(step['terminate_episode']) + + state = step['observation'] + joint_pos = step['joint'] + state['arm_concat'] = joint_pos + + state_format = "" + state_format = tf.cond( + tf.equal(tf.shape(joint_pos), tf.shape(tf.zeros((7,), dtype=tf.float64))), + lambda: tf.constant("arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos"), + lambda: state_format + ) + state_format = tf.cond( + tf.equal(tf.shape(joint_pos), tf.shape(tf.zeros((6,), dtype=tf.float64))), + lambda: tf.constant("arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos"), + lambda: state_format + ) + + # eef + eef = step['tcp_base'] + state['arm_concat'] = tf.cond( + tf.equal(tf.shape(eef), tf.shape(tf.zeros((7,), dtype=tf.float64))), + lambda: tf.concat([ + state['arm_concat'], + tf.concat([ + eef[:3], rotation_matrix_to_ortho6d_1d( + quaternion_to_rotation_matrix_wo_static_check(eef[3:]))], axis=0)], axis=0), + lambda: state['arm_concat'] + ) + state_format = tf.cond( + tf.equal(tf.shape(eef), tf.shape(tf.zeros((7,), dtype=tf.float64))), + lambda: tf.strings.join([state_format, tf.constant("eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")],separator = ','), + lambda: state_format + ) + + # gripper + state['arm_concat'] = tf.cond( + tf.equal(tf.shape(step['gripper']), tf.shape(tf.zeros((3,), dtype=tf.float64))), + lambda: tf.concat([state['arm_concat'], step['gripper'][0:1] / 110], axis=0), + lambda: state['arm_concat'] + ) + state_format = tf.cond( + tf.equal(tf.shape(step['gripper']), tf.shape(tf.zeros((3,), dtype=tf.float64))), + lambda: tf.strings.join([state_format, tf.constant("gripper_joint_0_pos")],separator = ','), + lambda: state_format + ) + + state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) + state['format'] = state_format + + return step + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + # Load the dataset + dataset = load_dataset(0) + for data in dataset: + for step in data['steps']: + step = process_step(step) + print(step['observation']['format']) diff --git a/data/preprocess_scripts/robomimic_can_ph.py b/data/preprocess_scripts/robomimic_can_ph.py new file mode 100644 index 0000000000000000000000000000000000000000..c43130cdd3ded9201d30e0aade4a179ee7604505 --- /dev/null +++ b/data/preprocess_scripts/robomimic_can_ph.py @@ -0,0 +1,96 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler + +def load_dataset(): + builder = tfds.builder('robomimic_ph/can_ph_image') + builder.download_and_prepare() + ds = builder.as_dataset(split='train', shuffle_files=True) + return ds + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg + # Convert raw action to our action + eef = step['action'] + step['action'] = {} + action = step['action'] + action['terminate'] = step['is_terminal'] + + eef_delta_pos = eef[:3] + eef_ang = eef[3:6] + # No base found + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") + + # Convert raw state to our state + state = step['observation'] + + arm_joint_pos = state['robot0_joint_pos'] + arm_joint_vel = state['robot0_joint_vel'] + gripper_pos = state['robot0_gripper_qpos'] + gripper_vel = state['robot0_gripper_qvel'] + eef_pos = state['robot0_eef_pos'] + eef_ang = quaternion_to_euler(state['robot0_eef_quat']) + + state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) + # convert to tf32 + state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + # manual added by lbg + instr = "lift the can into the the box" + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'roboturk' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all').take(1) + + # Inspect the dataset + ze=tf.constant(0.0) + for episode in dataset: + for step in episode['steps']: + print(step) + break diff --git a/data/preprocess_scripts/robomimic_tool_hang_ph.py b/data/preprocess_scripts/robomimic_tool_hang_ph.py new file mode 100644 index 0000000000000000000000000000000000000000..93b909912dabc04ac260c6051904fe8631892c22 --- /dev/null +++ b/data/preprocess_scripts/robomimic_tool_hang_ph.py @@ -0,0 +1,97 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler + + +def load_dataset(): + builder = tfds.builder('robomimic_ph/tool_hang_ph_image') + builder.download_and_prepare() + ds = builder.as_dataset(split='train', shuffle_files=True) + return ds + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg + # Convert raw action to our action + eef = step['action'] + step['action'] = {} + action = step['action'] + action['terminate'] = step['is_terminal'] + + eef_delta_pos = eef[:3] + eef_ang = quaternion_to_euler(eef[3:]) + + # No base found + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") + + # Convert raw state to our state + state = step['observation'] + arm_joint_pos = state['robot0_joint_pos'] + arm_joint_vel = state['robot0_joint_vel'] + gripper_pos = state['robot0_gripper_qpos'] + gripper_vel = state['robot0_gripper_qvel'] + eef_pos = state['robot0_eef_pos'] + eef_ang = quaternion_to_euler(state['robot0_eef_quat']) + + state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) + # convert to tf32 + state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + # manual added by lbg + instr = "hang the tool and place into the hole" + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'roboturk' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all').take(1) + + # Inspect the dataset + ze=tf.constant(0.0) + for episode in dataset: + for step in episode['steps']: + print(step) + break diff --git a/data/preprocess_scripts/robomimic_transport_ph.py b/data/preprocess_scripts/robomimic_transport_ph.py new file mode 100644 index 0000000000000000000000000000000000000000..452e67a94644dd255e2f315101ff5d6b4c450e4c --- /dev/null +++ b/data/preprocess_scripts/robomimic_transport_ph.py @@ -0,0 +1,114 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler + + +def load_dataset(): + builder = tfds.builder('robomimic_ph/transport_ph_image') + builder.download_and_prepare() + ds = builder.as_dataset(split='train', shuffle_files=True) + return ds + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg + # Convert raw action to our action + eef = step['action'] + step['action'] = {} + action = step['action'] + action['terminate'] = step['is_terminal'] + + left_eef_delta_pos = eef[:3] + left_eef_ang = quaternion_to_euler(eef[3:7]) + + right_eef_delta_pos = eef[7:10] + right_eef_ang = quaternion_to_euler(eef[10:]) + + # No base found + + # Concatenate the action + arm_action = tf.concat([left_eef_delta_pos,left_eef_ang,right_eef_delta_pos,right_eef_ang], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "left_eef_delta_pos_x,left_eef_delta_pos_y,left_eef_delta_pos_z,left_eef_delta_angle_roll,left_eef_delta_angle_pitch,left_eef_delta_angle_yaw,right_eef_delta_pos_x,right_eef_delta_pos_y,right_eef_delta_pos_z,right_eef_delta_angle_roll,right_eef_delta_angle_pitch,right_eef_delta_angle_yaw") + + # Convert raw state to our state + state = step['observation'] + left_arm_joint_pos = state['robot0_joint_pos'] + left_arm_joint_vel = state['robot0_joint_vel'] + left_gripper_pos = state['robot0_gripper_qpos'] + left_gripper_vel = state['robot0_gripper_qvel'] + left_eef_pos = state['robot0_eef_pos'] + left_eef_ang = quaternion_to_euler(state['robot0_eef_quat']) + + right_arm_joint_pos = state['robot1_joint_pos'] + right_arm_joint_vel = state['robot1_joint_vel'] + right_gripper_pos = state['robot1_gripper_qpos'] + right_gripper_vel = state['robot1_gripper_qvel'] + right_eef_pos = state['robot1_eef_pos'] + right_eef_ang = quaternion_to_euler(state['robot1_eef_quat']) + + arm_joint_pos = tf.concat([left_arm_joint_pos, right_arm_joint_pos], axis=0) + arm_joint_vel = tf.concat([left_arm_joint_vel, right_arm_joint_vel], axis=0) + gripper_pos = tf.concat([left_gripper_pos, right_gripper_pos], axis=0) + gripper_vel = tf.concat([left_gripper_vel, right_gripper_vel], axis=0) + eef_pos = tf.concat([left_eef_pos, right_eef_pos], axis=0) + eef_ang = tf.concat([left_eef_ang, right_eef_ang], axis=0) + + state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0) + # convert to tf32 + state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32) + # Write the state format + state['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_6_pos,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_6_pos,left_gripper_joint_0_pos,left_gripper_joint_1_pos,right_gripper_joint_0_pos,right_gripper_joint_1_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_arm_joint_6_vel,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_arm_joint_6_vel,left_gripper_joint_0_vel,left_gripper_joint_1_vel,right_gripper_joint_0_vel,right_gripper_joint_1_vel,left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_roll,left_eef_angle_pitch,left_eef_angle_yaw,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_roll,right_eef_angle_pitch,right_eef_angle_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + # manual added by lbg + instr = "transport the object from left hand to right hand" + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'roboturk' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all').take(1) + + # Inspect the dataset + ze=tf.constant(0.0) + for episode in dataset: + for step in episode['steps']: + print(step) + break diff --git a/data/preprocess_scripts/roboturk_real_laundrylayout.py b/data/preprocess_scripts/roboturk_real_laundrylayout.py new file mode 100644 index 0000000000000000000000000000000000000000..ff71ea673badaa019af83cfef2b599d04d00ac19 --- /dev/null +++ b/data/preprocess_scripts/roboturk_real_laundrylayout.py @@ -0,0 +1,223 @@ +import tensorflow as tf +import tensorflow_datasets as tfds +from data.utils import clean_task_instruction, quaternion_to_euler +import tensorflow as tf +import h5py +import numpy as np +from tqdm import tqdm +import os +import imageio +import concurrent.futures + +def get_frames(file_path): + if not os.path.exists(file_path) or not os.path.isfile(file_path) or not file_path.endswith('.mp4'): + return [] + frames = [] + with imageio.get_reader(file_path, 'ffmpeg') as reader: + for frame in reader: + frame = np.array(frame, dtype=np.uint8) + frames.append(frame) + return frames + +def parallel_get_frames(paths): + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_path = {executor.submit(get_frames, path): path for path in paths} + return [future.result() for future in concurrent.futures.as_completed(future_to_path)] + +def count_total_samples(filename): + total_samples = 0 + with h5py.File(filename, 'r') as f: + data = f['data'] + for user_key in data.keys(): + user = data[user_key] + for demo_key in user.keys(): + total_samples += 1 + return total_samples + +def dataset_generator(filename, total_samples): + with h5py.File(filename, 'r') as f: + data = f['data'] + for user_key in data.keys(): + user = data[user_key] + for demo_key in user.keys(): + demo = user[demo_key] + robot_observation = demo['robot_observation'] + user_control = demo['user_control'] + + eef_poses = robot_observation['eef_poses'] + joint_states_arm = robot_observation['joint_states_arm'] + joint_states_gripper = robot_observation['joint_states_gripper'] + user_control_data = user_control['user_control'] + + attrs = dict(demo.attrs) + top_depth_video_file = attrs['top_depth_video_file'] + top_rgb_video_file = attrs['top_rgb_video_file'] + front_rgb_video_file = attrs['front_rgb_video_file'] + + video_root_path = './data/datasets/roboturk' + top_depth_frames = get_frames(os.path.join(video_root_path, top_depth_video_file)) + top_rgb_frames = get_frames(os.path.join(video_root_path, top_rgb_video_file)) + front_rgb_frames = get_frames(os.path.join(video_root_path, front_rgb_video_file)) + + if len(top_depth_frames) == 0 or len(top_rgb_frames) == 0 or len(front_rgb_frames) == 0: + continue + # video_root_path = '/cephfs-thu/gsm_data/robotruck' + # video_paths = [ + # os.path.join(video_root_path, attrs['top_depth_video_file']), + # os.path.join(video_root_path, attrs['top_rgb_video_file']), + # os.path.join(video_root_path, attrs['front_rgb_video_file']) + # ] + # top_depth_frames, top_rgb_frames, front_rgb_frames = parallel_get_frames(video_paths) + + steps = [] + for i in range(len(eef_poses)): + task_demo_id = f"SawyerTowerCreation_{demo_key}_{i}" + step = { + 'task_demo_id': task_demo_id, + 'eef_poses': eef_poses[i], + 'joint_states_arm': joint_states_arm[i], + 'joint_states_gripper': joint_states_gripper[i], + 'user_control': user_control_data[i] if user_control_data.shape[0] > 0 else np.zeros(22), + 'observation':{ + 'top_depth_frame': top_depth_frames[i] if i < len(top_depth_frames) else np.zeros((0,0, 3), dtype=np.uint8), + 'top_rgb_frame': top_rgb_frames[i] if i < len(top_rgb_frames) else np.zeros((0, 0, 3), dtype=np.uint8), + 'front_rgb_frame': front_rgb_frames[i] if i < len(front_rgb_frames) else np.zeros((0, 0, 3), dtype=np.uint8), + }, + 'terminate_episode': i == len(eef_poses) - 1 + } + steps.append(step) + + + steps_dataset = tf.data.Dataset.from_generator( + lambda: iter(steps), + output_signature={ + 'task_demo_id': tf.TensorSpec(shape=(), dtype=tf.string), + 'eef_poses': tf.TensorSpec(shape=(7,), dtype=tf.float32), + 'joint_states_arm': tf.TensorSpec(shape=(27,), dtype=tf.float32), + 'joint_states_gripper': tf.TensorSpec(shape=(3,), dtype=tf.float32), + 'user_control': tf.TensorSpec(shape=(22,), dtype=tf.float32), + 'observation':{ + 'top_depth_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + 'top_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + 'front_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + }, + 'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.bool), + } + ) + + yield {'steps': steps_dataset} + +def load_dataset(): + filename = './data/datasets/roboturk/SawyerLaundryLayout_aligned_dataset.hdf5' + total_samples = count_total_samples(filename) + dataset = tf.data.Dataset.from_generator( + lambda: dataset_generator(filename, total_samples), + output_signature={ + 'steps': tf.data.DatasetSpec( + element_spec={ + 'task_demo_id': tf.TensorSpec(shape=(), dtype=tf.string), + 'eef_poses': tf.TensorSpec(shape=(7,), dtype=tf.float32), + 'joint_states_arm': tf.TensorSpec(shape=(27,), dtype=tf.float32), + 'joint_states_gripper': tf.TensorSpec(shape=(3,), dtype=tf.float32), + 'user_control': tf.TensorSpec(shape=(22,), dtype=tf.float32), + 'observation':{ + 'top_depth_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + 'top_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + 'front_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8), + }, + 'terminate_episode': tf.TensorSpec(shape=(), dtype = tf.bool), + } + ) + } + ) + return dataset + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + step['action'] = {} + action = step['action'] + action['terminate'] = step['terminate_episode'] + + eef_delta_pos = step['eef_poses'][:3] + eef_ang = step['eef_poses'][3:] + + # No base found + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw") + + # No state found + state = step['observation'] + # joint_states_arm: dataset of (num_timestamps, 27) shape where each of the 9 joints is represented by the JointState message + # (the nine joints are in order by their ROSBAG names: ['head_pan', 'right_j0', 'right_j1', 'right_j2', 'right_j3', 'right_j4', 'right_j5', 'right_j6', 'torso_t0']. For the most part, head_pan and torso should be zeros) + # [0] the position of the first joint (rad or m) + # [1] the velocity of the first joint (rad/s or m/s) + # [2] the effort that is applied in the first joint + # [3] the position of the second joint... + joint_states_arm = step['joint_states_arm'] + joint_pos = joint_states_arm[3:24:3] + joint_vel = joint_states_arm[4:25:3] + # joint_states_gripper: dataset of (num_timestamps, 3) shape + # [0] the position of the gripper (rad or m) + # [1] the velocity of the gripper (rad/s or m/s) + # [2] the effort that is applied in the gripper + joint_states_gripper = step['joint_states_gripper'] + gripper_pos = joint_states_gripper[:1] + # remove gripper_vel due to they are all zeros + # gripper_vel = joint_states_gripper[1:2] + # Concatenate the state + # state['arm_concat'] = tf.concat([joint_pos,joint_vel,gripper_pos,gripper_vel], axis=0) + state['arm_concat'] = tf.concat([joint_pos,joint_vel,gripper_pos], axis=0) + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + # copied from openxembod + instr = b'create tower' + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = '/cephfs-thu/gsm_data/openx_embod' + DATASET_NAME = 'roboturk_real_laundrylayout' + # Load the dataset + dataset = load_dataset() + + # save_dir = os.path.join(DATASET_DIR, DATASET_NAME) + # if not os.path.exists(save_dir): + # os.makedirs(save_dir) + # tf.data.experimental.save(dataset, save_dir) diff --git a/data/preprocess_scripts/singlevla_benchmark.py b/data/preprocess_scripts/singlevla_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..099973c31810e706f5da15246cf7fd89a10f88e8 --- /dev/null +++ b/data/preprocess_scripts/singlevla_benchmark.py @@ -0,0 +1,82 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + + # Robot action + # eef_pos = action_dict['ee_pos'][:3] + # eef_ang = action_dict['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # eef_pos_vel = action_dict['delta_ee'][:3] + # eef_ang_vel = action_dict['delta_ee'][3:6] + joint_pos = action_dict['joint_pos'] + # joint_vel = action_dict['delta_joint'][:-1] + # grip_pos = action_dict['delta_ee'][-1:] + # grip_vel = action_dict['gripper_velocity'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + # arm_action = tf.concat([joint_pos, eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, grip_pos], axis=0) + action['arm_concat'] = joint_pos + # action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos, gripper_joint_0_pos") + + # Convert raw state to our state + # Robot state + state = step['observation']['state'] + # image = step['observation']['image'] + # eef_pos = state['ee_pos'][:3] + # eef_ang = state['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + joint_pos = state['joint_pos'] + # grip_pos = state['joint_pos'][-1:] + + # # Concatenate the state + # state['arm_concat'] = tf.concat([ + # joint_pos, grip_pos,eef_pos,eef_ang], axis=0) + + state['arm_concat'] = joint_pos + + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..ae20d02368d09302823d7fa2d07a10b6b4aee4bc --- /dev/null +++ b/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py @@ -0,0 +1,84 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + + eef_delta_pos = origin_action[:3] + + # Ignore grip_open: -0.15~0.15 + + # No base found + + # Concatenate the action + action['arm_concat'] = eef_delta_pos + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") + + # Convert raw state to our state + state = step['observation'] + eef_quat = state['ee_orientation'] + eef_ang = quaternion_to_rotation_matrix(eef_quat) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_ang_vel = state['ee_orientation_vel'] + eef_pos = state['ee_position'] + eef_vel = state['ee_vel'] + joint_pos = state['joint_pos'] + joint_vel = state['joint_vel'] + gripper_open = state['state'][7:8] + + # Concatenate the state + state['arm_concat'] = tf.concat([joint_pos,gripper_open,joint_vel,eef_pos,eef_ang,eef_vel,eef_ang_vel],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'stanford_kuka_multimodal_dataset_converted_externally_to_rlds' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..06d73d88e06901dacbe2d752733738bb558c9c6b --- /dev/null +++ b/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py @@ -0,0 +1,98 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x change in end effector position, 1x gripper yaw, 1x open/close gripper (-1 means to open the gripper, 1 means close)]. + eef_delta_pos = action[:3] + eef_yaw = action[3:4] + eef_ang = tf.stack([0,0,eef_yaw[0]],axis=0) + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims((1 - action[4]) / 2, axis=0) + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + + action['terminate'] = step['is_terminal'] + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + + # Convert raw state to our state + state = step['observation']['state'] + # Robot state, consists of [7x robot joint angles, 7x robot joint velocities,1x gripper position]. + arm_joint_pos = state[:7] + arm_joint_vel = state[7:14] + gripper_pos = state[14:15] + # Robot end effector pose, consists of [3x Cartesian position, 1x gripper yaw, 1x gripper position]. This is the state used in the MaskViT paper. + eef = step['observation']['end_effector_pose'] + eef_pos = eef[:3] + gripper_yaw = eef[3:4] + eef_ang = tf.stack([0.0,0.0,gripper_yaw[0]],axis=0) + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # gripper_pos = eef[4:5] + + # Concatenate the state + state = step['observation'] + state['arm_concat'] = tf.concat([arm_joint_pos,arm_joint_vel,gripper_pos,eef_pos,eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..d6699bb0ab693e7b604e48d5063a5462993bb91e --- /dev/null +++ b/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py @@ -0,0 +1,89 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x robot end-effector velocities, 3x robot end-effector angular velocities, 1x gripper velocity]. + eef_delta_pos = action[:3] + eef_ang = action[3:6] + grip_vel = tf.expand_dims(action[6], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_vel], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_vel") + + # Convert raw state to our state + state = step['observation'] + state_vec = state['state'] + # Robot state, consists of [3x robot end-effector position, 3x robot end-effector euler angles, 1x gripper position]. + eef_pos = state_vec[:3] + eef_ang = state_vec[3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + grip_joint_pos = state_vec[6:7] * 13.03 # rescale to [0, 1] + + # Concatenate the state + state['arm_concat'] = tf.concat([grip_joint_pos,eef_pos, eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/taco_play.py b/data/preprocess_scripts/taco_play.py new file mode 100644 index 0000000000000000000000000000000000000000..1cb3abd54b7ddfe0806479000aab4255fbdb3f7b --- /dev/null +++ b/data/preprocess_scripts/taco_play.py @@ -0,0 +1,98 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + + # action['actions']: absolute desired values for gripper pose + the_actions=action['actions'] + # First 6 dimensions are x, y, z, yaw, pitch, roll + eef_pos=the_actions[:3] + eef_ang = tf.concat([the_actions[5:6],the_actions[4:5],the_actions[3:4]],axis=0) + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + open_gripper=the_actions[6:] + # Last dimension is open_gripper (-1 is open gripper, 1 is close) + grip_open=tf.reshape(tf.where(open_gripper<0,1.0,0.0),(1,)) + # -1 -> 1.0, 1->0.0 + + # No base found + + # Concatenate the action + arm_action = tf.concat([eef_pos, eef_ang, grip_open], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Convert raw state to our state + state = step['observation'] + robot_obs=state['robot_obs'] + # robot_obs: EE position (3), EE orientation in euler angles (3), gripper width (1), joint positions (7), gripper action (1) + eef_pos=robot_obs[:3] + eef_ang=robot_obs[3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + gripper_open=robot_obs[6:7] * 12.3843 # rescale to [0, 1] + joint_pos=robot_obs[7:14] + + # Concatenate the state + state['arm_concat'] = tf.concat([joint_pos,gripper_open,eef_pos,eef_ang],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'taco_play' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/toto.py b/data/preprocess_scripts/toto.py new file mode 100644 index 0000000000000000000000000000000000000000..4117bf97508011ccecf15277e2cfd01bb70e289b --- /dev/null +++ b/data/preprocess_scripts/toto.py @@ -0,0 +1,82 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True)) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + action['terminate'] = terminate_act_to_bool(action['terminate_episode']) + eef_delta_pos = action['world_vector'] + eef_ang = action['rotation_delta'] + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.reshape(tf.where(action['open_gripper'],tf.constant(1.0),tf.constant(0.0)),(1,)) + + # No base found + + # Concatenate the action + arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['arm_concat'] = arm_action + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + joint_pos=state['state'] + + # Concatenate the state + state['arm_concat'] = joint_pos + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['observation']['natural_language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'toto' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/twinvla_benchmark.py b/data/preprocess_scripts/twinvla_benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..bf55d50bc4a89476441939d4a765bdb25c43b722 --- /dev/null +++ b/data/preprocess_scripts/twinvla_benchmark.py @@ -0,0 +1,81 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + + # Robot action + # eef_pos = action_dict['ee_pos'][:3] + # eef_ang = action_dict['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + # eef_pos_vel = action_dict['delta_ee'][:3] + # eef_ang_vel = action_dict['delta_ee'][3:6] + left_joint_pos = action_dict['joint_pos'][:7] + right_joint_pos = action_dict['joint_pos'][7:] + # joint_vel = action_dict['delta_joint'][:-1] + # grip_pos = action_dict['delta_ee'][-1:] + # grip_vel = action_dict['gripper_velocity'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([left_joint_pos, right_joint_pos], axis=0) + action['arm_concat'] = arm_action + # action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open") + + # Convert raw state to our state + # Robot state + state = step['observation']['state'] + # image = step['observation']['image'] + # eef_pos = state['ee_pos'][:3] + # eef_ang = state['ee_pos'][3:6] + # eef_ang = euler_to_rotation_matrix(eef_ang) + # eef_ang = rotation_matrix_to_ortho6d(eef_ang) + left_joint_pos = state['joint_pos'][:7] + right_joint_pos = state['joint_pos'][7:] + # grip_pos = state['joint_pos'][-1:] + + # Concatenate the state + state['arm_concat'] = tf.concat([left_joint_pos, right_joint_pos], axis=0) + + + # Write the state format + state['format'] = tf.constant( + "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + # instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass diff --git a/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..9a00173b53a762cdab90f2f5f774b87ed17c8ae5 --- /dev/null +++ b/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + + # 8-dimensional action, consisting of end-effector position and orientation, gripper open/close and a episode termination action. + action = step['action'] + eef_delta_pos = action[:3] + eef_ang = action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + + action['terminate'] = step['is_terminal'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation']['state'] + # 21-dimensional joint states, consists of robot joint angles, joint velocity and joint torque. + arm_joint_pos = state[:7] + arm_joint_vel = state[7:14] + # arm_joint_torque = state[14:21] + + # Concatenate the state + + state = step['observation'] + state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..feb7fd9c1124d86b0e923f9039620647afb65674 --- /dev/null +++ b/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py @@ -0,0 +1,90 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x gripper velocities,1x gripper open/close torque]. + eef_vel = action[:3] + grip_open = tf.expand_dims(1 - action[3], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + + action['terminate'] = step['is_terminal'] + action['arm_concat'] = tf.concat([eef_vel,grip_open], axis=0) + # Write the action format + action['format'] = tf.constant( + "eef_vel_x,eef_vel_y,eef_vel_z,gripper_open") + + # Convert raw state to our state + state = step['observation']['state'] + # Robot state, consists of [3x gripper position,3x gripper orientation, 1x finger distance]. + gripper_pos = state[:3]/10 # dm -> m + gripper_ang = state[3:6] + gripper_ang = euler_to_rotation_matrix(gripper_ang) + gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) + gripper_open = state[6:7]/6.03 # 1 for open + + + # Concatenate the state + state = step['observation'] + state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/uiuc_d3field.py b/data/preprocess_scripts/uiuc_d3field.py new file mode 100644 index 0000000000000000000000000000000000000000..12b060e039098327e13ce127a984263dad2302b4 --- /dev/null +++ b/data/preprocess_scripts/uiuc_d3field.py @@ -0,0 +1,77 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + # Robot displacement from last frame + eef_delta_pos = origin_action[:3] + + # No base found + + # Concatenate the action + action['arm_concat'] = eef_delta_pos + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + # 4x4 Robot end-effector state + eef_mat=state['state'] + eef_pos=eef_mat[:3,3] + rotation_matrix=eef_mat[:3,:3] + eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) + state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'uiuc_d3field' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py b/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..2895224de2436de191cb98da4a2f1d398f6b6127 --- /dev/null +++ b/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py @@ -0,0 +1,73 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + # Robot action, consists of x,y,z goal and picker commandpicker<0.5 = open, picker>0.5 = close. + action = step['action'] + eef_delta_pos = action[:3] + grip_open = tf.cast(tf.expand_dims(action[3] < 0.5, axis=0), dtype=tf.float32) + + # Concatenate the action + step['action'] = {} + action = step['action'] + + action['terminate'] = step['is_terminal'] + action['arm_concat'] = tf.concat([eef_delta_pos, grip_open], axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,gripper_open") + + # State doesnt exist in this dataset + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/utaustin_mutex.py b/data/preprocess_scripts/utaustin_mutex.py new file mode 100644 index 0000000000000000000000000000000000000000..87343b8f8657112685c71466a4fd323625a26396 --- /dev/null +++ b/data/preprocess_scripts/utaustin_mutex.py @@ -0,0 +1,81 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + + origin_action = step['action'] + step['action']={} + action=step['action'] + action['terminate'] = step['is_terminal'] + # 6x end effector delta pose, 1x gripper position + eef_delta_pos = origin_action[:3] + eef_ang=origin_action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + grip_open=origin_action[6:7] + # No base found + + # Concatenate the action + action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0) + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + # Concatenate the state + arm_joint_ang=state['state'][:7] + grip_open=state['state'][7:8] * 13.21 # rescale to [0, 1] + # 4x4 Robot end-effector state + eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4))) + eef_pos = eef_mat[:3, 3] + rotation_matrix = eef_mat[:3, :3] + eef_ang = rotation_matrix_to_ortho6d(rotation_matrix) + state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'utaustin_mutex' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py b/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..6309864538a255007f5eb208393eaf5edf2feeff --- /dev/null +++ b/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py @@ -0,0 +1,91 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action]. + eef_delta_pos = action[:3]/1000 # change from mm to m + eef_ang = action[3:6] + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + + action['terminate'] = step['is_terminal'] + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + + # Convert raw state to our state + state = step['observation']['state'] + # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position]. + gripper_pos = state[:3]/1000 # change from mm to m + gripper_ang = state[3:6] + gripper_ang = euler_to_rotation_matrix(gripper_ang) + gripper_ang = rotation_matrix_to_ortho6d(gripper_ang) + gripper_open = state[6:7] / 86.12126922607422 # rescale to [0, 1] + + # Concatenate the state + state = step['observation'] + state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0) + + # Write the state format + state['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py b/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py new file mode 100644 index 0000000000000000000000000000000000000000..7d43fb8af5c51d7b0a272c7cfe19349cbc516dcd --- /dev/null +++ b/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py @@ -0,0 +1,97 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \ + rotation_matrix_to_ortho6d + + +def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor: + """ + Convert terminate action to a boolean, where True means terminate. + """ + return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32))) + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action = step['action'] + # Robot action, consists of [3x EEF position, 3x EEF orientation yaw/pitch/roll, 1x gripper open/close position]. + eef_delta_pos = action[:3] + eef_ang = tf.gather(action[3:6], [2, 1, 0]) + eef_ang = euler_to_quaternion(eef_ang) + grip_open = tf.expand_dims(1 - action[6], axis=0) + + # Concatenate the action + # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + step['action'] = {} + action = step['action'] + action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0) + action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open") + + # Convert raw state to our state + state = step['observation'] + # Robot joint state, consists of [7x robot joint angles, 7x robot joint velocity] + arm_joint_state = state['joint_state'] + arm_joint_pos = arm_joint_state[:7] + # We do not use the velocity since it is inacurate + # arm_joint_vel = arm_joint_state[7:14] + # Robot joint trajectory, consists of [7x robot joint angles, 7x robot joint velocity, 7x robot joint acceralation]. + # arm_joint_traj = state['joint_trajectory'] + # Robot end effector pose, consists of [3x EEF position, 3x EEF orientation yaw/pitch/roll]. + eef = state["end_effector_pose"] + eef_pos = eef[:3] + eef_ang = tf.gather(eef[3:6], [2, 1, 0]) + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + + # Concatenate the state + # state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, eef_pos, eef_ang], axis=0) + state['arm_concat'] = tf.concat([arm_joint_pos, eef_pos, eef_ang], axis=0) + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + import tensorflow_datasets as tfds + from data.utils import dataset_to_path + + DATASET_DIR = 'data/datasets/openx_embod' + DATASET_NAME = 'fractal20220817_data' + # Load the dataset + dataset = tfds.builder_from_directory( + builder_dir=dataset_to_path( + DATASET_NAME, DATASET_DIR)) + dataset = dataset.as_dataset(split='all') + + # Inspect the dataset + for episode in dataset: + for step in episode['steps']: + print(step) diff --git a/data/preprocess_scripts/vla_benchmark_ee.py b/data/preprocess_scripts/vla_benchmark_ee.py new file mode 100644 index 0000000000000000000000000000000000000000..207731fe8a61c4078f188dd88277b171cf41d15c --- /dev/null +++ b/data/preprocess_scripts/vla_benchmark_ee.py @@ -0,0 +1,80 @@ +import tensorflow as tf + +from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d + + +def process_step(step: dict) -> dict: + """ + Unify the action format and clean the task instruction. + + DO NOT use python list, use tf.TensorArray instead. + """ + # Convert raw action to our action + action_dict = step['action'] + + # Robot action + eef_pos = action_dict['ee_pos'][:3] + eef_ang = action_dict['ee_pos'][3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + eef_pos_vel = action_dict['delta_ee'][:3] + eef_ang_vel = action_dict['delta_ee'][3:6] + joint_pos = action_dict['joint_pos'][:-1] + joint_vel = action_dict['delta_joint'][:-1] + grip_pos = action_dict['delta_ee'][-1:] + # grip_vel = action_dict['gripper_velocity'] + + # Concatenate the action + step['action'] = {} + action = step['action'] + + arm_action = tf.concat([eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, joint_pos, joint_vel, grip_pos], axis=0) + action['arm_concat'] = arm_action + # action['terminate'] = step['is_terminal'] + + # Write the action format + action['format'] = tf.constant( + "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos") + + # Convert raw state to our state + # Robot state + state = step['observation']['state'] + image = step['observation']['image'] + eef_pos = state['ee_pos'][:3] + eef_ang = state['ee_pos'][3:6] + eef_ang = euler_to_rotation_matrix(eef_ang) + eef_ang = rotation_matrix_to_ortho6d(eef_ang) + joint_pos = state['joint_pos'][:-1] + grip_pos = state['joint_pos'][-1:] + + # Concatenate the state + state['arm_concat'] = tf.concat([ + joint_pos,grip_pos,eef_pos,eef_ang], axis=0) + + + # Write the state format + state['format'] = tf.constant( + "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5") + + # Clean the task instruction + # Define the replacements (old, new) as a dictionary + replacements = { + '_': ' ', + '1f': ' ', + '4f': ' ', + '-': ' ', + '50': ' ', + '55': ' ', + '56': ' ', + + } + instr = step['language_instruction'] + instr = clean_task_instruction(instr, replacements) + step['observation'] = state + step['observation']['natural_language_instruction'] = instr + + return step + + +if __name__ == "__main__": + pass