diff --git a/data/preprocess_scripts/agilex.py b/data/preprocess_scripts/agilex.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6a4965e4884a9b43cade76c9007e464cbd97903
--- /dev/null
+++ b/data/preprocess_scripts/agilex.py
@@ -0,0 +1,186 @@
+import json
+import random
+import os
+import fnmatch
+import random
+
+import tensorflow as tf
+
+
+def _parse_function(proto, precomputed_instr_embed_path):
+    keys_to_features = {
+        'action': tf.io.FixedLenFeature([], tf.string),
+        'base_action': tf.io.FixedLenFeature([], tf.string),
+        'qpos': tf.io.FixedLenFeature([], tf.string),
+        'qvel': tf.io.FixedLenFeature([], tf.string),
+        'cam_high': tf.io.FixedLenFeature([], tf.string),
+        'cam_left_wrist': tf.io.FixedLenFeature([], tf.string),
+        'cam_right_wrist': tf.io.FixedLenFeature([], tf.string),
+        'instruction': tf.io.FixedLenFeature([], tf.string),
+        'terminate_episode': tf.io.FixedLenFeature([], tf.int64)
+    }
+
+    parsed_features = tf.io.parse_single_example(proto, keys_to_features)
+
+    action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32)
+    base_action = tf.io.parse_tensor(parsed_features['base_action'], out_type=tf.float32)
+    qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32)
+    qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32)
+    cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.string)
+    cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.string)
+    cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.string)
+    # instruction = parsed_features['instruction']
+    terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64)
+    
+    cam_high = tf.image.decode_jpeg(cam_high, channels=3, dct_method='INTEGER_ACCURATE')
+    cam_left_wrist = tf.image.decode_jpeg(cam_left_wrist, channels=3, dct_method='INTEGER_ACCURATE')
+    cam_right_wrist = tf.image.decode_jpeg(cam_right_wrist, channels=3, dct_method='INTEGER_ACCURATE')
+    # BGR to RGB
+    cam_high = tf.reverse(cam_high, axis=[-1])
+    cam_left_wrist = tf.reverse(cam_left_wrist, axis=[-1])
+    cam_right_wrist = tf.reverse(cam_right_wrist, axis=[-1])
+
+    action = tf.reshape(action, [14])
+    base_action = tf.reshape(base_action, [2])
+    qpos = tf.reshape(qpos, [14])
+    qvel = tf.reshape(qvel, [14])
+    cam_high = tf.reshape(cam_high, [480, 640, 3])
+    cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3])
+    cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3])
+    
+    return {
+        "action": action,
+        "base_action": base_action,
+        "qpos": qpos,
+        "qvel": qvel,
+        'observation':{
+        "cam_high": cam_high,
+        "cam_left_wrist": cam_left_wrist,
+        "cam_right_wrist": cam_right_wrist
+        },
+        "instruction": precomputed_instr_embed_path,
+        "terminate_episode": terminate_episode,
+    }
+
+
+def dataset_generator_from_tfrecords(seed):
+    tfrecord_path = './data/datasets/agilex/tfrecords/'
+    filepaths = []
+    for root, dirs, files in os.walk(tfrecord_path):
+        for filename in fnmatch.filter(files, '*.tfrecord'):
+            filepath = os.path.join(root, filename)
+            filepaths.append(filepath)
+    
+    random.seed(seed)
+    random.shuffle(filepaths)
+    for filepath in filepaths:
+        raw_dataset = tf.data.TFRecordDataset(filepath)
+        dataset = raw_dataset.map(lambda x: _parse_function(x, os.path.dirname(filepath)))
+        yield {
+            'steps': dataset
+        }
+
+
+def load_dataset(seed):
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator_from_tfrecords(seed),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'action': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'base_action': tf.TensorSpec(shape=(2), dtype=tf.float32),
+                    'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'observation': {
+                        'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                    },
+                    'instruction': tf.TensorSpec(shape=(), dtype=tf.string),
+                    'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64),
+                }
+            )
+        }
+    )
+
+    return dataset
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    old_action = step['action']
+    step['action'] = {}
+    action = step['action']
+    step['action']['terminate'] = step['terminate_episode']
+    # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus
+    left_arm_pos = old_action[:6]
+    left_gripper_open = old_action[6:7] / 11.8997
+    right_arm_pos = old_action[7:13]
+    right_gripper_open = old_action[13:14] / 13.9231
+
+    # Base action is dummy (all zeros)
+    arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0)
+    action['arm_concat'] = arm_action
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open")
+    
+    state = step['observation']
+    left_qpos = step['qpos'][:6]
+    left_gripper_open = step['qpos'][6:7] / 4.7908     # rescale to [0, 1]
+    right_qpos = step['qpos'][7:13]
+    right_gripper_open = step['qpos'][13:14] / 4.7888  # rescale to [0, 1]
+    state['arm_concat'] = tf.concat([left_qpos, left_gripper_open,right_qpos, right_gripper_open], axis=0)
+    # # Write the state format
+    state['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open")
+
+    # We randomly sample [original,expanded,simplified] instructions. The ratio is 1:1:1
+    instr_type = tf.random.uniform(shape=(), minval=0, maxval=3, dtype=tf.int32)
+    # # NOTE bg : tf.random and tf.constant is buggy as it always return 0 (?)
+    # instr_type = tf.constant(instr_type)
+    # print(instr_type)
+    @tf.function
+    def f0(): 
+        return tf.strings.join([step['instruction'], tf.constant('/lang_embed_0.pt')])
+    @tf.function
+    def f1(): 
+        return tf.strings.join([step['instruction'], tf.constant('/lang_embed_1.pt')])
+    @tf.function
+    def f2():
+        index = tf.random.uniform(shape=(), minval=0, maxval=100, dtype=tf.int32)
+        return tf.strings.join([
+            step['instruction'], tf.constant('/lang_embed_'), tf.strings.as_string(index+2), tf.constant('.pt')])
+
+    instr = tf.case([
+        (tf.equal(instr_type, 0), f0),
+        (tf.equal(instr_type, 1), f1),
+        (tf.equal(instr_type, 2), f2)
+    ], exclusive=True)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":  
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    dataset = load_dataset(42)
+    
+    for episode in dataset:
+        for step in episode['steps']:
+            step = process_step(step)
+            # save the images
+            print(step['observation']['natural_language_instruction'])
diff --git a/data/preprocess_scripts/aloha_box_into_pot.py b/data/preprocess_scripts/aloha_box_into_pot.py
new file mode 100644
index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e
--- /dev/null
+++ b/data/preprocess_scripts/aloha_box_into_pot.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = action_dict['ee_6d_pos']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+
+    # Convert raw state to our state
+    # Robot state
+    state_dict = step['observation']['state']
+    state = {}
+    state['arm_concat'] = state_dict
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/aloha_dish_drainer.py b/data/preprocess_scripts/aloha_dish_drainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e
--- /dev/null
+++ b/data/preprocess_scripts/aloha_dish_drainer.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = action_dict['ee_6d_pos']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+
+    # Convert raw state to our state
+    # Robot state
+    state_dict = step['observation']['state']
+    state = {}
+    state['arm_concat'] = state_dict
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/aloha_handover_box.py b/data/preprocess_scripts/aloha_handover_box.py
new file mode 100644
index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e
--- /dev/null
+++ b/data/preprocess_scripts/aloha_handover_box.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = action_dict['ee_6d_pos']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+
+    # Convert raw state to our state
+    # Robot state
+    state_dict = step['observation']['state']
+    state = {}
+    state['arm_concat'] = state_dict
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/aloha_lift_box.py b/data/preprocess_scripts/aloha_lift_box.py
new file mode 100644
index 0000000000000000000000000000000000000000..a39783a8ba7a4bce846d6f8fe52ac3060dd2ae3e
--- /dev/null
+++ b/data/preprocess_scripts/aloha_lift_box.py
@@ -0,0 +1,55 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = action_dict['ee_6d_pos']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+
+    # Convert raw state to our state
+    # Robot state
+    state_dict = step['observation']['state']
+    state = {}
+    state['arm_concat'] = state_dict
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_0,left_eef_angle_1,left_eef_angle_2,left_eef_angle_3,left_eef_angle_4,left_eef_angle_5,left_gripper_open,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_0,right_eef_angle_1,right_eef_angle_2,right_eef_angle_3,right_eef_angle_4,right_eef_angle_5,right_gripper_open"
+    )
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/aloha_mobile.py b/data/preprocess_scripts/aloha_mobile.py
new file mode 100644
index 0000000000000000000000000000000000000000..013cfc3c117a401b9ebf20656bcfb172db70a73c
--- /dev/null
+++ b/data/preprocess_scripts/aloha_mobile.py
@@ -0,0 +1,183 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+import tensorflow as tf
+import h5py
+import numpy as np
+from tqdm import tqdm
+import os
+import imageio
+import concurrent.futures
+import fnmatch
+import cv2
+import random
+
+def _parse_function(proto):
+    keys_to_features = {
+        'action': tf.io.FixedLenFeature([], tf.string),
+        'base_action': tf.io.FixedLenFeature([], tf.string),
+        'qpos': tf.io.FixedLenFeature([], tf.string),
+        'qvel': tf.io.FixedLenFeature([], tf.string),
+        'cam_high': tf.io.FixedLenFeature([], tf.string),
+        'cam_left_wrist': tf.io.FixedLenFeature([], tf.string),
+        'cam_right_wrist': tf.io.FixedLenFeature([], tf.string),
+        'instruction': tf.io.FixedLenFeature([], tf.string),
+        'terminate_episode': tf.io.FixedLenFeature([], tf.int64)
+    }
+
+    parsed_features = tf.io.parse_single_example(proto, keys_to_features)
+
+    action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32)
+    base_action = tf.io.parse_tensor(parsed_features['base_action'], out_type=tf.float32)
+    qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32)
+    qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32)
+    cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.uint8)
+    cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.uint8)
+    cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.uint8)
+    instruction = parsed_features['instruction']
+    terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64)
+    action = tf.reshape(action, [14])
+    base_action = tf.reshape(base_action, [2])
+    qpos = tf.reshape(qpos, [14])
+    qvel = tf.reshape(qvel, [14])
+    cam_high = tf.reshape(cam_high, [480, 640, 3])
+    cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3])
+    cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3])
+
+    return {
+        "action": action,
+        "base_action": base_action,
+        "qpos": qpos,
+        "qvel": qvel,
+        'observation':{
+        "cam_high": cam_high,
+        "cam_left_wrist": cam_left_wrist,
+        "cam_right_wrist": cam_right_wrist
+        },
+        "instruction": instruction,
+        "terminate_episode": terminate_episode
+    }
+
+def dataset_generator_from_tfrecords(seed):
+    tfrecord_path = './data/datasets/aloha/tfrecords/aloha_mobile/'
+    datasets = []
+    filepaths = []
+    for root, dirs, files in os.walk(tfrecord_path):
+        for filename in fnmatch.filter(files, '*.tfrecord'):
+            filepath = os.path.join(root, filename)
+            filepaths.append(filepath)
+            
+    random.seed(seed)
+    random.shuffle(filepaths)
+    for filepath in filepaths:
+        raw_dataset = tf.data.TFRecordDataset(filepath)
+        dataset = raw_dataset.map(_parse_function)
+        yield {
+            'steps': dataset
+        }
+
+def load_dataset(seed):
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator_from_tfrecords(seed),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'action': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'base_action': tf.TensorSpec(shape=(2), dtype=tf.float32),
+                    'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'observation': {
+                        'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                    },
+                    'instruction': tf.TensorSpec(shape=(), dtype=tf.string),
+                    'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64)
+                }
+            )
+        }
+    )
+
+    return dataset
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    old_action = step['action']
+    step['action'] = {}
+    action = step['action']
+    step['action']['terminate'] = step['terminate_episode']
+    # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus
+    left_arm_pos = old_action[:6]
+    left_gripper_open = old_action[6:7]
+    right_arm_pos = old_action[7:13]
+    right_gripper_open = old_action[13:14]
+
+    base_vel_y = step['base_action'][:1]
+    base_delta_ang = step['base_action'][1:]
+    base_action = tf.concat([base_vel_y, base_delta_ang], axis=0)
+    # # No base found
+    arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0)
+    action['arm_concat'] = arm_action
+    action['base_concat'] = base_action
+    # # Write the action format
+    action['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open,base_vel_y,base_angular_vel")
+    
+    state = step['observation']
+    left_qpos = step['qpos'][:6]
+    left_gripper_open = step['qpos'][6:7]
+    right_qpos = step['qpos'][7:13]
+    right_gripper_open = step['qpos'][13:14]
+    left_qvel = step['qvel'][:6]
+    # left_gripper_joint_vel = step['qvel'][6:7]
+    right_qvel = step['qvel'][7:13]
+    # right_gripper_joint_vel = step['qvel'][13:14]
+
+    state['arm_concat'] = tf.concat([left_qpos, left_qvel, left_gripper_open, right_qpos, right_qvel, right_gripper_open], axis=0)
+    # # Write the state format
+    state['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = '/mnt/d/aloha/'
+    DATASET_NAME = 'dataset'
+    # Load the dataset
+    dataset = load_dataset()
+    for data in dataset.take(1):
+        for step in data['steps'].take(1):
+            from IPython import embed; embed()
+            print(step)
\ No newline at end of file
diff --git a/data/preprocess_scripts/aloha_static.py b/data/preprocess_scripts/aloha_static.py
new file mode 100644
index 0000000000000000000000000000000000000000..23480f021cb015511815ee48e6a8ae9c11d1efce
--- /dev/null
+++ b/data/preprocess_scripts/aloha_static.py
@@ -0,0 +1,193 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+import tensorflow as tf
+import h5py
+import numpy as np
+from tqdm import tqdm
+import os
+import imageio
+import concurrent.futures
+import fnmatch
+import cv2
+import random
+
+def get_all_hdf5s(root_dir):
+    num_files = 0
+    for root, dirs, files in os.walk(root_dir):
+        for filename in fnmatch.filter(files, '*.hdf5'):
+            num_files += 1
+    return num_files
+
+def stash_image_into_observation(step):
+    step['observation'] = {'cam_high': [], 'cam_left_wrist': [], 'cam_right_wrist':[], 'cam_low':[] }
+    step['observation']['cam_high'] = step['cam_high']
+    step['observation']['cam_left_wrist'] = step['cam_left_wrist']
+    step['observation']['cam_right_wrist'] = step['cam_right_wrist']
+    step['observation']['cam_low'] = step['cam_low']
+    return step
+
+def _parse_function(proto):
+    keys_to_features = {
+        'action': tf.io.FixedLenFeature([], tf.string),
+        'qpos': tf.io.FixedLenFeature([], tf.string),
+        'qvel': tf.io.FixedLenFeature([], tf.string),
+        'cam_high': tf.io.FixedLenFeature([], tf.string),
+        'cam_left_wrist': tf.io.FixedLenFeature([], tf.string),
+        'cam_right_wrist': tf.io.FixedLenFeature([], tf.string),
+        'cam_low': tf.io.FixedLenFeature([], tf.string),
+        'instruction': tf.io.FixedLenFeature([], tf.string),
+        'terminate_episode': tf.io.FixedLenFeature([], tf.int64)
+    }
+
+    parsed_features = tf.io.parse_single_example(proto, keys_to_features)
+
+    action = tf.io.parse_tensor(parsed_features['action'], out_type=tf.float32)
+    qpos = tf.io.parse_tensor(parsed_features['qpos'], out_type=tf.float32)
+    qvel = tf.io.parse_tensor(parsed_features['qvel'], out_type=tf.float32)
+    cam_high = tf.io.parse_tensor(parsed_features['cam_high'], out_type=tf.uint8)
+    cam_left_wrist = tf.io.parse_tensor(parsed_features['cam_left_wrist'], out_type=tf.uint8)
+    cam_right_wrist = tf.io.parse_tensor(parsed_features['cam_right_wrist'], out_type=tf.uint8)
+    cam_low = tf.io.parse_tensor(parsed_features['cam_low'], out_type=tf.uint8)
+    instruction = parsed_features['instruction']
+    terminate_episode = tf.cast(parsed_features['terminate_episode'], tf.int64)
+    action = tf.reshape(action, [14])
+    qpos = tf.reshape(qpos, [14])
+    qvel = tf.reshape(qvel, [14])
+    cam_high = tf.reshape(cam_high, [480, 640, 3])
+    cam_left_wrist = tf.reshape(cam_left_wrist, [480, 640, 3])
+    cam_right_wrist = tf.reshape(cam_right_wrist, [480, 640, 3])
+    cam_low = tf.reshape(cam_low, [480, 640, 3])
+    return {
+        "action": action,
+        "qpos": qpos,
+        "qvel": qvel,
+        'observation':{
+        "cam_high": cam_high,
+        "cam_left_wrist": cam_left_wrist,
+        "cam_right_wrist": cam_right_wrist,
+        "cam_low":cam_low
+        },
+        "instruction": instruction,
+        "terminate_episode": terminate_episode
+    }
+
+def dataset_generator_from_tfrecords(seed):
+    tfrecord_path = './data/datasets/aloha/tfrecords/aloha_static_cotraining_datasets/'
+    datasets = []
+    filepaths = []
+    for root, dirs, files in os.walk(tfrecord_path):
+        for filename in fnmatch.filter(files, '*.tfrecord'):
+            filepath = os.path.join(root, filename)
+            filepaths.append(filepath)
+            
+    random.seed(seed)
+    random.shuffle(filepaths)
+    for filepath in filepaths:
+        raw_dataset = tf.data.TFRecordDataset(filepath)
+        dataset = raw_dataset.map(_parse_function)
+        yield {
+            'steps': dataset
+        }
+
+def load_dataset(seed):
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator_from_tfrecords(seed),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'action': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'qpos': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'qvel': tf.TensorSpec(shape=(14), dtype=tf.float32),
+                    'observation': {
+                        'cam_high': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_left_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_right_wrist': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'cam_low': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                    },
+                    'instruction': tf.TensorSpec(shape=(), dtype=tf.string),
+                    'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64)
+                }
+            )
+        }
+    )
+
+    return dataset
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    old_action = step['action']
+    step['action'] = {}
+    action = step['action']
+    step['action']['terminate'] = step['terminate_episode']
+    # act-plus-plus/utils.py at main · MarkFzp/act-plus-plus
+    left_arm_pos = old_action[:6]
+    left_gripper_open = old_action[6:7]
+    right_arm_pos = old_action[7:13]
+    right_gripper_open = old_action[13:14]
+
+    arm_action = tf.concat([left_arm_pos,left_gripper_open,right_arm_pos,right_gripper_open], axis=0)
+    
+    action['arm_concat'] = arm_action
+    # # Write the action format
+    action['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open")
+
+    state = step['observation']
+    left_qpos = step['qpos'][:6]
+    left_gripper_open = step['qpos'][6:7]
+    right_qpos = step['qpos'][7:13]
+    right_gripper_open = step['qpos'][13:14]
+    left_qvel = step['qvel'][:6]
+    # left_gripper_joint_vel = step['qvel'][6:7]
+    right_qvel = step['qvel'][7:13]
+    # right_gripper_joint_vel = step['qvel'][13:14]
+
+    state['arm_concat'] = tf.concat([left_qpos, left_qvel, left_gripper_open, right_qpos, right_qvel, right_gripper_open], axis=0)
+    # # Write the state format
+    state['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = '/mnt/d/aloha/'
+    DATASET_NAME = 'dataset'
+    # Load the dataset
+    dataset = load_dataset()
+    for data in dataset.take(1):
+        for step in data['steps'].take(1):
+            from IPython import embed; embed()
+            print(step)
\ No newline at end of file
diff --git a/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py b/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..7af2596d0abcbc148b1f88a8c18868af664e9715
--- /dev/null
+++ b/data/preprocess_scripts/asu_table_top_converted_externally_to_rlds.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode].
+    # NOTE the dimension of action is actually 7, so only 7x joint velocities exists
+    joint_vel = action[:7]
+    gripper_vel = action[7:9]
+    # there are extra action_delta information
+    # Robot delta action, consists of [7x joint velocities, 2x gripper velocities, 1x terminate episode].
+    # action_delta = step['action_delta']
+    
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([joint_vel, gripper_vel], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_vel,gripper_joint_1_vel")
+    
+    # Convert raw state to our state
+    state = step['observation']
+    state_vec = state['state']
+    # Robot state, consists of [6x robot joint angles, 1x gripper position].
+    arm_joint_ang = state_vec[:6]
+    grip_pos = state_vec[6:7]
+    # Robot joint velocity, consists of [6x robot joint angles, 1x gripper position].
+    state_vel = state['state_vel']
+    arm_joint_vel = state_vel[:6]
+    grip_vel = state_vel[6:7]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([arm_joint_ang,arm_joint_vel,grip_pos,grip_vel], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,gripper_joint_0_pos,gripper_joint_0_vel")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca2c88d41041fa3a0a92974f40ebdb25ce9b0bd9
--- /dev/null
+++ b/data/preprocess_scripts/austin_sailor_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, \
+    quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action].
+    action = step['action']
+    eef_delta_pos = action[:3]
+    eef_ang = action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)  
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']['state']
+    joint_pos = step['observation']['state_joint']
+    # Default robot state, consists of [3x robot ee pos, 3x ee quat, 1x gripper state].
+    eef_pos = state[:3]
+    eef_quat = state[3:7]
+    eef_ang = quaternion_to_rotation_matrix(eef_quat)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    gripper_joint_pos = state[7:8] * 12.85  # rescale to [0, 1]
+
+    # Concatenate the state
+    state = step['observation']
+    state['arm_concat'] = tf.concat([joint_pos,gripper_joint_pos,eef_pos,eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+           "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b6d73fe5c2b8b454aea4eb55c5250e333dd953b
--- /dev/null
+++ b/data/preprocess_scripts/austin_sirius_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,93 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    # Robot action, consists of [3x ee relative pos, 3x ee relative rotation, 1x gripper action].
+    action = step['action']
+    eef_pos = action[:3]
+    eef_ang = action[3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    
+    action['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Convert raw state to our state
+    joint_pos = step['observation']['state'][:7]
+    grip_open = step['observation']['state'][7:8] * 12.55
+    state_ee = step['observation']['state_ee']
+    # Tensor (16,) End-effector state, represented as 4x4 homogeneous transformation matrix of ee pose.
+    transform_matrix = tf.transpose(tf.reshape(state_ee, (4, 4)))    
+    eef_pos = transform_matrix[:3, 3]
+    rotation_matrix = transform_matrix[:3, :3]
+    eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
+    # Concatenate the state
+    
+    state = step['observation']
+    state['arm_concat'] = tf.concat([joint_pos,grip_open,eef_pos,eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+           "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/bc_z.py b/data/preprocess_scripts/bc_z.py
new file mode 100644
index 0000000000000000000000000000000000000000..748252ddbeda2219dfad4dfcb22c546299850a61
--- /dev/null
+++ b/data/preprocess_scripts/bc_z.py
@@ -0,0 +1,95 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, \
+    euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    action = step['action']
+    # The next 10 actions for the positions. Each action is a 3D delta to add to current position.
+    eef_delta_pos = action['future/xyz_residual'][:3]
+    # The next 10 actions for the rotation. Each action is a 3D delta to add to the current axis angle.
+    eef_ang = action['future/axis_angle_residual'][:3]
+    eef_ang = euler_to_quaternion(eef_ang)
+    # The next 10 actions for the gripper. Each action is the value the gripper closure should be changed to (notably it is not a delta.)
+    grip_open = tf.cast(tf.expand_dims(1 - action['future/target_close'][0], axis=0), dtype=tf.float32)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    action['terminate'] = step['is_terminal']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+   
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+
+    gripper_ang = state['present/axis_angle']
+    gripper_ang = euler_to_rotation_matrix(gripper_ang)
+    gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
+    gripper_pos = state['present/xyz']
+    # How much the gripper is currently closed. Scaled from 0 to 1, but not all values from 0 to 1 are reachable. The range in the data is about 0.2 to 1
+    gripper_open = 1- state['present/sensed_close']
+
+
+    # Concatenate the state
+    state = step['observation']
+    state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/berkeley_fanuc_manipulation.py b/data/preprocess_scripts/berkeley_fanuc_manipulation.py
new file mode 100644
index 0000000000000000000000000000000000000000..d07003cbaf6bbf0e18aec088eab2764d47ee96ec
--- /dev/null
+++ b/data/preprocess_scripts/berkeley_fanuc_manipulation.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, \
+    quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+    # 6x end effector delta pose, 1x gripper position
+    eef_delta_pos = origin_action[:3]
+    eef_ang=origin_action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang],axis=0)
+    
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    # [6x robot joint angles, 1x gripper open status, 6x robot joint velocities].
+    arm_joint_ang=state['state'][:6]
+    grip_open=1-state['state'][6:7]
+    # arm_joint_vel=state['state'][7:13] # all zeros
+    eef_pos = state['end_effector_state'][:3]
+    eef_ang = quaternion_to_rotation_matrix(state['end_effector_state'][3:])
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'berkeley_fanuc_manipulation'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/berkeley_gnm_cory_hall.py b/data/preprocess_scripts/berkeley_gnm_cory_hall.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cfa5e5750f0255eff5bfc914e8a385141d26a15
--- /dev/null
+++ b/data/preprocess_scripts/berkeley_gnm_cory_hall.py
@@ -0,0 +1,78 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+  
+    eef_pos=tf.cast(origin_action,dtype=tf.float32)
+    eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32)
+    eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0))
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+    
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    eef_pos=tf.cast(state['position'],dtype=tf.float32)
+    eef_ang=tf.cast(state['yaw'],dtype=tf.float32)
+    eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0))
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'berkeley_gnm_cory_hall'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step['action'][6:7])
+
diff --git a/data/preprocess_scripts/berkeley_gnm_recon.py b/data/preprocess_scripts/berkeley_gnm_recon.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cb9b8d6191589159d4e899da8171f4b2e211895
--- /dev/null
+++ b/data/preprocess_scripts/berkeley_gnm_recon.py
@@ -0,0 +1,78 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+  
+    eef_pos=tf.cast(origin_action,dtype=tf.float32)
+    eef_ang=tf.cast(step['action_angle'][2:3],dtype=tf.float32)
+    eef_ang = euler_to_quaternion(tf.stack([0,0,eef_ang[0]],axis=0))
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+    
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    eef_pos=tf.cast(state['position'],dtype=tf.float32)
+    eef_ang=tf.cast(state['yaw'],dtype=tf.float32)
+    eef_ang = euler_to_rotation_matrix(tf.stack([0,0,eef_ang[0]],axis=0))
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    state['arm_concat'] = tf.concat([eef_pos/100,eef_ang],axis=0)
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_angle_x,eef_angle_y,eef_angle_z,eef_angle_w")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'berkeley_gnm_recon'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step['action'][6:7])
+
diff --git a/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py b/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..aef391a432e879bc00b6b43a7f3d0b39f8d98c90
--- /dev/null
+++ b/data/preprocess_scripts/berkeley_mvp_converted_externally_to_rlds.py
@@ -0,0 +1,90 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # 	Robot action, consists of [7 delta joint pos,1x gripper binary state].
+    delta_joint_pos = action[:7]
+    grip_open = tf.expand_dims(1 - action[7], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([delta_joint_pos, grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "arm_joint_0_delta_pos,arm_joint_1_delta_pos,arm_joint_2_delta_pos,arm_joint_3_delta_pos,arm_joint_4_delta_pos,arm_joint_5_delta_pos,arm_joint_6_delta_pos,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # xArm joint positions (7 DoF).
+    arm_joint_pos = state['joint_pos']
+    # Binary gripper state (1 - closed, 0 - open)
+    grip_open = tf.expand_dims(1 - tf.cast(state['gripper'],dtype=tf.float32), axis=0)
+    # Gripper pose, robot frame, [3 position, 4 rotation]
+    gripper_pose = state['pose'][:3]
+    # gripper_ang = quaternion_to_euler(state['pose'][3:7])
+    gripper_ang = quaternion_to_rotation_matrix(state['pose'][3:7])
+    gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([arm_joint_pos, gripper_pose,gripper_ang, grip_open], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/bridge.py b/data/preprocess_scripts/bridge.py
new file mode 100644
index 0000000000000000000000000000000000000000..05d1e46ec8a3b61afdb528987be0add4afba67f7
--- /dev/null
+++ b/data/preprocess_scripts/bridge.py
@@ -0,0 +1,90 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+    
+    eef_delta_pos = action['world_vector']
+    eef_ang=action['rotation_delta']
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open=tf.reshape(tf.where(action['open_gripper'],1.0, 0.0),(1,))
+    # grip_open:tensor
+
+    # No base found
+    
+    # Concatenate the action
+    arm_action=tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
+    action['arm_concat']=arm_action
+    #base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
+    #action['base_concat'] = base_action
+
+    # Write the action format
+    action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+    
+    # Convert raw state to our state
+    state= step['observation']
+    eef_pos=state['state'][:3]
+    eef_ang=state['state'][3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    gripper_action=state['state'][6:]
+    
+    # Concatenate the state
+    state['arm_concat']=tf.concat([eef_pos,eef_ang,gripper_action],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod/'
+    DATASET_NAME = 'bridge'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/bridgev2.py b/data/preprocess_scripts/bridgev2.py
new file mode 100644
index 0000000000000000000000000000000000000000..b937569b939cf88b4dc63cb1256ffc66aba144bb
--- /dev/null
+++ b/data/preprocess_scripts/bridgev2.py
@@ -0,0 +1,179 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+import tensorflow as tf
+import h5py
+import numpy as np
+from tqdm import tqdm
+import os
+import imageio
+import concurrent.futures
+import fnmatch
+import random
+
+
+def _parse_function(proto):
+    keys_to_features = {
+        'observations/images0': tf.io.FixedLenFeature([], tf.string),
+        'observations/state': tf.io.FixedLenFeature([], tf.string),
+        'observations/qpos': tf.io.FixedLenFeature([], tf.string),
+        'observations/eef_transform': tf.io.FixedLenFeature([], tf.string),
+        'language': tf.io.FixedLenFeature([], tf.string),
+        'actions': tf.io.FixedLenFeature([], tf.string),
+        'truncates': tf.io.FixedLenFeature([], tf.int64),
+    }
+
+    parsed_features = tf.io.parse_single_example(proto, keys_to_features)
+
+    observations_images0 = tf.io.parse_tensor(parsed_features['observations/images0'], out_type=tf.uint8)
+    observations_state = tf.io.parse_tensor(parsed_features['observations/state'], out_type=tf.float32)
+    observations_qpos = tf.io.parse_tensor(parsed_features['observations/qpos'], out_type=tf.float32)
+    # observations_eef_transform = tf.io.parse_tensor(parsed_features['observations/eef_transform'], out_type=tf.float32)
+    language = parsed_features['language']
+    actions = tf.io.parse_tensor(parsed_features['actions'], out_type=tf.float32)
+    truncates = parsed_features['truncates']
+    
+    actions = tf.reshape(actions, [7])
+    observations_images0 = tf.reshape(observations_images0, [480, 640, 3])
+    # observations_eef_transform = tf.reshape(observations_eef_transform, [4,4])
+    # observations_eef_transform = extract_angles_and_translation(observations_eef_transform)
+    # observations_eef_transform = tf.reshape(observations_eef_transform, [6])
+    observations_qpos = tf.reshape(observations_qpos, [6])
+    observations_state = tf.reshape(observations_state, [7])
+    
+    return {
+        'observation': {
+            'images0': observations_images0,
+            'state': observations_state,
+            'qpos': observations_qpos,
+        },
+        'language': language,
+        'actions': actions,
+        'truncates': truncates
+    }
+
+
+def dataset_generator_from_tfrecords(seed):
+    tfrecord_path = './data/datasets/bridgev2/tfrecords'
+    filepaths = []
+    for root, dirs, files in os.walk(tfrecord_path):
+        for filename in fnmatch.filter(files, '*.tfrecord'):
+            filepath = os.path.join(root, filename)
+            filepaths.append(filepath)
+            
+    random.seed(seed)
+    random.shuffle(filepaths)
+    for filepath in filepaths:
+        raw_dataset = tf.data.TFRecordDataset(filepath)
+        dataset = raw_dataset.map(_parse_function)
+        yield {
+            'steps': dataset
+        }
+        
+def load_dataset(seed):
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator_from_tfrecords(seed),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'observation': {
+                        'images0': tf.TensorSpec(shape=(480, 640, 3), dtype=tf.uint8),
+                        'state': tf.TensorSpec(shape=(7,), dtype=tf.float32),
+                        'qpos': tf.TensorSpec(shape=(6,), dtype=tf.float32),
+                    },
+                    'language': tf.TensorSpec(shape=(), dtype=tf.string),
+                    'actions': tf.TensorSpec(shape=(7,), dtype=tf.float32),
+                    'truncates': tf.TensorSpec(shape=(), dtype=tf.int64)
+                }
+            )
+        }
+    )
+
+    return dataset
+        
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    old_action = step['actions']
+    step['action'] = {}
+    action = step['action']
+    step['action']['terminate'] = step['truncates']
+    # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/utils/transformation_utils.py line 154
+    eef_delta_pos = old_action[:3]
+    eef_ang = old_action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)
+    gripper_state = old_action[6]
+    # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/base/robot_base_env.py line 231
+    # gripper_open = tf.constant(0.0,dtype=tf.float32) if gripper_state < 0.5 else tf.constant(1.0,dtype=tf.float32)
+    gripper_open = tf.cond(tf.less(gripper_state, 0.5), lambda: tf.constant(0.0, dtype=tf.float32), lambda: tf.constant(1.0, dtype=tf.float32))
+    gripper_open = tf.expand_dims(gripper_open,axis=0)
+
+    # # No base found
+    # # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang,gripper_open], axis=0)
+    action['arm_concat'] = arm_action
+    # # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z, eef_delta_angle_x, eef_delta_angle_y, eef_delta_angle_z, eef_delta_angle_w, gripper_open")
+
+    old_state = step['observation']['state']
+    qpos = step['observation']['qpos']
+    state = step['observation']
+
+    # https://github.com/rail-berkeley/bridge_data_robot/blob/main/widowx_envs/widowx_envs/base/robot_base_env.py line 292
+    eef_pos = old_state[:3]
+    eef_ang = old_state[3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    gripper_open = old_state[6:]
+    # gripper_open = tf.cond(tf.less(gripper_state, 0.5), lambda: tf.constant(0.0, dtype=tf.float32), lambda: tf.constant(1.0, dtype=tf.float32))
+    # gripper_open = tf.expand_dims(gripper_open,axis=0)
+
+    state['arm_concat'] = tf.concat([qpos,gripper_open,eef_pos,eef_ang], axis=0)
+    # # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    # copied from openxembod
+    instr = step['language']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    # Load the dataset
+    dataset = load_dataset(0)
+    for episode in dataset.take(1):
+        for step in episode['steps']:
+            step = process_step(step)
+            print(step)
+            break
+
diff --git a/data/preprocess_scripts/columbia_cairlab_pusht_real.py b/data/preprocess_scripts/columbia_cairlab_pusht_real.py
new file mode 100644
index 0000000000000000000000000000000000000000..24dfa078ea1dc4e63078b81ea67bd94aea817bd5
--- /dev/null
+++ b/data/preprocess_scripts/columbia_cairlab_pusht_real.py
@@ -0,0 +1,84 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+
+    eef_delta_pos = action['world_vector']
+    eef_ang = action['rotation_delta']
+    eef_ang = euler_to_quaternion(eef_ang)
+
+    # Ignore action['gripper_open']: 1 if close gripper, -1 if open gripper, 0 if no change.
+
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+
+    # Convert raw state to our state
+    state = step['observation']
+    eef_pos = state['robot_state']
+    
+    # Concatenate the state
+    state['arm_concat'] = eef_pos
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'columbia_cairlab_pusht_real'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..e41efb117f8bf41855987fccc6c4be723e03470c
--- /dev/null
+++ b/data/preprocess_scripts/dlr_edan_shared_control_converted_externally_to_rlds.py
@@ -0,0 +1,89 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
+    eef_delta_pos = action[:3]
+    eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_quaternion(eef_ang)
+    # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    state_vec = state['state']
+    # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench].
+    eef_pos = state_vec[:3]
+    eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_open = tf.expand_dims(1 - state_vec[6], axis=0)
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([eef_pos, eef_ang,grip_open], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..41fa0c794d5e02e9b8ec076bde8361a4044104e3
--- /dev/null
+++ b/data/preprocess_scripts/dlr_sara_grid_clamp_converted_externally_to_rlds.py
@@ -0,0 +1,78 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
+    eef_delta_pos = action[:3]
+    eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_quaternion(eef_ang)
+    # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    state_vec = state['state']
+    # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler("zxy") Class, 6x robot EEF wrench].
+    eef_pos = state_vec[:3]
+    eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # joint_pos = state_vec[6:12]   # EEF wrench is not the joint posit 
+    # Concatenate the state
+    # state['arm_concat'] = tf.concat([joint_pos,eef_pos, eef_ang], axis=0)
+    state['arm_concat'] = tf.concat([eef_pos, eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Define the task instruction
+    step['observation']['natural_language_instruction'] = tf.constant(
+        "Place the grid clamp on the table, aligning the protrusion on its bottom with the slots on the table.")
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py b/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..2163f69fd31e548f61ce3d5b73a196e8e2440fc6
--- /dev/null
+++ b/data/preprocess_scripts/dlr_sara_pour_converted_externally_to_rlds.py
@@ -0,0 +1,88 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
+    eef_delta_pos = action[:3]
+    eef_ang = tf.gather(action[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_quaternion(eef_ang)
+    # After watching the episode, I found that the last action is the gripper open/close action, where 0 means open and 1 means close.
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+    
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    state_vec = state['state']
+    # Robot state, consists of [3x robot EEF position, 3x robot EEF orientation yaw/pitch/roll calculated with scipy Rotation.as_euler(="zxy") Class].
+    eef_pos = state_vec[:3]
+    eef_ang = tf.gather(state_vec[3:6], [1, 2, 0])  # To Rotation.as_euler("xyz")
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([ eef_pos, eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/dobbe.py b/data/preprocess_scripts/dobbe.py
new file mode 100644
index 0000000000000000000000000000000000000000..27464d936826cae285d3b77e2654623289d8a8c6
--- /dev/null
+++ b/data/preprocess_scripts/dobbe.py
@@ -0,0 +1,86 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    arm_action = step['action']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = arm_action
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
+    action['terminate'] = step['is_terminal']
+
+    # The state has any problem?
+    state = step['observation']
+    eef_pos = state['xyz']
+    # Clip eef_pos to be [-10, 10] for stability
+    eef_pos = tf.clip_by_value(eef_pos, -10, 10)
+    eef_ang = state['rot']
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_pos = state['gripper']
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        grip_pos,eef_pos,eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+    from tqdm import tqdm
+    import numpy as np
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'dobbe'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+    # dataset = dataset.filter(
+    #     lambda x: tf.math.less(
+    #         tf.math.reduce_max(tf.math.abs(
+    #             tf.convert_to_tensor(
+    #                 [step['observation']['xyz'] for step in x['steps']]))), 3))
+
+    # Inspect the dataset
+    for i, episode in tqdm(enumerate(dataset), total=5208):
+        res = []
+        for step in episode['steps']:
+            res.append(step['observation']['xyz'].numpy())
+        max_val = np.max(np.abs(res))
+        if max_val > 2:
+            print(f"Episode {i} has a max value of {max_val}")
diff --git a/data/preprocess_scripts/eth_agent_affordances.py b/data/preprocess_scripts/eth_agent_affordances.py
new file mode 100644
index 0000000000000000000000000000000000000000..683509d038901c63b3c83e4d9c4572160389edcf
--- /dev/null
+++ b/data/preprocess_scripts/eth_agent_affordances.py
@@ -0,0 +1,77 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+
+    eef_vel = origin_action[:3]
+    eef_ang_vel=origin_action[3:6]
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_vel,eef_ang_vel],axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    eef_pos = state['state'][:3]
+    eef_ang = tf.gather(state['state'][3:6], [2, 1, 0])
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_open=state['state'][6:7]
+    # state['state'][8]:door opening angle
+    state['arm_concat'] = tf.concat([eef_pos,eef_ang,grip_open],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'eth_agent_affordances'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/fmb.py b/data/preprocess_scripts/fmb.py
new file mode 100644
index 0000000000000000000000000000000000000000..92a057c1b68f002ba061901b1d7b9f5d385b3208
--- /dev/null
+++ b/data/preprocess_scripts/fmb.py
@@ -0,0 +1,78 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d, \
+    quaternion_to_rotation_matrix
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+
+    # Robot action
+    eef_pos = action[:3]
+    eef_ang = action[3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    gripper_open = action[6:7]
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([eef_pos, eef_ang, gripper_open], axis=0)
+    action['arm_concat'] = arm_action
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']
+    eef_pos = state['eef_pose'][:3]
+    eef_ang = state['eef_pose'][3:]
+    eef_ang = quaternion_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_pos_vel = state['eef_vel'][:3]
+    eef_ang_vel = state['eef_vel'][3:]
+    joint_pos = state['joint_pos']
+    joint_vel = state['joint_vel']
+    grip_pos = 1 - state['state_gripper_pose']
+    grip_pos = tf.expand_dims(grip_pos, axis=0)
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        joint_pos,joint_vel,grip_pos,eef_pos,eef_ang,eef_pos_vel,eef_ang_vel], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        # Refine language instruction:
+        'object': 'brick and insert it into the slot of the matching shape'    
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c2af1830314ad04d08ec7b886f5ee20f09898f9
--- /dev/null
+++ b/data/preprocess_scripts/furniture_bench_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,98 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+
+    # No base found
+    eef_pos_delta=origin_action[:3]
+    eef_quat_delta=origin_action[3:7]
+    eef_ang = eef_quat_delta
+    # eef_ang=quaternion_to_euler(eef_quat_delta)
+    grip_open=-origin_action[7:8]
+
+    action["arm_concat"]=tf.concat([eef_pos_delta,eef_ang,grip_open],axis=0)
+    action['format']=tf.constant("eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+    # Ignore action
+    action['terminate'] = step['is_terminal']
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    
+    ''' 'state':
+    {
+      'ee_pos': EEF position (3,)
+      'ee_quat': EEF orientation (4,)
+      'ee_pos_vel': EEF linear velocity (3,)
+      'ee_ori_vel': EEF angular velocity (3,)
+      'joint_positions': Joint positions (7,)
+      'joint_velocities': Joint velocities (7,)
+      'joint_torques': Joint torques (7,)
+      'gripper_width': Gripper width (1,)
+    }'''
+    eef_pos=state['state'][:3]
+    # eef_ang=quaternion_to_euler(state['state'][3:7])
+    eef_ang = quaternion_to_rotation_matrix(state['state'][3:7])
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_pos_vel=state['state'][7:10]
+    eef_ang_vel=state['state'][10:13]
+    arm_joint_pos=state['state'][13:20]
+    arm_joint_vel=state['state'][20:27]
+    arm_joint_tor=state['state'][27:34]
+    # gripper_width?
+    grip_open=state['state'][34:35] * 12.507    # rescale to [0, 1]
+
+    state['arm_concat'] = tf.concat([arm_joint_pos,grip_open,arm_joint_vel,\
+    eef_pos,eef_ang,eef_pos_vel,eef_ang_vel],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'furniture_bench_dataset_converted_externally_to_rlds'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py b/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py
new file mode 100644
index 0000000000000000000000000000000000000000..44c8711fcba56254f21fe89e515612990de050e8
--- /dev/null
+++ b/data/preprocess_scripts/imperialcollege_sawyer_wrist_cam.py
@@ -0,0 +1,77 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action'] = {}
+    action = step['action']
+
+    # Multiplied by 10 Hz to get units m/s and rad/s
+    eef_delta_pos = origin_action[:3] * 10
+    # delta ZYX euler angles == roll/pitch/yaw velocities
+    eef_ang = origin_action[3:6] * 10
+    grip_open = 1 - origin_action[6:7]
+
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open],axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
+
+    # Convert raw state to our state
+    # state = step['observation']
+    # # Concatenate the state
+    # grip_open=state['state']
+    # state['arm_concat'] = grip_open
+
+    # Write the state format
+    # state['format'] = tf.constant(
+    #     "gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'imperialcollege_sawyer_wrist_cam'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/jaco_play.py b/data/preprocess_scripts/jaco_play.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b3401b850ecdf4503d766a9d3fd9807eff625df
--- /dev/null
+++ b/data/preprocess_scripts/jaco_play.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+    eef_delta_pos = action['world_vector']
+    # eef_ang = action['rotation_delta']
+    # (NOTE) due to the formality problem, grip_open is not used
+    # grip_open = 1 - (action['gripper_closedness_action'] ) / 2
+    # base_delta_pos = action['base_displacement_vector']
+    # base_delta_ang = action['base_displacement_vertical_rotation']
+
+    # Concatenate the action
+    arm_action = eef_delta_pos
+    action['arm_concat'] = arm_action
+    # base_action = tf.constant([0, 0, 0, 0], dtype=tf.float32)
+    # action['base_concat'] = None
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
+
+    # Convert raw state to our state
+    state = step['observation']
+    joint_pos = state['joint_pos']
+    eef_pos = state['end_effector_cartesian_pos'][:3]
+    eef_quat = state['end_effector_cartesian_pos'][3:]
+    eef_ang = quaternion_to_rotation_matrix(eef_quat)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_vel = state['end_effector_cartesian_velocity'][:3]
+    # We do not use angular velocity since it is very inaccurate in this environment
+    # eef_angular_vel = state['end_effector_cartesian_velocity'][3:]
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([joint_pos, eef_pos, eef_ang, eef_vel], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'jaco_play'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py b/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..acfd5fe1a8bf5637c14777ec72341d0cf5e7ee93
--- /dev/null
+++ b/data/preprocess_scripts/kaist_nonprehensile_converted_externally_to_rlds.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x end-effector position residual, 3x end-effector axis-angle residual, 
+    # 7x robot joint k_p gain coefficient, 7x robot joint damping ratio coefficient].
+    # The action residuals are global, i.e. multiplied on theleft-hand side of the current end-effector state.
+    eef_delta_pos = action[:3]
+    eef_ang = action[3:6] 
+    eef_ang = euler_to_quaternion(eef_ang)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['terminate'] = step['is_terminal']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang,], axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Robot state, consists of [joint_states, end_effector_pose].Joint states are 14-dimensional, formatted in the order of [q_0, w_0, q_1, w_0, ...].
+    # In other words, joint positions and velocities are interleaved.The end-effector pose is 7-dimensional, formatted in the order of [position, quaternion].The quaternion is formatted in (x,y,z,w) order. The end-effector pose references the tool frame, in the center of the two fingers of the gripper.
+    joint_states = state['state'][:14]
+    arm_joint_pos = joint_states[::2]
+    arm_joint_vel = joint_states[1::2]
+    eef_pos = state['state'][14:17]
+    # eef_ang = quaternion_to_euler(state['state'][17:21])
+    eef_ang = quaternion_to_rotation_matrix(state['state'][17:21])
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, eef_pos, eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/kuka.py b/data/preprocess_scripts/kuka.py
new file mode 100644
index 0000000000000000000000000000000000000000..156e239a4af9b19b24cce8dde115bf972bacf743
--- /dev/null
+++ b/data/preprocess_scripts/kuka.py
@@ -0,0 +1,107 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, quaternion_to_rotation_matrix,\
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # keys_view = step.keys()
+    # print("step")
+    # print(list(keys_view))
+    # Convert raw action to our action
+    action: dict = step['action']
+    # print("action")
+    # print(list(action.keys()))
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+
+    eef_delta_pos = action['world_vector']
+    eef_ang = action['rotation_delta']
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = 1 - (action['gripper_closedness_action'] + 1) / 2
+    base_delta_pos = action['base_displacement_vector']
+    base_delta_ang = action['base_displacement_vertical_rotation']
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['arm_concat'] = arm_action
+    base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
+    action['base_concat'] = base_action
+    
+    # print("action len:", len(action['arm_concat']) + len(action['base_concat']))
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open,base_vel_x,base_vel_y,base_angular_vel")
+
+    # action good for kuka same as example
+    
+    # Convert raw state to our state
+    state = step['observation']
+    # print("state")
+    # print(list(state.keys()))
+    eef_pos = state['clip_function_input/base_pose_tool_reached'][:3]
+    eef_ang = quaternion_to_rotation_matrix(state['clip_function_input/base_pose_tool_reached'][3:])
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_open = 1 - state['gripper_closed']
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
+    # print("state len:", len(state['arm_concat']))
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'kuka'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # with open('example.txt', 'w') as file:
+    # Inspect the dataset
+    episode_num = len(dataset)
+    for episode in dataset:
+        # print("episode")
+        # print(list(episode.keys()))
+        for step in episode['steps']:
+            process_step(step)
+            # break
+    print(f"episode_num: {episode_num}")
diff --git a/data/preprocess_scripts/language_table.py b/data/preprocess_scripts/language_table.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d369f4178ba919233d876265ceb5e6984186293
--- /dev/null
+++ b/data/preprocess_scripts/language_table.py
@@ -0,0 +1,75 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    
+    eef_delta_pos=origin_action
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = eef_delta_pos
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y")
+    
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    eef_pos=state['effector_translation']
+    state['arm_concat'] = eef_pos
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['instruction']
+    # Convert bytes to tf.string
+    instr = tf.strings.unicode_encode(instr, 'UTF-8')
+    # Remove '\x00'
+    instr = tf.strings.regex_replace(instr, '\x00', '')
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'language_table'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
+
diff --git a/data/preprocess_scripts/libero_10_no_noops.py b/data/preprocess_scripts/libero_10_no_noops.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c3fba7145744db1176d899581239b31cb6868d2
--- /dev/null
+++ b/data/preprocess_scripts/libero_10_no_noops.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+
+    # Robot action
+    # eef_pos = action_dict['ee_pos'][:3]
+    # eef_ang = action_dict['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_pos_vel = action_dict[:3]
+    eef_ang_vel = action_dict[3:6]
+    # joint_pos = action_dict['joint_pos'][:-1]
+    # joint_vel = action_dict['delta_joint'][:-1]
+    grip_pos = 1 - tf.clip_by_value(action_dict[-1:], 0, 1)
+    
+    # grip_vel = action_dict['gripper_velocity']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([eef_pos_vel, eef_ang_vel, grip_pos], axis=0)
+    action['arm_concat'] = arm_action
+    # action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_pos")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']
+    # print(state.keys())
+    # image = step['observation']['image']
+    eef_pos = state['state'][:3]
+    eef_ang = state['state'][3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # joint_pos = state['joint_pos'][:-1]
+    grip_pos = state['state'][-2:]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        grip_pos,eef_pos,eef_ang], axis=0)
+    
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/libero_object_no_noops.py b/data/preprocess_scripts/libero_object_no_noops.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c3fba7145744db1176d899581239b31cb6868d2
--- /dev/null
+++ b/data/preprocess_scripts/libero_object_no_noops.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+
+    # Robot action
+    # eef_pos = action_dict['ee_pos'][:3]
+    # eef_ang = action_dict['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_pos_vel = action_dict[:3]
+    eef_ang_vel = action_dict[3:6]
+    # joint_pos = action_dict['joint_pos'][:-1]
+    # joint_vel = action_dict['delta_joint'][:-1]
+    grip_pos = 1 - tf.clip_by_value(action_dict[-1:], 0, 1)
+    
+    # grip_vel = action_dict['gripper_velocity']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([eef_pos_vel, eef_ang_vel, grip_pos], axis=0)
+    action['arm_concat'] = arm_action
+    # action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_pos")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']
+    # print(state.keys())
+    # image = step['observation']['image']
+    eef_pos = state['state'][:3]
+    eef_ang = state['state'][3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # joint_pos = state['joint_pos'][:-1]
+    grip_pos = state['state'][-2:]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        grip_pos,eef_pos,eef_ang], axis=0)
+    
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef89473445e0cc014128fb073e96c6306962704b
--- /dev/null
+++ b/data/preprocess_scripts/maniskill_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,106 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+
+    # Convert raw action to our action
+    action = step['action']
+
+    # Robot action, consists of [3x end effector delta target position, 3x end effector delta target orientation in axis-angle format, 1x gripper target position (mimic for two fingers)]. 
+    # For delta target position, an action of -1 maps to a robot movement of -0.1m, and action of 1 maps to a movement of 0.1m.
+    # For delta target orientation, its encoded angle is mapped to a range of [-0.1rad, 0.1rad] for robot execution. For example, an action of [1, 0, 0] means rotating along the x-axis by 0.1 rad. 
+    # For gripper target position, an action of -1 means close, and an action of 1 means open.
+    eef_delta_pos = action[:3] * 0.1
+    eef_ang = action[3:6] * 0.1
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.expand_dims((action[6] + 1) / 2, axis=0)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['arm_concat'] = arm_action
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    # Robot state, consists of [7x robot joint angles, 2x gripper position, 7x robot joint angle velocity, 2x gripper velocity]. Angle in radians, position in meters.
+    state = step['observation']['state']
+    arm_joint_pos = state[:7]
+    gripper_pos = state[7:9] * 25   # rescale to [0, 1]
+    # We do not use velocity since it is very inaccurate in this environment
+    # arm_joint_vel = state[9:16]
+    # gripper_vel = state[16:18]
+
+    # Concatenate the state
+    
+    state = step['observation']
+    state['arm_concat'] = tf.concat([
+        arm_joint_pos, gripper_pos], axis=0)
+    
+    # Robot base pose in the world frame, consists of [x, y, z, qw, qx, qy, qz]. 
+    # The first three dimensions represent xyz positions in meters. The last four dimensions are the quaternion representation of rotation
+    # base_pose = step['observation']['base_pose']
+    # base_pose_xyz = base_pose[:3]
+    # base_pose_ang = quaternion_to_euler(base_pose[3:])
+    # processed_base_pose = tf.concat([base_pose_xyz, base_pose_ang], axis=0)
+    
+    # state['arm_concat'] = tf.concat([state['arm_concat'], processed_base_pose], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,gripper_joint_1_pos")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'maniskill_dataset_converted_externally_to_rlds'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset.take(1):
+        for step in episode['steps']:
+            print(step['is_last'])
diff --git a/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py b/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c2ea0f9338f90e5240ac6748725f3af8f9ca5d3
--- /dev/null
+++ b/data/preprocess_scripts/nyu_door_opening_surprising_effectiveness.py
@@ -0,0 +1,77 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+
+    # Multiplied by 3 Hz to get units m/s and rad/s
+    eef_delta_pos = action['world_vector'] * 3
+    eef_ang = action['rotation_delta'] * 3
+    # Origin: [-0.28, 0.96]: open, close
+    # 1-Origin: [0.04, 1.28]: close, open
+    grip_open = 1 - action['gripper_closedness_action']
+    # base_delta_pos = action['base_displacement_vector']
+    # base_delta_ang = action['base_displacement_vertical_rotation']
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['arm_concat'] = arm_action
+    # base_action = tf.concat([base_delta_pos, base_delta_ang], axis=0)
+    # action['base_concat'] = base_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_open")
+
+    # Convert raw state to our state
+    # state = step['observation']
+    # eef_pos = state['base_pose_tool_reached'][:3]
+    # eef_ang = quaternion_to_euler(state['base_pose_tool_reached'][3:])
+    # grip_open = 1 - state['gripper_closed']
+
+    # create empty tensor
+    # state['arm_concat'] = tf.constant([0, 0, 0, 0, 0, 0], dtype=tf.float32)
+
+    # Write the state format
+    # state['format'] = tf.constant(
+    #     "")
+
+    # Define the task instruction
+    step['observation']['natural_language_instruction'] = tf.constant(
+        "Open the cabinet door.")
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'nyu_door_opening_surprising_effectiveness'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset.take(1):
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..e950ae3fc4c7989d50080a8493364f05c29b10e3
--- /dev/null
+++ b/data/preprocess_scripts/nyu_franka_play_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,87 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate']=terminate_act_to_bool(origin_action[14])
+    joint_vel=origin_action[:7]
+    # gripper_open: 1-open, -1-closed
+    
+    grip_open=tf.where(tf.equal(origin_action[13:14],tf.constant(1.0)),tf.constant(1.0),tf.constant(0.0))
+    eef_pos=origin_action[7:10]
+    eef_ang=origin_action[10:13]
+    eef_ang = euler_to_quaternion(eef_ang)
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([joint_vel,grip_open,eef_pos,eef_ang],axis=0)
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    state_arm_joint = state['state'][:7]
+    eef_pos = state['state'][7:10]
+    eef_ang = euler_to_rotation_matrix(state['state'][10:13])
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    state['arm_concat'] = tf.concat([state_arm_joint,eef_pos,eef_ang],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'nyu_franka_play_dataset_converted_externally_to_rlds'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/qut_dexterous_manpulation.py b/data/preprocess_scripts/qut_dexterous_manpulation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ce6835740513c5bf159a11427b07ff7da5bf3c1
--- /dev/null
+++ b/data/preprocess_scripts/qut_dexterous_manpulation.py
@@ -0,0 +1,68 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    eef_pos = action[:3]
+    eef_quat = action[3:7]
+    eef_ang = quaternion_to_rotation_matrix(eef_quat)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_pos = action[7:]
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([
+        grip_pos,eef_pos,eef_ang], axis=0)
+    # Write the action format
+    action['format'] = tf.constant(
+        "gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+    action['terminate'] = step['is_terminal']
+
+    # Convert raw state to our state
+    state = step['observation']
+    robot_state = state['state']
+    eef_pos = robot_state[:3]
+    eef_quat = robot_state[3:7]
+    eef_ang = quaternion_to_rotation_matrix(eef_quat)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    joint_pos = robot_state[7:14]
+    grip_pos = robot_state[14:16] * 24.707  # rescale to [0, 1]
+    joint_vel = robot_state[16:23]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        grip_pos,eef_pos,eef_ang,joint_pos,joint_vel], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "gripper_joint_0_pos,gripper_joint_1_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/rh20t.py b/data/preprocess_scripts/rh20t.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ea6c3495df1a5ca1a1568a8981a7f7de5dc3a04
--- /dev/null
+++ b/data/preprocess_scripts/rh20t.py
@@ -0,0 +1,214 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix_wo_static_check, \
+    rotation_matrix_to_ortho6d_1d
+import tensorflow as tf
+import h5py
+import numpy as np
+from tqdm import tqdm
+import os
+import imageio
+import concurrent.futures
+import fnmatch
+import cv2
+import random
+
+def decode_image(image_data):
+    image_data = np.frombuffer(image_data, dtype=np.uint8)
+    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
+    return image
+
+def _parse_function(proto):
+    feature_description = {
+        'joint': tf.io.FixedLenFeature([], tf.string),
+        'image': tf.io.FixedLenFeature([], tf.string),
+        'instruction': tf.io.FixedLenFeature([], tf.string),
+        'terminate_episode': tf.io.FixedLenFeature([], tf.int64),
+        'gripper': tf.io.FixedLenFeature([], tf.string, default_value=""), 
+        'tcp': tf.io.FixedLenFeature([], tf.string, default_value=""),
+        'tcp_base': tf.io.FixedLenFeature([], tf.string, default_value="")
+    }
+    parsed_features = tf.io.parse_single_example(proto, feature_description)
+    
+    parsed_features['joint'] = tf.io.parse_tensor(parsed_features['joint'], out_type=tf.float64)
+    parsed_features['image'] = tf.io.parse_tensor(parsed_features['image'], out_type=tf.string)
+    parsed_features['instruction'] = tf.io.parse_tensor(parsed_features['instruction'], out_type=tf.string)
+    parsed_features['gripper'] = tf.cond(
+        tf.math.equal(parsed_features['gripper'], ""),
+        lambda: tf.constant([], dtype=tf.float64),
+        lambda: tf.reshape(tf.io.parse_tensor(parsed_features['gripper'], out_type=tf.float64), [3])
+    )
+    parsed_features['tcp_base'] = tf.cond(
+        tf.math.equal(parsed_features['tcp_base'], ""),
+        lambda: tf.constant([], dtype=tf.float64),
+        lambda: tf.reshape(tf.io.parse_tensor(parsed_features['tcp_base'], out_type=tf.float64),[7])
+    )
+    
+    image = tf.image.decode_jpeg(parsed_features['image'],channels=3)
+    joint = parsed_features['joint']
+    shape = tf.shape(joint)[0]
+    joint = tf.reshape(joint, [shape])
+
+    instruction = parsed_features['instruction']
+    terminate_episode = tf.cast(parsed_features['terminate_episode'],tf.int64)
+    gripper = parsed_features['gripper']
+    tcp_base = parsed_features['tcp_base']
+    return {
+        'joint': joint,
+        'observation':{
+            'image': image,
+        },
+        'instruction': instruction,
+        'terminate_episode': terminate_episode,
+        'gripper': gripper,
+        'tcp_base': tcp_base,
+    }
+
+def dataset_generator_from_tfrecords(seed):
+    tfrecord_path = './data/datasets/rh20t/tfrecords/'
+    datasets = []
+    filepaths = []
+    for root, dirs, files in os.walk(tfrecord_path):    
+        for filename in fnmatch.filter(files, '*.tfrecord'):
+            filepath = os.path.join(root, filename)
+            filepaths.append(filepath)
+
+    random.seed(seed)
+    random.shuffle(filepaths)
+    for filepath in filepaths:
+        raw_dataset = tf.data.TFRecordDataset(filepath)
+        dataset = raw_dataset.map(_parse_function)
+        num = 0
+        for data in dataset:
+            num = num + 1
+            if data['joint'].shape != tf.TensorShape((6,)) and data['joint'].shape != tf.TensorShape((7,)):
+                num = 0
+                break
+        if num <= 1: # discard dataset
+            continue
+        yield {
+            'steps': dataset
+        }
+        
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.int64)),tf.constant(False),tf.constant(True))
+
+
+def load_dataset(seed):
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator_from_tfrecords(seed),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'joint': tf.TensorSpec(shape=(None), dtype=tf.float64),
+                    'observation':{
+                    'image': tf.TensorSpec(shape=(None), dtype=tf.uint8),
+                    },
+                    'instruction': tf.TensorSpec(shape=(), dtype=tf.string),
+                    'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.int64),
+                    'gripper': tf.TensorSpec(shape=(None), dtype=tf.float64),
+                    'tcp_base': tf.TensorSpec(shape=(None), dtype=tf.float64),
+                }
+            )
+        }
+    )
+
+    return dataset
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0, dtype=tf.int64)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+    
+    
+    # Convert raw action to our action
+    step['action'] = {}
+    step['action']['terminate'] = terminate_act_to_bool(step['terminate_episode'])
+
+    state = step['observation']
+    joint_pos = step['joint']
+    state['arm_concat'] = joint_pos
+    
+    state_format = ""
+    state_format = tf.cond(
+        tf.equal(tf.shape(joint_pos), tf.shape(tf.zeros((7,), dtype=tf.float64))),
+        lambda: tf.constant("arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos"),
+        lambda: state_format
+    )
+    state_format = tf.cond(
+        tf.equal(tf.shape(joint_pos), tf.shape(tf.zeros((6,), dtype=tf.float64))),
+        lambda: tf.constant("arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos"),
+        lambda: state_format
+    )
+        
+    # eef
+    eef = step['tcp_base']
+    state['arm_concat'] = tf.cond(
+        tf.equal(tf.shape(eef), tf.shape(tf.zeros((7,), dtype=tf.float64))),
+        lambda: tf.concat([
+            state['arm_concat'], 
+            tf.concat([
+                eef[:3], rotation_matrix_to_ortho6d_1d(
+                    quaternion_to_rotation_matrix_wo_static_check(eef[3:]))], axis=0)], axis=0),
+        lambda: state['arm_concat']
+    )
+    state_format = tf.cond(
+        tf.equal(tf.shape(eef), tf.shape(tf.zeros((7,), dtype=tf.float64))),
+        lambda: tf.strings.join([state_format, tf.constant("eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")],separator = ','),
+        lambda: state_format
+    )
+    
+    # gripper
+    state['arm_concat'] = tf.cond(
+        tf.equal(tf.shape(step['gripper']), tf.shape(tf.zeros((3,), dtype=tf.float64))),
+        lambda: tf.concat([state['arm_concat'], step['gripper'][0:1] / 110], axis=0),
+        lambda: state['arm_concat']
+    )
+    state_format = tf.cond(
+        tf.equal(tf.shape(step['gripper']), tf.shape(tf.zeros((3,), dtype=tf.float64))),
+        lambda: tf.strings.join([state_format, tf.constant("gripper_joint_0_pos")],separator = ','),
+        lambda: state_format
+    )
+    
+    state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
+    state['format'] = state_format
+    
+    return step
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+    
+    # Load the dataset
+    dataset = load_dataset(0)
+    for data in dataset:
+        for step in data['steps']:
+            step = process_step(step)
+            print(step['observation']['format'])
diff --git a/data/preprocess_scripts/robomimic_can_ph.py b/data/preprocess_scripts/robomimic_can_ph.py
new file mode 100644
index 0000000000000000000000000000000000000000..c43130cdd3ded9201d30e0aade4a179ee7604505
--- /dev/null
+++ b/data/preprocess_scripts/robomimic_can_ph.py
@@ -0,0 +1,96 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+
+def load_dataset():
+    builder = tfds.builder('robomimic_ph/can_ph_image')
+    builder.download_and_prepare()
+    ds = builder.as_dataset(split='train', shuffle_files=True)
+    return ds
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
+    # Convert raw action to our action
+    eef = step['action']
+    step['action'] = {}
+    action = step['action']
+    action['terminate'] = step['is_terminal']
+
+    eef_delta_pos = eef[:3]
+    eef_ang = eef[3:6]
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
+    action['arm_concat'] = arm_action
+    
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
+
+    # Convert raw state to our state
+    state = step['observation']
+    
+    arm_joint_pos = state['robot0_joint_pos']
+    arm_joint_vel = state['robot0_joint_vel']
+    gripper_pos = state['robot0_gripper_qpos']
+    gripper_vel = state['robot0_gripper_qvel']
+    eef_pos = state['robot0_eef_pos']
+    eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
+    
+    state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
+    # convert to tf32
+    state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    # manual added by lbg
+    instr = "lift the can into the the box"
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'roboturk'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all').take(1)
+
+    # Inspect the dataset
+    ze=tf.constant(0.0)
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
+            break
diff --git a/data/preprocess_scripts/robomimic_tool_hang_ph.py b/data/preprocess_scripts/robomimic_tool_hang_ph.py
new file mode 100644
index 0000000000000000000000000000000000000000..93b909912dabc04ac260c6051904fe8631892c22
--- /dev/null
+++ b/data/preprocess_scripts/robomimic_tool_hang_ph.py
@@ -0,0 +1,97 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+
+
+def load_dataset():
+    builder = tfds.builder('robomimic_ph/tool_hang_ph_image')
+    builder.download_and_prepare()
+    ds = builder.as_dataset(split='train', shuffle_files=True)
+    return ds
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
+    # Convert raw action to our action
+    eef = step['action']
+    step['action'] = {}
+    action = step['action']
+    action['terminate'] = step['is_terminal']
+
+    eef_delta_pos = eef[:3]
+    eef_ang = quaternion_to_euler(eef[3:])
+    
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
+
+    # Convert raw state to our state
+    state = step['observation']
+    arm_joint_pos = state['robot0_joint_pos']
+    arm_joint_vel = state['robot0_joint_vel']
+    gripper_pos = state['robot0_gripper_qpos']
+    gripper_vel = state['robot0_gripper_qvel']
+    eef_pos = state['robot0_eef_pos']
+    eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
+    
+    state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
+    # convert to tf32
+    state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,gripper_joint_1_pos,gripper_joint_0_vel,gripper_joint_1_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_roll,eef_angle_pitch,eef_angle_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    # manual added by lbg
+    instr = "hang the tool and place into the hole"
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'roboturk'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all').take(1)
+
+    # Inspect the dataset
+    ze=tf.constant(0.0)
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
+            break
diff --git a/data/preprocess_scripts/robomimic_transport_ph.py b/data/preprocess_scripts/robomimic_transport_ph.py
new file mode 100644
index 0000000000000000000000000000000000000000..452e67a94644dd255e2f315101ff5d6b4c450e4c
--- /dev/null
+++ b/data/preprocess_scripts/robomimic_transport_ph.py
@@ -0,0 +1,114 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+
+
+def load_dataset():
+    builder = tfds.builder('robomimic_ph/transport_ph_image')
+    builder.download_and_prepare()
+    ds = builder.as_dataset(split='train', shuffle_files=True)
+    return ds
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # format refers to https://www.tensorflow.org/datasets/catalog/robomimic_mg
+    # Convert raw action to our action
+    eef = step['action']
+    step['action'] = {}
+    action = step['action']
+    action['terminate'] = step['is_terminal']
+
+    left_eef_delta_pos = eef[:3]
+    left_eef_ang = quaternion_to_euler(eef[3:7])
+    
+    right_eef_delta_pos = eef[7:10]
+    right_eef_ang = quaternion_to_euler(eef[10:])
+    
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([left_eef_delta_pos,left_eef_ang,right_eef_delta_pos,right_eef_ang], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_eef_delta_pos_x,left_eef_delta_pos_y,left_eef_delta_pos_z,left_eef_delta_angle_roll,left_eef_delta_angle_pitch,left_eef_delta_angle_yaw,right_eef_delta_pos_x,right_eef_delta_pos_y,right_eef_delta_pos_z,right_eef_delta_angle_roll,right_eef_delta_angle_pitch,right_eef_delta_angle_yaw")
+
+    # Convert raw state to our state
+    state = step['observation']
+    left_arm_joint_pos = state['robot0_joint_pos']
+    left_arm_joint_vel = state['robot0_joint_vel']
+    left_gripper_pos = state['robot0_gripper_qpos']
+    left_gripper_vel = state['robot0_gripper_qvel']
+    left_eef_pos = state['robot0_eef_pos']
+    left_eef_ang = quaternion_to_euler(state['robot0_eef_quat'])
+    
+    right_arm_joint_pos = state['robot1_joint_pos']
+    right_arm_joint_vel = state['robot1_joint_vel']
+    right_gripper_pos = state['robot1_gripper_qpos']
+    right_gripper_vel = state['robot1_gripper_qvel']
+    right_eef_pos = state['robot1_eef_pos']
+    right_eef_ang = quaternion_to_euler(state['robot1_eef_quat'])
+    
+    arm_joint_pos = tf.concat([left_arm_joint_pos, right_arm_joint_pos], axis=0)
+    arm_joint_vel = tf.concat([left_arm_joint_vel, right_arm_joint_vel], axis=0)
+    gripper_pos = tf.concat([left_gripper_pos, right_gripper_pos], axis=0)
+    gripper_vel = tf.concat([left_gripper_vel, right_gripper_vel], axis=0)
+    eef_pos = tf.concat([left_eef_pos, right_eef_pos], axis=0)
+    eef_ang = tf.concat([left_eef_ang, right_eef_ang], axis=0)
+    
+    state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, gripper_pos,gripper_vel,eef_pos,eef_ang], axis=0)
+    # convert to tf32
+    state['arm_concat'] = tf.cast(state['arm_concat'], tf.float32)
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_arm_joint_6_pos,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_arm_joint_6_pos,left_gripper_joint_0_pos,left_gripper_joint_1_pos,right_gripper_joint_0_pos,right_gripper_joint_1_pos,left_arm_joint_0_vel,left_arm_joint_1_vel,left_arm_joint_2_vel,left_arm_joint_3_vel,left_arm_joint_4_vel,left_arm_joint_5_vel,left_arm_joint_6_vel,right_arm_joint_0_vel,right_arm_joint_1_vel,right_arm_joint_2_vel,right_arm_joint_3_vel,right_arm_joint_4_vel,right_arm_joint_5_vel,right_arm_joint_6_vel,left_gripper_joint_0_vel,left_gripper_joint_1_vel,right_gripper_joint_0_vel,right_gripper_joint_1_vel,left_eef_pos_x,left_eef_pos_y,left_eef_pos_z,left_eef_angle_roll,left_eef_angle_pitch,left_eef_angle_yaw,right_eef_pos_x,right_eef_pos_y,right_eef_pos_z,right_eef_angle_roll,right_eef_angle_pitch,right_eef_angle_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    # manual added by lbg
+    instr = "transport the object from left hand to right hand"
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'roboturk'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all').take(1)
+
+    # Inspect the dataset
+    ze=tf.constant(0.0)
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
+            break
diff --git a/data/preprocess_scripts/roboturk_real_laundrylayout.py b/data/preprocess_scripts/roboturk_real_laundrylayout.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff71ea673badaa019af83cfef2b599d04d00ac19
--- /dev/null
+++ b/data/preprocess_scripts/roboturk_real_laundrylayout.py
@@ -0,0 +1,223 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from data.utils import clean_task_instruction, quaternion_to_euler
+import tensorflow as tf
+import h5py
+import numpy as np
+from tqdm import tqdm
+import os
+import imageio
+import concurrent.futures
+
+def get_frames(file_path):
+    if not os.path.exists(file_path) or not os.path.isfile(file_path) or not file_path.endswith('.mp4'):
+        return []
+    frames = []
+    with imageio.get_reader(file_path, 'ffmpeg') as reader:
+        for frame in reader:
+            frame = np.array(frame, dtype=np.uint8) 
+            frames.append(frame)
+    return frames
+
+def parallel_get_frames(paths):
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_path = {executor.submit(get_frames, path): path for path in paths}
+        return [future.result() for future in concurrent.futures.as_completed(future_to_path)]
+
+def count_total_samples(filename):
+    total_samples = 0
+    with h5py.File(filename, 'r') as f:
+        data = f['data']
+        for user_key in data.keys():
+            user = data[user_key]
+            for demo_key in user.keys():
+                total_samples += 1
+    return total_samples
+
+def dataset_generator(filename, total_samples):
+    with h5py.File(filename, 'r') as f:
+        data = f['data']
+        for user_key in data.keys():
+            user = data[user_key]
+            for demo_key in user.keys():
+                demo = user[demo_key]
+                robot_observation = demo['robot_observation']
+                user_control = demo['user_control']
+
+                eef_poses = robot_observation['eef_poses']
+                joint_states_arm = robot_observation['joint_states_arm']
+                joint_states_gripper = robot_observation['joint_states_gripper']
+                user_control_data = user_control['user_control']
+
+                attrs = dict(demo.attrs)
+                top_depth_video_file = attrs['top_depth_video_file']
+                top_rgb_video_file = attrs['top_rgb_video_file']
+                front_rgb_video_file = attrs['front_rgb_video_file']
+
+                video_root_path = './data/datasets/roboturk'
+                top_depth_frames = get_frames(os.path.join(video_root_path, top_depth_video_file))
+                top_rgb_frames = get_frames(os.path.join(video_root_path, top_rgb_video_file))
+                front_rgb_frames = get_frames(os.path.join(video_root_path, front_rgb_video_file))
+                
+                if len(top_depth_frames) == 0 or len(top_rgb_frames) == 0 or len(front_rgb_frames) == 0:
+                    continue
+                # video_root_path = '/cephfs-thu/gsm_data/robotruck'
+                # video_paths = [
+                #     os.path.join(video_root_path, attrs['top_depth_video_file']),
+                #     os.path.join(video_root_path, attrs['top_rgb_video_file']),
+                #     os.path.join(video_root_path, attrs['front_rgb_video_file'])
+                # ]
+                # top_depth_frames, top_rgb_frames, front_rgb_frames = parallel_get_frames(video_paths)
+
+                steps = []
+                for i in range(len(eef_poses)):
+                    task_demo_id = f"SawyerTowerCreation_{demo_key}_{i}"
+                    step = {
+                        'task_demo_id': task_demo_id,
+                        'eef_poses': eef_poses[i],
+                        'joint_states_arm': joint_states_arm[i],
+                        'joint_states_gripper': joint_states_gripper[i],
+                        'user_control': user_control_data[i] if user_control_data.shape[0] > 0 else np.zeros(22),
+                        'observation':{
+                            'top_depth_frame': top_depth_frames[i] if i < len(top_depth_frames) else np.zeros((0,0, 3), dtype=np.uint8),
+                            'top_rgb_frame': top_rgb_frames[i] if i < len(top_rgb_frames) else np.zeros((0, 0, 3), dtype=np.uint8),
+                            'front_rgb_frame': front_rgb_frames[i] if i < len(front_rgb_frames) else np.zeros((0, 0, 3), dtype=np.uint8),
+                        },
+                        'terminate_episode': i == len(eef_poses) - 1
+                    }
+                    steps.append(step)
+                    
+
+                steps_dataset = tf.data.Dataset.from_generator(
+                    lambda: iter(steps),
+                    output_signature={
+                        'task_demo_id': tf.TensorSpec(shape=(), dtype=tf.string),
+                        'eef_poses': tf.TensorSpec(shape=(7,), dtype=tf.float32),
+                        'joint_states_arm': tf.TensorSpec(shape=(27,), dtype=tf.float32),
+                        'joint_states_gripper': tf.TensorSpec(shape=(3,), dtype=tf.float32),
+                        'user_control': tf.TensorSpec(shape=(22,), dtype=tf.float32),
+                        'observation':{
+                            'top_depth_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                            'top_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                            'front_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                        },
+                        'terminate_episode': tf.TensorSpec(shape=(), dtype=tf.bool),
+                    }
+                )
+
+                yield {'steps': steps_dataset}                    
+
+def load_dataset():
+    filename = './data/datasets/roboturk/SawyerLaundryLayout_aligned_dataset.hdf5'
+    total_samples = count_total_samples(filename)
+    dataset = tf.data.Dataset.from_generator(
+        lambda: dataset_generator(filename, total_samples),
+        output_signature={
+            'steps': tf.data.DatasetSpec(
+                element_spec={
+                    'task_demo_id': tf.TensorSpec(shape=(), dtype=tf.string), 
+                    'eef_poses': tf.TensorSpec(shape=(7,), dtype=tf.float32),
+                    'joint_states_arm': tf.TensorSpec(shape=(27,), dtype=tf.float32),
+                    'joint_states_gripper': tf.TensorSpec(shape=(3,), dtype=tf.float32),
+                    'user_control': tf.TensorSpec(shape=(22,), dtype=tf.float32),
+                    'observation':{
+                        'top_depth_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                        'top_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                        'front_rgb_frame': tf.TensorSpec(shape=(None, None, 3), dtype=tf.uint8),
+                    },
+                    'terminate_episode': tf.TensorSpec(shape=(), dtype = tf.bool),
+                }
+            )
+        }
+    )
+    return dataset
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    step['action'] = {}
+    action = step['action']
+    action['terminate'] = step['terminate_episode']
+
+    eef_delta_pos = step['eef_poses'][:3]
+    eef_ang = step['eef_poses'][3:]
+
+    # No base found
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_roll,eef_delta_angle_pitch,eef_delta_angle_yaw")
+
+    # No state found
+    state = step['observation']
+    # joint_states_arm: dataset of (num_timestamps, 27) shape where each of the 9 joints is represented by the JointState message
+    # (the nine joints are in order by their ROSBAG names: ['head_pan', 'right_j0', 'right_j1', 'right_j2', 'right_j3', 'right_j4', 'right_j5', 'right_j6', 'torso_t0']. For the most part, head_pan and torso should be zeros)
+    # [0] the position of the first joint (rad or m)
+    # [1] the velocity of the first joint (rad/s or m/s)
+    # [2] the effort that is applied in the first joint
+    # [3] the position of the second joint...
+    joint_states_arm = step['joint_states_arm']
+    joint_pos = joint_states_arm[3:24:3]  
+    joint_vel = joint_states_arm[4:25:3] 
+    # joint_states_gripper: dataset of (num_timestamps, 3) shape
+    # [0] the position of the gripper (rad or m)
+    # [1] the velocity of the gripper (rad/s or m/s)
+    # [2] the effort that is applied in the gripper
+    joint_states_gripper = step['joint_states_gripper']
+    gripper_pos = joint_states_gripper[:1]
+    # remove gripper_vel due to they are all zeros
+    # gripper_vel = joint_states_gripper[1:2]
+    # Concatenate the state
+    # state['arm_concat'] = tf.concat([joint_pos,joint_vel,gripper_pos,gripper_vel], axis=0)
+    state['arm_concat'] = tf.concat([joint_pos,joint_vel,gripper_pos], axis=0)
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    # copied from openxembod
+    instr = b'create tower'
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = '/cephfs-thu/gsm_data/openx_embod'
+    DATASET_NAME = 'roboturk_real_laundrylayout'
+    # Load the dataset
+    dataset = load_dataset()
+    
+    # save_dir = os.path.join(DATASET_DIR, DATASET_NAME)
+    # if not os.path.exists(save_dir):
+    #     os.makedirs(save_dir)
+    # tf.data.experimental.save(dataset, save_dir)
diff --git a/data/preprocess_scripts/singlevla_benchmark.py b/data/preprocess_scripts/singlevla_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..099973c31810e706f5da15246cf7fd89a10f88e8
--- /dev/null
+++ b/data/preprocess_scripts/singlevla_benchmark.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+
+    # Robot action
+    # eef_pos = action_dict['ee_pos'][:3]
+    # eef_ang = action_dict['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # eef_pos_vel = action_dict['delta_ee'][:3]
+    # eef_ang_vel = action_dict['delta_ee'][3:6]
+    joint_pos = action_dict['joint_pos']
+    # joint_vel = action_dict['delta_joint'][:-1]
+    # grip_pos = action_dict['delta_ee'][-1:]
+    # grip_vel = action_dict['gripper_velocity']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    # arm_action = tf.concat([joint_pos, eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, grip_pos], axis=0)
+    action['arm_concat'] = joint_pos
+    # action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos, gripper_joint_0_pos")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']['state']
+    # image = step['observation']['image']
+    # eef_pos = state['ee_pos'][:3]
+    # eef_ang = state['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    joint_pos = state['joint_pos']
+    # grip_pos = state['joint_pos'][-1:]
+
+    # # Concatenate the state
+    # state['arm_concat'] = tf.concat([
+    #     joint_pos, grip_pos,eef_pos,eef_ang], axis=0)
+
+    state['arm_concat'] = joint_pos
+    
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae20d02368d09302823d7fa2d07a10b6b4aee4bc
--- /dev/null
+++ b/data/preprocess_scripts/stanford_kuka_multimodal_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,84 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+
+    eef_delta_pos = origin_action[:3]
+
+    # Ignore grip_open: -0.15~0.15
+
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = eef_delta_pos
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
+
+    # Convert raw state to our state
+    state = step['observation']
+    eef_quat = state['ee_orientation']
+    eef_ang = quaternion_to_rotation_matrix(eef_quat)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_ang_vel = state['ee_orientation_vel']
+    eef_pos = state['ee_position']
+    eef_vel = state['ee_vel']
+    joint_pos = state['joint_pos']
+    joint_vel = state['joint_vel']
+    gripper_open = state['state'][7:8]
+    
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([joint_pos,gripper_open,joint_vel,eef_pos,eef_ang,eef_vel,eef_ang_vel],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'stanford_kuka_multimodal_dataset_converted_externally_to_rlds'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..06d73d88e06901dacbe2d752733738bb558c9c6b
--- /dev/null
+++ b/data/preprocess_scripts/stanford_mask_vit_converted_externally_to_rlds.py
@@ -0,0 +1,98 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x change in end effector position, 1x gripper yaw, 1x open/close gripper (-1 means to open the gripper, 1 means close)].
+    eef_delta_pos = action[:3]
+    eef_yaw = action[3:4]
+    eef_ang = tf.stack([0,0,eef_yaw[0]],axis=0)
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.expand_dims((1 - action[4]) / 2, axis=0)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    
+    action['terminate'] = step['is_terminal']
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+
+    # Convert raw state to our state
+    state = step['observation']['state']
+    # Robot state, consists of [7x robot joint angles, 7x robot joint velocities,1x gripper position].
+    arm_joint_pos = state[:7]
+    arm_joint_vel = state[7:14]
+    gripper_pos = state[14:15]
+    # Robot end effector pose, consists of [3x Cartesian position, 1x gripper yaw, 1x gripper position]. This is the state used in the MaskViT paper.
+    eef = step['observation']['end_effector_pose']
+    eef_pos = eef[:3]
+    gripper_yaw = eef[3:4]
+    eef_ang = tf.stack([0.0,0.0,gripper_yaw[0]],axis=0)
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # gripper_pos = eef[4:5]
+
+    # Concatenate the state
+    state = step['observation']
+    state['arm_concat'] = tf.concat([arm_joint_pos,arm_joint_vel,gripper_pos,eef_pos,eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py b/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6699bb0ab693e7b604e48d5063a5462993bb91e
--- /dev/null
+++ b/data/preprocess_scripts/stanford_robocook_converted_externally_to_rlds.py
@@ -0,0 +1,89 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x robot end-effector velocities, 3x robot end-effector angular velocities, 1x gripper velocity].
+    eef_delta_pos = action[:3]
+    eef_ang = action[3:6]
+    grip_vel = tf.expand_dims(action[6], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_vel], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,gripper_joint_0_vel")
+
+    # Convert raw state to our state
+    state = step['observation']
+    state_vec = state['state']
+    # Robot state, consists of [3x robot end-effector position, 3x robot end-effector euler angles, 1x gripper position].
+    eef_pos = state_vec[:3]
+    eef_ang = state_vec[3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    grip_joint_pos = state_vec[6:7] * 13.03 # rescale to [0, 1]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([grip_joint_pos,eef_pos, eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/taco_play.py b/data/preprocess_scripts/taco_play.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cb3abd54b7ddfe0806479000aab4255fbdb3f7b
--- /dev/null
+++ b/data/preprocess_scripts/taco_play.py
@@ -0,0 +1,98 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.equal(terminate_act, tf.constant(1.0, dtype=tf.float32))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+
+    # action['actions']: absolute desired values for gripper pose
+    the_actions=action['actions']
+    # First 6 dimensions are x, y, z, yaw, pitch, roll
+    eef_pos=the_actions[:3]
+    eef_ang = tf.concat([the_actions[5:6],the_actions[4:5],the_actions[3:4]],axis=0)
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    open_gripper=the_actions[6:]
+    # Last dimension is open_gripper (-1 is open gripper, 1 is close)
+    grip_open=tf.reshape(tf.where(open_gripper<0,1.0,0.0),(1,)) 
+    # -1 -> 1.0, 1->0.0
+
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_pos, eef_ang, grip_open], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    robot_obs=state['robot_obs']
+    # robot_obs: EE position (3), EE orientation in euler angles (3), gripper width (1), joint positions (7), gripper action (1)
+    eef_pos=robot_obs[:3]
+    eef_ang=robot_obs[3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    gripper_open=robot_obs[6:7] * 12.3843 # rescale to [0, 1]
+    joint_pos=robot_obs[7:14]
+    
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([joint_pos,gripper_open,eef_pos,eef_ang],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_open,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'taco_play'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/toto.py b/data/preprocess_scripts/toto.py
new file mode 100644
index 0000000000000000000000000000000000000000..4117bf97508011ccecf15277e2cfd01bb70e289b
--- /dev/null
+++ b/data/preprocess_scripts/toto.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.where(tf.equal(terminate_act, tf.constant(0.0, dtype=tf.float32)),tf.constant(False),tf.constant(True))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    action['terminate'] = terminate_act_to_bool(action['terminate_episode'])
+    eef_delta_pos = action['world_vector']
+    eef_ang = action['rotation_delta']
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.reshape(tf.where(action['open_gripper'],tf.constant(1.0),tf.constant(0.0)),(1,))
+    
+    # No base found
+
+    # Concatenate the action
+    arm_action = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['arm_concat'] = arm_action
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    joint_pos=state['state']
+    
+    # Concatenate the state
+    state['arm_concat'] = joint_pos
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['observation']['natural_language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'toto'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/twinvla_benchmark.py b/data/preprocess_scripts/twinvla_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf55d50bc4a89476441939d4a765bdb25c43b722
--- /dev/null
+++ b/data/preprocess_scripts/twinvla_benchmark.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+
+    # Robot action
+    # eef_pos = action_dict['ee_pos'][:3]
+    # eef_ang = action_dict['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    # eef_pos_vel = action_dict['delta_ee'][:3]
+    # eef_ang_vel = action_dict['delta_ee'][3:6]
+    left_joint_pos = action_dict['joint_pos'][:7]
+    right_joint_pos = action_dict['joint_pos'][7:]
+    # joint_vel = action_dict['delta_joint'][:-1]
+    # grip_pos = action_dict['delta_ee'][-1:]
+    # grip_vel = action_dict['gripper_velocity']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([left_joint_pos, right_joint_pos], axis=0)
+    action['arm_concat'] = arm_action
+    # action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']['state']
+    # image = step['observation']['image']
+    # eef_pos = state['ee_pos'][:3]
+    # eef_ang = state['ee_pos'][3:6]
+    # eef_ang = euler_to_rotation_matrix(eef_ang)
+    # eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    left_joint_pos = state['joint_pos'][:7]
+    right_joint_pos = state['joint_pos'][7:]
+    # grip_pos = state['joint_pos'][-1:]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([left_joint_pos, right_joint_pos], axis=0)
+    
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "left_arm_joint_0_pos,left_arm_joint_1_pos,left_arm_joint_2_pos,left_arm_joint_3_pos,left_arm_joint_4_pos,left_arm_joint_5_pos,left_gripper_open,right_arm_joint_0_pos,right_arm_joint_1_pos,right_arm_joint_2_pos,right_arm_joint_3_pos,right_arm_joint_4_pos,right_arm_joint_5_pos,right_gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    # instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass
diff --git a/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a00173b53a762cdab90f2f5f774b87ed17c8ae5
--- /dev/null
+++ b/data/preprocess_scripts/ucsd_kitchen_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    
+    # 8-dimensional action, consisting of end-effector position and orientation, gripper open/close and a episode termination action.
+    action = step['action']
+    eef_delta_pos = action[:3]
+    eef_ang = action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    
+    action['terminate'] = step['is_terminal']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+   
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']['state']
+    # 21-dimensional joint states, consists of robot joint angles, joint velocity and joint torque.
+    arm_joint_pos = state[:7]
+    arm_joint_vel = state[7:14]
+    # arm_joint_torque = state[14:21]
+
+    # Concatenate the state
+    
+    state = step['observation']
+    state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py b/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..feb7fd9c1124d86b0e923f9039620647afb65674
--- /dev/null
+++ b/data/preprocess_scripts/ucsd_pick_and_place_dataset_converted_externally_to_rlds.py
@@ -0,0 +1,90 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x gripper velocities,1x gripper open/close torque].
+    eef_vel = action[:3]
+    grip_open = tf.expand_dims(1 - action[3], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    
+    action['terminate'] = step['is_terminal']
+    action['arm_concat'] = tf.concat([eef_vel,grip_open], axis=0)  
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_vel_x,eef_vel_y,eef_vel_z,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']['state']
+    # 	Robot state, consists of [3x gripper position,3x gripper orientation, 1x finger distance].
+    gripper_pos = state[:3]/10  # dm -> m
+    gripper_ang = state[3:6]
+    gripper_ang = euler_to_rotation_matrix(gripper_ang)
+    gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
+    gripper_open = state[6:7]/6.03  # 1 for open
+
+
+    # Concatenate the state
+    state = step['observation']
+    state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_open")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/uiuc_d3field.py b/data/preprocess_scripts/uiuc_d3field.py
new file mode 100644
index 0000000000000000000000000000000000000000..12b060e039098327e13ce127a984263dad2302b4
--- /dev/null
+++ b/data/preprocess_scripts/uiuc_d3field.py
@@ -0,0 +1,77 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+    # Robot displacement from last frame
+    eef_delta_pos = origin_action[:3]
+
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = eef_delta_pos
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    # 4x4 Robot end-effector state
+    eef_mat=state['state']
+    eef_pos=eef_mat[:3,3]
+    rotation_matrix=eef_mat[:3,:3]
+    eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
+    state['arm_concat'] = tf.concat([eef_pos,eef_ang],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'uiuc_d3field'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py b/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..2895224de2436de191cb98da4a2f1d398f6b6127
--- /dev/null
+++ b/data/preprocess_scripts/usc_cloth_sim_converted_externally_to_rlds.py
@@ -0,0 +1,73 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, quaternion_to_euler, euler_to_quaternion
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    # Robot action, consists of x,y,z goal and picker commandpicker<0.5 = open, picker>0.5 = close.
+    action = step['action']
+    eef_delta_pos = action[:3]
+    grip_open = tf.cast(tf.expand_dims(action[3] < 0.5, axis=0), dtype=tf.float32)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    action['terminate'] = step['is_terminal']
+    action['arm_concat'] = tf.concat([eef_delta_pos, grip_open], axis=0)
+   
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,gripper_open")
+
+    # State doesnt exist in this dataset
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/utaustin_mutex.py b/data/preprocess_scripts/utaustin_mutex.py
new file mode 100644
index 0000000000000000000000000000000000000000..87343b8f8657112685c71466a4fd323625a26396
--- /dev/null
+++ b/data/preprocess_scripts/utaustin_mutex.py
@@ -0,0 +1,81 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, rotation_matrix_to_ortho6d
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+
+    origin_action = step['action']
+    step['action']={}
+    action=step['action']
+    action['terminate'] = step['is_terminal']
+    # 6x end effector delta pose, 1x gripper position
+    eef_delta_pos = origin_action[:3]
+    eef_ang=origin_action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open=origin_action[6:7]
+    # No base found
+
+    # Concatenate the action
+    action['arm_concat'] = tf.concat([eef_delta_pos,eef_ang,grip_open],axis=0)
+    
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Concatenate the state
+    arm_joint_ang=state['state'][:7]
+    grip_open=state['state'][7:8] * 13.21   # rescale to [0, 1]
+    # 4x4 Robot end-effector state
+    eef_mat = tf.transpose(tf.reshape(state['state'][8:], (4, 4)))
+    eef_pos = eef_mat[:3, 3]
+    rotation_matrix = eef_mat[:3, :3]
+    eef_ang = rotation_matrix_to_ortho6d(rotation_matrix)
+    state['arm_concat'] = tf.concat([arm_joint_ang,grip_open,eef_pos,eef_ang],axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'utaustin_mutex'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py b/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..6309864538a255007f5eb208393eaf5edf2feeff
--- /dev/null
+++ b/data/preprocess_scripts/utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds.py
@@ -0,0 +1,91 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.float32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper open/close command, 1x terminal action].
+    eef_delta_pos = action[:3]/1000 # change from mm to m
+    eef_ang = action[3:6]
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    
+    action['terminate'] = step['is_terminal']
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+
+    # Convert raw state to our state
+    state = step['observation']['state']
+    # Robot state, consists of [3x end effector pos, 3x robot rpy angles, 1x gripper position].
+    gripper_pos = state[:3]/1000 # change from mm to m
+    gripper_ang = state[3:6]
+    gripper_ang = euler_to_rotation_matrix(gripper_ang)
+    gripper_ang = rotation_matrix_to_ortho6d(gripper_ang)
+    gripper_open = state[6:7] / 86.12126922607422   # rescale to [0, 1]
+
+    # Concatenate the state
+    state = step['observation']
+    state['arm_concat'] = tf.concat([gripper_pos, gripper_ang, gripper_open], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,gripper_joint_0_pos")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py b/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d43fb8af5c51d7b0a272c7cfe19349cbc516dcd
--- /dev/null
+++ b/data/preprocess_scripts/utokyo_xarm_pick_and_place_converted_externally_to_rlds.py
@@ -0,0 +1,97 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_quaternion, euler_to_rotation_matrix, \
+    rotation_matrix_to_ortho6d
+
+
+def terminate_act_to_bool(terminate_act: tf.Tensor) -> tf.Tensor:
+    """
+    Convert terminate action to a boolean, where True means terminate.
+    """
+    return tf.reduce_all(tf.equal(terminate_act, tf.constant([1, 0, 0], dtype=tf.int32)))
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action = step['action']
+    # Robot action, consists of [3x EEF position, 3x EEF orientation yaw/pitch/roll, 1x gripper open/close position].
+    eef_delta_pos = action[:3]
+    eef_ang = tf.gather(action[3:6], [2, 1, 0])
+    eef_ang = euler_to_quaternion(eef_ang)
+    grip_open = tf.expand_dims(1 - action[6], axis=0)
+
+    # Concatenate the action
+    # action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    step['action'] = {}
+    action = step['action']
+    action['arm_concat'] = tf.concat([eef_delta_pos, eef_ang, grip_open], axis=0)
+    action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_delta_pos_x,eef_delta_pos_y,eef_delta_pos_z,eef_delta_angle_x,eef_delta_angle_y,eef_delta_angle_z,eef_delta_angle_w,gripper_open")
+
+    # Convert raw state to our state
+    state = step['observation']
+    # Robot joint state, consists of [7x robot joint angles, 7x robot joint velocity]
+    arm_joint_state = state['joint_state']
+    arm_joint_pos = arm_joint_state[:7]
+    # We do not use the velocity since it is inacurate
+    # arm_joint_vel = arm_joint_state[7:14]
+    # Robot joint trajectory, consists of [7x robot joint angles, 7x robot joint velocity, 7x robot joint acceralation].
+    # arm_joint_traj = state['joint_trajectory']
+    # Robot end effector pose, consists of [3x EEF position, 3x EEF orientation yaw/pitch/roll].
+    eef = state["end_effector_pose"]
+    eef_pos = eef[:3]
+    eef_ang = tf.gather(eef[3:6], [2, 1, 0])
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+
+    # Concatenate the state
+    # state['arm_concat'] = tf.concat([arm_joint_pos, arm_joint_vel, eef_pos, eef_ang], axis=0)
+    state['arm_concat'] = tf.concat([arm_joint_pos, eef_pos, eef_ang], axis=0)
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    import tensorflow_datasets as tfds
+    from data.utils import dataset_to_path
+
+    DATASET_DIR = 'data/datasets/openx_embod'
+    DATASET_NAME = 'fractal20220817_data'
+    # Load the dataset
+    dataset = tfds.builder_from_directory(
+        builder_dir=dataset_to_path(
+            DATASET_NAME, DATASET_DIR))
+    dataset = dataset.as_dataset(split='all')
+
+    # Inspect the dataset
+    for episode in dataset:
+        for step in episode['steps']:
+            print(step)
diff --git a/data/preprocess_scripts/vla_benchmark_ee.py b/data/preprocess_scripts/vla_benchmark_ee.py
new file mode 100644
index 0000000000000000000000000000000000000000..207731fe8a61c4078f188dd88277b171cf41d15c
--- /dev/null
+++ b/data/preprocess_scripts/vla_benchmark_ee.py
@@ -0,0 +1,80 @@
+import tensorflow as tf
+
+from data.utils import clean_task_instruction, euler_to_rotation_matrix, rotation_matrix_to_ortho6d
+
+
+def process_step(step: dict) -> dict:
+    """
+    Unify the action format and clean the task instruction.
+
+    DO NOT use python list, use tf.TensorArray instead.
+    """
+    # Convert raw action to our action
+    action_dict = step['action']
+
+    # Robot action
+    eef_pos = action_dict['ee_pos'][:3]
+    eef_ang = action_dict['ee_pos'][3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    eef_pos_vel = action_dict['delta_ee'][:3]
+    eef_ang_vel = action_dict['delta_ee'][3:6]
+    joint_pos = action_dict['joint_pos'][:-1]
+    joint_vel = action_dict['delta_joint'][:-1]
+    grip_pos = action_dict['delta_ee'][-1:]
+    # grip_vel = action_dict['gripper_velocity']
+
+    # Concatenate the action
+    step['action'] = {}
+    action = step['action']
+    
+    arm_action = tf.concat([eef_pos, eef_ang, eef_pos_vel, eef_ang_vel, joint_pos, joint_vel, grip_pos], axis=0)
+    action['arm_concat'] = arm_action
+    # action['terminate'] = step['is_terminal']
+
+    # Write the action format
+    action['format'] = tf.constant(
+        "eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5,eef_vel_x,eef_vel_y,eef_vel_z,eef_angular_vel_roll,eef_angular_vel_pitch,eef_angular_vel_yaw,arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,arm_joint_6_pos,arm_joint_0_vel,arm_joint_1_vel,arm_joint_2_vel,arm_joint_3_vel,arm_joint_4_vel,arm_joint_5_vel,arm_joint_6_vel,gripper_joint_0_pos")
+
+    # Convert raw state to our state
+    # Robot state
+    state = step['observation']['state']
+    image = step['observation']['image']
+    eef_pos = state['ee_pos'][:3]
+    eef_ang = state['ee_pos'][3:6]
+    eef_ang = euler_to_rotation_matrix(eef_ang)
+    eef_ang = rotation_matrix_to_ortho6d(eef_ang)
+    joint_pos = state['joint_pos'][:-1]
+    grip_pos = state['joint_pos'][-1:]
+
+    # Concatenate the state
+    state['arm_concat'] = tf.concat([
+        joint_pos,grip_pos,eef_pos,eef_ang], axis=0)
+    
+
+    # Write the state format
+    state['format'] = tf.constant(
+        "arm_joint_0_pos,arm_joint_1_pos,arm_joint_2_pos,arm_joint_3_pos,arm_joint_4_pos,arm_joint_5_pos,gripper_joint_0_pos,eef_pos_x,eef_pos_y,eef_pos_z,eef_angle_0,eef_angle_1,eef_angle_2,eef_angle_3,eef_angle_4,eef_angle_5")
+
+    # Clean the task instruction
+    # Define the replacements (old, new) as a dictionary
+    replacements = {
+        '_': ' ',
+        '1f': ' ',
+        '4f': ' ',
+        '-': ' ',
+        '50': ' ',
+        '55': ' ',
+        '56': ' ',
+        
+    }
+    instr = step['language_instruction']
+    instr = clean_task_instruction(instr, replacements)
+    step['observation'] = state
+    step['observation']['natural_language_instruction'] = instr
+
+    return step
+
+
+if __name__ == "__main__":
+    pass