diff --git a/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5887f44bc7cc3057876f5b2f143a78a3f2aa5e99
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a5dd8bf622925f3076211c69d2c0a72ef94e7248
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6710bebb988421a60a989089c0d828e68936d266
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2c3d7e5602f5258e5566d5e503916c5ee641941
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f80676d2bdaa35882b454021e5fba0ec9bac20e4
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5670cfb7386956908ed2d552b3e1e8628490afd8
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..135367f95094f1a564e0263bbbcf0caffe703f82
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9f3ce9a6b7c4f048cfd74a46feb2e275bacc76c8
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a552c56b0a0b26e843a660677802b33cbfcf065c
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94b68234466a971b5da6a9d10a3e0131eae0a615
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3c3ebdbfcc8414bf8616fe14fcfefb23e8d3a12
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e47c1161eee90f9e052008939419d6a2a8a3227
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e032e21c442d56359ac69b846691d8cd0e1b55de
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c52939bad67663395a0a8828c8da49346440f70
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..650d0784b7597a78d86eb63a981eed20d338117c
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..427a02e231b408b011cf451956af31d3649338f8
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eaf05cecdd4e95fe90a676ce89fe5d0705bd80d9
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a04253f3e0b0720c89fe554c7baf72777d3a89c3
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b3569c66eedab09805179b35e6d27a4506185b3
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0dca5252fe02267d1a140aca4264282df60c76d
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b13e326a3c74eccc8ab51dbf62762c02f1a21b5
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f57b50a5bd4f8683d92c80c8c58c668d8d14e7bc
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41b4c71318d1bf496ae44ce0656e9e3010a36205
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24eb3d102419b3c4102ce15e638b7ac2e8781f40
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a93132ed2ab015f6371c1460ef80f8ffe44f8c29
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aeb5b626233eb237679e15537b399dce5877a4c7
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8e1cab260c70557dc6883d3c6ee44a6c7494ebf
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e896fb6ffe673c1674f6c81345b51b7415529841
Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc differ
diff --git a/Metaworld/metaworld/policies/policy.py b/Metaworld/metaworld/policies/policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..bccd39f8b10c73435cf20c907f88595c4dce5ee6
--- /dev/null
+++ b/Metaworld/metaworld/policies/policy.py
@@ -0,0 +1,71 @@
+import abc
+import warnings
+
+import numpy as np
+
+
+def assert_fully_parsed(func):
+    """Decorator function to ensure observations are fully parsed
+
+    Args:
+        func (Callable): The function to check
+
+    Returns:
+        (Callable): The input function, decorated to assert full parsing
+    """
+    def inner(obs):
+        obs_dict = func(obs)
+        assert len(obs) == sum(
+            [len(i) if isinstance(i, np.ndarray) else 1 for i in obs_dict.values()]
+        ), 'Observation not fully parsed'
+        return obs_dict
+    return inner
+
+
+def move(from_xyz, to_xyz, p):
+    """Computes action components that help move from 1 position to another
+
+    Args:
+        from_xyz (np.ndarray): The coordinates to move from (usually current position)
+        to_xyz (np.ndarray): The coordinates to move to
+        p (float): constant to scale response
+
+    Returns:
+        (np.ndarray): Response that will decrease abs(to_xyz - from_xyz)
+
+    """
+    error = to_xyz - from_xyz
+    response = p * error
+
+    if np.any(np.absolute(response) > 1.):
+        warnings.warn('Constant(s) may be too high. Environments clip response to [-1, 1]')
+
+    return response
+
+
+class Policy(abc.ABC):
+
+    @staticmethod
+    @abc.abstractmethod
+    def _parse_obs(obs):
+        """Pulls pertinent information out of observation and places in a dict.
+
+        Args:
+            obs (np.ndarray): Observation which conforms to env.observation_space
+
+        Returns:
+            dict: Dictionary which contains information from the observation
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_action(self, obs):
+        """Gets an action in response to an observation.
+
+        Args:
+            obs (np.ndarray): Observation which conforms to env.observation_space
+
+        Returns:
+            np.ndarray: Array (usually 4 elements) representing the action to take
+        """
+        pass
diff --git a/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0e4dc9f32c45ab866c35191adcc4d8db8d64ff9
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py
@@ -0,0 +1,60 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerBoxCloseV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'lid_pos': obs[3:6],
+            'box_pos': obs[9:11],
+            'extra_info': obs[[6, 7, 8, 11]],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_lid = o_d['lid_pos'] + np.array([-.04, .0, -.06])
+        pos_box = np.array([*o_d['box_pos'], 0.15]) + np.array([-.04, .0, .0])
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(pos_curr[:2] - pos_lid[:2]) > 0.01:
+            return pos_lid + np.array([0., 0., 0.1])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(pos_curr[2] - pos_lid[2]) > 0.05:
+            return pos_lid
+        # If not at the same Z height as the goal, move up to that plane
+        elif abs(pos_curr[2] - pos_box[2]) > 0.04:
+            return np.array([pos_curr[0], pos_curr[1], pos_box[2]])
+        # Move to the goal
+        else:
+            return pos_box
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['lid_pos'] + np.array([-.04, .0, -.06])
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01 or abs(pos_curr[2] - pos_puck[2]) > 0.13:
+            return 0.
+        # While end effector is moving down toward the puck, begin closing the grabber
+        else:
+            return .8
diff --git a/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4ff7060923711fe4ed119f9bf2f1100405aeca9
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerButtonPressTopdownV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'hand_closed': obs[3],
+            'button_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.04:
+            return pos_button + np.array([0., 0., 0.1])
+        else:
+            return pos_button
diff --git a/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f33a128db41c915a9b5fd893bf2eb3c04ec47ca
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerButtonPressTopdownWallV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'hand_closed': obs[3],
+            'button_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = -1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, -.06, .0])
+
+        if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.04:
+            return pos_button + np.array([0., 0., 0.1])
+        else:
+            return pos_button
diff --git a/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e5925e0ff1663a9091094465c182abd762a9aac
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, move
+
+
+class SawyerButtonPressV2Policy(Policy):
+
+    @staticmethod
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'hand_closed': obs[3],
+            'button_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=25.)
+        action['grab_effort'] = 0.
+
+        return action.array
+
+    @staticmethod
+    def desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([0., 0., -0.07])
+        
+        # align the gripper with the button if the gripper does not have
+        # the same x and z position as the button.
+        hand_x, hand_y, hand_z = pos_curr
+        button_initial_x, button_initial_y, button_initial_z = pos_button
+        if not np.all(np.isclose(np.array([hand_x, hand_z]),
+                                np.array([button_initial_x, button_initial_z]),
+                                atol=0.02)):
+            pos_button[1] = pos_curr[1] - .1
+            return pos_button
+        # if the hand is aligned with the button, push the button in, by
+        # increasing the hand's y position
+        pos_button[1] += 0.02
+
+        return pos_button
diff --git a/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f55971bf0414ca8cbfd8367786904592a65a95
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, move
+
+
+class SawyerButtonPressWallV1Policy(Policy):
+
+    @staticmethod
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'button_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=15.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, .0, .04])
+
+        if abs(pos_curr[0] - pos_button[0]) > 0.02:
+            return np.array([pos_button[0], pos_curr[1], .3])
+        elif pos_button[1] - pos_curr[1] > 0.09:
+            return np.array([pos_button[0], pos_button[1], .3])
+        elif abs(pos_curr[2] - pos_button[2]) > 0.02:
+            return pos_button + np.array([.0, -.05, .0])
+        else:
+            return pos_button + np.array([.0, -.02, .0])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, .0, .04])
+
+        if abs(pos_curr[0] - pos_button[0]) > 0.02 or \
+                pos_button[1] - pos_curr[1] > 0.09 or \
+                abs(pos_curr[2] - pos_button[2]) > 0.02:
+            return 1.
+        else:
+            return -1.
diff --git a/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..38cee93b801904e0b9dc31e00c599db022c48623
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, move
+
+
+class SawyerButtonPressWallV2Policy(Policy):
+
+    @staticmethod
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'hand_closed': obs[3],
+            'button_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=15.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, .0, .04])
+
+        if abs(pos_curr[0] - pos_button[0]) > 0.02:
+            return np.array([pos_button[0], pos_curr[1], .3])
+        elif pos_button[1] - pos_curr[1] > 0.09:
+            return np.array([pos_button[0], pos_button[1], .3])
+        elif abs(pos_curr[2] - pos_button[2]) > 0.02:
+            return pos_button + np.array([.0, -.05, .0])
+        else:
+            return pos_button + np.array([.0, -.02, .0])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, .0, .04])
+
+        if abs(pos_curr[0] - pos_button[0]) > 0.02 or \
+                pos_button[1] - pos_curr[1] > 0.09 or \
+                abs(pos_curr[2] - pos_button[2]) > 0.02:
+            return 1.
+        else:
+            return -1.
diff --git a/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py b/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..25dc8de0ca747c884cc7ee94227faede0c4abbcc
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerCoffeeButtonV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'button_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = -1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['button_pos'] + np.array([.0, .0, -.07])
+
+        if np.linalg.norm(pos_curr[[0, 2]] - pos_button[[0, 2]]) > 0.02:
+            return np.array([pos_button[0], pos_curr[1], pos_button[2]])
+        else:
+            return pos_button + np.array([.0, .2, .0])
diff --git a/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py b/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..89b78171bd345047d34763ae54b7fd3b41171fa3
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py
@@ -0,0 +1,56 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerCoffeePushV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'mug_pos': obs[4:7],
+            'goal_xy': obs[-3:-1],
+            'unused_info_1': obs[7:-3],
+            'unused_info_2': obs[-1],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_mug = o_d['mug_pos'] + np.array([.01, .0, .05])
+        pos_goal = o_d['goal_xy']
+
+        if np.linalg.norm(pos_curr[:2] - pos_mug[:2]) > 0.06:
+            return pos_mug + np.array([.0, .0, .2])
+        elif abs(pos_curr[2] - pos_mug[2]) > 0.02:
+            return pos_mug
+        else:
+            return np.array([pos_goal[0], pos_goal[1], .1])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_mug = o_d['mug_pos'] + np.array([.01, .0, .05])
+
+        if np.linalg.norm(pos_curr[:2] - pos_mug[:2]) > 0.06 or \
+                abs(pos_curr[2] - pos_mug[2]) > 0.1:
+            return -1.
+        else:
+            return .5
diff --git a/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py b/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..e68693d1ba8b36c5709e773e852eb8668281586e
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDialTurnV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'dial_pos': obs[3:6],
+            'goal_pos': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_pow': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=5.)
+        action['grab_pow'] = 0.
+
+        return action.array
+
+    @staticmethod
+    def _desired_xyz(o_d):
+        hand_pos = o_d['hand_pos']
+        dial_pos = o_d['dial_pos'] + np.array([0.0, -0.028, 0.0])
+        if abs(hand_pos[2] - dial_pos[2]) > 0.02:
+            return np.array([hand_pos[0], hand_pos[1], dial_pos[2]])
+        elif abs(hand_pos[1] - dial_pos[1]) > 0.02:
+            return np.array([dial_pos[0]+0.20, dial_pos[1], dial_pos[2]])
+        return np.array([dial_pos[0]-0.10, dial_pos[1], dial_pos[2]])
diff --git a/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py b/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6392ea1bdeef49410ca91b2c204dbd87ce94f521
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDialTurnV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_gripper_open': obs[3],
+            'dial_pos': obs[4:7],
+            'extra_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_pow': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_pow'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        hand_pos = o_d['hand_pos']
+        dial_pos = o_d['dial_pos'] + np.array([0.05, 0.02, 0.09])
+
+        if np.linalg.norm(hand_pos[:2] - dial_pos[:2]) > 0.02:
+            return np.array([*dial_pos[:2], 0.2])
+        if abs(hand_pos[2] - dial_pos[2]) > 0.02:
+            return dial_pos
+        return dial_pos + np.array([-.05, .005, .0])
diff --git a/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py b/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecc1215e848ca8ec24a35a5e76fd865a2c2eb829
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDisassembleV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'wrench_pos': obs[3:6],
+            'peg_pos': obs[9:],
+            'unused_info': obs[6:9],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wrench = o_d['wrench_pos'] + np.array([.01, -.01, .01])
+        pos_peg = o_d['peg_pos'] + np.array([.07, .0, .15])
+
+        # If XY error is greater than 0.02, place end effector above the wrench
+        if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02:
+            return pos_wrench + np.array([0., 0., 0.12])
+        # Once XY error is low enough, drop end effector down on top of wrench
+        elif abs(pos_curr[2] - pos_wrench[2]) > 0.03:
+            return pos_wrench
+        # If still hooked on peg, move upwards
+        elif pos_wrench[2] < 0.12:
+            return pos_peg + np.array([.0, .0, .1])
+        # Move away from peg
+        else:
+            return pos_curr + np.array([.0, -.1, .0])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wrench = o_d['wrench_pos'] + np.array([.01, .0, .0])
+
+        if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02 or abs(pos_curr[2] - pos_wrench[2]) > 0.08:
+            return 0.
+        else:
+            return 0.8
diff --git a/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py b/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..9efbc613b4af685e1a51645df856d8b8ead60300
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py
@@ -0,0 +1,57 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDisassembleV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'wrench_pos': obs[4:7],
+            'peg_pos': obs[-3:],
+            'unused_info': obs[7:-3],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wrench = o_d['wrench_pos'] + np.array([-.02, .0, .01])
+        pos_peg = o_d['peg_pos'] + np.array([.12, .0, .14])
+
+        # If XY error is greater than 0.02, place end effector above the wrench
+        if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02:
+            return pos_wrench + np.array([0., 0., 0.1])
+        # Once XY error is low enough, drop end effector down on top of wrench
+        elif abs(pos_curr[2] - pos_wrench[2]) > 0.03:
+            return pos_wrench
+        # Move upwards
+        else:
+            return pos_curr + np.array([.0, .0, .1])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wrench = o_d['wrench_pos'] + np.array([-.02, .0, .01])
+
+        if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02 or abs(pos_curr[2] - pos_wrench[2]) > 0.07:
+            return 0.
+        else:
+            return 0.8
diff --git a/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..492059652a82b23218100503a0683deea314efbf
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDoorCloseV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'door_pos': obs[4:7],
+            'unused_2': obs[7:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_door = o_d['door_pos']
+        pos_door += np.array([0.05, 0.12, 0.1])
+        pos_goal = o_d['goal_pos']
+
+        # # if to the right of door handle///
+        # if pos_curr[0] > pos_door[0]:
+        #     # if below door handle by more than 0.2
+        #     if pos_curr[2] < pos_door[2] + 0.2:
+        #         # rise above door handle by ~0.2
+        #         return np.array([pos_curr[0], pos_curr[1], pos_door[2] + 0.25])
+        #     else:
+        #         # move toward door handle in XY plane
+        #         return np.array([pos_door[0] - 0.02, pos_door[1], pos_curr[2]])
+        # # put end effector on the outer edge of door handle (still above it)
+        # elif abs(pos_curr[2] - pos_door[2]) > 0.04:
+        #     return pos_door + np.array([-0.02, 0., 0.])
+        # # push from outer edge toward door handle's centroid
+        # else:
+        return pos_goal
diff --git a/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..12065195513a8e343ae1cfd98183c8bb935215d0
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDrawerCloseV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'drwr_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_drwr = o_d['drwr_pos']
+
+        # if further forward than the drawer...
+        if pos_curr[1] > pos_drwr[1]:
+            if pos_curr[2] < pos_drwr[2] + 0.4:
+                # rise up quickly (Z direction)
+                return np.array([pos_curr[0], pos_curr[1], pos_drwr[2] + 0.5])
+            else:
+                # move to front edge of drawer handle, but stay high in Z
+                return pos_drwr + np.array([0., -0.075, 0.4])
+        # drop down to touch drawer handle
+        elif abs(pos_curr[2] - pos_drwr[2]) > 0.04:
+            return pos_drwr + np.array([0., -0.075, 0.])
+        # push toward drawer handle's centroid
+        else:
+            return pos_drwr
diff --git a/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a26ea0c4402a4c6b4279d75098db3a471af155f
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py
@@ -0,0 +1,50 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDrawerCloseV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_grasp_info': obs[3],
+            'drwr_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_drwr = o_d['drwr_pos'] + np.array([.0, .0, -.02])
+
+        # if further forward than the drawer...
+        if pos_curr[1] > pos_drwr[1]:
+            if pos_curr[2] < pos_drwr[2] + 0.23:
+                # rise up quickly (Z direction)
+                return np.array([pos_curr[0], pos_curr[1], pos_drwr[2] + 0.5])
+            else:
+                # move to front edge of drawer handle, but stay high in Z
+                return pos_drwr + np.array([0., -0.075, 0.23])
+        # drop down to touch drawer handle
+        elif abs(pos_curr[2] - pos_drwr[2]) > 0.04:
+            return pos_drwr + np.array([0., -0.075, 0.])
+        # push toward drawer handle's centroid
+        else:
+            return pos_drwr
diff --git a/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..21c1930d8f0c2baa48c618d813be9aba5edc67fa
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerDrawerOpenV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'drwr_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        # NOTE this policy looks different from the others because it must
+        # modify its p constant part-way through the task
+        pos_curr = o_d['hand_pos']
+        pos_drwr = o_d['drwr_pos']
+
+        # align end effector's Z axis with drawer handle's Z axis
+        if np.linalg.norm(pos_curr[:2] - pos_drwr[:2]) > 0.06:
+            to_pos = pos_drwr + np.array([0., 0., 0.3])
+            action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=4.)
+        # drop down to touch drawer handle
+        elif abs(pos_curr[2] - pos_drwr[2]) > 0.04:
+            to_pos = pos_drwr
+            action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=4.)
+        # push toward a point just behind the drawer handle
+        # also increase p value to apply more force
+        else:
+            to_pos = pos_drwr + np.array([0., -0.06, 0.])
+            action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=50.)
+
+        # keep gripper open
+        action['grab_effort'] = -1.
+
+        return action.array
diff --git a/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5536d16402cea7404703ca0cb5812a0d312e696
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerFaucetCloseV1Policy(Policy):
+    
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'faucet_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_faucet = o_d['faucet_pos'] + np.array([.02, .0, .0])
+
+        if np.linalg.norm(pos_curr[:2] - pos_faucet[:2]) > 0.04:
+            return pos_faucet + np.array([.0, .0, .1])
+        elif abs(pos_curr[2] - pos_faucet[2]) > 0.04:
+            return pos_faucet
+        else:
+            return pos_faucet + np.array([-.1, .05, .0])
diff --git a/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py b/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5aa39399f67162b5369e1e9fe3cea565afbbd67
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerFaucetOpenV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_gripper': obs[3],
+            'faucet_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_faucet = o_d['faucet_pos'] + np.array([-.04, .0, .03])
+
+        if np.linalg.norm(pos_curr[:2] - pos_faucet[:2]) > 0.04:
+            return pos_faucet + np.array([.0, .0, .1])
+        elif abs(pos_curr[2] - pos_faucet[2]) > 0.04:
+            return pos_faucet
+        else:
+            return pos_faucet + np.array([.1, .05, .0])
diff --git a/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py b/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cc5836d3bd3aa0a46299763d914b8fa87190b03
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerHandInsertV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'obj_pos': obs[4:7],
+            'goal_pos': obs[-3:],
+            'unused_info': obs[7:-3],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        hand_pos = o_d['hand_pos']
+        obj_pos = o_d['obj_pos']
+        goal_pos = o_d['goal_pos']
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(hand_pos[:2] - obj_pos[:2]) > 0.02:
+            return obj_pos + np.array([0., 0., 0.1])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(hand_pos[2] - obj_pos[2]) > 0.05:
+            return obj_pos + np.array([0., 0., 0.03])
+        # If not above goal, move to be directly above goal
+        elif np.linalg.norm(hand_pos[:2] - goal_pos[:2]) > 0.04:
+            return np.array([goal_pos[0], goal_pos[1], hand_pos[2]])
+        else:
+            return goal_pos
+
+    @staticmethod
+    def _grab_effort(o_d):
+        hand_pos = o_d['hand_pos']
+        obj_pos = o_d['obj_pos']
+
+        if np.linalg.norm(hand_pos[:2] - obj_pos[:2]) > 0.02 or abs(hand_pos[2] - obj_pos[2]) > 0.1:
+            return 0.
+        else:
+            return 0.65
diff --git a/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b8a1548c77c63fa52420a6c8154236ac0da463b
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerHandlePressSideV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'handle_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['handle_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.02:
+            return pos_button + np.array([0., 0., 0.2])
+        else:
+            return pos_button + np.array([.0, .0, -.5])
diff --git a/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py b/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..68bf0794421c667ec501f3c68bd6c11e4197a3c0
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerHandlePullSideV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'handle_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_button = o_d['handle_pos'] + np.array([.02, .0, .0])
+
+        if abs(pos_curr[1] - pos_button[1]) > 0.04:
+            return pos_button + np.array([0., 0., 0.2])
+        elif abs(pos_curr[2] - pos_button[2]) > 0.03:
+            return pos_button + np.array([.1, .0, -.01])
+        elif abs(pos_curr[0] - pos_button[0]) > .01:
+            return np.array([pos_button[0] - .04, pos_button[1], pos_curr[2]])
+        else:
+            return pos_button + np.array([-.04, .0, .1])
diff --git a/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py b/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..290e917ae2230dc561f7e041f4f0c510241d3e3e
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py
@@ -0,0 +1,44 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerHandlePullV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'handle_pos': obs[4:7],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_handle = o_d['handle_pos'] + np.array([0, -0.04, 0])
+
+        if np.linalg.norm(pos_curr[:2] - pos_handle[:2]) > 0.02:
+            return pos_handle
+        if abs(pos_curr[2] - pos_handle[2]) > 0.02:
+            return pos_handle[2]
+        return pos_handle + np.array([0., 0., 0.1])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        return 1.
diff --git a/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py b/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca79aa64fa6856e77b147902c151ed64a35d02fe
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerLeverPullV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper': obs[3],
+            'lever_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_lever = o_d['lever_pos'] + np.array([.0, -.055, .0])
+
+        if np.linalg.norm(pos_curr[:2] - pos_lever[:2]) > 0.02:
+            return pos_lever + np.array([0., 0., -0.1])
+        elif abs(pos_curr[2] - pos_lever[2]) > 0.02:
+            return pos_lever
+        else:
+            return pos_lever + np.array([.0, .08, .02])
diff --git a/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..212ae3ccd9115a6b1ddf98840a793aebd653e632
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPegUnplugSideV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_gripper': obs[3],
+            'peg_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_peg = o_d['peg_pos'] + np.array([-.02, .0, .035])
+
+        if np.linalg.norm(pos_curr[:2] - pos_peg[:2]) > 0.04:
+            return pos_peg + np.array([0., 0., 0.2])
+        elif abs(pos_curr[2] - .15) > 0.02:
+            return np.array([*pos_peg[:2], .15])
+        else:
+            return pos_curr + np.array([.01, .0, .0])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_peg = o_d['peg_pos'] + np.array([-.02, .0, .035])
+
+        if np.linalg.norm(pos_curr[:2] - pos_peg[:2]) > 0.04 \
+            or abs(pos_curr[2] - pos_peg[2]) > 0.15:
+            return -1.
+        else:
+            return .1
diff --git a/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebd8393e826c826ea2cd7cceca978e239d07c5e3
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py
@@ -0,0 +1,71 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, move, assert_fully_parsed
+
+
+class SawyerPickPlaceWallV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2':  obs[7:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=10.)
+        action['grab_effort'] = self.grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos'] + np.array([-0.005, 0, 0])
+        pos_goal = o_d['goal_pos']
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.015:
+            return pos_puck + np.array([0., 0., 0.1])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04 and pos_puck[-1] < 0.03:
+            return pos_puck + np.array([0., 0., 0.03])
+        # Move to the goal
+        else:
+            # if wall is in the way of arm, straight up above the wall
+            if(-0.15 <= pos_curr[0] <= 0.35 and
+                    0.60 <= pos_curr[1] <= 0.80 and
+                    pos_curr[2] < 0.25):
+                    return pos_curr + [0, 0, 1]
+            #move towards the goal while staying above the wall
+            elif(-0.15 <= pos_curr[0] <= 0.35 and
+                    0.60 <= pos_curr[1] <= 0.80 and
+                    pos_curr[2] < 0.35):
+                return np.array([pos_goal[0], pos_goal[1], pos_curr[2]])
+            # If not at the same Z height as the goal, move up to that plane
+            elif abs(pos_curr[2] - pos_goal[2]) > 0.04:
+                return np.array([pos_curr[0], pos_curr[1], pos_goal[2]])
+            return pos_goal
+
+    @staticmethod
+    def grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.015 or abs(pos_curr[2] - pos_puck[2]) > 0.1:
+            return 0.
+        # While end effector is moving down toward the puck, begin closing the grabber
+        else:
+            return 0.9
diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..f23800dbb592b8cb20a3549b17e0e2c1eebea4d7
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPlateSlideBackSideV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_xyz(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos'] + np.array([.023, .0, .025])
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01:
+            return pos_puck + np.array([.0, .0, .07])
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04:
+            return pos_puck
+        else:
+            return np.array([pos_curr[0] + .1, .6, pos_curr[2]])
diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..313743b6be5944d41f48c2025fda5020605c87cc
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPlateSlideBackV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = -1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos'] + np.array([.0, -.065, .025])
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01:
+            return pos_puck + np.array([.0, .0, .1])
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04:
+            return pos_puck
+        elif pos_curr[1] > .7:
+            return pos_curr + np.array([.0, -.1, .0])
+        elif pos_curr[1] > .6:
+            return np.array([.15, .55, pos_curr[2]])
+        else:
+            return np.array([pos_curr[0] - .1, .55, pos_curr[2]])
diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce1323744f1f46f17e9c9b56de68ce389f85e5c1
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPlateSlideSideV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        # return {
+        #     'hand_pos': obs[:3],
+        #     'puck_pos': obs[3:6],
+        #     'unused_info': obs[6:],
+        # }
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos'] + np.array([.07, .0, -.005])
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04:
+            return pos_puck + np.array([.0, .0, .1])
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04:
+            return pos_puck
+        elif pos_curr[0] > -.2:
+            return np.array([pos_curr[0] - .1, .6, pos_curr[2]])
+        else:
+            return pos_puck + np.array([-.1, .0, .0])
diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4a34cd71fd4254a4ef989bc7d111ba74d413945
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPlateSlideV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2': obs[7:-3],
+            'shelf_x': obs[-3],
+            'unused_3': obs[-2:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = -1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos'] + np.array([.0, -.055, .03])
+
+        aligned_with_puck = np.linalg.norm(pos_curr[:2] - pos_puck[:2]) <= 0.03
+
+        if not aligned_with_puck:
+            return pos_puck + np.array([.0, .0, .1])
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04:
+            return pos_puck
+        else:
+            return np.array([o_d['shelf_x'], .9, pos_puck[2]])
diff --git a/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py b/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..9eb958f9326da982bc87615f02760771ce127070
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py
@@ -0,0 +1,56 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPushBackV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'puck_pos': obs[3:6],
+            'goal_pos': obs[9:],
+            'unused_info': obs[6:9],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos']
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.05:
+            return pos_puck + np.array([0., 0., 0.15])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.04:
+            return pos_puck + np.array([0., 0., 0.03])
+        # Move to the goal
+        else:
+            return o_d['goal_pos'] + np.array([.0, .0, .05])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04 or abs(pos_curr[2] - pos_puck[2]) > 0.12:
+            return 0.
+        # While end effector is moving down toward the puck, begin closing the grabber
+        else:
+            return 0.6
diff --git a/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py b/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..71ae54f741e2d05c9221ac47c265081621637a09
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py
@@ -0,0 +1,57 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPushBackV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'puck_pos': obs[4:7],
+            'unused_2':  obs[7:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=1.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos']
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04:
+            return pos_puck + np.array([0., 0., 0.3])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(pos_curr[2] - pos_puck[2]) > 0.055:
+            return pos_puck
+        # Move to the goal
+        else:
+            return o_d['goal_pos'] + np.array([0.0, 0.0, pos_curr[2]])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_puck = o_d['puck_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04 or abs(pos_curr[2] - pos_puck[2]) > 0.085:
+            return 0.
+        # While end effector is moving down toward the puck, begin closing the grabber
+        else:
+            return 0.9
diff --git a/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..d13c2225b45393822148acad03a1b68ed0f512f8
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerPushWallV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'obj_pos': obs[4:7],
+            'unused_2':  obs[7:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=10.)
+        action['grab_effort'] = self.grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_obj = o_d['obj_pos'] + np.array([-0.005, 0, 0])
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(pos_curr[:2] - pos_obj[:2]) > 0.02:
+            return pos_obj + np.array([0., 0., 0.2])
+        # Once XY error is low enough, drop end effector down on top of obj
+        elif abs(pos_curr[2] - pos_obj[2]) > 0.04:
+            return pos_obj + np.array([0., 0., 0.03])
+        # Move to the goal
+        else:
+            #if the wall is between the puck and the goal, go around the wall
+            if(-0.1 <= pos_obj[0] <= 0.3 and 0.65 <= pos_obj[1] <= 0.75):
+                return pos_curr + np.array([-1, 0, 0])
+            elif ((-0.15 < pos_obj[0] < 0.05 or 0.15 < pos_obj[0] < 0.35)
+                    and 0.695 <= pos_obj[1] <= 0.755):
+                return pos_curr + np.array([0, 1, 0])
+            return o_d['goal_pos']
+
+    @staticmethod
+    def grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_obj = o_d['obj_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_obj[:2]) > 0.02 or \
+                          abs(pos_curr[2] - pos_obj[2]) > 0.1:
+            return 0.0
+        # While end effector is moving down toward the obj, begin closing the grabber
+        else:
+            return 0.6
diff --git a/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py b/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbf91318765dc177f73fe2d8dddb9bd6116d81cc
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerShelfPlaceV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'block_pos': obs[3:6],
+            'shelf_x': obs[-3],
+            'unused_info': obs[[6, 7, 8, 10, 11]],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_block = o_d['block_pos'] + np.array([.005, .0, .015])
+        pos_shelf_x = o_d['shelf_x']
+        if np.linalg.norm(pos_curr[:2] - pos_block[:2]) > 0.04:
+            # positioning over block
+            return pos_block + np.array([0., 0., 0.3])
+        elif abs(pos_curr[2] - pos_block[2]) > 0.02:
+            # grabbing block
+            return pos_block
+        elif np.abs(pos_curr[0] - pos_shelf_x) > 0.02:
+            # centering with goal pos
+            return np.array([pos_shelf_x, pos_curr[1], pos_curr[2]])
+        elif pos_curr[2] < 0.25:
+            # move up to correct height
+            pos_new = pos_curr + np.array([0., 0., 0.25])
+            return pos_new
+        else:
+            # move forward to goal
+            pos_new = pos_curr + np.array([0., 0.05, 0.])
+            return pos_new
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_block = o_d['block_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_block[:2]) > 0.04 \
+            or abs(pos_curr[2] - pos_block[2]) > 0.15:
+            return -1.
+        else:
+            return .7
diff --git a/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py b/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c6937b319e48cd3db9524a8a13519b6f50d9c8d
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerSoccerV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'ball_pos': obs[3:6],
+            'goal_pos': obs[9:],
+            'unused_info': obs[6:9],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_ball = o_d['ball_pos'] + np.array([.0, .0, .03])
+        pos_goal = o_d['goal_pos']
+
+        curr_to_ball = pos_ball - pos_curr
+        curr_to_ball /= np.linalg.norm(curr_to_ball)
+
+        ball_to_goal = pos_goal - pos_ball
+        ball_to_goal /= np.linalg.norm(ball_to_goal)
+
+        scaling = .1
+        if np.dot(curr_to_ball[:2], ball_to_goal[:2]) < .7:
+            scaling *= -1
+
+        return pos_ball + scaling * ball_to_goal
diff --git a/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py b/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3f6233d7f2b867dfe46de5f3e1449419b91b729
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py
@@ -0,0 +1,60 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerStickPushV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'stick_pos': obs[3:6],
+            'obj_pos': obs[6:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_pow': 3
+        })
+        
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.)
+        action['grab_pow'] = self._grab_pow(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_xyz(o_d):
+        hand_pos = o_d['hand_pos']
+        stick_pos = o_d['stick_pos'] + np.array([-0.02, 0.0, 0.0])
+        obj_pos = o_d['obj_pos']
+        goal_pos = o_d['goal_pos']
+
+        # If error in the XY plane is greater than 0.02, place end effector above the puck
+        if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02:
+            return stick_pos + np.array([0., 0., 0.1])
+        # Once XY error is low enough, drop end effector down on top of puck
+        elif abs(hand_pos[2] - stick_pos[2]) > 0.05 and stick_pos[-1] < 0.03:
+            return stick_pos + np.array([0., 0., 0.03])
+        elif abs(obj_pos[2]+0.05 - hand_pos[2]) > 0.01:
+            return np.array([hand_pos[0], hand_pos[1], obj_pos[2]+0.05])
+        # Move to the goal
+        else:
+            return np.array([goal_pos[0], goal_pos[1], hand_pos[2]])
+
+    @staticmethod
+    def _grab_pow(o_d):
+        hand_pos = o_d['hand_pos']
+        stick_pos = o_d['stick_pos'] + np.array([-0.02, 0.0, 0.0])
+
+        if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02 or abs(hand_pos[2] - stick_pos[2]) > 0.1:
+            return 0.
+        # While end effector is moving down toward the puck, begin closing the grabber
+        else:
+            return 0.8
diff --git a/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py b/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f7e2ec25ad9f495ea1d2ccd22102af81f765b30
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerStickPushV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'stick_pos': obs[4:7],
+            'unused_2': obs[7:11],
+            'obj_pos': obs[11:14],
+            'unused_3': obs[14:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_pow': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.)
+        action['grab_pow'] = self._grab_pow(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_xyz(o_d):
+        hand_pos = o_d['hand_pos']
+        stick_pos = o_d['stick_pos'] + np.array([.015, .0, .03])
+        thermos_pos = o_d['obj_pos']
+        goal_pos = o_d['goal_pos'] + np.array([.0, .0, .132])
+
+        if abs(stick_pos[0] - thermos_pos[0]) > 0.04:
+            if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02:
+                return stick_pos + np.array([0., 0., 0.1])
+            elif abs(hand_pos[2] - stick_pos[2]) > 0.02:
+                return stick_pos
+            elif abs(stick_pos[1] - thermos_pos[1]) > 0.02:
+                return np.array([stick_pos[0], thermos_pos[1], stick_pos[2]])
+            elif abs(stick_pos[2] - thermos_pos[2]) > 0.02:
+                return np.array([stick_pos[0], *thermos_pos[1:]])
+            else:
+                return thermos_pos
+        else:
+            return goal_pos
+
+    @staticmethod
+    def _grab_pow(o_d):
+        hand_pos = o_d['hand_pos']
+        stick_pos = o_d['stick_pos'] + np.array([.015, .0, .03])
+
+        if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02 or abs(hand_pos[2] - stick_pos[2]) > 0.1:
+            return -1.0
+        else:
+            return +0.7
diff --git a/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py b/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..034d3a7d1962102aa59f4950f341df42a1330917
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerSweepIntoV1Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'cube_pos': obs[3:6],
+            'unused_info': obs[6:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_cube = o_d['cube_pos'] + np.array([.0, .0, .015])
+
+        if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04:
+            return pos_cube + np.array([0., 0., 0.3])
+        elif abs(pos_curr[2] - pos_cube[2]) > 0.02:
+            return pos_cube
+        else:
+            return np.array([.0, .8, .015])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_cube = o_d['cube_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04 \
+            or abs(pos_curr[2] - pos_cube[2]) > 0.15:
+            return -1.
+        else:
+            return .7
diff --git a/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py b/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcf765baf3d108a1d216ab9deeec984558728b7e
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py
@@ -0,0 +1,58 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerSweepV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_1': obs[3],
+            'cube_pos': obs[4:7],
+            'unused_2':  obs[7:-3],
+            'goal_pos': obs[-3:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = self._grab_effort(o_d)
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_cube = o_d['cube_pos'] + np.array([.0, .0, .015])
+        pos_goal = o_d['goal_pos']
+
+        if pos_curr[0] < .2:
+            if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04:
+                return pos_cube + np.array([0., 0., 0.3])
+            elif abs(pos_curr[2] - pos_cube[2]) > 0.04:
+                return pos_cube
+
+        return pos_goal + np.array([0, 0, 0.1])
+
+    @staticmethod
+    def _grab_effort(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_cube = o_d['cube_pos']
+
+        if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04 \
+            or abs(pos_curr[2] - pos_cube[2]) > 0.15:
+            return -1.
+        elif pos_cube[0] < .4:
+            return .7
+        else:
+            return -1.
diff --git a/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..f202539d24494b6008754f61e75f26c5c148e3c3
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerWindowCloseV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'gripper_unused': obs[3],
+            'wndw_pos': obs[4:7],
+            'unused_info': obs[7:],
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wndw = o_d['wndw_pos'] + np.array([+0.03, -0.03, -0.08])
+
+        if np.linalg.norm(pos_curr[:2] - pos_wndw[:2]) > 0.04:
+            return pos_wndw + np.array([0., 0., 0.25])
+        elif abs(pos_curr[2] - pos_wndw[2]) > 0.02:
+            return pos_wndw
+        else:
+            return pos_wndw + np.array([-0.1, 0., 0.])
diff --git a/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py b/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..22cc282a596660e32e2c8fc9f2f302b7bf5a8c2b
--- /dev/null
+++ b/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py
@@ -0,0 +1,42 @@
+import numpy as np
+
+from metaworld.policies.action import Action
+from metaworld.policies.policy import Policy, assert_fully_parsed, move
+
+
+class SawyerWindowOpenV2Policy(Policy):
+
+    @staticmethod
+    @assert_fully_parsed
+    def _parse_obs(obs):
+        return {
+            'hand_pos': obs[:3],
+            'unused_gripper_open' : obs[3],
+            'wndw_pos': obs[4:7],
+            'unused_info': obs[7:]
+        }
+
+    def get_action(self, obs):
+        o_d = self._parse_obs(obs)
+
+        action = Action({
+            'delta_pos': np.arange(3),
+            'grab_effort': 3
+        })
+
+        action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.)
+        action['grab_effort'] = 1.
+
+        return action.array
+
+    @staticmethod
+    def _desired_pos(o_d):
+        pos_curr = o_d['hand_pos']
+        pos_wndw = o_d['wndw_pos'] + np.array([-0.03, -0.03, -0.08])
+
+        if np.linalg.norm(pos_curr[:2] - pos_wndw[:2]) > 0.04:
+            return pos_wndw + np.array([0., 0., 0.3])
+        elif abs(pos_curr[2] - pos_wndw[2]) > 0.02:
+            return pos_wndw
+        else:
+            return pos_wndw + np.array([0.1, 0., 0.])
diff --git a/gym-0.21.0/gym.egg-info/requires.txt b/gym-0.21.0/gym.egg-info/requires.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9995362a17212576fff17e96aa17e7283ce6ff28
--- /dev/null
+++ b/gym-0.21.0/gym.egg-info/requires.txt
@@ -0,0 +1,58 @@
+numpy>=1.18.0
+cloudpickle>=1.2.0
+
+[:python_version < "3.8"]
+importlib_metadata>=4.8.1
+
+[accept-rom-license]
+autorom[accept-rom-license]~=0.4.2
+
+[all]
+mujoco_py<2.0,>=1.50
+scipy>=1.4.1
+box2d-py==2.3.5
+pyglet>=1.4.0
+scipy>=1.4.1
+lz4>=3.1.0
+opencv-python>=3.0
+pyglet>=1.4.0
+ale-py~=0.7.1
+mujoco_py<2.0,>=1.50
+lz4>=3.1.0
+opencv-python>=3.0
+ale-py~=0.7.1
+pyglet>=1.4.0
+box2d-py==2.3.5
+pyglet>=1.4.0
+
+[atari]
+ale-py~=0.7.1
+
+[box2d]
+box2d-py==2.3.5
+pyglet>=1.4.0
+
+[classic_control]
+pyglet>=1.4.0
+
+[mujoco]
+mujoco_py<2.0,>=1.50
+
+[nomujoco]
+box2d-py==2.3.5
+pyglet>=1.4.0
+scipy>=1.4.1
+lz4>=3.1.0
+opencv-python>=3.0
+pyglet>=1.4.0
+ale-py~=0.7.1
+
+[other]
+lz4>=3.1.0
+opencv-python>=3.0
+
+[robotics]
+mujoco_py<2.0,>=1.50
+
+[toy_text]
+scipy>=1.4.1
diff --git a/gym-0.21.0/gym/logger.py b/gym-0.21.0/gym/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f42a2cf987894cd3579bfa64f92d1481911b86d
--- /dev/null
+++ b/gym-0.21.0/gym/logger.py
@@ -0,0 +1,43 @@
+import warnings
+
+from gym.utils import colorize
+
+DEBUG = 10
+INFO = 20
+WARN = 30
+ERROR = 40
+DISABLED = 50
+
+MIN_LEVEL = 30
+
+
+def set_level(level):
+    """
+    Set logging threshold on current logger.
+    """
+    global MIN_LEVEL
+    MIN_LEVEL = level
+
+
+def debug(msg, *args):
+    if MIN_LEVEL <= DEBUG:
+        print("%s: %s" % ("DEBUG", msg % args))
+
+
+def info(msg, *args):
+    if MIN_LEVEL <= INFO:
+        print("%s: %s" % ("INFO", msg % args))
+
+
+def warn(msg, *args):
+    if MIN_LEVEL <= WARN:
+        warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"))
+
+
+def error(msg, *args):
+    if MIN_LEVEL <= ERROR:
+        print(colorize("%s: %s" % ("ERROR", msg % args), "red"))
+
+
+# DEPRECATED:
+setLevel = set_level
diff --git a/gym-0.21.0/tests/spaces/test_spaces.py b/gym-0.21.0/tests/spaces/test_spaces.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bdc23ffca7afef8da1f6a18fab88f15d7543a9c
--- /dev/null
+++ b/gym-0.21.0/tests/spaces/test_spaces.py
@@ -0,0 +1,437 @@
+import json  # note: ujson fails this test due to float equality
+import copy
+
+import numpy as np
+import pytest
+
+from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
+def test_roundtripping(space):
+    sample_1 = space.sample()
+    sample_2 = space.sample()
+    assert space.contains(sample_1)
+    assert space.contains(sample_2)
+    json_rep = space.to_jsonable([sample_1, sample_2])
+
+    json_roundtripped = json.loads(json.dumps(json_rep))
+
+    samples_after_roundtrip = space.from_jsonable(json_roundtripped)
+    sample_1_prime, sample_2_prime = samples_after_roundtrip
+
+    s1 = space.to_jsonable([sample_1])
+    s1p = space.to_jsonable([sample_1_prime])
+    s2 = space.to_jsonable([sample_2])
+    s2p = space.to_jsonable([sample_2_prime])
+    assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p)
+    assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p)
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+        Box(low=-np.inf, high=np.inf, shape=(1, 3)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(6),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
+def test_equality(space):
+    space1 = space
+    space2 = copy.copy(space)
+    assert space1 == space2, "Expected {} to equal {}".format(space1, space2)
+
+
+@pytest.mark.parametrize(
+    "spaces",
+    [
+        (Discrete(3), Discrete(4)),
+        (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
+        (MultiBinary(8), MultiBinary(7)),
+        (
+            Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+            Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32),
+        ),
+        (
+            Box(low=-np.inf, high=0.0, shape=(2, 1)),
+            Box(low=0.0, high=np.inf, shape=(2, 1)),
+        ),
+        (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
+        (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
+        (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
+    ],
+)
+def test_inequality(spaces):
+    space1, space2 = spaces
+    assert space1 != space2, "Expected {} != {}".format(space1, space2)
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(5),
+        Box(low=0, high=255, shape=(2,), dtype="uint8"),
+        Box(low=-np.inf, high=np.inf, shape=(3, 3)),
+        Box(low=1.0, high=np.inf, shape=(3, 3)),
+        Box(low=-np.inf, high=2.0, shape=(3, 3)),
+    ],
+)
+def test_sample(space):
+    space.seed(0)
+    n_trials = 100
+    samples = np.array([space.sample() for _ in range(n_trials)])
+    expected_mean = 0.0
+    if isinstance(space, Box):
+        if space.is_bounded():
+            expected_mean = (space.high + space.low) / 2
+        elif space.is_bounded("below"):
+            expected_mean = 1 + space.low
+        elif space.is_bounded("above"):
+            expected_mean = -1 + space.high
+        else:
+            expected_mean = 0.0
+    elif isinstance(space, Discrete):
+        expected_mean = space.n / 2
+    else:
+        raise NotImplementedError
+    np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std())
+
+
+@pytest.mark.parametrize(
+    "spaces",
+    [
+        (Discrete(5), MultiBinary(5)),
+        (
+            Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+            MultiDiscrete([2, 2, 8]),
+        ),
+        (
+            Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8),
+            Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8),
+        ),
+        (Dict({"position": Discrete(5)}), Tuple([Discrete(5)])),
+        (Dict({"position": Discrete(5)}), Discrete(5)),
+        (Tuple((Discrete(5),)), Discrete(5)),
+        (
+            Box(low=np.array([-np.inf, 0.0]), high=np.array([0.0, np.inf])),
+            Box(low=np.array([-np.inf, 1.0]), high=np.array([0.0, np.inf])),
+        ),
+    ],
+)
+def test_class_inequality(spaces):
+    assert spaces[0] == spaces[0]
+    assert spaces[1] == spaces[1]
+    assert spaces[0] != spaces[1]
+    assert spaces[1] != spaces[0]
+
+
+@pytest.mark.parametrize(
+    "space_fn",
+    [
+        lambda: Dict(space1="abc"),
+        lambda: Dict({"space1": "abc"}),
+        lambda: Tuple(["abc"]),
+    ],
+)
+def test_bad_space_calls(space_fn):
+    with pytest.raises(AssertionError):
+        space_fn()
+
+
+def test_seed_Dict():
+    test_space = Dict(
+        {
+            "a": Box(low=0, high=1, shape=(3, 3)),
+            "b": Dict(
+                {
+                    "b_1": Box(low=-100, high=100, shape=(2,)),
+                    "b_2": Box(low=-1, high=1, shape=(2,)),
+                }
+            ),
+            "c": Discrete(5),
+        }
+    )
+
+    seed_dict = {
+        "a": 0,
+        "b": {
+            "b_1": 1,
+            "b_2": 2,
+        },
+        "c": 3,
+    }
+
+    test_space.seed(seed_dict)
+
+    # "Unpack" the dict sub-spaces into individual spaces
+    a = Box(low=0, high=1, shape=(3, 3))
+    a.seed(0)
+    b_1 = Box(low=-100, high=100, shape=(2,))
+    b_1.seed(1)
+    b_2 = Box(low=-1, high=1, shape=(2,))
+    b_2.seed(2)
+    c = Discrete(5)
+    c.seed(3)
+
+    for i in range(10):
+        test_s = test_space.sample()
+        a_s = a.sample()
+        assert (test_s["a"] == a_s).all()
+        b_1_s = b_1.sample()
+        assert (test_s["b"]["b_1"] == b_1_s).all()
+        b_2_s = b_2.sample()
+        assert (test_s["b"]["b_2"] == b_2_s).all()
+        c_s = c.sample()
+        assert test_s["c"] == c_s
+
+
+def test_box_dtype_check():
+    # Related Issues:
+    # https://github.com/openai/gym/issues/2357
+    # https://github.com/openai/gym/issues/2298
+
+    space = Box(0, 2, tuple(), dtype=np.float32)
+
+    # casting will match the correct type
+    assert space.contains(0.5)
+
+    # float64 is not in float32 space
+    assert not space.contains(np.array(0.5))
+    assert not space.contains(np.array(1))
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
+def test_seed_returns_list(space):
+    def assert_integer_list(seed):
+        assert isinstance(seed, list)
+        assert len(seed) >= 1
+        assert all([isinstance(s, int) for s in seed])
+
+    assert_integer_list(space.seed(None))
+    assert_integer_list(space.seed(0))
+
+
+def convert_sample_hashable(sample):
+    if isinstance(sample, np.ndarray):
+        return tuple(sample.tolist())
+    if isinstance(sample, (list, tuple)):
+        return tuple(convert_sample_hashable(s) for s in sample)
+    if isinstance(sample, dict):
+        return tuple(
+            (key, convert_sample_hashable(value)) for key, value in sample.items()
+        )
+
+    return sample
+
+
+def sample_equal(sample1, sample2):
+    return convert_sample_hashable(sample1) == convert_sample_hashable(sample2)
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Discrete(3),
+        Box(low=0.0, high=np.inf, shape=(2, 2)),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        MultiDiscrete([2, 2, 100]),
+        MultiBinary(10),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
+def test_seed_reproducibility(space):
+    space1 = space
+    space2 = copy.deepcopy(space)
+
+    space1.seed(None)
+    space2.seed(None)
+
+    assert space1.seed(0) == space2.seed(0)
+    assert sample_equal(space1.sample(), space2.sample())
+
+
+@pytest.mark.parametrize(
+    "space",
+    [
+        Tuple([Discrete(100), Discrete(100)]),
+        Tuple([Discrete(5), Discrete(10)]),
+        Tuple(
+            [
+                Discrete(5),
+                Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32),
+            ]
+        ),
+        Tuple((Discrete(5), Discrete(2), Discrete(2))),
+        Dict(
+            {
+                "position": Discrete(5),
+                "velocity": Box(
+                    low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32
+                ),
+            }
+        ),
+    ],
+)
+def test_seed_subspace_incorrelated(space):
+    subspaces = space.spaces if isinstance(space, Tuple) else space.spaces.values()
+
+    space.seed(0)
+    states = [
+        convert_sample_hashable(subspace.np_random.get_state())
+        for subspace in subspaces
+    ]
+
+    assert len(states) == len(set(states))
+
+
+def test_multidiscrete_as_tuple():
+    # 1D multi-discrete
+    space = MultiDiscrete([3, 4, 5])
+
+    assert space.shape == (3,)
+    assert space[0] == Discrete(3)
+    assert space[0:1] == MultiDiscrete([3])
+    assert space[0:2] == MultiDiscrete([3, 4])
+    assert space[:] == space and space[:] is not space
+    assert len(space) == 3
+
+    # 2D multi-discrete
+    space = MultiDiscrete([[3, 4, 5], [6, 7, 8]])
+
+    assert space.shape == (2, 3)
+    assert space[0, 1] == Discrete(4)
+    assert space[0] == MultiDiscrete([3, 4, 5])
+    assert space[0:1] == MultiDiscrete([[3, 4, 5]])
+    assert space[0:2, :] == MultiDiscrete([[3, 4, 5], [6, 7, 8]])
+    assert space[:, 0:1] == MultiDiscrete([[3], [6]])
+    assert space[0:2, 0:2] == MultiDiscrete([[3, 4], [6, 7]])
+    assert space[:] == space and space[:] is not space
+    assert space[:, :] == space and space[:, :] is not space
+
+
+def test_multidiscrete_subspace_reproducibility():
+    # 1D multi-discrete
+    space = MultiDiscrete([100, 200, 300])
+    space.seed(None)
+
+    assert sample_equal(space[0].sample(), space[0].sample())
+    assert sample_equal(space[0:1].sample(), space[0:1].sample())
+    assert sample_equal(space[0:2].sample(), space[0:2].sample())
+    assert sample_equal(space[:].sample(), space[:].sample())
+    assert sample_equal(space[:].sample(), space.sample())
+
+    # 2D multi-discrete
+    space = MultiDiscrete([[300, 400, 500], [600, 700, 800]])
+    space.seed(None)
+
+    assert sample_equal(space[0, 1].sample(), space[0, 1].sample())
+    assert sample_equal(space[0].sample(), space[0].sample())
+    assert sample_equal(space[0:1].sample(), space[0:1].sample())
+    assert sample_equal(space[0:2, :].sample(), space[0:2, :].sample())
+    assert sample_equal(space[:, 0:1].sample(), space[:, 0:1].sample())
+    assert sample_equal(space[0:2, 0:2].sample(), space[0:2, 0:2].sample())
+    assert sample_equal(space[:].sample(), space[:].sample())
+    assert sample_equal(space[:, :].sample(), space[:, :].sample())
+    assert sample_equal(space[:, :].sample(), space.sample())
+
+
+def test_space_legacy_state_pickling():
+    legacy_state = {
+        "shape": (
+            1,
+            2,
+            3,
+        ),
+        "dtype": np.int64,
+        "np_random": np.random.default_rng(),
+        "n": 3,
+    }
+    space = Discrete(1)
+    space.__setstate__(legacy_state)
+
+    assert space.shape == legacy_state["shape"]
+    assert space._shape == legacy_state["shape"]
+    assert space.np_random == legacy_state["np_random"]
+    assert space._np_random == legacy_state["np_random"]
+    assert space.n == 3
+    assert space.dtype == legacy_state["dtype"]
diff --git a/gym-0.21.0/tests/utils/test_atexit.py b/gym-0.21.0/tests/utils/test_atexit.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dd60f5173e8d10c7b24ae84635eef1763af5a97
--- /dev/null
+++ b/gym-0.21.0/tests/utils/test_atexit.py
@@ -0,0 +1,24 @@
+from gym.utils.closer import Closer
+
+
+class Closeable(object):
+    close_called = False
+
+    def close(self):
+        self.close_called = True
+
+
+def test_register_unregister():
+    registry = Closer(atexit_register=False)
+    c1 = Closeable()
+    c2 = Closeable()
+
+    assert not c1.close_called
+    assert not c2.close_called
+    registry.register(c1)
+    id2 = registry.register(c2)
+
+    registry.unregister(id2)
+    registry.close()
+    assert c1.close_called
+    assert not c2.close_called
diff --git a/gym-0.21.0/tests/vector/test_shared_memory.py b/gym-0.21.0/tests/vector/test_shared_memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..bee2d7df42ec4758fe52768751baf8b8785a2653
--- /dev/null
+++ b/gym-0.21.0/tests/vector/test_shared_memory.py
@@ -0,0 +1,173 @@
+import pytest
+import numpy as np
+
+import multiprocessing as mp
+from multiprocessing.sharedctypes import SynchronizedArray
+from multiprocessing import Array, Process
+from collections import OrderedDict
+
+from gym.spaces import Tuple, Dict
+from gym.error import CustomSpaceError
+from gym.vector.utils.spaces import _BaseGymSpaces
+from tests.vector.utils import spaces, custom_spaces
+
+from gym.vector.utils.shared_memory import (
+    create_shared_memory,
+    read_from_shared_memory,
+    write_to_shared_memory,
+)
+
+
+expected_types = [
+    Array("d", 1),
+    Array("f", 1),
+    Array("f", 3),
+    Array("f", 4),
+    Array("B", 1),
+    Array("B", 32 * 32 * 3),
+    Array("i", 1),
+    (Array("i", 1), Array("i", 1)),
+    (Array("i", 1), Array("f", 2)),
+    Array("B", 3),
+    Array("B", 19),
+    OrderedDict([("position", Array("i", 1)), ("velocity", Array("f", 1))]),
+    OrderedDict(
+        [
+            ("position", OrderedDict([("x", Array("i", 1)), ("y", Array("i", 1))])),
+            ("velocity", (Array("i", 1), Array("B", 1))),
+        ]
+    ),
+]
+
+
+@pytest.mark.parametrize("n", [1, 8])
+@pytest.mark.parametrize(
+    "space,expected_type",
+    list(zip(spaces, expected_types)),
+    ids=[space.__class__.__name__ for space in spaces],
+)
+@pytest.mark.parametrize(
+    "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"]
+)
+def test_create_shared_memory(space, expected_type, n, ctx):
+    def assert_nested_type(lhs, rhs, n):
+        assert type(lhs) == type(rhs)
+        if isinstance(lhs, (list, tuple)):
+            assert len(lhs) == len(rhs)
+            for lhs_, rhs_ in zip(lhs, rhs):
+                assert_nested_type(lhs_, rhs_, n)
+
+        elif isinstance(lhs, (dict, OrderedDict)):
+            assert set(lhs.keys()) ^ set(rhs.keys()) == set()
+            for key in lhs.keys():
+                assert_nested_type(lhs[key], rhs[key], n)
+
+        elif isinstance(lhs, SynchronizedArray):
+            # Assert the length of the array
+            assert len(lhs[:]) == n * len(rhs[:])
+            # Assert the data type
+            assert type(lhs[0]) == type(rhs[0])  # noqa: E721
+
+        else:
+            raise TypeError("Got unknown type `{0}`.".format(type(lhs)))
+
+    ctx = mp if (ctx is None) else mp.get_context(ctx)
+    shared_memory = create_shared_memory(space, n=n, ctx=ctx)
+    assert_nested_type(shared_memory, expected_type, n=n)
+
+
+@pytest.mark.parametrize("n", [1, 8])
+@pytest.mark.parametrize(
+    "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"]
+)
+@pytest.mark.parametrize("space", custom_spaces)
+def test_create_shared_memory_custom_space(n, ctx, space):
+    ctx = mp if (ctx is None) else mp.get_context(ctx)
+    with pytest.raises(CustomSpaceError):
+        shared_memory = create_shared_memory(space, n=n, ctx=ctx)
+
+
+@pytest.mark.parametrize(
+    "space", spaces, ids=[space.__class__.__name__ for space in spaces]
+)
+def test_write_to_shared_memory(space):
+    def assert_nested_equal(lhs, rhs):
+        assert isinstance(rhs, list)
+        if isinstance(lhs, (list, tuple)):
+            for i in range(len(lhs)):
+                assert_nested_equal(lhs[i], [rhs_[i] for rhs_ in rhs])
+
+        elif isinstance(lhs, (dict, OrderedDict)):
+            for key in lhs.keys():
+                assert_nested_equal(lhs[key], [rhs_[key] for rhs_ in rhs])
+
+        elif isinstance(lhs, SynchronizedArray):
+            assert np.all(np.array(lhs[:]) == np.stack(rhs, axis=0).flatten())
+
+        else:
+            raise TypeError("Got unknown type `{0}`.".format(type(lhs)))
+
+    def write(i, shared_memory, sample):
+        write_to_shared_memory(i, sample, shared_memory, space)
+
+    shared_memory_n8 = create_shared_memory(space, n=8)
+    samples = [space.sample() for _ in range(8)]
+
+    processes = [
+        Process(target=write, args=(i, shared_memory_n8, samples[i])) for i in range(8)
+    ]
+
+    for process in processes:
+        process.start()
+    for process in processes:
+        process.join()
+
+    assert_nested_equal(shared_memory_n8, samples)
+
+
+@pytest.mark.parametrize(
+    "space", spaces, ids=[space.__class__.__name__ for space in spaces]
+)
+def test_read_from_shared_memory(space):
+    def assert_nested_equal(lhs, rhs, space, n):
+        assert isinstance(rhs, list)
+        if isinstance(space, Tuple):
+            assert isinstance(lhs, tuple)
+            for i in range(len(lhs)):
+                assert_nested_equal(
+                    lhs[i], [rhs_[i] for rhs_ in rhs], space.spaces[i], n
+                )
+
+        elif isinstance(space, Dict):
+            assert isinstance(lhs, OrderedDict)
+            for key in lhs.keys():
+                assert_nested_equal(
+                    lhs[key], [rhs_[key] for rhs_ in rhs], space.spaces[key], n
+                )
+
+        elif isinstance(space, _BaseGymSpaces):
+            assert isinstance(lhs, np.ndarray)
+            assert lhs.shape == ((n,) + space.shape)
+            assert lhs.dtype == space.dtype
+            assert np.all(lhs == np.stack(rhs, axis=0))
+
+        else:
+            raise TypeError("Got unknown type `{0}`".format(type(space)))
+
+    def write(i, shared_memory, sample):
+        write_to_shared_memory(i, sample, shared_memory, space)
+
+    shared_memory_n8 = create_shared_memory(space, n=8)
+    memory_view_n8 = read_from_shared_memory(shared_memory_n8, space, n=8)
+    samples = [space.sample() for _ in range(8)]
+
+    processes = [
+        Process(target=write, args=(i, shared_memory_n8, samples[i])) for i in range(8)
+    ]
+
+    for process in processes:
+        process.start()
+    for process in processes:
+        process.join()
+
+    assert_nested_equal(memory_view_n8, samples, space, n=8)
diff --git a/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py b/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..744867dbb46cc1471f5ae45e3884b2ba5c30bee0
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py
@@ -0,0 +1,93 @@
+import numpy as np
+import gym
+from gym.wrappers import AtariPreprocessing
+import pytest
+
+pytest.importorskip("gym.envs.atari")
+
+
+@pytest.fixture(scope="module")
+def env_fn():
+    return lambda: gym.make("PongNoFrameskip-v4")
+
+
+def test_atari_preprocessing_grayscale(env_fn):
+    import cv2
+
+    env1 = env_fn()
+    env2 = AtariPreprocessing(
+        env_fn(), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0
+    )
+    env3 = AtariPreprocessing(
+        env_fn(), screen_size=84, grayscale_obs=False, frame_skip=1, noop_max=0
+    )
+    env4 = AtariPreprocessing(
+        env_fn(),
+        screen_size=84,
+        grayscale_obs=True,
+        frame_skip=1,
+        noop_max=0,
+        grayscale_newaxis=True,
+    )
+    env1.seed(0)
+    env2.seed(0)
+    env3.seed(0)
+    env4.seed(0)
+    obs1 = env1.reset()
+    obs2 = env2.reset()
+    obs3 = env3.reset()
+    obs4 = env4.reset()
+    assert env1.observation_space.shape == (210, 160, 3)
+    assert env2.observation_space.shape == (84, 84)
+    assert env3.observation_space.shape == (84, 84, 3)
+    assert env4.observation_space.shape == (84, 84, 1)
+    assert obs1.shape == (210, 160, 3)
+    assert obs2.shape == (84, 84)
+    assert obs3.shape == (84, 84, 3)
+    assert obs4.shape == (84, 84, 1)
+    assert np.allclose(obs3, cv2.resize(obs1, (84, 84), interpolation=cv2.INTER_AREA))
+    obs3_gray = cv2.cvtColor(obs3, cv2.COLOR_RGB2GRAY)
+    # the edges of the numbers do not render quite the same in the grayscale, so we ignore them
+    assert np.allclose(obs2[10:38], obs3_gray[10:38])
+    # the paddle also do not render quite the same
+    assert np.allclose(obs2[44:], obs3_gray[44:])
+    # now add a channel axis and re-test
+    obs3_gray = obs3_gray.reshape(84, 84, 1)
+    assert np.allclose(obs4[10:38], obs3_gray[10:38])
+    assert np.allclose(obs4[44:], obs3_gray[44:])
+
+    env1.close()
+    env2.close()
+    env3.close()
+    env4.close()
+
+
+def test_atari_preprocessing_scale(env_fn):
+    # arbitrarily chosen number for stepping into env. and ensuring all observations are in the required range
+    max_test_steps = 10
+
+    for grayscale in [True, False]:
+        for scaled in [True, False]:
+            env = AtariPreprocessing(
+                env_fn(),
+                screen_size=84,
+                grayscale_obs=grayscale,
+                scale_obs=scaled,
+                frame_skip=1,
+                noop_max=0,
+            )
+            obs = env.reset().flatten()
+            done, step_i = False, 0
+            max_obs = 1 if scaled else 255
+            assert (0 <= obs).all() and (
+                obs <= max_obs
+            ).all(), "Obs. must be in range [0,{}]".format(max_obs)
+            while not done or step_i <= max_test_steps:
+                obs, _, done, _ = env.step(env.action_space.sample())
+                obs = obs.flatten()
+                assert (0 <= obs).all() and (
+                    obs <= max_obs
+                ).all(), "Obs. must be in range [0,{}]".format(max_obs)
+                step_i += 1
+
+            env.close()