diff --git a/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5887f44bc7cc3057876f5b2f143a78a3f2aa5e99 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/action.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5dd8bf622925f3076211c69d2c0a72ef94e7248 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6710bebb988421a60a989089c0d828e68936d266 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2c3d7e5602f5258e5566d5e503916c5ee641941 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_assembly_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f80676d2bdaa35882b454021e5fba0ec9bac20e4 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_basketball_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5670cfb7386956908ed2d552b3e1e8628490afd8 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..135367f95094f1a564e0263bbbcf0caffe703f82 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f3ce9a6b7c4f048cfd74a46feb2e275bacc76c8 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_wall_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a552c56b0a0b26e843a660677802b33cbfcf065c Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_button_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94b68234466a971b5da6a9d10a3e0131eae0a615 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3c3ebdbfcc8414bf8616fe14fcfefb23e8d3a12 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e47c1161eee90f9e052008939419d6a2a8a3227 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e032e21c442d56359ac69b846691d8cd0e1b55de Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c52939bad67663395a0a8828c8da49346440f70 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..650d0784b7597a78d86eb63a981eed20d338117c Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..427a02e231b408b011cf451956af31d3649338f8 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_open_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaf05cecdd4e95fe90a676ce89fe5d0705bd80d9 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a04253f3e0b0720c89fe554c7baf72777d3a89c3 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hand_insert_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b3569c66eedab09805179b35e6d27a4506185b3 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0dca5252fe02267d1a140aca4264282df60c76d Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b13e326a3c74eccc8ab51dbf62762c02f1a21b5 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f57b50a5bd4f8683d92c80c8c58c668d8d14e7bc Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41b4c71318d1bf496ae44ce0656e9e3010a36205 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24eb3d102419b3c4102ce15e638b7ac2e8781f40 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a93132ed2ab015f6371c1460ef80f8ffe44f8c29 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aeb5b626233eb237679e15537b399dce5877a4c7 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8e1cab260c70557dc6883d3c6ee44a6c7494ebf Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_soccer_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e896fb6ffe673c1674f6c81345b51b7415529841 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_window_open_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/policy.py b/Metaworld/metaworld/policies/policy.py new file mode 100644 index 0000000000000000000000000000000000000000..bccd39f8b10c73435cf20c907f88595c4dce5ee6 --- /dev/null +++ b/Metaworld/metaworld/policies/policy.py @@ -0,0 +1,71 @@ +import abc +import warnings + +import numpy as np + + +def assert_fully_parsed(func): + """Decorator function to ensure observations are fully parsed + + Args: + func (Callable): The function to check + + Returns: + (Callable): The input function, decorated to assert full parsing + """ + def inner(obs): + obs_dict = func(obs) + assert len(obs) == sum( + [len(i) if isinstance(i, np.ndarray) else 1 for i in obs_dict.values()] + ), 'Observation not fully parsed' + return obs_dict + return inner + + +def move(from_xyz, to_xyz, p): + """Computes action components that help move from 1 position to another + + Args: + from_xyz (np.ndarray): The coordinates to move from (usually current position) + to_xyz (np.ndarray): The coordinates to move to + p (float): constant to scale response + + Returns: + (np.ndarray): Response that will decrease abs(to_xyz - from_xyz) + + """ + error = to_xyz - from_xyz + response = p * error + + if np.any(np.absolute(response) > 1.): + warnings.warn('Constant(s) may be too high. Environments clip response to [-1, 1]') + + return response + + +class Policy(abc.ABC): + + @staticmethod + @abc.abstractmethod + def _parse_obs(obs): + """Pulls pertinent information out of observation and places in a dict. + + Args: + obs (np.ndarray): Observation which conforms to env.observation_space + + Returns: + dict: Dictionary which contains information from the observation + """ + pass + + @abc.abstractmethod + def get_action(self, obs): + """Gets an action in response to an observation. + + Args: + obs (np.ndarray): Observation which conforms to env.observation_space + + Returns: + np.ndarray: Array (usually 4 elements) representing the action to take + """ + pass diff --git a/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e4dc9f32c45ab866c35191adcc4d8db8d64ff9 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_box_close_v1_policy.py @@ -0,0 +1,60 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerBoxCloseV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'lid_pos': obs[3:6], + 'box_pos': obs[9:11], + 'extra_info': obs[[6, 7, 8, 11]], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_lid = o_d['lid_pos'] + np.array([-.04, .0, -.06]) + pos_box = np.array([*o_d['box_pos'], 0.15]) + np.array([-.04, .0, .0]) + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(pos_curr[:2] - pos_lid[:2]) > 0.01: + return pos_lid + np.array([0., 0., 0.1]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(pos_curr[2] - pos_lid[2]) > 0.05: + return pos_lid + # If not at the same Z height as the goal, move up to that plane + elif abs(pos_curr[2] - pos_box[2]) > 0.04: + return np.array([pos_curr[0], pos_curr[1], pos_box[2]]) + # Move to the goal + else: + return pos_box + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['lid_pos'] + np.array([-.04, .0, -.06]) + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01 or abs(pos_curr[2] - pos_puck[2]) > 0.13: + return 0. + # While end effector is moving down toward the puck, begin closing the grabber + else: + return .8 diff --git a/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ff7060923711fe4ed119f9bf2f1100405aeca9 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_button_press_topdown_v2_policy.py @@ -0,0 +1,40 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerButtonPressTopdownV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'hand_closed': obs[3], + 'button_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.04: + return pos_button + np.array([0., 0., 0.1]) + else: + return pos_button diff --git a/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..1f33a128db41c915a9b5fd893bf2eb3c04ec47ca --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_button_press_topdown_wall_v2_policy.py @@ -0,0 +1,40 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerButtonPressTopdownWallV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'hand_closed': obs[3], + 'button_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = -1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, -.06, .0]) + + if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.04: + return pos_button + np.array([0., 0., 0.1]) + else: + return pos_button diff --git a/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..1e5925e0ff1663a9091094465c182abd762a9aac --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_button_press_v2_policy.py @@ -0,0 +1,49 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, move + + +class SawyerButtonPressV2Policy(Policy): + + @staticmethod + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'hand_closed': obs[3], + 'button_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=25.) + action['grab_effort'] = 0. + + return action.array + + @staticmethod + def desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([0., 0., -0.07]) + + # align the gripper with the button if the gripper does not have + # the same x and z position as the button. + hand_x, hand_y, hand_z = pos_curr + button_initial_x, button_initial_y, button_initial_z = pos_button + if not np.all(np.isclose(np.array([hand_x, hand_z]), + np.array([button_initial_x, button_initial_z]), + atol=0.02)): + pos_button[1] = pos_curr[1] - .1 + return pos_button + # if the hand is aligned with the button, push the button in, by + # increasing the hand's y position + pos_button[1] += 0.02 + + return pos_button diff --git a/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..52f55971bf0414ca8cbfd8367786904592a65a95 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_button_press_wall_v1_policy.py @@ -0,0 +1,54 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, move + + +class SawyerButtonPressWallV1Policy(Policy): + + @staticmethod + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'button_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=15.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, .0, .04]) + + if abs(pos_curr[0] - pos_button[0]) > 0.02: + return np.array([pos_button[0], pos_curr[1], .3]) + elif pos_button[1] - pos_curr[1] > 0.09: + return np.array([pos_button[0], pos_button[1], .3]) + elif abs(pos_curr[2] - pos_button[2]) > 0.02: + return pos_button + np.array([.0, -.05, .0]) + else: + return pos_button + np.array([.0, -.02, .0]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, .0, .04]) + + if abs(pos_curr[0] - pos_button[0]) > 0.02 or \ + pos_button[1] - pos_curr[1] > 0.09 or \ + abs(pos_curr[2] - pos_button[2]) > 0.02: + return 1. + else: + return -1. diff --git a/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..38cee93b801904e0b9dc31e00c599db022c48623 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_button_press_wall_v2_policy.py @@ -0,0 +1,55 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, move + + +class SawyerButtonPressWallV2Policy(Policy): + + @staticmethod + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'hand_closed': obs[3], + 'button_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=15.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, .0, .04]) + + if abs(pos_curr[0] - pos_button[0]) > 0.02: + return np.array([pos_button[0], pos_curr[1], .3]) + elif pos_button[1] - pos_curr[1] > 0.09: + return np.array([pos_button[0], pos_button[1], .3]) + elif abs(pos_curr[2] - pos_button[2]) > 0.02: + return pos_button + np.array([.0, -.05, .0]) + else: + return pos_button + np.array([.0, -.02, .0]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, .0, .04]) + + if abs(pos_curr[0] - pos_button[0]) > 0.02 or \ + pos_button[1] - pos_curr[1] > 0.09 or \ + abs(pos_curr[2] - pos_button[2]) > 0.02: + return 1. + else: + return -1. diff --git a/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py b/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..25dc8de0ca747c884cc7ee94227faede0c4abbcc --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_coffee_button_v2_policy.py @@ -0,0 +1,40 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerCoffeeButtonV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'button_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = -1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['button_pos'] + np.array([.0, .0, -.07]) + + if np.linalg.norm(pos_curr[[0, 2]] - pos_button[[0, 2]]) > 0.02: + return np.array([pos_button[0], pos_curr[1], pos_button[2]]) + else: + return pos_button + np.array([.0, .2, .0]) diff --git a/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py b/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..89b78171bd345047d34763ae54b7fd3b41171fa3 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_coffee_push_v2_policy.py @@ -0,0 +1,56 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerCoffeePushV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'mug_pos': obs[4:7], + 'goal_xy': obs[-3:-1], + 'unused_info_1': obs[7:-3], + 'unused_info_2': obs[-1], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_mug = o_d['mug_pos'] + np.array([.01, .0, .05]) + pos_goal = o_d['goal_xy'] + + if np.linalg.norm(pos_curr[:2] - pos_mug[:2]) > 0.06: + return pos_mug + np.array([.0, .0, .2]) + elif abs(pos_curr[2] - pos_mug[2]) > 0.02: + return pos_mug + else: + return np.array([pos_goal[0], pos_goal[1], .1]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_mug = o_d['mug_pos'] + np.array([.01, .0, .05]) + + if np.linalg.norm(pos_curr[:2] - pos_mug[:2]) > 0.06 or \ + abs(pos_curr[2] - pos_mug[2]) > 0.1: + return -1. + else: + return .5 diff --git a/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py b/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..e68693d1ba8b36c5709e773e852eb8668281586e --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_dial_turn_v1_policy.py @@ -0,0 +1,39 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDialTurnV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'dial_pos': obs[3:6], + 'goal_pos': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_pow': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=5.) + action['grab_pow'] = 0. + + return action.array + + @staticmethod + def _desired_xyz(o_d): + hand_pos = o_d['hand_pos'] + dial_pos = o_d['dial_pos'] + np.array([0.0, -0.028, 0.0]) + if abs(hand_pos[2] - dial_pos[2]) > 0.02: + return np.array([hand_pos[0], hand_pos[1], dial_pos[2]]) + elif abs(hand_pos[1] - dial_pos[1]) > 0.02: + return np.array([dial_pos[0]+0.20, dial_pos[1], dial_pos[2]]) + return np.array([dial_pos[0]-0.10, dial_pos[1], dial_pos[2]]) diff --git a/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py b/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..6392ea1bdeef49410ca91b2c204dbd87ce94f521 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_dial_turn_v2_policy.py @@ -0,0 +1,41 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDialTurnV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_gripper_open': obs[3], + 'dial_pos': obs[4:7], + 'extra_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_pow': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_pow'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + hand_pos = o_d['hand_pos'] + dial_pos = o_d['dial_pos'] + np.array([0.05, 0.02, 0.09]) + + if np.linalg.norm(hand_pos[:2] - dial_pos[:2]) > 0.02: + return np.array([*dial_pos[:2], 0.2]) + if abs(hand_pos[2] - dial_pos[2]) > 0.02: + return dial_pos + return dial_pos + np.array([-.05, .005, .0]) diff --git a/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py b/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..ecc1215e848ca8ec24a35a5e76fd865a2c2eb829 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_disassemble_v1_policy.py @@ -0,0 +1,59 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDisassembleV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'wrench_pos': obs[3:6], + 'peg_pos': obs[9:], + 'unused_info': obs[6:9], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_wrench = o_d['wrench_pos'] + np.array([.01, -.01, .01]) + pos_peg = o_d['peg_pos'] + np.array([.07, .0, .15]) + + # If XY error is greater than 0.02, place end effector above the wrench + if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02: + return pos_wrench + np.array([0., 0., 0.12]) + # Once XY error is low enough, drop end effector down on top of wrench + elif abs(pos_curr[2] - pos_wrench[2]) > 0.03: + return pos_wrench + # If still hooked on peg, move upwards + elif pos_wrench[2] < 0.12: + return pos_peg + np.array([.0, .0, .1]) + # Move away from peg + else: + return pos_curr + np.array([.0, -.1, .0]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_wrench = o_d['wrench_pos'] + np.array([.01, .0, .0]) + + if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02 or abs(pos_curr[2] - pos_wrench[2]) > 0.08: + return 0. + else: + return 0.8 diff --git a/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py b/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..9efbc613b4af685e1a51645df856d8b8ead60300 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_disassemble_v2_policy.py @@ -0,0 +1,57 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDisassembleV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'wrench_pos': obs[4:7], + 'peg_pos': obs[-3:], + 'unused_info': obs[7:-3], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_wrench = o_d['wrench_pos'] + np.array([-.02, .0, .01]) + pos_peg = o_d['peg_pos'] + np.array([.12, .0, .14]) + + # If XY error is greater than 0.02, place end effector above the wrench + if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02: + return pos_wrench + np.array([0., 0., 0.1]) + # Once XY error is low enough, drop end effector down on top of wrench + elif abs(pos_curr[2] - pos_wrench[2]) > 0.03: + return pos_wrench + # Move upwards + else: + return pos_curr + np.array([.0, .0, .1]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_wrench = o_d['wrench_pos'] + np.array([-.02, .0, .01]) + + if np.linalg.norm(pos_curr[:2] - pos_wrench[:2]) > 0.02 or abs(pos_curr[2] - pos_wrench[2]) > 0.07: + return 0. + else: + return 0.8 diff --git a/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..492059652a82b23218100503a0683deea314efbf --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_door_close_v2_policy.py @@ -0,0 +1,54 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDoorCloseV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'door_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_door = o_d['door_pos'] + pos_door += np.array([0.05, 0.12, 0.1]) + pos_goal = o_d['goal_pos'] + + # # if to the right of door handle/// + # if pos_curr[0] > pos_door[0]: + # # if below door handle by more than 0.2 + # if pos_curr[2] < pos_door[2] + 0.2: + # # rise above door handle by ~0.2 + # return np.array([pos_curr[0], pos_curr[1], pos_door[2] + 0.25]) + # else: + # # move toward door handle in XY plane + # return np.array([pos_door[0] - 0.02, pos_door[1], pos_curr[2]]) + # # put end effector on the outer edge of door handle (still above it) + # elif abs(pos_curr[2] - pos_door[2]) > 0.04: + # return pos_door + np.array([-0.02, 0., 0.]) + # # push from outer edge toward door handle's centroid + # else: + return pos_goal diff --git a/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..12065195513a8e343ae1cfd98183c8bb935215d0 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_drawer_close_v1_policy.py @@ -0,0 +1,49 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDrawerCloseV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'drwr_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_drwr = o_d['drwr_pos'] + + # if further forward than the drawer... + if pos_curr[1] > pos_drwr[1]: + if pos_curr[2] < pos_drwr[2] + 0.4: + # rise up quickly (Z direction) + return np.array([pos_curr[0], pos_curr[1], pos_drwr[2] + 0.5]) + else: + # move to front edge of drawer handle, but stay high in Z + return pos_drwr + np.array([0., -0.075, 0.4]) + # drop down to touch drawer handle + elif abs(pos_curr[2] - pos_drwr[2]) > 0.04: + return pos_drwr + np.array([0., -0.075, 0.]) + # push toward drawer handle's centroid + else: + return pos_drwr diff --git a/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..6a26ea0c4402a4c6b4279d75098db3a471af155f --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_drawer_close_v2_policy.py @@ -0,0 +1,50 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDrawerCloseV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_grasp_info': obs[3], + 'drwr_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_drwr = o_d['drwr_pos'] + np.array([.0, .0, -.02]) + + # if further forward than the drawer... + if pos_curr[1] > pos_drwr[1]: + if pos_curr[2] < pos_drwr[2] + 0.23: + # rise up quickly (Z direction) + return np.array([pos_curr[0], pos_curr[1], pos_drwr[2] + 0.5]) + else: + # move to front edge of drawer handle, but stay high in Z + return pos_drwr + np.array([0., -0.075, 0.23]) + # drop down to touch drawer handle + elif abs(pos_curr[2] - pos_drwr[2]) > 0.04: + return pos_drwr + np.array([0., -0.075, 0.]) + # push toward drawer handle's centroid + else: + return pos_drwr diff --git a/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py b/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..21c1930d8f0c2baa48c618d813be9aba5edc67fa --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_drawer_open_v1_policy.py @@ -0,0 +1,48 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerDrawerOpenV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'drwr_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + # NOTE this policy looks different from the others because it must + # modify its p constant part-way through the task + pos_curr = o_d['hand_pos'] + pos_drwr = o_d['drwr_pos'] + + # align end effector's Z axis with drawer handle's Z axis + if np.linalg.norm(pos_curr[:2] - pos_drwr[:2]) > 0.06: + to_pos = pos_drwr + np.array([0., 0., 0.3]) + action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=4.) + # drop down to touch drawer handle + elif abs(pos_curr[2] - pos_drwr[2]) > 0.04: + to_pos = pos_drwr + action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=4.) + # push toward a point just behind the drawer handle + # also increase p value to apply more force + else: + to_pos = pos_drwr + np.array([0., -0.06, 0.]) + action['delta_pos'] = move(o_d['hand_pos'], to_pos, p=50.) + + # keep gripper open + action['grab_effort'] = -1. + + return action.array diff --git a/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py b/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..a5536d16402cea7404703ca0cb5812a0d312e696 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_faucet_close_v1_policy.py @@ -0,0 +1,41 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerFaucetCloseV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'faucet_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_faucet = o_d['faucet_pos'] + np.array([.02, .0, .0]) + + if np.linalg.norm(pos_curr[:2] - pos_faucet[:2]) > 0.04: + return pos_faucet + np.array([.0, .0, .1]) + elif abs(pos_curr[2] - pos_faucet[2]) > 0.04: + return pos_faucet + else: + return pos_faucet + np.array([-.1, .05, .0]) diff --git a/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py b/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..b5aa39399f67162b5369e1e9fe3cea565afbbd67 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_faucet_open_v2_policy.py @@ -0,0 +1,42 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerFaucetOpenV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_gripper': obs[3], + 'faucet_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_faucet = o_d['faucet_pos'] + np.array([-.04, .0, .03]) + + if np.linalg.norm(pos_curr[:2] - pos_faucet[:2]) > 0.04: + return pos_faucet + np.array([.0, .0, .1]) + elif abs(pos_curr[2] - pos_faucet[2]) > 0.04: + return pos_faucet + else: + return pos_faucet + np.array([.1, .05, .0]) diff --git a/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py b/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..2cc5836d3bd3aa0a46299763d914b8fa87190b03 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_hand_insert_v2_policy.py @@ -0,0 +1,59 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerHandInsertV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'obj_pos': obs[4:7], + 'goal_pos': obs[-3:], + 'unused_info': obs[7:-3], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + hand_pos = o_d['hand_pos'] + obj_pos = o_d['obj_pos'] + goal_pos = o_d['goal_pos'] + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(hand_pos[:2] - obj_pos[:2]) > 0.02: + return obj_pos + np.array([0., 0., 0.1]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(hand_pos[2] - obj_pos[2]) > 0.05: + return obj_pos + np.array([0., 0., 0.03]) + # If not above goal, move to be directly above goal + elif np.linalg.norm(hand_pos[:2] - goal_pos[:2]) > 0.04: + return np.array([goal_pos[0], goal_pos[1], hand_pos[2]]) + else: + return goal_pos + + @staticmethod + def _grab_effort(o_d): + hand_pos = o_d['hand_pos'] + obj_pos = o_d['obj_pos'] + + if np.linalg.norm(hand_pos[:2] - obj_pos[:2]) > 0.02 or abs(hand_pos[2] - obj_pos[2]) > 0.1: + return 0. + else: + return 0.65 diff --git a/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..5b8a1548c77c63fa52420a6c8154236ac0da463b --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_handle_press_side_v2_policy.py @@ -0,0 +1,40 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerHandlePressSideV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'handle_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['handle_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_button[:2]) > 0.02: + return pos_button + np.array([0., 0., 0.2]) + else: + return pos_button + np.array([.0, .0, -.5]) diff --git a/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py b/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..68bf0794421c667ec501f3c68bd6c11e4197a3c0 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_handle_pull_side_v1_policy.py @@ -0,0 +1,43 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerHandlePullSideV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'handle_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_button = o_d['handle_pos'] + np.array([.02, .0, .0]) + + if abs(pos_curr[1] - pos_button[1]) > 0.04: + return pos_button + np.array([0., 0., 0.2]) + elif abs(pos_curr[2] - pos_button[2]) > 0.03: + return pos_button + np.array([.1, .0, -.01]) + elif abs(pos_curr[0] - pos_button[0]) > .01: + return np.array([pos_button[0] - .04, pos_button[1], pos_curr[2]]) + else: + return pos_button + np.array([-.04, .0, .1]) diff --git a/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py b/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..290e917ae2230dc561f7e041f4f0c510241d3e3e --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_handle_pull_v2_policy.py @@ -0,0 +1,44 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerHandlePullV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'handle_pos': obs[4:7], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_handle = o_d['handle_pos'] + np.array([0, -0.04, 0]) + + if np.linalg.norm(pos_curr[:2] - pos_handle[:2]) > 0.02: + return pos_handle + if abs(pos_curr[2] - pos_handle[2]) > 0.02: + return pos_handle[2] + return pos_handle + np.array([0., 0., 0.1]) + + @staticmethod + def _grab_effort(o_d): + return 1. diff --git a/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py b/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..ca79aa64fa6856e77b147902c151ed64a35d02fe --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_lever_pull_v2_policy.py @@ -0,0 +1,42 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerLeverPullV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper': obs[3], + 'lever_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_lever = o_d['lever_pos'] + np.array([.0, -.055, .0]) + + if np.linalg.norm(pos_curr[:2] - pos_lever[:2]) > 0.02: + return pos_lever + np.array([0., 0., -0.1]) + elif abs(pos_curr[2] - pos_lever[2]) > 0.02: + return pos_lever + else: + return pos_lever + np.array([.0, .08, .02]) diff --git a/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..212ae3ccd9115a6b1ddf98840a793aebd653e632 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_peg_unplug_side_v2_policy.py @@ -0,0 +1,53 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPegUnplugSideV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_gripper': obs[3], + 'peg_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_peg = o_d['peg_pos'] + np.array([-.02, .0, .035]) + + if np.linalg.norm(pos_curr[:2] - pos_peg[:2]) > 0.04: + return pos_peg + np.array([0., 0., 0.2]) + elif abs(pos_curr[2] - .15) > 0.02: + return np.array([*pos_peg[:2], .15]) + else: + return pos_curr + np.array([.01, .0, .0]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_peg = o_d['peg_pos'] + np.array([-.02, .0, .035]) + + if np.linalg.norm(pos_curr[:2] - pos_peg[:2]) > 0.04 \ + or abs(pos_curr[2] - pos_peg[2]) > 0.15: + return -1. + else: + return .1 diff --git a/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd8393e826c826ea2cd7cceca978e239d07c5e3 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_pick_place_wall_v2_policy.py @@ -0,0 +1,71 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, move, assert_fully_parsed + + +class SawyerPickPlaceWallV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=10.) + action['grab_effort'] = self.grab_effort(o_d) + + return action.array + + @staticmethod + def desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + np.array([-0.005, 0, 0]) + pos_goal = o_d['goal_pos'] + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.015: + return pos_puck + np.array([0., 0., 0.1]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(pos_curr[2] - pos_puck[2]) > 0.04 and pos_puck[-1] < 0.03: + return pos_puck + np.array([0., 0., 0.03]) + # Move to the goal + else: + # if wall is in the way of arm, straight up above the wall + if(-0.15 <= pos_curr[0] <= 0.35 and + 0.60 <= pos_curr[1] <= 0.80 and + pos_curr[2] < 0.25): + return pos_curr + [0, 0, 1] + #move towards the goal while staying above the wall + elif(-0.15 <= pos_curr[0] <= 0.35 and + 0.60 <= pos_curr[1] <= 0.80 and + pos_curr[2] < 0.35): + return np.array([pos_goal[0], pos_goal[1], pos_curr[2]]) + # If not at the same Z height as the goal, move up to that plane + elif abs(pos_curr[2] - pos_goal[2]) > 0.04: + return np.array([pos_curr[0], pos_curr[1], pos_goal[2]]) + return pos_goal + + @staticmethod + def grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.015 or abs(pos_curr[2] - pos_puck[2]) > 0.1: + return 0. + # While end effector is moving down toward the puck, begin closing the grabber + else: + return 0.9 diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..f23800dbb592b8cb20a3549b17e0e2c1eebea4d7 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_plate_slide_back_side_v2_policy.py @@ -0,0 +1,42 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPlateSlideBackSideV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_xyz(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + np.array([.023, .0, .025]) + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01: + return pos_puck + np.array([.0, .0, .07]) + elif abs(pos_curr[2] - pos_puck[2]) > 0.04: + return pos_puck + else: + return np.array([pos_curr[0] + .1, .6, pos_curr[2]]) diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..313743b6be5944d41f48c2025fda5020605c87cc --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_plate_slide_back_v2_policy.py @@ -0,0 +1,46 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPlateSlideBackV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = -1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + np.array([.0, -.065, .025]) + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.01: + return pos_puck + np.array([.0, .0, .1]) + elif abs(pos_curr[2] - pos_puck[2]) > 0.04: + return pos_puck + elif pos_curr[1] > .7: + return pos_curr + np.array([.0, -.1, .0]) + elif pos_curr[1] > .6: + return np.array([.15, .55, pos_curr[2]]) + else: + return np.array([pos_curr[0] - .1, .55, pos_curr[2]]) diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..ce1323744f1f46f17e9c9b56de68ce389f85e5c1 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_plate_slide_side_v2_policy.py @@ -0,0 +1,49 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPlateSlideSideV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + # return { + # 'hand_pos': obs[:3], + # 'puck_pos': obs[3:6], + # 'unused_info': obs[6:], + # } + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + np.array([.07, .0, -.005]) + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04: + return pos_puck + np.array([.0, .0, .1]) + elif abs(pos_curr[2] - pos_puck[2]) > 0.04: + return pos_puck + elif pos_curr[0] > -.2: + return np.array([pos_curr[0] - .1, .6, pos_curr[2]]) + else: + return pos_puck + np.array([-.1, .0, .0]) diff --git a/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py b/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a34cd71fd4254a4ef989bc7d111ba74d413945 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_plate_slide_v2_policy.py @@ -0,0 +1,46 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPlateSlideV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'shelf_x': obs[-3], + 'unused_3': obs[-2:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = -1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + np.array([.0, -.055, .03]) + + aligned_with_puck = np.linalg.norm(pos_curr[:2] - pos_puck[:2]) <= 0.03 + + if not aligned_with_puck: + return pos_puck + np.array([.0, .0, .1]) + elif abs(pos_curr[2] - pos_puck[2]) > 0.04: + return pos_puck + else: + return np.array([o_d['shelf_x'], .9, pos_puck[2]]) diff --git a/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py b/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..9eb958f9326da982bc87615f02760771ce127070 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_push_back_v1_policy.py @@ -0,0 +1,56 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPushBackV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'puck_pos': obs[3:6], + 'goal_pos': obs[9:], + 'unused_info': obs[6:9], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=10.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.05: + return pos_puck + np.array([0., 0., 0.15]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(pos_curr[2] - pos_puck[2]) > 0.04: + return pos_puck + np.array([0., 0., 0.03]) + # Move to the goal + else: + return o_d['goal_pos'] + np.array([.0, .0, .05]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04 or abs(pos_curr[2] - pos_puck[2]) > 0.12: + return 0. + # While end effector is moving down toward the puck, begin closing the grabber + else: + return 0.6 diff --git a/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py b/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..71ae54f741e2d05c9221ac47c265081621637a09 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_push_back_v2_policy.py @@ -0,0 +1,57 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPushBackV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'puck_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=1.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04: + return pos_puck + np.array([0., 0., 0.3]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(pos_curr[2] - pos_puck[2]) > 0.055: + return pos_puck + # Move to the goal + else: + return o_d['goal_pos'] + np.array([0.0, 0.0, pos_curr[2]]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_puck = o_d['puck_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_puck[:2]) > 0.04 or abs(pos_curr[2] - pos_puck[2]) > 0.085: + return 0. + # While end effector is moving down toward the puck, begin closing the grabber + else: + return 0.9 diff --git a/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py b/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..d13c2225b45393822148acad03a1b68ed0f512f8 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_push_wall_v2_policy.py @@ -0,0 +1,64 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerPushWallV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'obj_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self.desired_pos(o_d), p=10.) + action['grab_effort'] = self.grab_effort(o_d) + + return action.array + + @staticmethod + def desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_obj = o_d['obj_pos'] + np.array([-0.005, 0, 0]) + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(pos_curr[:2] - pos_obj[:2]) > 0.02: + return pos_obj + np.array([0., 0., 0.2]) + # Once XY error is low enough, drop end effector down on top of obj + elif abs(pos_curr[2] - pos_obj[2]) > 0.04: + return pos_obj + np.array([0., 0., 0.03]) + # Move to the goal + else: + #if the wall is between the puck and the goal, go around the wall + if(-0.1 <= pos_obj[0] <= 0.3 and 0.65 <= pos_obj[1] <= 0.75): + return pos_curr + np.array([-1, 0, 0]) + elif ((-0.15 < pos_obj[0] < 0.05 or 0.15 < pos_obj[0] < 0.35) + and 0.695 <= pos_obj[1] <= 0.755): + return pos_curr + np.array([0, 1, 0]) + return o_d['goal_pos'] + + @staticmethod + def grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_obj = o_d['obj_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_obj[:2]) > 0.02 or \ + abs(pos_curr[2] - pos_obj[2]) > 0.1: + return 0.0 + # While end effector is moving down toward the obj, begin closing the grabber + else: + return 0.6 diff --git a/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py b/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf91318765dc177f73fe2d8dddb9bd6116d81cc --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_shelf_place_v1_policy.py @@ -0,0 +1,64 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerShelfPlaceV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'block_pos': obs[3:6], + 'shelf_x': obs[-3], + 'unused_info': obs[[6, 7, 8, 10, 11]], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_block = o_d['block_pos'] + np.array([.005, .0, .015]) + pos_shelf_x = o_d['shelf_x'] + if np.linalg.norm(pos_curr[:2] - pos_block[:2]) > 0.04: + # positioning over block + return pos_block + np.array([0., 0., 0.3]) + elif abs(pos_curr[2] - pos_block[2]) > 0.02: + # grabbing block + return pos_block + elif np.abs(pos_curr[0] - pos_shelf_x) > 0.02: + # centering with goal pos + return np.array([pos_shelf_x, pos_curr[1], pos_curr[2]]) + elif pos_curr[2] < 0.25: + # move up to correct height + pos_new = pos_curr + np.array([0., 0., 0.25]) + return pos_new + else: + # move forward to goal + pos_new = pos_curr + np.array([0., 0.05, 0.]) + return pos_new + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_block = o_d['block_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_block[:2]) > 0.04 \ + or abs(pos_curr[2] - pos_block[2]) > 0.15: + return -1. + else: + return .7 diff --git a/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py b/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..6c6937b319e48cd3db9524a8a13519b6f50d9c8d --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_soccer_v1_policy.py @@ -0,0 +1,48 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerSoccerV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'ball_pos': obs[3:6], + 'goal_pos': obs[9:], + 'unused_info': obs[6:9], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_ball = o_d['ball_pos'] + np.array([.0, .0, .03]) + pos_goal = o_d['goal_pos'] + + curr_to_ball = pos_ball - pos_curr + curr_to_ball /= np.linalg.norm(curr_to_ball) + + ball_to_goal = pos_goal - pos_ball + ball_to_goal /= np.linalg.norm(ball_to_goal) + + scaling = .1 + if np.dot(curr_to_ball[:2], ball_to_goal[:2]) < .7: + scaling *= -1 + + return pos_ball + scaling * ball_to_goal diff --git a/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py b/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f6233d7f2b867dfe46de5f3e1449419b91b729 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_stick_push_v1_policy.py @@ -0,0 +1,60 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerStickPushV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'stick_pos': obs[3:6], + 'obj_pos': obs[6:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_pow': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.) + action['grab_pow'] = self._grab_pow(o_d) + + return action.array + + @staticmethod + def _desired_xyz(o_d): + hand_pos = o_d['hand_pos'] + stick_pos = o_d['stick_pos'] + np.array([-0.02, 0.0, 0.0]) + obj_pos = o_d['obj_pos'] + goal_pos = o_d['goal_pos'] + + # If error in the XY plane is greater than 0.02, place end effector above the puck + if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02: + return stick_pos + np.array([0., 0., 0.1]) + # Once XY error is low enough, drop end effector down on top of puck + elif abs(hand_pos[2] - stick_pos[2]) > 0.05 and stick_pos[-1] < 0.03: + return stick_pos + np.array([0., 0., 0.03]) + elif abs(obj_pos[2]+0.05 - hand_pos[2]) > 0.01: + return np.array([hand_pos[0], hand_pos[1], obj_pos[2]+0.05]) + # Move to the goal + else: + return np.array([goal_pos[0], goal_pos[1], hand_pos[2]]) + + @staticmethod + def _grab_pow(o_d): + hand_pos = o_d['hand_pos'] + stick_pos = o_d['stick_pos'] + np.array([-0.02, 0.0, 0.0]) + + if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02 or abs(hand_pos[2] - stick_pos[2]) > 0.1: + return 0. + # While end effector is moving down toward the puck, begin closing the grabber + else: + return 0.8 diff --git a/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py b/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..2f7e2ec25ad9f495ea1d2ccd22102af81f765b30 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_stick_push_v2_policy.py @@ -0,0 +1,64 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerStickPushV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'stick_pos': obs[4:7], + 'unused_2': obs[7:11], + 'obj_pos': obs[11:14], + 'unused_3': obs[14:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_pow': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_xyz(o_d), p=10.) + action['grab_pow'] = self._grab_pow(o_d) + + return action.array + + @staticmethod + def _desired_xyz(o_d): + hand_pos = o_d['hand_pos'] + stick_pos = o_d['stick_pos'] + np.array([.015, .0, .03]) + thermos_pos = o_d['obj_pos'] + goal_pos = o_d['goal_pos'] + np.array([.0, .0, .132]) + + if abs(stick_pos[0] - thermos_pos[0]) > 0.04: + if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02: + return stick_pos + np.array([0., 0., 0.1]) + elif abs(hand_pos[2] - stick_pos[2]) > 0.02: + return stick_pos + elif abs(stick_pos[1] - thermos_pos[1]) > 0.02: + return np.array([stick_pos[0], thermos_pos[1], stick_pos[2]]) + elif abs(stick_pos[2] - thermos_pos[2]) > 0.02: + return np.array([stick_pos[0], *thermos_pos[1:]]) + else: + return thermos_pos + else: + return goal_pos + + @staticmethod + def _grab_pow(o_d): + hand_pos = o_d['hand_pos'] + stick_pos = o_d['stick_pos'] + np.array([.015, .0, .03]) + + if np.linalg.norm(hand_pos[:2] - stick_pos[:2]) > 0.02 or abs(hand_pos[2] - stick_pos[2]) > 0.1: + return -1.0 + else: + return +0.7 diff --git a/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py b/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..034d3a7d1962102aa59f4950f341df42a1330917 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_sweep_into_v1_policy.py @@ -0,0 +1,52 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerSweepIntoV1Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'cube_pos': obs[3:6], + 'unused_info': obs[6:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_cube = o_d['cube_pos'] + np.array([.0, .0, .015]) + + if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04: + return pos_cube + np.array([0., 0., 0.3]) + elif abs(pos_curr[2] - pos_cube[2]) > 0.02: + return pos_cube + else: + return np.array([.0, .8, .015]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_cube = o_d['cube_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04 \ + or abs(pos_curr[2] - pos_cube[2]) > 0.15: + return -1. + else: + return .7 diff --git a/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py b/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..fcf765baf3d108a1d216ab9deeec984558728b7e --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_sweep_v2_policy.py @@ -0,0 +1,58 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerSweepV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_1': obs[3], + 'cube_pos': obs[4:7], + 'unused_2': obs[7:-3], + 'goal_pos': obs[-3:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = self._grab_effort(o_d) + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_cube = o_d['cube_pos'] + np.array([.0, .0, .015]) + pos_goal = o_d['goal_pos'] + + if pos_curr[0] < .2: + if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04: + return pos_cube + np.array([0., 0., 0.3]) + elif abs(pos_curr[2] - pos_cube[2]) > 0.04: + return pos_cube + + return pos_goal + np.array([0, 0, 0.1]) + + @staticmethod + def _grab_effort(o_d): + pos_curr = o_d['hand_pos'] + pos_cube = o_d['cube_pos'] + + if np.linalg.norm(pos_curr[:2] - pos_cube[:2]) > 0.04 \ + or abs(pos_curr[2] - pos_cube[2]) > 0.15: + return -1. + elif pos_cube[0] < .4: + return .7 + else: + return -1. diff --git a/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py b/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..f202539d24494b6008754f61e75f26c5c148e3c3 --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_window_close_v2_policy.py @@ -0,0 +1,42 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerWindowCloseV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'gripper_unused': obs[3], + 'wndw_pos': obs[4:7], + 'unused_info': obs[7:], + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_wndw = o_d['wndw_pos'] + np.array([+0.03, -0.03, -0.08]) + + if np.linalg.norm(pos_curr[:2] - pos_wndw[:2]) > 0.04: + return pos_wndw + np.array([0., 0., 0.25]) + elif abs(pos_curr[2] - pos_wndw[2]) > 0.02: + return pos_wndw + else: + return pos_wndw + np.array([-0.1, 0., 0.]) diff --git a/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py b/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..22cc282a596660e32e2c8fc9f2f302b7bf5a8c2b --- /dev/null +++ b/Metaworld/metaworld/policies/sawyer_window_open_v2_policy.py @@ -0,0 +1,42 @@ +import numpy as np + +from metaworld.policies.action import Action +from metaworld.policies.policy import Policy, assert_fully_parsed, move + + +class SawyerWindowOpenV2Policy(Policy): + + @staticmethod + @assert_fully_parsed + def _parse_obs(obs): + return { + 'hand_pos': obs[:3], + 'unused_gripper_open' : obs[3], + 'wndw_pos': obs[4:7], + 'unused_info': obs[7:] + } + + def get_action(self, obs): + o_d = self._parse_obs(obs) + + action = Action({ + 'delta_pos': np.arange(3), + 'grab_effort': 3 + }) + + action['delta_pos'] = move(o_d['hand_pos'], to_xyz=self._desired_pos(o_d), p=25.) + action['grab_effort'] = 1. + + return action.array + + @staticmethod + def _desired_pos(o_d): + pos_curr = o_d['hand_pos'] + pos_wndw = o_d['wndw_pos'] + np.array([-0.03, -0.03, -0.08]) + + if np.linalg.norm(pos_curr[:2] - pos_wndw[:2]) > 0.04: + return pos_wndw + np.array([0., 0., 0.3]) + elif abs(pos_curr[2] - pos_wndw[2]) > 0.02: + return pos_wndw + else: + return pos_wndw + np.array([0.1, 0., 0.]) diff --git a/gym-0.21.0/gym.egg-info/requires.txt b/gym-0.21.0/gym.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..9995362a17212576fff17e96aa17e7283ce6ff28 --- /dev/null +++ b/gym-0.21.0/gym.egg-info/requires.txt @@ -0,0 +1,58 @@ +numpy>=1.18.0 +cloudpickle>=1.2.0 + +[:python_version < "3.8"] +importlib_metadata>=4.8.1 + +[accept-rom-license] +autorom[accept-rom-license]~=0.4.2 + +[all] +mujoco_py<2.0,>=1.50 +scipy>=1.4.1 +box2d-py==2.3.5 +pyglet>=1.4.0 +scipy>=1.4.1 +lz4>=3.1.0 +opencv-python>=3.0 +pyglet>=1.4.0 +ale-py~=0.7.1 +mujoco_py<2.0,>=1.50 +lz4>=3.1.0 +opencv-python>=3.0 +ale-py~=0.7.1 +pyglet>=1.4.0 +box2d-py==2.3.5 +pyglet>=1.4.0 + +[atari] +ale-py~=0.7.1 + +[box2d] +box2d-py==2.3.5 +pyglet>=1.4.0 + +[classic_control] +pyglet>=1.4.0 + +[mujoco] +mujoco_py<2.0,>=1.50 + +[nomujoco] +box2d-py==2.3.5 +pyglet>=1.4.0 +scipy>=1.4.1 +lz4>=3.1.0 +opencv-python>=3.0 +pyglet>=1.4.0 +ale-py~=0.7.1 + +[other] +lz4>=3.1.0 +opencv-python>=3.0 + +[robotics] +mujoco_py<2.0,>=1.50 + +[toy_text] +scipy>=1.4.1 diff --git a/gym-0.21.0/gym/logger.py b/gym-0.21.0/gym/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..6f42a2cf987894cd3579bfa64f92d1481911b86d --- /dev/null +++ b/gym-0.21.0/gym/logger.py @@ -0,0 +1,43 @@ +import warnings + +from gym.utils import colorize + +DEBUG = 10 +INFO = 20 +WARN = 30 +ERROR = 40 +DISABLED = 50 + +MIN_LEVEL = 30 + + +def set_level(level): + """ + Set logging threshold on current logger. + """ + global MIN_LEVEL + MIN_LEVEL = level + + +def debug(msg, *args): + if MIN_LEVEL <= DEBUG: + print("%s: %s" % ("DEBUG", msg % args)) + + +def info(msg, *args): + if MIN_LEVEL <= INFO: + print("%s: %s" % ("INFO", msg % args)) + + +def warn(msg, *args): + if MIN_LEVEL <= WARN: + warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow")) + + +def error(msg, *args): + if MIN_LEVEL <= ERROR: + print(colorize("%s: %s" % ("ERROR", msg % args), "red")) + + +# DEPRECATED: +setLevel = set_level diff --git a/gym-0.21.0/tests/spaces/test_spaces.py b/gym-0.21.0/tests/spaces/test_spaces.py new file mode 100644 index 0000000000000000000000000000000000000000..3bdc23ffca7afef8da1f6a18fab88f15d7543a9c --- /dev/null +++ b/gym-0.21.0/tests/spaces/test_spaces.py @@ -0,0 +1,437 @@ +import json # note: ujson fails this test due to float equality +import copy + +import numpy as np +import pytest + +from gym.spaces import Tuple, Box, Discrete, MultiDiscrete, MultiBinary, Dict + + +@pytest.mark.parametrize( + "space", + [ + Discrete(3), + Box(low=0.0, high=np.inf, shape=(2, 2)), + Tuple([Discrete(5), Discrete(10)]), + Tuple( + [ + Discrete(5), + Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), + ] + ), + Tuple((Discrete(5), Discrete(2), Discrete(2))), + MultiDiscrete([2, 2, 100]), + MultiBinary(10), + Dict( + { + "position": Discrete(5), + "velocity": Box( + low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 + ), + } + ), + ], +) +def test_roundtripping(space): + sample_1 = space.sample() + sample_2 = space.sample() + assert space.contains(sample_1) + assert space.contains(sample_2) + json_rep = space.to_jsonable([sample_1, sample_2]) + + json_roundtripped = json.loads(json.dumps(json_rep)) + + samples_after_roundtrip = space.from_jsonable(json_roundtripped) + sample_1_prime, sample_2_prime = samples_after_roundtrip + + s1 = space.to_jsonable([sample_1]) + s1p = space.to_jsonable([sample_1_prime]) + s2 = space.to_jsonable([sample_2]) + s2p = space.to_jsonable([sample_2_prime]) + assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p) + assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p) + + +@pytest.mark.parametrize( + "space", + [ + Discrete(3), + Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), + Box(low=-np.inf, high=np.inf, shape=(1, 3)), + Tuple([Discrete(5), Discrete(10)]), + Tuple( + [ + Discrete(5), + Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), + ] + ), + Tuple((Discrete(5), Discrete(2), Discrete(2))), + MultiDiscrete([2, 2, 100]), + MultiBinary(6), + Dict( + { + "position": Discrete(5), + "velocity": Box( + low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 + ), + } + ), + ], +) +def test_equality(space): + space1 = space + space2 = copy.copy(space) + assert space1 == space2, "Expected {} to equal {}".format(space1, space2) + + +@pytest.mark.parametrize( + "spaces", + [ + (Discrete(3), Discrete(4)), + (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])), + (MultiBinary(8), MultiBinary(7)), + ( + Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), + Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32), + ), + ( + Box(low=-np.inf, high=0.0, shape=(2, 1)), + Box(low=0.0, high=np.inf, shape=(2, 1)), + ), + (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])), + (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})), + (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})), + ], +) +def test_inequality(spaces): + space1, space2 = spaces + assert space1 != space2, "Expected {} != {}".format(space1, space2) + + +@pytest.mark.parametrize( + "space", + [ + Discrete(5), + Box(low=0, high=255, shape=(2,), dtype="uint8"), + Box(low=-np.inf, high=np.inf, shape=(3, 3)), + Box(low=1.0, high=np.inf, shape=(3, 3)), + Box(low=-np.inf, high=2.0, shape=(3, 3)), + ], +) +def test_sample(space): + space.seed(0) + n_trials = 100 + samples = np.array([space.sample() for _ in range(n_trials)]) + expected_mean = 0.0 + if isinstance(space, Box): + if space.is_bounded(): + expected_mean = (space.high + space.low) / 2 + elif space.is_bounded("below"): + expected_mean = 1 + space.low + elif space.is_bounded("above"): + expected_mean = -1 + space.high + else: + expected_mean = 0.0 + elif isinstance(space, Discrete): + expected_mean = space.n / 2 + else: + raise NotImplementedError + np.testing.assert_allclose(expected_mean, samples.mean(), atol=3.0 * samples.std()) + + +@pytest.mark.parametrize( + "spaces", + [ + (Discrete(5), MultiBinary(5)), + ( + Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), + MultiDiscrete([2, 2, 8]), + ), + ( + Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8), + Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8), + ), + (Dict({"position": Discrete(5)}), Tuple([Discrete(5)])), + (Dict({"position": Discrete(5)}), Discrete(5)), + (Tuple((Discrete(5),)), Discrete(5)), + ( + Box(low=np.array([-np.inf, 0.0]), high=np.array([0.0, np.inf])), + Box(low=np.array([-np.inf, 1.0]), high=np.array([0.0, np.inf])), + ), + ], +) +def test_class_inequality(spaces): + assert spaces[0] == spaces[0] + assert spaces[1] == spaces[1] + assert spaces[0] != spaces[1] + assert spaces[1] != spaces[0] + + +@pytest.mark.parametrize( + "space_fn", + [ + lambda: Dict(space1="abc"), + lambda: Dict({"space1": "abc"}), + lambda: Tuple(["abc"]), + ], +) +def test_bad_space_calls(space_fn): + with pytest.raises(AssertionError): + space_fn() + + +def test_seed_Dict(): + test_space = Dict( + { + "a": Box(low=0, high=1, shape=(3, 3)), + "b": Dict( + { + "b_1": Box(low=-100, high=100, shape=(2,)), + "b_2": Box(low=-1, high=1, shape=(2,)), + } + ), + "c": Discrete(5), + } + ) + + seed_dict = { + "a": 0, + "b": { + "b_1": 1, + "b_2": 2, + }, + "c": 3, + } + + test_space.seed(seed_dict) + + # "Unpack" the dict sub-spaces into individual spaces + a = Box(low=0, high=1, shape=(3, 3)) + a.seed(0) + b_1 = Box(low=-100, high=100, shape=(2,)) + b_1.seed(1) + b_2 = Box(low=-1, high=1, shape=(2,)) + b_2.seed(2) + c = Discrete(5) + c.seed(3) + + for i in range(10): + test_s = test_space.sample() + a_s = a.sample() + assert (test_s["a"] == a_s).all() + b_1_s = b_1.sample() + assert (test_s["b"]["b_1"] == b_1_s).all() + b_2_s = b_2.sample() + assert (test_s["b"]["b_2"] == b_2_s).all() + c_s = c.sample() + assert test_s["c"] == c_s + + +def test_box_dtype_check(): + # Related Issues: + # https://github.com/openai/gym/issues/2357 + # https://github.com/openai/gym/issues/2298 + + space = Box(0, 2, tuple(), dtype=np.float32) + + # casting will match the correct type + assert space.contains(0.5) + + # float64 is not in float32 space + assert not space.contains(np.array(0.5)) + assert not space.contains(np.array(1)) + + +@pytest.mark.parametrize( + "space", + [ + Discrete(3), + Box(low=0.0, high=np.inf, shape=(2, 2)), + Tuple([Discrete(5), Discrete(10)]), + Tuple( + [ + Discrete(5), + Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), + ] + ), + Tuple((Discrete(5), Discrete(2), Discrete(2))), + MultiDiscrete([2, 2, 100]), + MultiBinary(10), + Dict( + { + "position": Discrete(5), + "velocity": Box( + low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 + ), + } + ), + ], +) +def test_seed_returns_list(space): + def assert_integer_list(seed): + assert isinstance(seed, list) + assert len(seed) >= 1 + assert all([isinstance(s, int) for s in seed]) + + assert_integer_list(space.seed(None)) + assert_integer_list(space.seed(0)) + + +def convert_sample_hashable(sample): + if isinstance(sample, np.ndarray): + return tuple(sample.tolist()) + if isinstance(sample, (list, tuple)): + return tuple(convert_sample_hashable(s) for s in sample) + if isinstance(sample, dict): + return tuple( + (key, convert_sample_hashable(value)) for key, value in sample.items() + ) + + return sample + + +def sample_equal(sample1, sample2): + return convert_sample_hashable(sample1) == convert_sample_hashable(sample2) + + +@pytest.mark.parametrize( + "space", + [ + Discrete(3), + Box(low=0.0, high=np.inf, shape=(2, 2)), + Tuple([Discrete(5), Discrete(10)]), + Tuple( + [ + Discrete(5), + Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), + ] + ), + Tuple((Discrete(5), Discrete(2), Discrete(2))), + MultiDiscrete([2, 2, 100]), + MultiBinary(10), + Dict( + { + "position": Discrete(5), + "velocity": Box( + low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 + ), + } + ), + ], +) +def test_seed_reproducibility(space): + space1 = space + space2 = copy.deepcopy(space) + + space1.seed(None) + space2.seed(None) + + assert space1.seed(0) == space2.seed(0) + assert sample_equal(space1.sample(), space2.sample()) + + +@pytest.mark.parametrize( + "space", + [ + Tuple([Discrete(100), Discrete(100)]), + Tuple([Discrete(5), Discrete(10)]), + Tuple( + [ + Discrete(5), + Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), + ] + ), + Tuple((Discrete(5), Discrete(2), Discrete(2))), + Dict( + { + "position": Discrete(5), + "velocity": Box( + low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 + ), + } + ), + ], +) +def test_seed_subspace_incorrelated(space): + subspaces = space.spaces if isinstance(space, Tuple) else space.spaces.values() + + space.seed(0) + states = [ + convert_sample_hashable(subspace.np_random.get_state()) + for subspace in subspaces + ] + + assert len(states) == len(set(states)) + + +def test_multidiscrete_as_tuple(): + # 1D multi-discrete + space = MultiDiscrete([3, 4, 5]) + + assert space.shape == (3,) + assert space[0] == Discrete(3) + assert space[0:1] == MultiDiscrete([3]) + assert space[0:2] == MultiDiscrete([3, 4]) + assert space[:] == space and space[:] is not space + assert len(space) == 3 + + # 2D multi-discrete + space = MultiDiscrete([[3, 4, 5], [6, 7, 8]]) + + assert space.shape == (2, 3) + assert space[0, 1] == Discrete(4) + assert space[0] == MultiDiscrete([3, 4, 5]) + assert space[0:1] == MultiDiscrete([[3, 4, 5]]) + assert space[0:2, :] == MultiDiscrete([[3, 4, 5], [6, 7, 8]]) + assert space[:, 0:1] == MultiDiscrete([[3], [6]]) + assert space[0:2, 0:2] == MultiDiscrete([[3, 4], [6, 7]]) + assert space[:] == space and space[:] is not space + assert space[:, :] == space and space[:, :] is not space + + +def test_multidiscrete_subspace_reproducibility(): + # 1D multi-discrete + space = MultiDiscrete([100, 200, 300]) + space.seed(None) + + assert sample_equal(space[0].sample(), space[0].sample()) + assert sample_equal(space[0:1].sample(), space[0:1].sample()) + assert sample_equal(space[0:2].sample(), space[0:2].sample()) + assert sample_equal(space[:].sample(), space[:].sample()) + assert sample_equal(space[:].sample(), space.sample()) + + # 2D multi-discrete + space = MultiDiscrete([[300, 400, 500], [600, 700, 800]]) + space.seed(None) + + assert sample_equal(space[0, 1].sample(), space[0, 1].sample()) + assert sample_equal(space[0].sample(), space[0].sample()) + assert sample_equal(space[0:1].sample(), space[0:1].sample()) + assert sample_equal(space[0:2, :].sample(), space[0:2, :].sample()) + assert sample_equal(space[:, 0:1].sample(), space[:, 0:1].sample()) + assert sample_equal(space[0:2, 0:2].sample(), space[0:2, 0:2].sample()) + assert sample_equal(space[:].sample(), space[:].sample()) + assert sample_equal(space[:, :].sample(), space[:, :].sample()) + assert sample_equal(space[:, :].sample(), space.sample()) + + +def test_space_legacy_state_pickling(): + legacy_state = { + "shape": ( + 1, + 2, + 3, + ), + "dtype": np.int64, + "np_random": np.random.default_rng(), + "n": 3, + } + space = Discrete(1) + space.__setstate__(legacy_state) + + assert space.shape == legacy_state["shape"] + assert space._shape == legacy_state["shape"] + assert space.np_random == legacy_state["np_random"] + assert space._np_random == legacy_state["np_random"] + assert space.n == 3 + assert space.dtype == legacy_state["dtype"] diff --git a/gym-0.21.0/tests/utils/test_atexit.py b/gym-0.21.0/tests/utils/test_atexit.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd60f5173e8d10c7b24ae84635eef1763af5a97 --- /dev/null +++ b/gym-0.21.0/tests/utils/test_atexit.py @@ -0,0 +1,24 @@ +from gym.utils.closer import Closer + + +class Closeable(object): + close_called = False + + def close(self): + self.close_called = True + + +def test_register_unregister(): + registry = Closer(atexit_register=False) + c1 = Closeable() + c2 = Closeable() + + assert not c1.close_called + assert not c2.close_called + registry.register(c1) + id2 = registry.register(c2) + + registry.unregister(id2) + registry.close() + assert c1.close_called + assert not c2.close_called diff --git a/gym-0.21.0/tests/vector/test_shared_memory.py b/gym-0.21.0/tests/vector/test_shared_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..bee2d7df42ec4758fe52768751baf8b8785a2653 --- /dev/null +++ b/gym-0.21.0/tests/vector/test_shared_memory.py @@ -0,0 +1,173 @@ +import pytest +import numpy as np + +import multiprocessing as mp +from multiprocessing.sharedctypes import SynchronizedArray +from multiprocessing import Array, Process +from collections import OrderedDict + +from gym.spaces import Tuple, Dict +from gym.error import CustomSpaceError +from gym.vector.utils.spaces import _BaseGymSpaces +from tests.vector.utils import spaces, custom_spaces + +from gym.vector.utils.shared_memory import ( + create_shared_memory, + read_from_shared_memory, + write_to_shared_memory, +) + + +expected_types = [ + Array("d", 1), + Array("f", 1), + Array("f", 3), + Array("f", 4), + Array("B", 1), + Array("B", 32 * 32 * 3), + Array("i", 1), + (Array("i", 1), Array("i", 1)), + (Array("i", 1), Array("f", 2)), + Array("B", 3), + Array("B", 19), + OrderedDict([("position", Array("i", 1)), ("velocity", Array("f", 1))]), + OrderedDict( + [ + ("position", OrderedDict([("x", Array("i", 1)), ("y", Array("i", 1))])), + ("velocity", (Array("i", 1), Array("B", 1))), + ] + ), +] + + +@pytest.mark.parametrize("n", [1, 8]) +@pytest.mark.parametrize( + "space,expected_type", + list(zip(spaces, expected_types)), + ids=[space.__class__.__name__ for space in spaces], +) +@pytest.mark.parametrize( + "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"] +) +def test_create_shared_memory(space, expected_type, n, ctx): + def assert_nested_type(lhs, rhs, n): + assert type(lhs) == type(rhs) + if isinstance(lhs, (list, tuple)): + assert len(lhs) == len(rhs) + for lhs_, rhs_ in zip(lhs, rhs): + assert_nested_type(lhs_, rhs_, n) + + elif isinstance(lhs, (dict, OrderedDict)): + assert set(lhs.keys()) ^ set(rhs.keys()) == set() + for key in lhs.keys(): + assert_nested_type(lhs[key], rhs[key], n) + + elif isinstance(lhs, SynchronizedArray): + # Assert the length of the array + assert len(lhs[:]) == n * len(rhs[:]) + # Assert the data type + assert type(lhs[0]) == type(rhs[0]) # noqa: E721 + + else: + raise TypeError("Got unknown type `{0}`.".format(type(lhs))) + + ctx = mp if (ctx is None) else mp.get_context(ctx) + shared_memory = create_shared_memory(space, n=n, ctx=ctx) + assert_nested_type(shared_memory, expected_type, n=n) + + +@pytest.mark.parametrize("n", [1, 8]) +@pytest.mark.parametrize( + "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"] +) +@pytest.mark.parametrize("space", custom_spaces) +def test_create_shared_memory_custom_space(n, ctx, space): + ctx = mp if (ctx is None) else mp.get_context(ctx) + with pytest.raises(CustomSpaceError): + shared_memory = create_shared_memory(space, n=n, ctx=ctx) + + +@pytest.mark.parametrize( + "space", spaces, ids=[space.__class__.__name__ for space in spaces] +) +def test_write_to_shared_memory(space): + def assert_nested_equal(lhs, rhs): + assert isinstance(rhs, list) + if isinstance(lhs, (list, tuple)): + for i in range(len(lhs)): + assert_nested_equal(lhs[i], [rhs_[i] for rhs_ in rhs]) + + elif isinstance(lhs, (dict, OrderedDict)): + for key in lhs.keys(): + assert_nested_equal(lhs[key], [rhs_[key] for rhs_ in rhs]) + + elif isinstance(lhs, SynchronizedArray): + assert np.all(np.array(lhs[:]) == np.stack(rhs, axis=0).flatten()) + + else: + raise TypeError("Got unknown type `{0}`.".format(type(lhs))) + + def write(i, shared_memory, sample): + write_to_shared_memory(i, sample, shared_memory, space) + + shared_memory_n8 = create_shared_memory(space, n=8) + samples = [space.sample() for _ in range(8)] + + processes = [ + Process(target=write, args=(i, shared_memory_n8, samples[i])) for i in range(8) + ] + + for process in processes: + process.start() + for process in processes: + process.join() + + assert_nested_equal(shared_memory_n8, samples) + + +@pytest.mark.parametrize( + "space", spaces, ids=[space.__class__.__name__ for space in spaces] +) +def test_read_from_shared_memory(space): + def assert_nested_equal(lhs, rhs, space, n): + assert isinstance(rhs, list) + if isinstance(space, Tuple): + assert isinstance(lhs, tuple) + for i in range(len(lhs)): + assert_nested_equal( + lhs[i], [rhs_[i] for rhs_ in rhs], space.spaces[i], n + ) + + elif isinstance(space, Dict): + assert isinstance(lhs, OrderedDict) + for key in lhs.keys(): + assert_nested_equal( + lhs[key], [rhs_[key] for rhs_ in rhs], space.spaces[key], n + ) + + elif isinstance(space, _BaseGymSpaces): + assert isinstance(lhs, np.ndarray) + assert lhs.shape == ((n,) + space.shape) + assert lhs.dtype == space.dtype + assert np.all(lhs == np.stack(rhs, axis=0)) + + else: + raise TypeError("Got unknown type `{0}`".format(type(space))) + + def write(i, shared_memory, sample): + write_to_shared_memory(i, sample, shared_memory, space) + + shared_memory_n8 = create_shared_memory(space, n=8) + memory_view_n8 = read_from_shared_memory(shared_memory_n8, space, n=8) + samples = [space.sample() for _ in range(8)] + + processes = [ + Process(target=write, args=(i, shared_memory_n8, samples[i])) for i in range(8) + ] + + for process in processes: + process.start() + for process in processes: + process.join() + + assert_nested_equal(memory_view_n8, samples, space, n=8) diff --git a/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py b/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..744867dbb46cc1471f5ae45e3884b2ba5c30bee0 --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_atari_preprocessing.py @@ -0,0 +1,93 @@ +import numpy as np +import gym +from gym.wrappers import AtariPreprocessing +import pytest + +pytest.importorskip("gym.envs.atari") + + +@pytest.fixture(scope="module") +def env_fn(): + return lambda: gym.make("PongNoFrameskip-v4") + + +def test_atari_preprocessing_grayscale(env_fn): + import cv2 + + env1 = env_fn() + env2 = AtariPreprocessing( + env_fn(), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0 + ) + env3 = AtariPreprocessing( + env_fn(), screen_size=84, grayscale_obs=False, frame_skip=1, noop_max=0 + ) + env4 = AtariPreprocessing( + env_fn(), + screen_size=84, + grayscale_obs=True, + frame_skip=1, + noop_max=0, + grayscale_newaxis=True, + ) + env1.seed(0) + env2.seed(0) + env3.seed(0) + env4.seed(0) + obs1 = env1.reset() + obs2 = env2.reset() + obs3 = env3.reset() + obs4 = env4.reset() + assert env1.observation_space.shape == (210, 160, 3) + assert env2.observation_space.shape == (84, 84) + assert env3.observation_space.shape == (84, 84, 3) + assert env4.observation_space.shape == (84, 84, 1) + assert obs1.shape == (210, 160, 3) + assert obs2.shape == (84, 84) + assert obs3.shape == (84, 84, 3) + assert obs4.shape == (84, 84, 1) + assert np.allclose(obs3, cv2.resize(obs1, (84, 84), interpolation=cv2.INTER_AREA)) + obs3_gray = cv2.cvtColor(obs3, cv2.COLOR_RGB2GRAY) + # the edges of the numbers do not render quite the same in the grayscale, so we ignore them + assert np.allclose(obs2[10:38], obs3_gray[10:38]) + # the paddle also do not render quite the same + assert np.allclose(obs2[44:], obs3_gray[44:]) + # now add a channel axis and re-test + obs3_gray = obs3_gray.reshape(84, 84, 1) + assert np.allclose(obs4[10:38], obs3_gray[10:38]) + assert np.allclose(obs4[44:], obs3_gray[44:]) + + env1.close() + env2.close() + env3.close() + env4.close() + + +def test_atari_preprocessing_scale(env_fn): + # arbitrarily chosen number for stepping into env. and ensuring all observations are in the required range + max_test_steps = 10 + + for grayscale in [True, False]: + for scaled in [True, False]: + env = AtariPreprocessing( + env_fn(), + screen_size=84, + grayscale_obs=grayscale, + scale_obs=scaled, + frame_skip=1, + noop_max=0, + ) + obs = env.reset().flatten() + done, step_i = False, 0 + max_obs = 1 if scaled else 255 + assert (0 <= obs).all() and ( + obs <= max_obs + ).all(), "Obs. must be in range [0,{}]".format(max_obs) + while not done or step_i <= max_test_steps: + obs, _, done, _ = env.step(env.action_space.sample()) + obs = obs.flatten() + assert (0 <= obs).all() and ( + obs <= max_obs + ).all(), "Obs. must be in range [0,{}]".format(max_obs) + step_i += 1 + + env.close()