diff --git a/.gitattributes b/.gitattributes index 73b3743f3431f336b6f81e535329cfcbe0b0b561..d09e26f108a36778198ba6249b89a4363da61376 100644 --- a/.gitattributes +++ b/.gitattributes @@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text Metaworld/zarr_path:[[:space:]]data/metaworld_door-close_expert.zarr/data/point_cloud/10.0.0 filter=lfs diff=lfs merge=lfs -text Metaworld/zarr_path:[[:space:]]data/metaworld_door-open_expert.zarr/data/point_cloud/7.0.0 filter=lfs diff=lfs merge=lfs -text Metaworld/zarr_path:[[:space:]]data/metaworld_door-lock_expert.zarr/data/point_cloud/5.0.0 filter=lfs diff=lfs merge=lfs -text +Metaworld/zarr_path:[[:space:]]data/metaworld_door-lock_expert.zarr/data/point_cloud/6.0.0 filter=lfs diff=lfs merge=lfs -text diff --git a/Metaworld/metaworld/envs/__init__.py b/Metaworld/metaworld/envs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9cb9590b1299755c2fa42b34b98c947911ebc335 --- /dev/null +++ b/Metaworld/metaworld/envs/__init__.py @@ -0,0 +1,6 @@ +from metaworld.envs.mujoco.env_dict import (ALL_V2_ENVIRONMENTS_GOAL_HIDDEN, + ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + ) + +__all__ = ['ALL_V2_ENVIRONMENTS_GOAL_HIDDEN', + 'ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE'] \ No newline at end of file diff --git a/Metaworld/metaworld/envs/__pycache__/__init__.cpython-38.pyc b/Metaworld/metaworld/envs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73142f91f8c6257b2ba56379ca50bb8eeb3cdcb5 Binary files /dev/null and b/Metaworld/metaworld/envs/__pycache__/__init__.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/__pycache__/asset_path_utils.cpython-38.pyc b/Metaworld/metaworld/envs/__pycache__/asset_path_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b82d8954c9f577ecfaeccf552ab89111cdf2e324 Binary files /dev/null and b/Metaworld/metaworld/envs/__pycache__/asset_path_utils.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/__pycache__/reward_utils.cpython-38.pyc b/Metaworld/metaworld/envs/__pycache__/reward_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..016f576268b1cfa901984b2d12d70c303916a607 Binary files /dev/null and b/Metaworld/metaworld/envs/__pycache__/reward_utils.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/asset_path_utils.py b/Metaworld/metaworld/envs/asset_path_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4fb15a74e7b16d11fa44cfbd0afa1f8dfbf1367f --- /dev/null +++ b/Metaworld/metaworld/envs/asset_path_utils.py @@ -0,0 +1,12 @@ +import os + +ENV_ASSET_DIR_V1 = os.path.join(os.path.dirname(__file__), 'assets_v1') +ENV_ASSET_DIR_V2 = os.path.join(os.path.dirname(__file__), 'assets_v2') + + +def full_v1_path_for(file_name): + return os.path.join(ENV_ASSET_DIR_V1, file_name) + + +def full_v2_path_for(file_name): + return os.path.join(ENV_ASSET_DIR_V2, file_name) diff --git a/Metaworld/metaworld/envs/mujoco/__init__.py b/Metaworld/metaworld/envs/mujoco/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Metaworld/metaworld/envs/mujoco/__pycache__/__init__.cpython-38.pyc b/Metaworld/metaworld/envs/mujoco/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d3f2cf9cc662002f5ae51429c62865ff644e5c9 Binary files /dev/null and b/Metaworld/metaworld/envs/mujoco/__pycache__/__init__.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/mujoco/__pycache__/env_dict.cpython-38.pyc b/Metaworld/metaworld/envs/mujoco/__pycache__/env_dict.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6ae1af360c4dfcd662456f77b961fdc329838dae Binary files /dev/null and b/Metaworld/metaworld/envs/mujoco/__pycache__/env_dict.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/mujoco/__pycache__/mujoco_env.cpython-38.pyc b/Metaworld/metaworld/envs/mujoco/__pycache__/mujoco_env.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7340fcb8f632db2056d8228d150f495d9abd596f Binary files /dev/null and b/Metaworld/metaworld/envs/mujoco/__pycache__/mujoco_env.cpython-38.pyc differ diff --git a/Metaworld/metaworld/envs/mujoco/env_dict.py b/Metaworld/metaworld/envs/mujoco/env_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..2ca6e84ef28c4f86ac79cd6bc056c542532acf85 --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/env_dict.py @@ -0,0 +1,643 @@ +from collections import OrderedDict +import re + +import numpy as np + +from metaworld.envs.mujoco.sawyer_xyz.v1 import ( + SawyerNutAssemblyEnv, + SawyerBasketballEnv, + SawyerBinPickingEnv, + SawyerBoxCloseEnv, + SawyerButtonPressEnv, + SawyerButtonPressTopdownEnv, + SawyerButtonPressTopdownWallEnv, + SawyerButtonPressWallEnv, + SawyerCoffeeButtonEnv, + SawyerCoffeePullEnv, + SawyerCoffeePushEnv, + SawyerDialTurnEnv, + SawyerNutDisassembleEnv, + SawyerDoorEnv, + SawyerDoorCloseEnv, + SawyerDoorLockEnv, + SawyerDoorUnlockEnv, + SawyerDrawerCloseEnv, + SawyerDrawerOpenEnv, + SawyerFaucetCloseEnv, + SawyerFaucetOpenEnv, + SawyerHammerEnv, + SawyerHandInsertEnv, + SawyerHandlePressEnv, + SawyerHandlePressSideEnv, + SawyerHandlePullEnv, + SawyerHandlePullSideEnv, + SawyerLeverPullEnv, + SawyerPegInsertionSideEnv, + SawyerPegUnplugSideEnv, + SawyerPickOutOfHoleEnv, + SawyerPlateSlideEnv, + SawyerPlateSlideBackEnv, + SawyerPlateSlideBackSideEnv, + SawyerPlateSlideSideEnv, + SawyerPushBackEnv, + SawyerReachPushPickPlaceEnv, + SawyerReachPushPickPlaceWallEnv, + SawyerShelfPlaceEnv, + SawyerSoccerEnv, + SawyerStickPullEnv, + SawyerStickPushEnv, + SawyerSweepEnv, + SawyerSweepIntoGoalEnv, + SawyerWindowCloseEnv, + SawyerWindowOpenEnv, +) +from metaworld.envs.mujoco.sawyer_xyz.v2 import ( + SawyerNutAssemblyEnvV2, + SawyerBasketballEnvV2, + SawyerBinPickingEnvV2, + SawyerBoxCloseEnvV2, + SawyerButtonPressTopdownEnvV2, + SawyerButtonPressTopdownWallEnvV2, + SawyerButtonPressEnvV2, + SawyerButtonPressWallEnvV2, + SawyerCoffeeButtonEnvV2, + SawyerCoffeePullEnvV2, + SawyerCoffeePushEnvV2, + SawyerDialTurnEnvV2, + SawyerNutDisassembleEnvV2, + SawyerDoorCloseEnvV2, + SawyerDoorLockEnvV2, + SawyerDoorUnlockEnvV2, + SawyerDoorEnvV2, + SawyerDrawerCloseEnvV2, + SawyerDrawerOpenEnvV2, + SawyerFaucetCloseEnvV2, + SawyerFaucetOpenEnvV2, + SawyerHammerEnvV2, + SawyerHandInsertEnvV2, + SawyerHandlePressSideEnvV2, + SawyerHandlePressEnvV2, + SawyerHandlePullSideEnvV2, + SawyerHandlePullEnvV2, + SawyerLeverPullEnvV2, + SawyerPegInsertionSideEnvV2, + SawyerPegUnplugSideEnvV2, + SawyerPickOutOfHoleEnvV2, + SawyerPickPlaceEnvV2, + SawyerPickPlaceWallEnvV2, + SawyerPlateSlideBackSideEnvV2, + SawyerPlateSlideBackEnvV2, + SawyerPlateSlideSideEnvV2, + SawyerPlateSlideEnvV2, + SawyerPushBackEnvV2, + SawyerPushEnvV2, + SawyerPushWallEnvV2, + SawyerReachEnvV2, + SawyerReachWallEnvV2, + SawyerShelfPlaceEnvV2, + SawyerSoccerEnvV2, + SawyerStickPullEnvV2, + SawyerStickPushEnvV2, + SawyerSweepEnvV2, + SawyerSweepIntoGoalEnvV2, + SawyerWindowCloseEnvV2, + SawyerWindowOpenEnvV2, +) + + +ALL_V1_ENVIRONMENTS = OrderedDict(( + ('reach-v1', SawyerReachPushPickPlaceEnv), + ('push-v1', SawyerReachPushPickPlaceEnv), + ('pick-place-v1', SawyerReachPushPickPlaceEnv), + ('door-open-v1', SawyerDoorEnv), + ('drawer-open-v1', SawyerDrawerOpenEnv), + ('drawer-close-v1', SawyerDrawerCloseEnv), + ('button-press-topdown-v1', SawyerButtonPressTopdownEnv), + ('peg-insert-side-v1', SawyerPegInsertionSideEnv), + ('window-open-v1', SawyerWindowOpenEnv), + ('window-close-v1', SawyerWindowCloseEnv), + ('door-close-v1', SawyerDoorCloseEnv), + ('reach-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('pick-place-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('push-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('button-press-v1', SawyerButtonPressEnv), + ('button-press-topdown-wall-v1', SawyerButtonPressTopdownWallEnv), + ('button-press-wall-v1', SawyerButtonPressWallEnv), + ('peg-unplug-side-v1', SawyerPegUnplugSideEnv), + ('disassemble-v1', SawyerNutDisassembleEnv), + ('hammer-v1', SawyerHammerEnv), + ('plate-slide-v1', SawyerPlateSlideEnv), + ('plate-slide-side-v1', SawyerPlateSlideSideEnv), + ('plate-slide-back-v1', SawyerPlateSlideBackEnv), + ('plate-slide-back-side-v1', SawyerPlateSlideBackSideEnv), + ('handle-press-v1', SawyerHandlePressEnv), + ('handle-pull-v1', SawyerHandlePullEnv), + ('handle-press-side-v1', SawyerHandlePressSideEnv), + ('handle-pull-side-v1', SawyerHandlePullSideEnv), + ('stick-push-v1', SawyerStickPushEnv), + ('stick-pull-v1', SawyerStickPullEnv), + ('basketball-v1', SawyerBasketballEnv), + ('soccer-v1', SawyerSoccerEnv), + ('faucet-open-v1', SawyerFaucetOpenEnv), + ('faucet-close-v1', SawyerFaucetCloseEnv), + ('coffee-push-v1', SawyerCoffeePushEnv), + ('coffee-pull-v1', SawyerCoffeePullEnv), + ('coffee-button-v1', SawyerCoffeeButtonEnv), + ('sweep-v1', SawyerSweepEnv), + ('sweep-into-v1', SawyerSweepIntoGoalEnv), + ('pick-out-of-hole-v1', SawyerPickOutOfHoleEnv), + ('assembly-v1', SawyerNutAssemblyEnv), + ('shelf-place-v1', SawyerShelfPlaceEnv), + ('push-back-v1', SawyerPushBackEnv), + ('lever-pull-v1', SawyerLeverPullEnv), + ('dial-turn-v1', SawyerDialTurnEnv), + ('bin-picking-v1', SawyerBinPickingEnv), + ('box-close-v1', SawyerBoxCloseEnv), + ('hand-insert-v1', SawyerHandInsertEnv), + ('door-lock-v1', SawyerDoorLockEnv), + ('door-unlock-v1', SawyerDoorUnlockEnv), +)) + +ALL_V2_ENVIRONMENTS = OrderedDict(( + ('assembly-v2', SawyerNutAssemblyEnvV2), + ('basketball-v2', SawyerBasketballEnvV2), + ('bin-picking-v2', SawyerBinPickingEnvV2), + ('box-close-v2', SawyerBoxCloseEnvV2), + ('button-press-topdown-v2', SawyerButtonPressTopdownEnvV2), + ('button-press-topdown-wall-v2', SawyerButtonPressTopdownWallEnvV2), + ('button-press-v2', SawyerButtonPressEnvV2), + ('button-press-wall-v2', SawyerButtonPressWallEnvV2), + ('coffee-button-v2', SawyerCoffeeButtonEnvV2), + ('coffee-pull-v2', SawyerCoffeePullEnvV2), + ('coffee-push-v2', SawyerCoffeePushEnvV2), + ('dial-turn-v2', SawyerDialTurnEnvV2), + ('disassemble-v2', SawyerNutDisassembleEnvV2), + ('door-close-v2', SawyerDoorCloseEnvV2), + ('door-lock-v2', SawyerDoorLockEnvV2), + ('door-open-v2', SawyerDoorEnvV2), + ('door-unlock-v2', SawyerDoorUnlockEnvV2), + ('hand-insert-v2', SawyerHandInsertEnvV2), + ('drawer-close-v2', SawyerDrawerCloseEnvV2), + ('drawer-open-v2', SawyerDrawerOpenEnvV2), + ('faucet-open-v2', SawyerFaucetOpenEnvV2), + ('faucet-close-v2', SawyerFaucetCloseEnvV2), + ('hammer-v2', SawyerHammerEnvV2), + ('handle-press-side-v2', SawyerHandlePressSideEnvV2), + ('handle-press-v2', SawyerHandlePressEnvV2), + ('handle-pull-side-v2', SawyerHandlePullSideEnvV2), + ('handle-pull-v2', SawyerHandlePullEnvV2), + ('lever-pull-v2', SawyerLeverPullEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('pick-place-wall-v2', SawyerPickPlaceWallEnvV2), + ('pick-out-of-hole-v2', SawyerPickOutOfHoleEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('push-back-v2', SawyerPushBackEnvV2), + ('push-v2', SawyerPushEnvV2), + ('pick-place-v2', SawyerPickPlaceEnvV2), + ('plate-slide-v2', SawyerPlateSlideEnvV2), + ('plate-slide-side-v2', SawyerPlateSlideSideEnvV2), + ('plate-slide-back-v2', SawyerPlateSlideBackEnvV2), + ('plate-slide-back-side-v2', SawyerPlateSlideBackSideEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('peg-unplug-side-v2', SawyerPegUnplugSideEnvV2), + ('soccer-v2', SawyerSoccerEnvV2), + ('stick-push-v2', SawyerStickPushEnvV2), + ('stick-pull-v2', SawyerStickPullEnvV2), + ('push-wall-v2', SawyerPushWallEnvV2), + ('push-v2', SawyerPushEnvV2), + ('reach-wall-v2', SawyerReachWallEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('shelf-place-v2', SawyerShelfPlaceEnvV2), + ('sweep-into-v2', SawyerSweepIntoGoalEnvV2), + ('sweep-v2', SawyerSweepEnvV2), + ('window-open-v2', SawyerWindowOpenEnvV2), + ('window-close-v2', SawyerWindowCloseEnvV2), +)) + +_NUM_METAWORLD_ENVS = len(ALL_V1_ENVIRONMENTS) + +EASY_MODE_CLS_DICT = OrderedDict( + (('reach-v1', SawyerReachPushPickPlaceEnv), + ('push-v1', SawyerReachPushPickPlaceEnv), + ('pick-place-v1', SawyerReachPushPickPlaceEnv), + ('door-open-v1', SawyerDoorEnv), ('drawer-open-v1', SawyerDrawerOpenEnv), + ('drawer-close-v1', SawyerDrawerCloseEnv), + ('button-press-topdown-v1', SawyerButtonPressTopdownEnv), + ('peg-insert-side-v1', SawyerPegInsertionSideEnv), + ('window-open-v1', SawyerWindowOpenEnv), + ('window-close-v1', SawyerWindowCloseEnv)), ) + +EASY_MODE_ARGS_KWARGS = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V1_ENVIRONMENTS.keys()).index(key)}) + for key, _ in EASY_MODE_CLS_DICT.items() +} + +EASY_MODE_ARGS_KWARGS['reach-v1']['kwargs']['task_type'] = 'reach' +EASY_MODE_ARGS_KWARGS['push-v1']['kwargs']['task_type'] = 'push' +EASY_MODE_ARGS_KWARGS['pick-place-v1']['kwargs']['task_type'] = 'pick_place' + +MEDIUM_MODE_CLS_DICT = OrderedDict( + (('train', + OrderedDict((('reach-v1', SawyerReachPushPickPlaceEnv), + ('push-v1', SawyerReachPushPickPlaceEnv), + ('pick-place-v1', SawyerReachPushPickPlaceEnv), + ('door-open-v1', SawyerDoorEnv), ('drawer-close-v1', + SawyerDrawerCloseEnv), + ('button-press-topdown-v1', SawyerButtonPressTopdownEnv), + ('peg-insert-side-v1', + SawyerPegInsertionSideEnv), ('window-open-v1', + SawyerWindowOpenEnv), + ('sweep-v1', SawyerSweepEnv), ('basketball-v1', + SawyerBasketballEnv)))), + ('test', + OrderedDict( + (('drawer-open-v1', SawyerDrawerOpenEnv), ('door-close-v1', + SawyerDoorCloseEnv), + ('shelf-place-v1', SawyerShelfPlaceEnv), ('sweep-into-v1', + SawyerSweepIntoGoalEnv), ( + 'lever-pull-v1', + SawyerLeverPullEnv, + )))))) +medium_mode_train_args_kwargs = { + key: dict(args=[], + kwargs={ + 'task_id': list(ALL_V1_ENVIRONMENTS.keys()).index(key), + }) + for key, _ in MEDIUM_MODE_CLS_DICT['train'].items() +} + +medium_mode_test_args_kwargs = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V1_ENVIRONMENTS.keys()).index(key)}) + for key, _ in MEDIUM_MODE_CLS_DICT['test'].items() +} + +medium_mode_train_args_kwargs['reach-v1']['kwargs']['task_type'] = 'reach' +medium_mode_train_args_kwargs['push-v1']['kwargs']['task_type'] = 'push' +medium_mode_train_args_kwargs['pick-place-v1']['kwargs'][ + 'task_type'] = 'pick_place' + +MEDIUM_MODE_ARGS_KWARGS = dict( + train=medium_mode_train_args_kwargs, + test=medium_mode_test_args_kwargs, +) +''' + ML45 environments and arguments +''' +HARD_MODE_CLS_DICT = OrderedDict( + (('train', + OrderedDict(( + ('reach-v1', SawyerReachPushPickPlaceEnv), + ('push-v1', SawyerReachPushPickPlaceEnv), + ('pick-place-v1', SawyerReachPushPickPlaceEnv), + ('door-open-v1', SawyerDoorEnv), + ('drawer-open-v1', SawyerDrawerOpenEnv), + ('drawer-close-v1', SawyerDrawerCloseEnv), + ('button-press-topdown-v1', SawyerButtonPressTopdownEnv), + ('peg-insert-side-v1', SawyerPegInsertionSideEnv), + ('window-open-v1', SawyerWindowOpenEnv), + ('window-close-v1', SawyerWindowCloseEnv), + ('door-close-v1', SawyerDoorCloseEnv), + ('reach-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('pick-place-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('push-wall-v1', SawyerReachPushPickPlaceWallEnv), + ('button-press-v1', SawyerButtonPressEnv), + ('button-press-topdown-wall-v1', SawyerButtonPressTopdownWallEnv), + ('button-press-wall-v1', SawyerButtonPressWallEnv), + ('peg-unplug-side-v1', SawyerPegUnplugSideEnv), + ('disassemble-v1', SawyerNutDisassembleEnv), + ('hammer-v1', SawyerHammerEnv), + ('plate-slide-v1', SawyerPlateSlideEnv), + ('plate-slide-side-v1', SawyerPlateSlideSideEnv), + ('plate-slide-back-v1', SawyerPlateSlideBackEnv), + ('plate-slide-back-side-v1', SawyerPlateSlideBackSideEnv), + ('handle-press-v1', SawyerHandlePressEnv), + ('handle-pull-v1', SawyerHandlePullEnv), + ('handle-press-side-v1', SawyerHandlePressSideEnv), + ('handle-pull-side-v1', SawyerHandlePullSideEnv), + ('stick-push-v1', SawyerStickPushEnv), + ('stick-pull-v1', SawyerStickPullEnv), + ('basketball-v1', SawyerBasketballEnv), + ('soccer-v1', SawyerSoccerEnv), + ('faucet-open-v1', SawyerFaucetOpenEnv), + ('faucet-close-v1', SawyerFaucetCloseEnv), + ('coffee-push-v1', SawyerCoffeePushEnv), + ('coffee-pull-v1', SawyerCoffeePullEnv), + ('coffee-button-v1', SawyerCoffeeButtonEnv), + ('sweep-v1', SawyerSweepEnv), + ('sweep-into-v1', SawyerSweepIntoGoalEnv), + ('pick-out-of-hole-v1', SawyerPickOutOfHoleEnv), + ('assembly-v1', SawyerNutAssemblyEnv), + ('shelf-place-v1', SawyerShelfPlaceEnv), + ('push-back-v1', SawyerPushBackEnv), + ('lever-pull-v1', SawyerLeverPullEnv), + ('dial-turn-v1', SawyerDialTurnEnv), + ))), ('test', + OrderedDict(( + ('bin-picking-v1', SawyerBinPickingEnv), + ('box-close-v1', SawyerBoxCloseEnv), + ('hand-insert-v1', SawyerHandInsertEnv), + ('door-lock-v1', SawyerDoorLockEnv), + ('door-unlock-v1', SawyerDoorUnlockEnv), + ))))) + + +def _hard_mode_args_kwargs(env_cls_, key_): + del env_cls_ + + kwargs = dict(task_id=list(ALL_V1_ENVIRONMENTS.keys()).index(key_)) + if key_ == 'reach-v1' or key_ == 'reach-wall-v1': + kwargs['task_type'] = 'reach' + elif key_ == 'push-v1' or key_ == 'push-wall-v1': + kwargs['task_type'] = 'push' + elif key_ == 'pick-place-v1' or key_ == 'pick-place-wall-v1': + kwargs['task_type'] = 'pick_place' + return dict(args=[], kwargs=kwargs) + + +HARD_MODE_ARGS_KWARGS = dict(train={}, test={}) +for key, env_cls in HARD_MODE_CLS_DICT['train'].items(): + HARD_MODE_ARGS_KWARGS['train'][key] = _hard_mode_args_kwargs(env_cls, key) +for key, env_cls in HARD_MODE_CLS_DICT['test'].items(): + HARD_MODE_ARGS_KWARGS['test'][key] = _hard_mode_args_kwargs(env_cls, key) + +############################## V2 DICTS ############################## + +MT10_V2 = OrderedDict( + (('reach-v2', SawyerReachEnvV2), ('push-v2', SawyerPushEnvV2), + ('pick-place-v2', SawyerPickPlaceEnvV2), + ('door-open-v2', SawyerDoorEnvV2), + ('drawer-open-v2', SawyerDrawerOpenEnvV2), + ('drawer-close-v2', SawyerDrawerCloseEnvV2), + ('button-press-topdown-v2', SawyerButtonPressTopdownEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('window-open-v2', SawyerWindowOpenEnvV2), + ('window-close-v2', SawyerWindowCloseEnvV2)), ) + +MT10_V2_ARGS_KWARGS = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key)}) + for key, _ in MT10_V2.items() +} + +ML10_V2 = OrderedDict( + (('train', + OrderedDict( + (('reach-v2', SawyerReachEnvV2), ('push-v2', SawyerPushEnvV2), + ('pick-place-v2', SawyerPickPlaceEnvV2), + ('door-open-v2', SawyerDoorEnvV2), ('drawer-close-v2', + SawyerDrawerCloseEnvV2), + ('button-press-topdown-v2', SawyerButtonPressEnvV2), + ('peg-insert-side-v2', + SawyerPegInsertionSideEnvV2), ('window-open-v2', + SawyerWindowOpenEnvV2), + ('sweep-v2', SawyerSweepEnvV2), ('basketball-v2', + SawyerBasketballEnvV2)))), + ('test', + OrderedDict( + (('drawer-open-v2', SawyerDrawerOpenEnvV2), + ('door-close-v2', SawyerDoorCloseEnvV2), ('shelf-place-v2', + SawyerShelfPlaceEnvV2), + ('sweep-into-v2', SawyerSweepIntoGoalEnvV2), ( + 'lever-pull-v2', + SawyerLeverPullEnvV2, + )))))) + +ml10_train_args_kwargs = { + key: dict(args=[], + kwargs={ + 'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key), + }) + for key, _ in ML10_V2['train'].items() +} + +ml10_test_args_kwargs = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key)}) + for key, _ in ML10_V2['test'].items() +} + +ML10_ARGS_KWARGS = dict( + train=ml10_train_args_kwargs, + test=ml10_test_args_kwargs, +) + +ML1_V2 = OrderedDict( + (('train', ALL_V2_ENVIRONMENTS), ('test', ALL_V2_ENVIRONMENTS))) + +ML1_args_kwargs = { + key: dict(args=[], + kwargs={ + 'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key), + }) + for key, _ in ML1_V2['train'].items() +} + +MT50_V2 = OrderedDict(( + ('assembly-v2', SawyerNutAssemblyEnvV2), + ('basketball-v2', SawyerBasketballEnvV2), + ('bin-picking-v2', SawyerBinPickingEnvV2), + ('box-close-v2', SawyerBoxCloseEnvV2), + ('button-press-topdown-v2', SawyerButtonPressTopdownEnvV2), + ('button-press-topdown-wall-v2', SawyerButtonPressTopdownWallEnvV2), + ('button-press-v2', SawyerButtonPressEnvV2), + ('button-press-wall-v2', SawyerButtonPressWallEnvV2), + ('coffee-button-v2', SawyerCoffeeButtonEnvV2), + ('coffee-pull-v2', SawyerCoffeePullEnvV2), + ('coffee-push-v2', SawyerCoffeePushEnvV2), + ('dial-turn-v2', SawyerDialTurnEnvV2), + ('disassemble-v2', SawyerNutDisassembleEnvV2), + ('door-close-v2', SawyerDoorCloseEnvV2), + ('door-lock-v2', SawyerDoorLockEnvV2), + ('door-open-v2', SawyerDoorEnvV2), + ('door-unlock-v2', SawyerDoorUnlockEnvV2), + ('hand-insert-v2', SawyerHandInsertEnvV2), + ('drawer-close-v2', SawyerDrawerCloseEnvV2), + ('drawer-open-v2', SawyerDrawerOpenEnvV2), + ('faucet-open-v2', SawyerFaucetOpenEnvV2), + ('faucet-close-v2', SawyerFaucetCloseEnvV2), + ('hammer-v2', SawyerHammerEnvV2), + ('handle-press-side-v2', SawyerHandlePressSideEnvV2), + ('handle-press-v2', SawyerHandlePressEnvV2), + ('handle-pull-side-v2', SawyerHandlePullSideEnvV2), + ('handle-pull-v2', SawyerHandlePullEnvV2), + ('lever-pull-v2', SawyerLeverPullEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('pick-place-wall-v2', SawyerPickPlaceWallEnvV2), + ('pick-out-of-hole-v2', SawyerPickOutOfHoleEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('push-back-v2', SawyerPushBackEnvV2), + ('push-v2', SawyerPushEnvV2), + ('pick-place-v2', SawyerPickPlaceEnvV2), + ('plate-slide-v2', SawyerPlateSlideEnvV2), + ('plate-slide-side-v2', SawyerPlateSlideSideEnvV2), + ('plate-slide-back-v2', SawyerPlateSlideBackEnvV2), + ('plate-slide-back-side-v2', SawyerPlateSlideBackSideEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('peg-unplug-side-v2', SawyerPegUnplugSideEnvV2), + ('soccer-v2', SawyerSoccerEnvV2), + ('stick-push-v2', SawyerStickPushEnvV2), + ('stick-pull-v2', SawyerStickPullEnvV2), + ('push-wall-v2', SawyerPushWallEnvV2), + ('push-v2', SawyerPushEnvV2), + ('reach-wall-v2', SawyerReachWallEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('shelf-place-v2', SawyerShelfPlaceEnvV2), + ('sweep-into-v2', SawyerSweepIntoGoalEnvV2), + ('sweep-v2', SawyerSweepEnvV2), + ('window-open-v2', SawyerWindowOpenEnvV2), + ('window-close-v2', SawyerWindowCloseEnvV2), +)) + +MT50_V2_ARGS_KWARGS = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key)}) + for key, _ in MT50_V2.items() +} + +ML45_V2 = OrderedDict( + (('train', + OrderedDict(( + ('assembly-v2', SawyerNutAssemblyEnvV2), + ('basketball-v2', SawyerBasketballEnvV2), + ('button-press-topdown-v2', SawyerButtonPressTopdownEnvV2), + ('button-press-topdown-wall-v2', SawyerButtonPressTopdownWallEnvV2), + ('button-press-v2', SawyerButtonPressEnvV2), + ('button-press-wall-v2', SawyerButtonPressWallEnvV2), + ('coffee-button-v2', SawyerCoffeeButtonEnvV2), + ('coffee-pull-v2', SawyerCoffeePullEnvV2), + ('coffee-push-v2', SawyerCoffeePushEnvV2), + ('dial-turn-v2', SawyerDialTurnEnvV2), + ('disassemble-v2', SawyerNutDisassembleEnvV2), + ('door-close-v2', SawyerDoorCloseEnvV2), + ('door-open-v2', SawyerDoorEnvV2), + ('drawer-close-v2', SawyerDrawerCloseEnvV2), + ('drawer-open-v2', SawyerDrawerOpenEnvV2), + ('faucet-open-v2', SawyerFaucetOpenEnvV2), + ('faucet-close-v2', SawyerFaucetCloseEnvV2), + ('hammer-v2', SawyerHammerEnvV2), + ('handle-press-side-v2', SawyerHandlePressSideEnvV2), + ('handle-press-v2', SawyerHandlePressEnvV2), + ('handle-pull-side-v2', SawyerHandlePullSideEnvV2), + ('handle-pull-v2', SawyerHandlePullEnvV2), + ('lever-pull-v2', SawyerLeverPullEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('pick-place-wall-v2', SawyerPickPlaceWallEnvV2), + ('pick-out-of-hole-v2', SawyerPickOutOfHoleEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('push-back-v2', SawyerPushBackEnvV2), + ('push-v2', SawyerPushEnvV2), + ('pick-place-v2', SawyerPickPlaceEnvV2), + ('plate-slide-v2', SawyerPlateSlideEnvV2), + ('plate-slide-side-v2', SawyerPlateSlideSideEnvV2), + ('plate-slide-back-v2', SawyerPlateSlideBackEnvV2), + ('plate-slide-back-side-v2', SawyerPlateSlideBackSideEnvV2), + ('peg-insert-side-v2', SawyerPegInsertionSideEnvV2), + ('peg-unplug-side-v2', SawyerPegUnplugSideEnvV2), + ('soccer-v2', SawyerSoccerEnvV2), + ('stick-push-v2', SawyerStickPushEnvV2), + ('stick-pull-v2', SawyerStickPullEnvV2), + ('push-wall-v2', SawyerPushWallEnvV2), + ('push-v2', SawyerPushEnvV2), + ('reach-wall-v2', SawyerReachWallEnvV2), + ('reach-v2', SawyerReachEnvV2), + ('shelf-place-v2', SawyerShelfPlaceEnvV2), + ('sweep-into-v2', SawyerSweepIntoGoalEnvV2), + ('sweep-v2', SawyerSweepEnvV2), + ('window-open-v2', SawyerWindowOpenEnvV2), + ('window-close-v2', SawyerWindowCloseEnvV2), + ))), ('test', + OrderedDict(( + ('bin-picking-v2', SawyerBinPickingEnvV2), + ('box-close-v2', SawyerBoxCloseEnvV2), + ('hand-insert-v2', SawyerHandInsertEnvV2), + ('door-lock-v2', SawyerDoorLockEnvV2), + ('door-unlock-v2', SawyerDoorUnlockEnvV2), + ))))) + +ml45_train_args_kwargs = { + key: dict(args=[], + kwargs={ + 'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key), + }) + for key, _ in ML45_V2['train'].items() +} + +ml45_test_args_kwargs = { + key: dict(args=[], + kwargs={'task_id': list(ALL_V2_ENVIRONMENTS.keys()).index(key)}) + for key, _ in ML45_V2['test'].items() +} + +ML45_ARGS_KWARGS = dict( + train=ml45_train_args_kwargs, + test=ml45_test_args_kwargs, +) + + +def create_hidden_goal_envs(): + hidden_goal_envs = {} + for env_name, env_cls in ALL_V2_ENVIRONMENTS.items(): + d = {} + + def initialize(env, seed=None): + if seed is not None: + st0 = np.random.get_state() + np.random.seed(seed) + super(type(env), env).__init__() + env._partially_observable = True + env._freeze_rand_vec = False + env._set_task_called = True + env.reset() + env._freeze_rand_vec = True + if seed is not None: + env.seed(seed) + np.random.set_state(st0) + + d['__init__'] = initialize + hg_env_name = re.sub("(^|[-])\s*([a-zA-Z])", + lambda p: p.group(0).upper(), env_name) + hg_env_name = hg_env_name.replace("-", "") + hg_env_key = '{}-goal-hidden'.format(env_name) + hg_env_name = '{}GoalHidden'.format(hg_env_name) + HiddenGoalEnvCls = type(hg_env_name, (env_cls, ), d) + hidden_goal_envs[hg_env_key] = HiddenGoalEnvCls + + return OrderedDict(hidden_goal_envs) + + +def create_observable_goal_envs(): + observable_goal_envs = {} + for env_name, env_cls in ALL_V2_ENVIRONMENTS.items(): + d = {} + + def initialize(env, seed=None): + if seed is not None: + st0 = np.random.get_state() + np.random.seed(seed) + super(type(env), env).__init__() + env._partially_observable = False + env._freeze_rand_vec = False + env._set_task_called = True + env.reset() + env._freeze_rand_vec = True + if seed is not None: + env.seed(seed) + np.random.set_state(st0) + + d['__init__'] = initialize + og_env_name = re.sub("(^|[-])\s*([a-zA-Z])", + lambda p: p.group(0).upper(), env_name) + og_env_name = og_env_name.replace("-", "") + + og_env_key = '{}-goal-observable'.format(env_name) + og_env_name = '{}GoalObservable'.format(og_env_name) + ObservableGoalEnvCls = type(og_env_name, (env_cls, ), d) + observable_goal_envs[og_env_key] = ObservableGoalEnvCls + + return OrderedDict(observable_goal_envs) + + +ALL_V2_ENVIRONMENTS_GOAL_HIDDEN = create_hidden_goal_envs() +ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE = create_observable_goal_envs() diff --git a/Metaworld/metaworld/envs/mujoco/mujoco_env.py b/Metaworld/metaworld/envs/mujoco/mujoco_env.py new file mode 100644 index 0000000000000000000000000000000000000000..94c8f121a4a7342f6e0245ae64de67f85547d77d --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/mujoco_env.py @@ -0,0 +1,155 @@ +import abc +import warnings + +import glfw +from gym import error +from gym.utils import seeding +import numpy as np +from os import path +import gym + +try: + import mujoco_py +except ImportError as e: + raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) + + +def _assert_task_is_set(func): + def inner(*args, **kwargs): + env = args[0] + if not env._set_task_called: + raise RuntimeError( + 'You must call env.set_task before using env.' + + func.__name__ + ) + return func(*args, **kwargs) + return inner + + +DEFAULT_SIZE = 500 + +class MujocoEnv(gym.Env, abc.ABC): + """ + This is a simplified version of the gym MujocoEnv class. + + Some differences are: + - Do not automatically set the observation/action space. + """ + + max_path_length = 500 + + def __init__(self, model_path, frame_skip): + if not path.exists(model_path): + raise IOError("File %s does not exist" % model_path) + + self.frame_skip = frame_skip + self.model = mujoco_py.load_model_from_path(model_path) + self.sim = mujoco_py.MjSim(self.model) + self.data = self.sim.data + self.viewer = None + self._viewers = {} + + self.metadata = { + 'render.modes': ['human'], + 'video.frames_per_second': int(np.round(1.0 / self.dt)) + } + self.init_qpos = self.sim.data.qpos.ravel().copy() + self.init_qvel = self.sim.data.qvel.ravel().copy() + + self._did_see_sim_exception = False + + self.np_random, _ = seeding.np_random(None) + + def seed(self, seed): + assert seed is not None + self.np_random, seed = seeding.np_random(seed) + self.action_space.seed(seed) + self.observation_space.seed(seed) + self.goal_space.seed(seed) + return [seed] + + @abc.abstractmethod + def reset_model(self): + """ + Reset the robot degrees of freedom (qpos and qvel). + Implement this in each subclass. + """ + pass + + def viewer_setup(self): + """ + This method is called when the viewer is initialized and after every reset + Optionally implement this method, if you need to tinker with camera position + and so forth. + """ + pass + + @_assert_task_is_set + def reset(self): + self._did_see_sim_exception = False + self.sim.reset() + ob = self.reset_model() + if self.viewer is not None: + self.viewer_setup() + return ob + + def set_state(self, qpos, qvel): + assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) + old_state = self.sim.get_state() + new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, + old_state.act, old_state.udd_state) + self.sim.set_state(new_state) + self.sim.forward() + + @property + def dt(self): + return self.model.opt.timestep * self.frame_skip + + def do_simulation(self, ctrl, n_frames=None): + if getattr(self, 'curr_path_length', 0) > self.max_path_length: + raise ValueError('Maximum path length allowed by the benchmark has been exceeded') + if self._did_see_sim_exception: + return + + if n_frames is None: + n_frames = self.frame_skip + self.sim.data.ctrl[:] = ctrl + + for _ in range(n_frames): + try: + self.sim.step() + except mujoco_py.MujocoException as err: + warnings.warn(str(err), category=RuntimeWarning) + self._did_see_sim_exception = True + + def render(self, offscreen=False, camera_name="corner2", resolution=(640, 480)): + assert_string = ("camera_name should be one of ", + "corner3, corner, corner2, topview, gripperPOV, behindGripper") + assert camera_name in {"corner3", "corner", "corner2", + "topview", "gripperPOV", "behindGripper"}, assert_string + if not offscreen: + self._get_viewer('human').render() + else: + return self.sim.render( + *resolution, + mode='offscreen', + camera_name=camera_name + ) + + def close(self): + if self.viewer is not None: + glfw.destroy_window(self.viewer.window) + self.viewer = None + + def _get_viewer(self, mode): + self.viewer = self._viewers.get(mode) + if self.viewer is None: + if mode == 'human': + self.viewer = mujoco_py.MjViewer(self.sim) + self.viewer_setup() + self._viewers[mode] = self.viewer + self.viewer_setup() + return self.viewer + + def get_body_com(self, body_name): + return self.data.get_body_xpos(body_name) diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/__init__.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py new file mode 100644 index 0000000000000000000000000000000000000000..50aa8aa094dbf338a651b967cf33b58923efb1c6 --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py @@ -0,0 +1,607 @@ +import abc +import copy +import pickle + +from gym.spaces import Box +from gym.spaces import Discrete +import mujoco_py +import numpy as np + +from metaworld.envs import reward_utils +from metaworld.envs.mujoco.mujoco_env import MujocoEnv, _assert_task_is_set + + +class SawyerMocapBase(MujocoEnv, metaclass=abc.ABCMeta): + """ + Provides some commonly-shared functions for Sawyer Mujoco envs that use + mocap for XYZ control. + """ + mocap_low = np.array([-0.2, 0.5, 0.06]) + mocap_high = np.array([0.2, 0.7, 0.6]) + + def __init__(self, model_name, frame_skip=5): + MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) + self.reset_mocap_welds() + + def get_endeff_pos(self): + return self.data.get_body_xpos('hand').copy() + + @property + def tcp_center(self): + """The COM of the gripper's 2 fingers + + Returns: + (np.ndarray): 3-element position + """ + right_finger_pos = self._get_site_pos('rightEndEffector') + left_finger_pos = self._get_site_pos('leftEndEffector') + tcp_center = (right_finger_pos + left_finger_pos) / 2.0 + return tcp_center + + def get_env_state(self): + joint_state = self.sim.get_state() + mocap_state = self.data.mocap_pos, self.data.mocap_quat + state = joint_state, mocap_state + return copy.deepcopy(state) + + def set_env_state(self, state): + joint_state, mocap_state = state + self.sim.set_state(joint_state) + mocap_pos, mocap_quat = mocap_state + self.data.set_mocap_pos('mocap', mocap_pos) + self.data.set_mocap_quat('mocap', mocap_quat) + self.sim.forward() + + def __getstate__(self): + state = self.__dict__.copy() + del state['model'] + del state['sim'] + del state['data'] + mjb = self.model.get_mjb() + return {'state': state, 'mjb': mjb, 'env_state': self.get_env_state()} + + def __setstate__(self, state): + self.__dict__ = state['state'] + self.model = mujoco_py.load_model_from_mjb(state['mjb']) + self.sim = mujoco_py.MjSim(self.model) + self.data = self.sim.data + self.set_env_state(state['env_state']) + + def reset_mocap_welds(self): + """Resets the mocap welds that we use for actuation.""" + sim = self.sim + if sim.model.nmocap > 0 and sim.model.eq_data is not None: + for i in range(sim.model.eq_data.shape[0]): + if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: + sim.model.eq_data[i, :] = np.array( + [0., 0., 0., 1., 0., 0., 0.]) + sim.forward() + + +class SawyerXYZEnv(SawyerMocapBase, metaclass=abc.ABCMeta): + _HAND_SPACE = Box( + np.array([-0.525, .348, -.0525]), + np.array([+0.525, 1.025, .7]) + ) + max_path_length = 500 + + TARGET_RADIUS = 0.05 + + def __init__( + self, + model_name, + frame_skip=5, + hand_low=(-0.2, 0.55, 0.05), + hand_high=(0.2, 0.75, 0.3), + mocap_low=None, + mocap_high=None, + action_scale=1./100, + action_rot_scale=1., + ): + super().__init__(model_name, frame_skip=frame_skip) + self.random_init = True + self.action_scale = action_scale + self.action_rot_scale = action_rot_scale + self.hand_low = np.array(hand_low) + self.hand_high = np.array(hand_high) + if mocap_low is None: + mocap_low = hand_low + if mocap_high is None: + mocap_high = hand_high + self.mocap_low = np.hstack(mocap_low) + self.mocap_high = np.hstack(mocap_high) + self.curr_path_length = 0 + self.seeded_rand_vec = False + self._freeze_rand_vec = True + self._last_rand_vec = None + + # We use continuous goal space by default and + # can discretize the goal space by calling + # the `discretize_goal_space` method. + self.discrete_goal_space = None + self.discrete_goals = [] + self.active_discrete_goal = None + + self.init_left_pad = self.get_body_com('leftpad') + self.init_right_pad = self.get_body_com('rightpad') + + self.action_space = Box( + np.array([-1, -1, -1, -1]), + np.array([+1, +1, +1, +1]), + ) + + self.isV2 = "V2" in type(self).__name__ + # Technically these observation lengths are different between v1 and v2, + # but we handle that elsewhere and just stick with v2 numbers here + self._obs_obj_max_len = 14 if self.isV2 else 6 + self._obs_obj_possible_lens = (6, 14) + + self._set_task_called = False + self._partially_observable = True + + self.hand_init_pos = None # OVERRIDE ME + self._target_pos = None # OVERRIDE ME + self._random_reset_space = None # OVERRIDE ME + + self._last_stable_obs = None + # Note: It is unlikely that the positions and orientations stored + # in this initiation of _prev_obs are correct. That being said, it + # doesn't seem to matter (it will only effect frame-stacking for the + # very first observation) + self._prev_obs = self._get_curr_obs_combined_no_goal() + + def _set_task_inner(self): + # Doesn't absorb "extra" kwargs, to ensure nothing's missed. + pass + + def set_task(self, task): + self._set_task_called = True + data = pickle.loads(task.data) + assert isinstance(self, data['env_cls']) + del data['env_cls'] + self._last_rand_vec = data['rand_vec'] + self._freeze_rand_vec = True + self._last_rand_vec = data['rand_vec'] + del data['rand_vec'] + self._partially_observable = data['partially_observable'] + del data['partially_observable'] + self._set_task_inner(**data) + self.reset() + + def set_xyz_action(self, action): + action = np.clip(action, -1, 1) + pos_delta = action * self.action_scale + new_mocap_pos = self.data.mocap_pos + pos_delta[None] + + new_mocap_pos[0, :] = np.clip( + new_mocap_pos[0, :], + self.mocap_low, + self.mocap_high, + ) + self.data.set_mocap_pos('mocap', new_mocap_pos) + self.data.set_mocap_quat('mocap', np.array([1, 0, 1, 0])) + + def discretize_goal_space(self, goals): + assert False + assert len(goals) >= 1 + self.discrete_goals = goals + # update the goal_space to a Discrete space + self.discrete_goal_space = Discrete(len(self.discrete_goals)) + + def _set_obj_xyz(self, pos): + qpos = self.data.qpos.flat.copy() + qvel = self.data.qvel.flat.copy() + qpos[9:12] = pos.copy() + qvel[9:15] = 0 + self.set_state(qpos, qvel) + + def _get_site_pos(self, siteName): + _id = self.model.site_names.index(siteName) + return self.data.site_xpos[_id].copy() + + def _set_pos_site(self, name, pos): + """Sets the position of the site corresponding to `name` + + Args: + name (str): The site's name + pos (np.ndarray): Flat, 3 element array indicating site's location + """ + assert isinstance(pos, np.ndarray) + assert pos.ndim == 1 + + self.data.site_xpos[self.model.site_name2id(name)] = pos[:3] + + @property + def _target_site_config(self): + """Retrieves site name(s) and position(s) corresponding to env targets + + :rtype: list of (str, np.ndarray) + """ + return [('goal', self._target_pos)] + + @property + def touching_main_object(self): + """Calls `touching_object` for the ID of the env's main object + + Returns: + (bool) whether the gripper is touching the object + + """ + return self.touching_object(self._get_id_main_object) + + def touching_object(self, object_geom_id): + """Determines whether the gripper is touching the object with given id + + Args: + object_geom_id (int): the ID of the object in question + + Returns: + (bool): whether the gripper is touching the object + + """ + leftpad_geom_id = self.unwrapped.model.geom_name2id('leftpad_geom') + rightpad_geom_id = self.unwrapped.model.geom_name2id('rightpad_geom') + + leftpad_object_contacts = [ + x for x in self.unwrapped.data.contact + if (leftpad_geom_id in (x.geom1, x.geom2) + and object_geom_id in (x.geom1, x.geom2)) + ] + + rightpad_object_contacts = [ + x for x in self.unwrapped.data.contact + if (rightpad_geom_id in (x.geom1, x.geom2) + and object_geom_id in (x.geom1, x.geom2)) + ] + + leftpad_object_contact_force = sum( + self.unwrapped.data.efc_force[x.efc_address] + for x in leftpad_object_contacts) + + rightpad_object_contact_force = sum( + self.unwrapped.data.efc_force[x.efc_address] + for x in rightpad_object_contacts) + + return 0 < leftpad_object_contact_force and \ + 0 < rightpad_object_contact_force + + @property + def _get_id_main_object(self): + return self.unwrapped.model.geom_name2id('objGeom') + + def _get_pos_objects(self): + """Retrieves object position(s) from mujoco properties or instance vars + + Returns: + np.ndarray: Flat array (usually 3 elements) representing the + object(s)' position(s) + """ + # Throw error rather than making this an @abc.abstractmethod so that + # V1 environments don't have to implement it + raise NotImplementedError + + def _get_quat_objects(self): + """Retrieves object quaternion(s) from mujoco properties + + Returns: + np.ndarray: Flat array (usually 4 elements) representing the + object(s)' quaternion(s) + + """ + # Throw error rather than making this an @abc.abstractmethod so that + # V1 environments don't have to implement it + if self.isV2: + raise NotImplementedError + else: + return None + + def _get_pos_goal(self): + """Retrieves goal position from mujoco properties or instance vars + + Returns: + np.ndarray: Flat array (3 elements) representing the goal position + """ + assert isinstance(self._target_pos, np.ndarray) + assert self._target_pos.ndim == 1 + return self._target_pos + + def _get_curr_obs_combined_no_goal(self): + """Combines the end effector's {pos, closed amount} and the object(s)' + {pos, quat} into a single flat observation. The goal's position is + *not* included in this. + + Returns: + np.ndarray: The flat observation array (18 elements) + + """ + pos_hand = self.get_endeff_pos() + + finger_right, finger_left = ( + self._get_site_pos('rightEndEffector'), + self._get_site_pos('leftEndEffector') + ) + + # the gripper can be at maximum about ~0.1 m apart. + # dividing by 0.1 normalized the gripper distance between + # 0 and 1. Further, we clip because sometimes the grippers + # are slightly more than 0.1m apart (~0.00045 m) + # clipping removes the effects of this random extra distance + # that is produced by mujoco + gripper_distance_apart = np.linalg.norm(finger_right - finger_left) + gripper_distance_apart = np.clip(gripper_distance_apart / 0.1, 0., 1.) + + obs_obj_padded = np.zeros(self._obs_obj_max_len) + + obj_pos = self._get_pos_objects() + assert len(obj_pos) % 3 == 0 + + obj_pos_split = np.split(obj_pos, len(obj_pos) // 3) + + if self.isV2: + obj_quat = self._get_quat_objects() + assert len(obj_quat) % 4 == 0 + obj_quat_split = np.split(obj_quat, len(obj_quat) // 4) + obs_obj_padded[:len(obj_pos) + len(obj_quat)] = np.hstack([ + np.hstack((pos, quat)) + for pos, quat in zip(obj_pos_split, obj_quat_split) + ]) + assert(len(obs_obj_padded) in self._obs_obj_possible_lens) + return np.hstack((pos_hand, gripper_distance_apart, obs_obj_padded)) + else: + # is a v1 environment + obs_obj_padded[:len(obj_pos)] = obj_pos + assert(len(obs_obj_padded) in self._obs_obj_possible_lens) + return np.hstack((pos_hand, obs_obj_padded)) + + def _get_obs(self): + """Frame stacks `_get_curr_obs_combined_no_goal()` and concatenates the + goal position to form a single flat observation. + + Returns: + np.ndarray: The flat observation array (39 elements) + """ + # do frame stacking + pos_goal = self._get_pos_goal() + if self._partially_observable: + pos_goal = np.zeros_like(pos_goal) + curr_obs = self._get_curr_obs_combined_no_goal() + # do frame stacking + if self.isV2: + obs = np.hstack((curr_obs, self._prev_obs, pos_goal)) + else: + obs = np.hstack((curr_obs, pos_goal)) + self._prev_obs = curr_obs + return obs + + def _get_obs_dict(self): + obs = self._get_obs() + return dict( + state_observation=obs, + state_desired_goal=self._get_pos_goal(), + state_achieved_goal=obs[3:-3], + ) + + @property + def observation_space(self): + obs_obj_max_len = self._obs_obj_max_len if self.isV2 else 6 + + obj_low = np.full(obs_obj_max_len, -np.inf) + obj_high = np.full(obs_obj_max_len, +np.inf) + goal_low = np.zeros(3) if self._partially_observable \ + else self.goal_space.low + goal_high = np.zeros(3) if self._partially_observable \ + else self.goal_space.high + gripper_low = -1. + gripper_high = +1. + + return Box( + np.hstack((self._HAND_SPACE.low, gripper_low, obj_low, self._HAND_SPACE.low, gripper_low, obj_low, goal_low)), + np.hstack((self._HAND_SPACE.high, gripper_high, obj_high, self._HAND_SPACE.high, gripper_high, obj_high, goal_high)) + ) if self.isV2 else Box( + np.hstack((self._HAND_SPACE.low, obj_low, goal_low)), + np.hstack((self._HAND_SPACE.high, obj_high, goal_high)) + ) + + @_assert_task_is_set + def step(self, action): + self.set_xyz_action(action[:3]) + self.do_simulation([action[-1], -action[-1]]) + self.curr_path_length += 1 + + # Running the simulator can sometimes mess up site positions, so + # re-position them here to make sure they're accurate + for site in self._target_site_config: + self._set_pos_site(*site) + + if self._did_see_sim_exception: + return ( + self._last_stable_obs, # observation just before going unstable + 0.0, # reward (penalize for causing instability) + False, # termination flag always False + { # info + 'success': False, + 'near_object': 0.0, + 'grasp_success': False, + 'grasp_reward': 0.0, + 'in_place_reward': 0.0, + 'obj_to_target': 0.0, + 'unscaled_reward': 0.0, + } + ) + + self._last_stable_obs = self._get_obs() + if not self.isV2: + # v1 environments expect this superclass step() to return only the + # most recent observation. they override the rest of the + # functionality and end up returning the same sort of tuple that + # this does + return self._last_stable_obs + + reward, info = self.evaluate_state(self._last_stable_obs, action) + return self._last_stable_obs, reward, False, info + + def evaluate_state(self, obs, action): + """Does the heavy-lifting for `step()` -- namely, calculating reward + and populating the `info` dict with training metrics + + Returns: + float: Reward between 0 and 10 + dict: Dictionary which contains useful metrics (success, + near_object, grasp_success, grasp_reward, in_place_reward, + obj_to_target, unscaled_reward) + + """ + # Throw error rather than making this an @abc.abstractmethod so that + # V1 environments don't have to implement it + raise NotImplementedError + + def reset(self): + self.curr_path_length = 0 + return super().reset() + + def _reset_hand(self, steps=50): + for _ in range(steps): + self.data.set_mocap_pos('mocap', self.hand_init_pos) + self.data.set_mocap_quat('mocap', np.array([1, 0, 1, 0])) + self.do_simulation([-1, 1], self.frame_skip) + self.init_tcp = self.tcp_center + + def _get_state_rand_vec(self): + if self._freeze_rand_vec: + assert self._last_rand_vec is not None + return self._last_rand_vec + elif self.seeded_rand_vec: + rand_vec = self.np_random.uniform( + self._random_reset_space.low, + self._random_reset_space.high, + size=self._random_reset_space.low.size) + return rand_vec + else: + rand_vec = np.random.uniform( + self._random_reset_space.low, + self._random_reset_space.high, + size=self._random_reset_space.low.size) + self._last_rand_vec = rand_vec + return rand_vec + + def _gripper_caging_reward(self, + action, + obj_pos, + obj_radius, + pad_success_thresh, + object_reach_radius, + xz_thresh, + desired_gripper_effort=1.0, + high_density=False, + medium_density=False): + """Reward for agent grasping obj + Args: + action(np.ndarray): (4,) array representing the action + delta(x), delta(y), delta(z), gripper_effort + obj_pos(np.ndarray): (3,) array representing the obj x,y,z + obj_radius(float):radius of object's bounding sphere + pad_success_thresh(float): successful distance of gripper_pad + to object + object_reach_radius(float): successful distance of gripper center + to the object. + xz_thresh(float): successful distance of gripper in x_z axis to the + object. Y axis not included since the caging function handles + successful grasping in the Y axis. + """ + if high_density and medium_density: + raise ValueError("Can only be either high_density or medium_density") + # MARK: Left-right gripper information for caging reward---------------- + left_pad = self.get_body_com('leftpad') + right_pad = self.get_body_com('rightpad') + + # get current positions of left and right pads (Y axis) + pad_y_lr = np.hstack((left_pad[1], right_pad[1])) + # compare *current* pad positions with *current* obj position (Y axis) + pad_to_obj_lr = np.abs(pad_y_lr - obj_pos[1]) + # compare *current* pad positions with *initial* obj position (Y axis) + pad_to_objinit_lr = np.abs(pad_y_lr - self.obj_init_pos[1]) + + # Compute the left/right caging rewards. This is crucial for success, + # yet counterintuitive mathematically because we invented it + # accidentally. + # + # Before touching the object, `pad_to_obj_lr` ("x") is always separated + # from `caging_lr_margin` ("the margin") by some small number, + # `pad_success_thresh`. + # + # When far away from the object: + # x = margin + pad_success_thresh + # --> Thus x is outside the margin, yielding very small reward. + # Here, any variation in the reward is due to the fact that + # the margin itself is shifting. + # When near the object (within pad_success_thresh): + # x = pad_success_thresh - margin + # --> Thus x is well within the margin. As long as x > obj_radius, + # it will also be within the bounds, yielding maximum reward. + # Here, any variation in the reward is due to the gripper + # moving *too close* to the object (i.e, blowing past the + # obj_radius bound). + # + # Therefore, before touching the object, this is very nearly a binary + # reward -- if the gripper is between obj_radius and pad_success_thresh, + # it gets maximum reward. Otherwise, the reward very quickly falls off. + # + # After grasping the object and moving it away from initial position, + # x remains (mostly) constant while the margin grows considerably. This + # penalizes the agent if it moves *back* toward `obj_init_pos`, but + # offers no encouragement for leaving that position in the first place. + # That part is left to the reward functions of individual environments. + caging_lr_margin = np.abs(pad_to_objinit_lr - pad_success_thresh) + caging_lr = [reward_utils.tolerance( + pad_to_obj_lr[i], # "x" in the description above + bounds=(obj_radius, pad_success_thresh), + margin=caging_lr_margin[i], # "margin" in the description above + sigmoid='long_tail', + ) for i in range(2)] + caging_y = reward_utils.hamacher_product(*caging_lr) + + # MARK: X-Z gripper information for caging reward----------------------- + tcp = self.tcp_center + xz = [0, 2] + + # Compared to the caging_y reward, caging_xz is simple. The margin is + # constant (something in the 0.3 to 0.5 range) and x shrinks as the + # gripper moves towards the object. After picking up the object, the + # reward is maximized and changes very little + caging_xz_margin = np.linalg.norm(self.obj_init_pos[xz] - self.init_tcp[xz]) + caging_xz_margin -= xz_thresh + caging_xz = reward_utils.tolerance( + np.linalg.norm(tcp[xz] - obj_pos[xz]), # "x" in the description above + bounds=(0, xz_thresh), + margin=caging_xz_margin, # "margin" in the description above + sigmoid='long_tail', + ) + + # MARK: Closed-extent gripper information for caging reward------------- + gripper_closed = min(max(0, action[-1]), desired_gripper_effort) \ + / desired_gripper_effort + + # MARK: Combine components---------------------------------------------- + caging = reward_utils.hamacher_product(caging_y, caging_xz) + gripping = gripper_closed if caging > 0.97 else 0. + caging_and_gripping = reward_utils.hamacher_product(caging, gripping) + + if high_density: + caging_and_gripping = (caging_and_gripping + caging) / 2 + if medium_density: + tcp = self.tcp_center + tcp_to_obj = np.linalg.norm(obj_pos - tcp) + tcp_to_obj_init = np.linalg.norm(self.obj_init_pos - self.init_tcp) + # Compute reach reward + # - We subtract `object_reach_radius` from the margin so that the + # reward always starts with a value of 0.1 + reach_margin = abs(tcp_to_obj_init - object_reach_radius) + reach = reward_utils.tolerance( + tcp_to_obj, + bounds=(0, object_reach_radius), + margin=reach_margin, + sigmoid='long_tail', + ) + caging_and_gripping = (caging_and_gripping + reach) / 2 + + return caging_and_gripping diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py new file mode 100644 index 0000000000000000000000000000000000000000..931682c15dbd3eda18dd2a6fc91ef613b8b9ce6f --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_coffee_push.py @@ -0,0 +1,132 @@ +import numpy as np +from gym.spaces import Box + +from metaworld.envs.asset_path_utils import full_v1_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv, _assert_task_is_set + + +class SawyerCoffeePushEnv(SawyerXYZEnv): + + def __init__(self): + + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (-0.1, 0.6, 0.) + obj_high = (0.1, 0.7, 0.) + goal_low = (-0.1, 0.8, -.001) + goal_high = (0.1, 0.9, 0.0) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + ) + + self.init_config = { + 'obj_init_angle': 0.3, + 'obj_init_pos': np.array([0., .6, 0.]), + 'hand_init_pos': np.array([0., .6, .2]), + } + self.goal = np.array([0., 0.8, 0]) + self.obj_init_pos = self.init_config['obj_init_pos'] + self.obj_init_angle = self.init_config['obj_init_angle'] + self.hand_init_pos = self.init_config['hand_init_pos'] + + self._random_reset_space = Box( + np.hstack((obj_low, goal_low)), + np.hstack((obj_high, goal_high)), + ) + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + + @property + def model_name(self): + return full_v1_path_for('sawyer_xyz/sawyer_coffee.xml') + + @_assert_task_is_set + def step(self, action): + ob = super().step(action) + reward, reachDist, pushDist = self.compute_reward(action, ob) + info = { + 'reachDist': reachDist, + 'goalDist': pushDist, + 'epRew': reward, + 'pickRew': None, + 'success': float(pushDist <= 0.07) + } + + return ob, reward, False, info + + @property + def _target_site_config(self): + return [('coffee_goal', self._target_pos)] + + def _get_pos_objects(self): + return self.data.get_geom_xpos('objGeom') + + def adjust_initObjPos(self, orig_init_pos): + # This is to account for meshes for the geom and object are not aligned + # If this is not done, the object could be initialized in an extreme position + diff = self.get_body_com('obj')[:2] - self.data.get_geom_xpos('objGeom')[:2] + adjustedPos = orig_init_pos[:2] + diff + + #The convention we follow is that body_com[2] is always 0, and geom_pos[2] is the object height + return [adjustedPos[0], adjustedPos[1],self.get_body_com('obj')[-1]] + + def reset_model(self): + self._reset_hand() + self._target_pos = self.goal.copy() + self.obj_init_pos = self.adjust_initObjPos(self.init_config['obj_init_pos']) + self.obj_init_angle = self.init_config['obj_init_angle'] + self.objHeight = self.data.get_geom_xpos('objGeom')[2] + + if self.random_init: + goal_pos = self._get_state_rand_vec() + self._target_pos = goal_pos[3:] + while np.linalg.norm(goal_pos[:2] - self._target_pos[:2]) < 0.15: + goal_pos = self._get_state_rand_vec() + self._target_pos = goal_pos[3:] + self._target_pos = np.concatenate((goal_pos[-3:-1], [self.obj_init_pos[-1]])) + self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]])) + machine_pos = self._target_pos - np.array([0, -0.1, -0.27]) + button_pos = machine_pos + np.array([0., -0.12, 0.05]) + self.sim.model.body_pos[self.model.body_name2id('coffee_machine')] = machine_pos + self.sim.model.body_pos[self.model.body_name2id('button')] = button_pos + + self._set_obj_xyz(self.obj_init_pos) + self.maxPushDist = np.linalg.norm(self.obj_init_pos[:2] - np.array(self._target_pos)[:2]) + + return self._get_obs() + + def _reset_hand(self): + super()._reset_hand(10) + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + self.init_fingerCOM = (rightFinger + leftFinger)/2 + self.reachCompleted = False + + def compute_reward(self, actions, obs): + del actions + + objPos = obs[3:6] + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + fingerCOM = (rightFinger + leftFinger)/2 + + goal = self._target_pos + + c1 = 1000 + c2 = 0.01 + c3 = 0.001 + assert np.all(goal == self._get_site_pos('coffee_goal')) + reachDist = np.linalg.norm(fingerCOM - objPos) + pushDist = np.linalg.norm(objPos[:2] - goal[:2]) + reachRew = -reachDist + + if reachDist < 0.05: + pushRew = 1000*(self.maxPushDist - pushDist) + c1*(np.exp(-(pushDist**2)/c2) + np.exp(-(pushDist**2)/c3)) + pushRew = max(pushRew, 0) + else: + pushRew = 0 + + reward = reachRew + pushRew + + return [reward, reachDist, pushDist] diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py new file mode 100644 index 0000000000000000000000000000000000000000..fa826f740c780513bf230d7db67da53fc0f81c01 --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_dial_turn.py @@ -0,0 +1,115 @@ +import numpy as np +from gym.spaces import Box + +from metaworld.envs.asset_path_utils import full_v1_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv, _assert_task_is_set + + +class SawyerDialTurnEnv(SawyerXYZEnv): + + def __init__(self): + + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (-0.1, 0.7, 0.05) + obj_high = (0.1, 0.8, 0.05) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + ) + + self.init_config = { + 'obj_init_pos': np.array([0, 0.7, 0.05]), + 'hand_init_pos': np.array([0, 0.6, 0.2], dtype=np.float32), + } + self.goal = np.array([0., 0.73, 0.08]) + self.obj_init_pos = self.init_config['obj_init_pos'] + self.hand_init_pos = self.init_config['hand_init_pos'] + goal_low = self.hand_low + goal_high = self.hand_high + + self._random_reset_space = Box( + np.array(obj_low), + np.array(obj_high), + ) + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + + @property + def model_name(self): + return full_v1_path_for('sawyer_xyz/sawyer_dial.xml') + + @_assert_task_is_set + def step(self, action): + ob = super().step(action) + reward, reachDist, pullDist = self.compute_reward(action, ob) + + info = { + 'reachDist': reachDist, + 'goalDist': pullDist, + 'epRew': reward, + 'pickRew': None, + 'success': float(pullDist <= 0.03) + } + + return ob, reward, False, info + + def _get_pos_objects(self): + return self._get_site_pos('dialStart') + + def reset_model(self): + self._reset_hand() + self._target_pos = self.goal.copy() + self.obj_init_pos = self.init_config['obj_init_pos'] + + if self.random_init: + goal_pos = self._get_state_rand_vec() + self.obj_init_pos = goal_pos[:3] + final_pos = goal_pos.copy() + np.array([0, 0.03, 0.03]) + self._target_pos = final_pos + + self.sim.model.body_pos[self.model.body_name2id('dial')] = self.obj_init_pos + self.maxPullDist = np.abs(self._target_pos[1] - self.obj_init_pos[1]) + + return self._get_obs() + + def _reset_hand(self): + super()._reset_hand(10) + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + self.init_fingerCOM = (rightFinger + leftFinger)/2 + self.reachCompleted = False + + def compute_reward(self, actions, obs): + del actions + + objPos = obs[3:6] + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + fingerCOM = (rightFinger + leftFinger)/2 + + pullGoal = self._target_pos + + pullDist = np.abs(objPos[1] - pullGoal[1]) + reachDist = np.linalg.norm(objPos - fingerCOM) + reachRew = -reachDist + + self.reachCompleted = reachDist < 0.05 + + def pullReward(): + c1 = 1000 + c2 = 0.001 + c3 = 0.0001 + + if self.reachCompleted: + pullRew = 1000*(self.maxPullDist - pullDist) + c1*(np.exp(-(pullDist**2)/c2) + np.exp(-(pullDist**2)/c3)) + pullRew = max(pullRew,0) + return pullRew + else: + return 0 + + pullRew = pullReward() + reward = reachRew + pullRew + + return [reward, reachDist, pullDist] diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py new file mode 100644 index 0000000000000000000000000000000000000000..48eb10f2ab0cd7bd87ca14434c429ffddb56c51a --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_disassemble_peg.py @@ -0,0 +1,186 @@ +import numpy as np +from gym.spaces import Box + +from metaworld.envs.asset_path_utils import full_v1_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv, _assert_task_is_set + + +class SawyerNutDisassembleEnv(SawyerXYZEnv): + def __init__(self): + + liftThresh = 0.05 + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (0.1, 0.75, 0.02) + obj_high = (0., 0.85, 0.02) + goal_low = (-0.1, 0.75, 0.1699) + goal_high = (0.1, 0.85, 0.1701) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + ) + + self.init_config = { + 'obj_init_angle': 0.3, + 'obj_init_pos': np.array([0, 0.8, 0.02]), + 'hand_init_pos': np.array((0, 0.6, 0.2), dtype=np.float32), + } + self.goal = np.array([0, 0.8, 0.17]) + self.obj_init_pos = self.init_config['obj_init_pos'] + self.obj_init_angle = self.init_config['obj_init_angle'] + self.hand_init_pos = self.init_config['hand_init_pos'] + + self.liftThresh = liftThresh + + self._random_reset_space = Box( + np.hstack((obj_low, goal_low)), + np.hstack((obj_high, goal_high)), + ) + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + + @property + def model_name(self): + return full_v1_path_for('sawyer_xyz/sawyer_assembly_peg.xml') + + @_assert_task_is_set + def step(self, action): + ob = super().step(action) + reward, _, reachDist, pickRew, _, placingDist, success = self.compute_reward(action, ob) + info = { + 'reachDist': reachDist, + 'pickRew': pickRew, + 'epRew': reward, + 'goalDist': placingDist, + 'success': success + } + + return ob, reward, False, info + + @property + def _target_site_config(self): + return [('pegTop', self._target_pos)] + + def _get_pos_objects(self): + return self.data.get_geom_xpos('RoundNut-8') + + def _get_obs_dict(self): + obs_dict = super()._get_obs_dict() + obs_dict['state_achieved_goal'] = self.get_body_com('RoundNut') + return obs_dict + + def reset_model(self): + self._reset_hand() + self._target_pos = self.goal.copy() + self.obj_init_pos = np.array(self.init_config['obj_init_pos']) + self.obj_init_angle = self.init_config['obj_init_angle'] + + if self.random_init: + goal_pos = self._get_state_rand_vec() + while np.linalg.norm(goal_pos[:2] - goal_pos[-3:-1]) < 0.1: + goal_pos = self._get_state_rand_vec() + self.obj_init_pos = goal_pos[:3] + self._target_pos = goal_pos[:3] + np.array([0, 0, 0.15]) + + peg_pos = self.obj_init_pos + np.array([0., 0., 0.03]) + peg_top_pos = self.obj_init_pos + np.array([0., 0., 0.08]) + self.sim.model.body_pos[self.model.body_name2id('peg')] = peg_pos + self.sim.model.site_pos[self.model.site_name2id('pegTop')] = peg_top_pos + self._set_obj_xyz(self.obj_init_pos) + self.objHeight = self.data.get_geom_xpos('RoundNut-8')[2] + self.heightTarget = self.objHeight + self.liftThresh + self.maxPlacingDist = np.linalg.norm(np.array([self.obj_init_pos[0], self.obj_init_pos[1], self.heightTarget]) - np.array(self._target_pos)) + self.heightTarget + + return self._get_obs() + + def _reset_hand(self): + super()._reset_hand(10) + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + self.init_fingerCOM = (rightFinger + leftFinger)/2 + self.pickCompleted = False + + def compute_reward(self, actions, obs): + + graspPos = obs[3:6] + objPos = graspPos + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + fingerCOM = (rightFinger + leftFinger)/2 + + heightTarget = self.heightTarget + placingGoal = self._target_pos + + reachDist = np.linalg.norm(graspPos - fingerCOM) + reachDistxy = np.linalg.norm(graspPos[:-1] - fingerCOM[:-1]) + zDist = np.abs(fingerCOM[-1] - self.init_fingerCOM[-1]) + + placingDist = np.linalg.norm(objPos - placingGoal) + + def reachReward(): + reachRew = -reachDist + if reachDistxy < 0.04: + reachRew = -reachDist + else: + reachRew = -reachDistxy - 2*zDist + + # incentive to close fingers when reachDist is small + if reachDist < 0.04: + reachRew = -reachDist + max(actions[-1],0)/50 + return reachRew, reachDist + + def pickCompletionCriteria(): + tolerance = 0.01 + if objPos[2] >= (heightTarget- tolerance) and reachDist < 0.04: + return True + else: + return False + + if pickCompletionCriteria(): + self.pickCompleted = True + + def objDropped(): + return (objPos[2] < (self.objHeight + 0.005)) and (placingDist >0.02) and (reachDist > 0.02) + + def orig_pickReward(): + hScale = 100 + if self.pickCompleted and not(objDropped()): + return hScale*heightTarget + elif (reachDist < 0.04) and (objPos[2]> (self.objHeight + 0.005)) : + return hScale* min(heightTarget, objPos[2]) + else: + return 0 + + def placeRewardMove(): + c1 = 1000 + c2 = 0.01 + c3 = 0.001 + + placeRew = 1000*(self.maxPlacingDist - placingDist) + c1*(np.exp(-(placingDist**2)/c2) + np.exp(-(placingDist**2)/c3)) + placeRew = max(placeRew,0) + cond = self.pickCompleted and (reachDist < 0.03) and not(objDropped()) + if cond: + return [placeRew, placingDist] + else: + return [0 , placingDist] + + + reachRew, reachDist = reachReward() + pickRew = orig_pickReward() + + peg_pos = self.sim.model.body_pos[self.model.body_name2id('peg')] + nut_pos = self.get_body_com('RoundNut') + if abs(nut_pos[0] - peg_pos[0]) > 0.05 or \ + abs(nut_pos[1] - peg_pos[1]) > 0.05: + placingDist = 0 + reachRew = 0 + reachDist = 0 + pickRew = heightTarget*100 + + placeRew , placingDist = placeRewardMove() + assert ((placeRew >=0) and (pickRew>=0)) + reward = reachRew + pickRew + placeRew + success = (abs(nut_pos[0] - peg_pos[0]) > 0.05 or abs(nut_pos[1] - peg_pos[1]) > 0.05) or placingDist < 0.02 + + return [reward, reachRew, reachDist, pickRew, placeRew, placingDist, float(success)] diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py new file mode 100644 index 0000000000000000000000000000000000000000..6765ad61ebfa8ddc59788abb247f5c3ff4e20a1e --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_back_side.py @@ -0,0 +1,119 @@ +import numpy as np +from gym.spaces import Box + +from metaworld.envs.asset_path_utils import full_v1_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv, _assert_task_is_set + + +class SawyerPlateSlideBackSideEnv(SawyerXYZEnv): + + def __init__(self): + + goal_low = (-0.1, 0.6, 0.015) + goal_high = (0.1, 0.6, 0.015) + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (-0.25, 0.6, 0.02) + obj_high = (-0.25, 0.6, 0.02) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + ) + + self.init_config = { + 'obj_init_angle': 0.3, + 'obj_init_pos': np.array([-0.25, 0.6, 0.02], dtype=np.float32), + 'hand_init_pos': np.array((0, 0.6, 0.2), dtype=np.float32), + } + self.goal = np.array([0., 0.6, 0.015]) + self.obj_init_pos = self.init_config['obj_init_pos'] + self.obj_init_angle = self.init_config['obj_init_angle'] + self.hand_init_pos = self.init_config['hand_init_pos'] + + self._random_reset_space = Box( + np.hstack((obj_low, goal_low)), + np.hstack((obj_high, goal_high)), + ) + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + + @property + def model_name(self): + return full_v1_path_for('sawyer_xyz/sawyer_plate_slide_sideway.xml') + + @_assert_task_is_set + def step(self, action): + ob = super().step(action) + reward, reachDist, pullDist = self.compute_reward(action, ob) + + info = { + 'reachDist': reachDist, + 'goalDist': pullDist, + 'epRew': reward, + 'pickRew': None, + 'success': float(pullDist <= 0.07) + } + + return ob, reward, False, info + + def _get_pos_objects(self): + return self.data.get_geom_xpos('objGeom') + + def _set_obj_xyz(self, pos): + qpos = self.data.qpos.flat.copy() + qvel = self.data.qvel.flat.copy() + qpos[9:11] = pos + self.set_state(qpos, qvel) + + def reset_model(self): + self._reset_hand() + self._target_pos = self.goal.copy() + self.obj_init_pos = self.init_config['obj_init_pos'] + self.objHeight = self.data.get_geom_xpos('objGeom')[2] + + if self.random_init: + obj_pos = self._get_state_rand_vec() + self.obj_init_pos = obj_pos[:3] + goal_pos = obj_pos[3:] + self._target_pos = goal_pos + + self.sim.model.body_pos[self.model.body_name2id('cabinet')] = self.obj_init_pos + self._set_obj_xyz(np.array([-0.2, 0.])) + self.maxDist = np.linalg.norm(self.data.get_geom_xpos('objGeom')[:-1] - self._target_pos[:-1]) + self.target_reward = 1000*self.maxDist + 1000*2 + + return self._get_obs() + + def _reset_hand(self): + super()._reset_hand(10) + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + self.init_fingerCOM = (rightFinger + leftFinger)/2 + + def compute_reward(self, actions, obs): + del actions + + objPos = obs[3:6] + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + fingerCOM = (rightFinger + leftFinger)/2 + + pullGoal = self._target_pos + + reachDist = np.linalg.norm(objPos - fingerCOM) + + pullDist = np.linalg.norm(objPos[:-1] - pullGoal[:-1]) + + c1 = 1000 + c2 = 0.01 + c3 = 0.001 + if reachDist < 0.05: + pullRew = 1000*(self.maxDist - pullDist) + c1*(np.exp(-(pullDist**2)/c2) + np.exp(-(pullDist**2)/c3)) + pullRew = max(pullRew, 0) + else: + pullRew = 0 + + reward = -reachDist + pullRew + + return [reward, reachDist, pullDist] diff --git a/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py new file mode 100644 index 0000000000000000000000000000000000000000..e68adcee6a1e7a46d03fc4af27d15769516c2a6a --- /dev/null +++ b/Metaworld/metaworld/envs/mujoco/sawyer_xyz/v1/sawyer_plate_slide_side.py @@ -0,0 +1,124 @@ +import numpy as np +from gym.spaces import Box + +from metaworld.envs.asset_path_utils import full_v1_path_for +from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv, _assert_task_is_set + + +class SawyerPlateSlideSideEnv(SawyerXYZEnv): + + def __init__(self): + + goal_low = (-0.3, 0.6, 0.02) + goal_high = (-0.25, 0.7, 0.02) + hand_low = (-0.5, 0.40, 0.05) + hand_high = (0.5, 1, 0.5) + obj_low = (0., 0.6, 0.015) + obj_high = (0., 0.6, 0.015) + + super().__init__( + self.model_name, + hand_low=hand_low, + hand_high=hand_high, + ) + + self.init_config = { + 'obj_init_angle': 0.3, + 'obj_init_pos': np.array([0., 0.6, 0.015], dtype=np.float32), + 'hand_init_pos': np.array((0, 0.6, 0.2), dtype=np.float32), + } + self.goal = np.array([-0.25, 0.6, 0.02]) + self.obj_init_pos = self.init_config['obj_init_pos'] + self.obj_init_angle = self.init_config['obj_init_angle'] + self.hand_init_pos = self.init_config['hand_init_pos'] + + self._random_reset_space = Box( + np.hstack((obj_low, goal_low)), + np.hstack((obj_high, goal_high)), + ) + self.goal_space = Box(np.array(goal_low), np.array(goal_high)) + + @property + def model_name(self): + return full_v1_path_for('sawyer_xyz/sawyer_plate_slide_sideway.xml') + + @_assert_task_is_set + def step(self, action): + ob = super().step(action) + reward, reachDist, pullDist = self.compute_reward(action, ob) + + info = { + 'reachDist': reachDist, + 'goalDist': pullDist, + 'epRew': reward, + 'pickRew': None, + 'success': float(pullDist <= 0.08) + } + + return ob, reward, False, info + + def _get_pos_objects(self): + return self.data.get_geom_xpos('objGeom') + + def _set_objCOM_marker(self): + objPos = self.data.get_geom_xpos('handle') + self.data.site_xpos[self.model.site_name2id('objSite')] = ( + objPos + ) + + def _set_obj_xyz(self, pos): + qpos = self.data.qpos.flat.copy() + qvel = self.data.qvel.flat.copy() + qpos[9:11] = pos + self.set_state(qpos, qvel) + + def reset_model(self): + self._reset_hand() + self._target_pos = self.goal.copy() + self.obj_init_pos = self.init_config['obj_init_pos'] + self.objHeight = self.data.get_geom_xpos('objGeom')[2] + + if self.random_init: + obj_pos = self._get_state_rand_vec() + self.obj_init_pos = obj_pos[:3] + goal_pos = obj_pos[3:] + self._target_pos = goal_pos + + self.sim.model.body_pos[self.model.body_name2id('cabinet')] = self._target_pos + self._set_obj_xyz(np.zeros(2)) + self.maxDist = np.linalg.norm(self.obj_init_pos[:-1] - self._target_pos[:-1]) + self.target_reward = 1000*self.maxDist + 1000*2 + + return self._get_obs() + + def _reset_hand(self): + super()._reset_hand(10) + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + self.init_fingerCOM = (rightFinger + leftFinger)/2 + + def compute_reward(self, actions, obs): + del actions + + objPos = obs[3:6] + + rightFinger, leftFinger = self._get_site_pos('rightEndEffector'), self._get_site_pos('leftEndEffector') + fingerCOM = (rightFinger + leftFinger)/2 + + pullGoal = self._target_pos + + reachDist = np.linalg.norm(objPos - fingerCOM) + + pullDist = np.linalg.norm(objPos[:-1] - pullGoal[:-1]) + + c1 = 1000 + c2 = 0.01 + c3 = 0.001 + if reachDist < 0.05: + pullRew = 1000*(self.maxDist - pullDist) + c1*(np.exp(-(pullDist**2)/c2) + np.exp(-(pullDist**2)/c3)) + pullRew = max(pullRew, 0) + else: + pullRew = 0 + reward = -reachDist + pullRew + + return [reward, reachDist, pullDist] diff --git a/Metaworld/metaworld/envs/reward_utils.py b/Metaworld/metaworld/envs/reward_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0f1b729f4246c78265c56f12a0ccee435e0c5204 --- /dev/null +++ b/Metaworld/metaworld/envs/reward_utils.py @@ -0,0 +1,220 @@ +"""A set of reward utilities written by the authors of dm_control""" + +from multiprocessing import Value +import numpy as np + +# The value returned by tolerance() at `margin` distance from `bounds` interval. +_DEFAULT_VALUE_AT_MARGIN = 0.1 + + +def _sigmoids(x, value_at_1, sigmoid): + """Returns 1 when `x` == 0, between 0 and 1 otherwise. + + Args: + x: A scalar or numpy array. + value_at_1: A float between 0 and 1 specifying the output when `x` == 1. + sigmoid: String, choice of sigmoid type. + + Returns: + A numpy array with values between 0.0 and 1.0. + + Raises: + ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and + `quadratic` sigmoids which allow `value_at_1` == 0. + ValueError: If `sigmoid` is of an unknown type. + """ + if sigmoid in ('cosine', 'linear', 'quadratic'): + if not 0 <= value_at_1 < 1: + raise ValueError( + '`value_at_1` must be nonnegative and smaller than 1, ' + 'got {}.'.format(value_at_1)) + else: + if not 0 < value_at_1 < 1: + raise ValueError('`value_at_1` must be strictly between 0 and 1, ' + 'got {}.'.format(value_at_1)) + + if sigmoid == 'gaussian': + scale = np.sqrt(-2 * np.log(value_at_1)) + return np.exp(-0.5 * (x * scale)**2) + + elif sigmoid == 'hyperbolic': + scale = np.arccosh(1 / value_at_1) + return 1 / np.cosh(x * scale) + + elif sigmoid == 'long_tail': + scale = np.sqrt(1 / value_at_1 - 1) + return 1 / ((x * scale)**2 + 1) + + elif sigmoid == 'reciprocal': + scale = 1 / value_at_1 - 1 + return 1 / (abs(x) * scale + 1) + + elif sigmoid == 'cosine': + scale = np.arccos(2 * value_at_1 - 1) / np.pi + scaled_x = x * scale + return np.where( + abs(scaled_x) < 1, (1 + np.cos(np.pi * scaled_x)) / 2, 0.0) + + elif sigmoid == 'linear': + scale = 1 - value_at_1 + scaled_x = x * scale + return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0) + + elif sigmoid == 'quadratic': + scale = np.sqrt(1 - value_at_1) + scaled_x = x * scale + return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0) + + elif sigmoid == 'tanh_squared': + scale = np.arctanh(np.sqrt(1 - value_at_1)) + return 1 - np.tanh(x * scale)**2 + + else: + raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid)) + + +def tolerance(x, + bounds=(0.0, 0.0), + margin=0.0, + sigmoid='gaussian', + value_at_margin=_DEFAULT_VALUE_AT_MARGIN): + """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise. + + Args: + x: A scalar or numpy array. + bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for + the target interval. These can be infinite if the interval is unbounded + at one or both ends, or they can be equal to one another if the target + value is exact. + margin: Float. Parameter that controls how steeply the output decreases as + `x` moves out-of-bounds. + * If `margin == 0` then the output will be 0 for all values of `x` + outside of `bounds`. + * If `margin > 0` then the output will decrease sigmoidally with + increasing distance from the nearest bound. + sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian', + 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'. + value_at_margin: A float between 0 and 1 specifying the output value when + the distance from `x` to the nearest bound is equal to `margin`. Ignored + if `margin == 0`. + + Returns: + A float or numpy array with values between 0.0 and 1.0. + + Raises: + ValueError: If `bounds[0] > bounds[1]`. + ValueError: If `margin` is negative. + """ + lower, upper = bounds + if lower > upper: + raise ValueError('Lower bound must be <= upper bound.') + if margin < 0: + raise ValueError('`margin` must be non-negative. Current value: {}'.format(margin)) + + in_bounds = np.logical_and(lower <= x, x <= upper) + if margin == 0: + value = np.where(in_bounds, 1.0, 0.0) + else: + d = np.where(x < lower, lower - x, x - upper) / margin + value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, + sigmoid)) + + return float(value) if np.isscalar(x) else value + + +def inverse_tolerance(x, + bounds=(0.0, 0.0), + margin=0.0, + sigmoid='reciprocal'): + """Returns 0 when `x` falls inside the bounds, between 1 and 0 otherwise. + + Args: + x: A scalar or numpy array. + bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for + the target interval. These can be infinite if the interval is unbounded + at one or both ends, or they can be equal to one another if the target + value is exact. + margin: Float. Parameter that controls how steeply the output decreases as + `x` moves out-of-bounds. + * If `margin == 0` then the output will be 0 for all values of `x` + outside of `bounds`. + * If `margin > 0` then the output will decrease sigmoidally with + increasing distance from the nearest bound. + sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian', + 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'. + value_at_margin: A float between 0 and 1 specifying the output value when + the distance from `x` to the nearest bound is equal to `margin`. Ignored + if `margin == 0`. + + Returns: + A float or numpy array with values between 0.0 and 1.0. + + Raises: + ValueError: If `bounds[0] > bounds[1]`. + ValueError: If `margin` is negative. + """ + bound = tolerance(x, + bounds=bounds, + margin=margin, + sigmoid=sigmoid, + value_at_margin=0) + return 1 - bound + + +def rect_prism_tolerance(curr, zero, one): + """Computes a reward if curr is inside a rectangluar prism region. + + The 3d points curr and zero specify 2 diagonal corners of a rectangular + prism that represents the decreasing region. + + one represents the corner of the prism that has a reward of 1. + zero represents the diagonal opposite corner of the prism that has a reward + of 0. + Curr is the point that the prism reward region is being applied for. + + Args: + curr(np.ndarray): The point who's reward is being assessed. + shape is (3,). + zero(np.ndarray): One corner of the rectangular prism, with reward 0. + shape is (3,) + one(np.ndarray): The diagonal opposite corner of one, with reward 1. + shape is (3,) + """ + in_range = lambda a, b, c: float(b <= a <=c) if c >= b else float(c <= a <= b) + in_prism = (in_range(curr[0], zero[0], one[0]) and + in_range(curr[1], zero[1], one[1]) and + in_range(curr[2], zero[2], one[2])) + if in_prism: + diff = one - zero + x_scale = (curr[0] - zero[0]) / diff[0] + y_scale = (curr[1] - zero[1]) / diff[1] + z_scale = (curr[2] - zero[2]) / diff[2] + return x_scale * y_scale * z_scale + # return 0.01 + else: + return 1. + + + +def hamacher_product(a, b): + """The hamacher (t-norm) product of a and b. + + computes (a * b) / ((a + b) - (a * b)) + + Args: + a (float): 1st term of hamacher product. + b (float): 2nd term of hamacher product. + Raises: + ValueError: a and b must range between 0 and 1 + + Returns: + float: The hammacher product of a and b + """ + if not ((0. <= a <= 1.) and (0. <= b <= 1.)): + raise ValueError("a and b must range between 0 and 1") + + denominator = a + b - (a * b) + h_prod = ((a * b) / denominator) if denominator > 0 else 0 + + assert 0. <= h_prod <= 1. + return h_prod diff --git a/Metaworld/metaworld/policies/__pycache__/__init__.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06e2ce816d284fd66ab8a5a05cfb479f0843630a Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/__init__.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_bin_picking_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_bin_picking_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6d17bfa2ebc083091eec2511230f99534d5488d Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_bin_picking_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d594a1af72137ab4f485e50735fdf7c3603b2de Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_box_close_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c5cf2866e345dede2cffdd11d3977dd4bb52aa4 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_topdown_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ad3df955e35fcfe3beb234e3009a84ecbb2fa35 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8959bd275a948d645f886a918d16be6564a60866 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3b7b91bae2e64fa46d5b7bc35a417d896be9fe0 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca937b5fb4fc718e1afe66664a6e522c454112be Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_button_press_wall_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50b12382099cc814593e9785f78cc9c0e3bb731d Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_pull_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ad652877b99db7ee89e1f9bbd2015555e9332a7 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_coffee_push_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65e536ea40d478068b65381e9f8e85f17383bf46 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_dial_turn_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..473a7a0616a7e589ee2f6e860bb748c126e7b10c Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11b120a9bcc75d267dd89ab2e9256126e7da7166 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_disassemble_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_close_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_close_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ba8932c49e22a5af8aa94f87823c7900d1aab6f Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_close_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52d851bb8ba6f3f45ad9a128fcab1eda1c9a1129 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..715873efefd81f8a5acf56d5fb1de39c6c0b51b8 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_lock_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0aaad9aaade8c4f89b9193849ee168801109bd6a Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_open_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9384bd1c38a3ac30447a8f900e729e4ae890abbb Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f51ace0ecfa74a13928ff819bf4f8d09589417e Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_door_unlock_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ce4432c4806b544301e71143fd927e59488b0f6 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_close_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_open_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_open_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ca2500533d1c26d04859ff51fb558d19ae774d9 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_drawer_open_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d08e552bdcebbe68e5cf26837db444e9d7c78ee Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_faucet_close_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..37b85b05b13ee2fd77abfa4ca6d7678dc33de094 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f8bbd5daa775c85932e48801c247263f36810f4 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_hammer_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b944d70bda58f917bc44179f10f75bb0366dabb Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..deab4f5972f7dceef01d1d8dc15243b66aa8c71a Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_press_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6dc6c573688b376e8da51a3fe8643ca5462b9f7 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1b098ff479135fcbe5ccbb793ea60218744f077 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_handle_pull_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_peg_insertion_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_insertion_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97c4233d1b80d94f4011f6e478b4b5770697220a Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_insertion_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80e444bb774c79cde2502e43c9c899f2985c284b Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_peg_unplug_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..129a034229ad939dc34e7e3a444650712607718b Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7dc3820d0aed4f55c0b88b4a2d83f5834fb1ba2e Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_out_of_hole_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a360aeef9ab46deb0ba089739e8c8dc540b2c99 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_wall_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6493cbff9935c8d9a2314f662486a54eb82205f Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_pick_place_wall_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_side_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_side_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ec9acb5cd9bd6bc8c6ab6d419176f95e60338c0 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_side_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65d1bff66fdc72f9a79bf9bd4d80e26e70578f14 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_back_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d811bae0f8fa951a2f4cb0770e36132d87f4ac28 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_side_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e67123e079e8b58e5433b67cb53a3d68cda1be11 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_plate_slide_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ecf758668a39e207fa1813eb92c8df35bb15d09 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89c78957757a10887426f5f11f0293299d854beb Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_push_back_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_push_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_push_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53c8cf5467771b8c9331f92f429db30bc5e78a00 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_push_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_push_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_push_wall_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a2566bf7bd2017d51c44be8ae8af411334423586 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_push_wall_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_reach_wall_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_wall_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7f304a3196574d85f89f70c7148b9a7c2ea22d4 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_reach_wall_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ce00e646fdffbd99bac706e5c6180192abacb65 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39818e495a4ec2c7cdf834d4e9d38a2b1caf9c28 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_shelf_place_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aca760c80e1241ff14bb0346240393999564b44d Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72f061909f72805dadf7cdab363e71c2a348cca1 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_pull_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df9c90af912b906790f81ff83d80195d0dc8f36c Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62d88a5e9c08260857332f299b31b821d38f02e0 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_stick_push_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_into_v1_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_into_v1_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ec11bb9f55f3373c6d6f6afeec927704d33d464 Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_into_v1_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b736164b1eb513f38d202cb10e7705b9c79670a Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_sweep_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/metaworld/policies/__pycache__/sawyer_window_close_v2_policy.cpython-38.pyc b/Metaworld/metaworld/policies/__pycache__/sawyer_window_close_v2_policy.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f9d45a4b92b69fafd6a557f76a31216c9fdd6e7f Binary files /dev/null and b/Metaworld/metaworld/policies/__pycache__/sawyer_window_close_v2_policy.cpython-38.pyc differ diff --git a/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/point_cloud/6.0.0 b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/point_cloud/6.0.0 new file mode 100644 index 0000000000000000000000000000000000000000..ad9a5751878d0a3d1916eea10985dde479f7ee70 --- /dev/null +++ b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/point_cloud/6.0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ea9a2c7f546a4807e9f66fab8f4e3dee251ea4390a45d935015ff918e76d60 +size 1225348 diff --git a/gym-0.21.0/gym.egg-info/top_level.txt b/gym-0.21.0/gym.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e6c2dd43fd01404d4f7beb122961c70d52e4c2f --- /dev/null +++ b/gym-0.21.0/gym.egg-info/top_level.txt @@ -0,0 +1 @@ +gym diff --git a/gym-0.21.0/gym/__pycache__/version.cpython-38.pyc b/gym-0.21.0/gym/__pycache__/version.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b508610526ab3d1926aa6dd19c0a3485f34c14cb Binary files /dev/null and b/gym-0.21.0/gym/__pycache__/version.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/error.py b/gym-0.21.0/gym/error.py new file mode 100644 index 0000000000000000000000000000000000000000..5884e5911b4e08118de6d3fcca630a4a77430639 --- /dev/null +++ b/gym-0.21.0/gym/error.py @@ -0,0 +1,226 @@ +import sys + + +class Error(Exception): + pass + + +# Local errors + + +class Unregistered(Error): + """Raised when the user requests an item from the registry that does + not actually exist. + """ + + pass + + +class UnregisteredEnv(Unregistered): + """Raised when the user requests an env from the registry that does + not actually exist. + """ + + pass + + +class UnregisteredBenchmark(Unregistered): + """Raised when the user requests an env from the registry that does + not actually exist. + """ + + pass + + +class DeprecatedEnv(Error): + """Raised when the user requests an env from the registry with an + older version number than the latest env with the same name. + """ + + pass + + +class UnseedableEnv(Error): + """Raised when the user tries to seed an env that does not support + seeding. + """ + + pass + + +class DependencyNotInstalled(Error): + pass + + +class UnsupportedMode(Exception): + """Raised when the user requests a rendering mode not supported by the + environment. + """ + + pass + + +class ResetNeeded(Exception): + """When the monitor is active, raised when the user tries to step an + environment that's already done. + """ + + pass + + +class ResetNotAllowed(Exception): + """When the monitor is active, raised when the user tries to step an + environment that's not yet done. + """ + + pass + + +class InvalidAction(Exception): + """Raised when the user performs an action not contained within the + action space + """ + + pass + + +# API errors + + +class APIError(Error): + def __init__( + self, + message=None, + http_body=None, + http_status=None, + json_body=None, + headers=None, + ): + super(APIError, self).__init__(message) + + if http_body and hasattr(http_body, "decode"): + try: + http_body = http_body.decode("utf-8") + except: + http_body = ( + "" + ) + + self._message = message + self.http_body = http_body + self.http_status = http_status + self.json_body = json_body + self.headers = headers or {} + self.request_id = self.headers.get("request-id", None) + + def __unicode__(self): + if self.request_id is not None: + msg = self._message or "" + return u"Request {0}: {1}".format(self.request_id, msg) + else: + return self._message + + def __str__(self): + try: # Python 2 + return unicode(self).encode("utf-8") + except NameError: # Python 3 + return self.__unicode__() + + +class APIConnectionError(APIError): + pass + + +class InvalidRequestError(APIError): + def __init__( + self, + message, + param, + http_body=None, + http_status=None, + json_body=None, + headers=None, + ): + super(InvalidRequestError, self).__init__( + message, http_body, http_status, json_body, headers + ) + self.param = param + + +class AuthenticationError(APIError): + pass + + +class RateLimitError(APIError): + pass + + +# Video errors + + +class VideoRecorderError(Error): + pass + + +class InvalidFrame(Error): + pass + + +# Wrapper errors + + +class DoubleWrapperError(Error): + pass + + +class WrapAfterConfigureError(Error): + pass + + +class RetriesExceededError(Error): + pass + + +# Vectorized environments errors + + +class AlreadyPendingCallError(Exception): + """ + Raised when `reset`, or `step` is called asynchronously (e.g. with + `reset_async`, or `step_async` respectively), and `reset_async`, or + `step_async` (respectively) is called again (without a complete call to + `reset_wait`, or `step_wait` respectively). + """ + + def __init__(self, message, name): + super(AlreadyPendingCallError, self).__init__(message) + self.name = name + + +class NoAsyncCallError(Exception): + """ + Raised when an asynchronous `reset`, or `step` is not running, but + `reset_wait`, or `step_wait` (respectively) is called. + """ + + def __init__(self, message, name): + super(NoAsyncCallError, self).__init__(message) + self.name = name + + +class ClosedEnvironmentError(Exception): + """ + Trying to call `reset`, or `step`, while the environment is closed. + """ + + pass + + +class CustomSpaceError(Exception): + """ + The space is a custom gym.Space instance, and is not supported by + `AsyncVectorEnv` with `shared_memory=True`. + """ + + pass diff --git a/gym-0.21.0/tests/vector/test_spaces.py b/gym-0.21.0/tests/vector/test_spaces.py new file mode 100644 index 0000000000000000000000000000000000000000..9a53cc88b7323aa862b1e4ce6c64aaebc8640341 --- /dev/null +++ b/gym-0.21.0/tests/vector/test_spaces.py @@ -0,0 +1,105 @@ +import pytest +import numpy as np + +from gym.spaces import Box, MultiDiscrete, Tuple, Dict +from tests.vector.utils import spaces, custom_spaces, CustomSpace + +from gym.vector.utils.spaces import batch_space + +expected_batch_spaces_4 = [ + Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float64), + Box(low=0.0, high=10.0, shape=(4, 1), dtype=np.float32), + Box( + low=np.array( + [[-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0]] + ), + high=np.array( + [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]] + ), + dtype=np.float32, + ), + Box( + low=np.array( + [ + [[-1.0, 0.0], [0.0, -1.0]], + [[-1.0, 0.0], [0.0, -1.0]], + [[-1.0, 0.0], [0.0, -1]], + [[-1.0, 0.0], [0.0, -1.0]], + ] + ), + high=np.ones((4, 2, 2)), + dtype=np.float32, + ), + Box(low=0, high=255, shape=(4,), dtype=np.uint8), + Box(low=0, high=255, shape=(4, 32, 32, 3), dtype=np.uint8), + MultiDiscrete([2, 2, 2, 2]), + Tuple((MultiDiscrete([3, 3, 3, 3]), MultiDiscrete([5, 5, 5, 5]))), + Tuple( + ( + MultiDiscrete([7, 7, 7, 7]), + Box( + low=np.array([[0.0, -1.0], [0.0, -1.0], [0.0, -1.0], [0.0, -1]]), + high=np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]), + dtype=np.float32, + ), + ) + ), + Box( + low=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]), + high=np.array([[10, 12, 16], [10, 12, 16], [10, 12, 16], [10, 12, 16]]), + dtype=np.int64, + ), + Box(low=0, high=1, shape=(4, 19), dtype=np.int8), + Dict( + { + "position": MultiDiscrete([23, 23, 23, 23]), + "velocity": Box(low=0.0, high=1.0, shape=(4, 1), dtype=np.float32), + } + ), + Dict( + { + "position": Dict( + { + "x": MultiDiscrete([29, 29, 29, 29]), + "y": MultiDiscrete([31, 31, 31, 31]), + } + ), + "velocity": Tuple( + ( + MultiDiscrete([37, 37, 37, 37]), + Box(low=0, high=255, shape=(4,), dtype=np.uint8), + ) + ), + } + ), +] + +expected_custom_batch_spaces_4 = [ + Tuple((CustomSpace(), CustomSpace(), CustomSpace(), CustomSpace())), + Tuple( + ( + Tuple((CustomSpace(), CustomSpace(), CustomSpace(), CustomSpace())), + Box(low=0, high=255, shape=(4,), dtype=np.uint8), + ) + ), +] + + +@pytest.mark.parametrize( + "space,expected_batch_space_4", + list(zip(spaces, expected_batch_spaces_4)), + ids=[space.__class__.__name__ for space in spaces], +) +def test_batch_space(space, expected_batch_space_4): + batch_space_4 = batch_space(space, n=4) + assert batch_space_4 == expected_batch_space_4 + + +@pytest.mark.parametrize( + "space,expected_batch_space_4", + list(zip(custom_spaces, expected_custom_batch_spaces_4)), + ids=[space.__class__.__name__ for space in custom_spaces], +) +def test_batch_space_custom_space(space, expected_batch_space_4): + batch_space_4 = batch_space(space, n=4) + assert batch_space_4 == expected_batch_space_4