diff --git a/Metaworld/.gitignore b/Metaworld/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..03b6378d8e1d01f4370e6123f1ec3d58935d6bc6 --- /dev/null +++ b/Metaworld/.gitignore @@ -0,0 +1,148 @@ +# Plots and figures +scripts/figures/ +scripts/movies* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/_apidoc/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +# metaworld-specific stuff below this line +MUJOCO_LOG.TXT + +# Other IDEs and editors +*.sublime-project +*.sublime-workspace +.project +.pydevproject +*.swp +*.orig +.idea +.settings +.vscode/ + +# MacOS +.DS_Store + +# no submodules, please +.gitmodules + +# pipenv +# Including these for now, since pipenv is not the authoritative environment +# tool +Pipfile +Pipfile.lock diff --git a/Metaworld/LICENSE b/Metaworld/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..524f4b05f6a2a5221bda5978ce7431bf7ff14455 --- /dev/null +++ b/Metaworld/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Meta-World Team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Metaworld/MANIFEST.in b/Metaworld/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..3770244cd741c8454b9245bfc80b28d526b635d9 --- /dev/null +++ b/Metaworld/MANIFEST.in @@ -0,0 +1,2 @@ +graft metaworld/envs/assets_v1 +graft metaworld/envs/assets_v2 diff --git a/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..36937ecc98ad1f39b4c762ba8a9718816bb16074 Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 differ diff --git a/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 new file mode 100644 index 0000000000000000000000000000000000000000..34059363a17c8f22ce3238f8450a3216d1555f0e Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 differ diff --git a/VRL3/src/cfgs_adroit/task/hammer.yaml b/VRL3/src/cfgs_adroit/task/hammer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18c49e7dc5d0094c7dad4fc5aea1553dbacbc54f --- /dev/null +++ b/VRL3/src/cfgs_adroit/task/hammer.yaml @@ -0,0 +1,4 @@ +num_train_frames: 4100000 +task_name: hammer-v0 +agent: + encoder_lr_scale: 1 \ No newline at end of file diff --git a/VRL3/src/cfgs_adroit/task/pen.yaml b/VRL3/src/cfgs_adroit/task/pen.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6834a504321d0142aba6ce41d860f8ee44b68a7e --- /dev/null +++ b/VRL3/src/cfgs_adroit/task/pen.yaml @@ -0,0 +1,4 @@ +num_train_frames: 4100000 +task_name: pen-v0 +agent: + encoder_lr_scale: 1 \ No newline at end of file diff --git a/VRL3/src/rrl_local/rrl_utils.py b/VRL3/src/rrl_local/rrl_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..05d943348eb2c607605122af23d683cd2082534d --- /dev/null +++ b/VRL3/src/rrl_local/rrl_utils.py @@ -0,0 +1,85 @@ +# Copyright (c) Rutav Shah, Indian Institute of Technlogy Kharagpur +# Copyright (c) Facebook, Inc. and its affiliates + +from mjrl.utils.gym_env import GymEnv +from rrl_local.rrl_multicam import BasicAdroitEnv +import os + +def make_basic_env(env, cam_list=[], from_pixels=False, hybrid_state=None, test_image=False, channels_first=False, + num_repeats=1, num_frames=1): + e = GymEnv(env) + env_kwargs = None + if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84?? + height = 84 + width = 84 + latent_dim = height*width*len(cam_list)*3 + # RRL class instance is environment wrapper... + e = BasicAdroitEnv(e, cameras=cam_list, + height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state, + test_image=test_image, channels_first=channels_first, num_repeats=num_repeats, num_frames=num_frames) + env_kwargs = {'rrl_kwargs' : e.env_kwargs} + # if not from pixels... then it's simpler + return e, env_kwargs + +def make_env(env, cam_list=[], from_pixels=False, encoder_type=None, hybrid_state=None,) : + # TODO why is encoder type put into the environment? + e = GymEnv(env) + env_kwargs = None + if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84?? + height = 84 + width = 84 + latent_dim = height*width*len(cam_list)*3 + + if encoder_type and encoder_type == 'resnet34': + assert from_pixels==True + height = 256 + width = 256 + latent_dim = 512*len(cam_list) #TODO each camera provides an image? + if from_pixels: + # RRL class instance is environment wrapper... + e = BasicAdroitEnv(e, cameras=cam_list, encoder_type=encoder_type, + height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state) + env_kwargs = {'rrl_kwargs' : e.env_kwargs} + return e, env_kwargs + +def make_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + +def preprocess_args(args): + job_data = {} + job_data['seed'] = args.seed + job_data['env'] = args.env + job_data['output'] = args.output + job_data['from_pixels'] = args.from_pixels + job_data['hybrid_state'] = args.hybrid_state + job_data['stack_frames'] = args.stack_frames + job_data['encoder_type'] = args.encoder_type + job_data['cam1'] = args.cam1 + job_data['cam2'] = args.cam2 + job_data['cam3'] = args.cam3 + job_data['algorithm'] = args.algorithm + job_data['num_cpu'] = args.num_cpu + job_data['save_freq'] = args.save_freq + job_data['eval_rollouts'] = args.eval_rollouts + job_data['demo_file'] = args.demo_file + job_data['bc_batch_size'] = args.bc_batch_size + job_data['bc_epochs'] = args.bc_epochs + job_data['bc_learn_rate'] = args.bc_learn_rate + #job_data['policy_size'] = args.policy_size + job_data['policy_size'] = tuple(map(int, args.policy_size.split(', '))) + job_data['vf_batch_size'] = args.vf_batch_size + job_data['vf_epochs'] = args.vf_epochs + job_data['vf_learn_rate'] = args.vf_learn_rate + job_data['rl_step_size'] = args.rl_step_size + job_data['rl_gamma'] = args.rl_gamma + job_data['rl_gae'] = args.rl_gae + job_data['rl_num_traj'] = args.rl_num_traj + job_data['rl_num_iter'] = args.rl_num_iter + job_data['lam_0'] = args.lam_0 + job_data['lam_1'] = args.lam_1 + print(job_data) + return job_data + + diff --git a/VRL3/src/transfer_util.py b/VRL3/src/transfer_util.py new file mode 100644 index 0000000000000000000000000000000000000000..99691715eb98bd7463044e3ca0bca78efc1d1044 --- /dev/null +++ b/VRL3/src/transfer_util.py @@ -0,0 +1,50 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from torchvision import datasets, models, transforms + +def set_parameter_requires_grad(model, n_resblock_finetune): + assert n_resblock_finetune in (0, 1, 2, 3, 4, 5) + for param in model.parameters(): + param.requires_grad = False + + for name, param in model.named_parameters(): + condition = (n_resblock_finetune >= 1 and 'layer4' in name) or (n_resblock_finetune >= 2 and 'layer3' in name) or \ + (n_resblock_finetune >= 3 and 'layer2' in name) or (n_resblock_finetune >= 4 and 'layer1' in name) or \ + (n_resblock_finetune >= 5) + + if condition: + param.requires_grad = True + + for name, param in model.named_parameters(): + if 'bn' in name: + param.requires_grad = False + +def initialize_model(model_name, n_resblock_finetune, use_pretrained=True): + # Initialize these variables which will be set in this if statement. Each of these + # variables is model specific. + model_ft = None + input_size = 0 + + if model_name == "resnet18": + """ Resnet18 + """ + model_ft = models.resnet18(pretrained=use_pretrained) + set_parameter_requires_grad(model_ft, n_resblock_finetune) + feature_size = model_ft.fc.in_features + input_size = 224 + elif model_name == 'resnet34': + model_ft = models.resnet34(pretrained=use_pretrained) + set_parameter_requires_grad(model_ft, n_resblock_finetune) + feature_size = model_ft.fc.in_features + input_size = 224 + else: + print("Invalid model name, exiting...") + exit() + + return model_ft, input_size, feature_size + + + + + diff --git a/gym-0.21.0/.gitignore b/gym-0.21.0/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a4ebf2a52774af7cbe8f56641f5fe9c3dc4f87b0 --- /dev/null +++ b/gym-0.21.0/.gitignore @@ -0,0 +1,39 @@ +*.swp +*.pyc +*.py~ +.DS_Store +.cache +.pytest_cache/ + +# Setuptools distribution and build folders. +/dist/ +/build + +# Virtualenv +/env + +# Python egg metadata, regenerated from source files by setuptools. +/*.egg-info + +*.sublime-project +*.sublime-workspace + +logs/ + +.ipynb_checkpoints +ghostdriver.log + +junk +MUJOCO_LOG.txt + +rllab_mujoco + +tutorial/*.html + +# IDE files +.eggs +.tox + +# PyCharm project files +.idea +vizdoom.ini diff --git a/gym-0.21.0/CODE_OF_CONDUCT.rst b/gym-0.21.0/CODE_OF_CONDUCT.rst new file mode 100644 index 0000000000000000000000000000000000000000..e208081cae3a9aefd992b3eff96b3809a29c0f34 --- /dev/null +++ b/gym-0.21.0/CODE_OF_CONDUCT.rst @@ -0,0 +1,13 @@ +OpenAI Gym is dedicated to providing a harassment-free experience for +everyone, regardless of gender, gender identity and expression, sexual +orientation, disability, physical appearance, body size, age, race, or +religion. We do not tolerate harassment of participants in any form. + +This code of conduct applies to all OpenAI Gym spaces (including Gist +comments) both online and off. Anyone who violates this code of +conduct may be sanctioned or expelled from these spaces at the +discretion of the OpenAI team. + +We may add additional rules over time, which will be made clearly +available to participants. Participants are responsible for knowing +and abiding by these rules. diff --git a/gym-0.21.0/bin/docker_entrypoint b/gym-0.21.0/bin/docker_entrypoint new file mode 100644 index 0000000000000000000000000000000000000000..a5fe2c9f0ca075c9767057b131faaf572ac4bf95 --- /dev/null +++ b/gym-0.21.0/bin/docker_entrypoint @@ -0,0 +1,26 @@ +#!/bin/bash +# This script is the entrypoint for our Docker image. + +set -ex + +# Set up display; otherwise rendering will fail +Xvfb -screen 0 1024x768x24 & +export DISPLAY=:0 + +# Wait for the file to come up +display=0 +file="/tmp/.X11-unix/X$display" +for i in $(seq 1 10); do + if [ -e "$file" ]; then + break + fi + + echo "Waiting for $file to be created (try $i/10)" + sleep "$i" +done +if ! [ -e "$file" ]; then + echo "Timing out: $file was not created" + exit 1 +fi + +exec "$@" diff --git a/gym-0.21.0/docs/api.md b/gym-0.21.0/docs/api.md new file mode 100644 index 0000000000000000000000000000000000000000..68749f0edf27e7ea01bbd4ae91287cffff287902 --- /dev/null +++ b/gym-0.21.0/docs/api.md @@ -0,0 +1,123 @@ +# API +## Initializing Environments +Initializing environment is very easy in Gym and can be done via: + +```python +import gym +env = gym.make('CartPole-v0') +``` + +## Interacting with the Environment +This example will run an instance of `CartPole-v0` environment for 1000 timesteps, rendering the environment at each step. You should see a window pop up rendering the classic [cart-pole](https://www.youtube.com/watch?v=J7E6_my3CHk&ab_channel=TylerStreeter) problem + +```python +import gym +env = gym.make('CartPole-v0') +env.reset() +for _ in range(1000): + env.render() # by default `mode="human"`(GUI), you can pass `mode="rbg_array"` to retrieve an image instead + env.step(env.action_space.sample()) # take a random action +env.close() +``` + +The output should look something like this + +![cartpole-no-reset](https://user-images.githubusercontent.com/28860173/129241283-70069f7c-453d-4670-a226-854203bd8a1b.gif) + + +The commonly used methods are: + +`reset()` resets the environment to its initial state and returns the observation corresponding to the initial state +`step(action)` takes an action as an input and implements that action in the environment. This method returns a set of four values +`render()` renders the environment + +- `observation` (**object**) : an environment specific object representation your observation of the environment after the step is taken. Its often aliased as the next state after the action has been taken +- `reward`(**float**) : immediate reward achieved by the previous action. Actual value and range will varies between environments, but the final goal is always to increase your total reward +- `done`(**boolean**): whether it’s time to `reset` the environment again. Most (but not all) tasks are divided up into well-defined episodes, and `done` being `True` indicates the episode has terminated. (For example, perhaps the pole tipped too far, or you lost your last life.) +- `info`(**dict**) : This provides general information helpful for debugging or additional information depending on the environment, such as the raw probabilities behind the environment’s last state change + + +## Additional Environment API + +- `action_space`: this attribute gives the format of valid actions. It is of datatype `Space` provided by Gym. (For ex: If the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions 0 & 1 ) +```python +print(env.action_space) +#> Discrete(2) + +print(env.observation_space) +#> Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32) + +``` +- `observation_space`: this attribute gives the format of valid observations. It if of datatype `Space` provided by Gym. (For ex: if the observation space is of type `Box` and the shape of the object is `(4,)`, this denotes a valid observation will be an array of 4 numbers). We can check the box bounds as well with attributes + +```python +print(env.observation_space.high) +#> array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32) + +print(env.observation_space.low) +#> array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32) +``` +- There are multiple types of Space types inherently available in gym: + - `Box` describes an n-dimensional continuous space. Its a bounded space where we can define the upper and lower limit which describe the valid values our observations can take. + - `Discrete` describes a discrete space where { 0, 1, ......., n-1} are the possible values our observation/action can take. + - `Dict` represents a dictionary of simple spaces. + - `Tuple` represents a tuple of simple spaces + - `MultiBinary` creates a n-shape binary space. Argument n can be a number or a `list` of numbers + - `MultiDiscrete` consists of a series of `Discrete` action spaces with different number of actions in each element + ```python + observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32) + print(observation_space.sample()) + #> [ 1.6952509 -0.4399011 -0.7981693] + + observation_space = Discrete(4) + print(observation_space.sample()) + #> 1 + + observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)}) + print(observation_space.sample()) + #> OrderedDict([('position', 0), ('velocity', 1)]) + + observation_space = Tuple((Discrete(2), Discrete(3))) + print(observation_space.sample()) + #> (1, 2) + + observation_space = MultiBinary(5) + print(observation_space.sample()) + #> [1 1 1 0 1] + + observation_space = MultiDiscrete([ 5, 2, 2 ]) + print(observation_space.sample()) + #> [3 0 0] + ``` +- `reward_range`: returns a tuple corresponding to min and max possible rewards. Default range is set to `[-inf,+inf]`. You can set it if you want a narrower range +- `close()` : Override close in your subclass to perform any necessary cleanup +- `seed()`: Sets the seed for this env's random number generator + + +### Unwrapping an environment +If you have a wrapped environment, and you want to get the unwrapped environment underneath all the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the `.unwrapped` attribute. If the environment is already a base environment, the `.unwrapped` attribute will just return itself. + +```python +base_env = env.unwrapped +``` + +### Vectorized Environment +Vectorized Environments are a way of stacking multiple independent environments, so that instead of training on one environment, our agent can train on multiple environments at a time. Each `observation` returned from a vectorized environment is a batch of observations for each sub-environment, and `step` is also expected to receive a batch of actions for each sub-environment. + +**NOTE:** All sub-environments should share the identical observation and action spaces. A vector of multiple different environments is not supported + +Gym Vector API consists of two types of vectorized environments: + +- `AsyncVectorEnv` runs multiple environments in parallel. It uses `multiprocessing` processes, and pipes for communication. +- `SyncVectorEnv`runs multiple environments serially + +```python +import gym +env = gym.vector.make('CartPole-v1', 3,asynchronous=True) # Creates an Asynchronous env +env.reset() +#> array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827], +#> [ 0.03073904, 0.00145001, -0.03088818, -0.03131252], +#> [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]], +#> dtype=float32) + +``` diff --git a/gym-0.21.0/docs/toy_text/frozen_lake.md b/gym-0.21.0/docs/toy_text/frozen_lake.md new file mode 100644 index 0000000000000000000000000000000000000000..b84daf7ec1a7830efa1c6cbe8f2685b0d7e15e91 --- /dev/null +++ b/gym-0.21.0/docs/toy_text/frozen_lake.md @@ -0,0 +1,70 @@ +Frozen Lake +--- +|Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import| +| ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------| +|Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake| +--- + + +Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake. + +The agent take a 1-element vector for actions. +The action space is `(dir)`, where `dir` decides direction to move in which can be: + +- 0: LEFT +- 1: DOWN +- 2: RIGHT +- 3: UP + +The observation is a value representing the agents current position as + + current_row * nrows + current_col + +**Rewards:** + +Reward schedule: +- Reach goal(G): +1 +- Reach hole(H): 0 + +### Arguments + +``` +gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True) +``` + +`desc`: Used to specify custom map for frozen lake. For example, + + desc=["SFFF", "FHFH", "FFFH", "HFFG"]. + +`map_name`: ID to use any of the preloaded maps. + + "4x4":[ + "SFFF", + "FHFH", + "FFFH", + "HFFG" + ] + + "8x8": [ + "SFFFFFFF", + "FFFFFFFF", + "FFFHFFFF", + "FFFFFHFF", + "FFFHFFFF", + "FHHFFFHF", + "FHFFHFHF", + "FFFHFFFG", + ] + + + + +`is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions. + + For example, if action is left and is_slippery is True, then: + - P(move left)=1/3 + - P(move up)=1/3 + - P(move down)=1/3 +### Version History + +* v0: Initial versions release (1.0.0) diff --git a/gym-0.21.0/docs/tutorials.md b/gym-0.21.0/docs/tutorials.md new file mode 100644 index 0000000000000000000000000000000000000000..7abc9543e8224fcc69dd824a1fad396f20d91aa2 --- /dev/null +++ b/gym-0.21.0/docs/tutorials.md @@ -0,0 +1,17 @@ +## Getting Started With OpenAI Gym: The Basic Building Blocks + +https://blog.paperspace.com/getting-started-with-openai-gym/ + +A good starting point explaining all the basic building blocks of the Gym API. + + + +## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym +Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents. + +https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/ + + +## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym + +https://www.gocoder.one/blog/rl-tutorial-with-openai-gym diff --git a/gym-0.21.0/gym/core.py b/gym-0.21.0/gym/core.py new file mode 100644 index 0000000000000000000000000000000000000000..ae7b32c355714a1fc2b5ea2f0d4ee01f4845fcf0 --- /dev/null +++ b/gym-0.21.0/gym/core.py @@ -0,0 +1,357 @@ +from abc import abstractmethod + +import gym +from gym import error +from gym.utils import closer + + +class Env(object): + """The main OpenAI Gym class. It encapsulates an environment with + arbitrary behind-the-scenes dynamics. An environment can be + partially or fully observed. + + The main API methods that users of this class need to know are: + + step + reset + render + close + seed + + And set the following attributes: + + action_space: The Space object corresponding to valid actions + observation_space: The Space object corresponding to valid observations + reward_range: A tuple corresponding to the min and max possible rewards + + Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range. + + The methods are accessed publicly as "step", "reset", etc... + """ + + # Set this in SOME subclasses + metadata = {"render.modes": []} + reward_range = (-float("inf"), float("inf")) + spec = None + + # Set these in ALL subclasses + action_space = None + observation_space = None + + @abstractmethod + def step(self, action): + """Run one timestep of the environment's dynamics. When end of + episode is reached, you are responsible for calling `reset()` + to reset this environment's state. + + Accepts an action and returns a tuple (observation, reward, done, info). + + Args: + action (object): an action provided by the agent + + Returns: + observation (object): agent's observation of the current environment + reward (float) : amount of reward returned after previous action + done (bool): whether the episode has ended, in which case further step() calls will return undefined results + info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Resets the environment to an initial state and returns an initial + observation. + + Note that this function should not reset the environment's random + number generator(s); random variables in the environment's state should + be sampled independently between multiple calls to `reset()`. In other + words, each call of `reset()` should yield an environment suitable for + a new episode, independent of previous episodes. + + Returns: + observation (object): the initial observation. + """ + raise NotImplementedError + + @abstractmethod + def render(self, mode="human"): + """Renders the environment. + + The set of supported modes varies per environment. (And some + environments do not support rendering at all.) By convention, + if mode is: + + - human: render to the current display or terminal and + return nothing. Usually for human consumption. + - rgb_array: Return an numpy.ndarray with shape (x, y, 3), + representing RGB values for an x-by-y pixel image, suitable + for turning into a video. + - ansi: Return a string (str) or StringIO.StringIO containing a + terminal-style text representation. The text can include newlines + and ANSI escape sequences (e.g. for colors). + + Note: + Make sure that your class's metadata 'render.modes' key includes + the list of supported modes. It's recommended to call super() + in implementations to use the functionality of this method. + + Args: + mode (str): the mode to render with + + Example: + + class MyEnv(Env): + metadata = {'render.modes': ['human', 'rgb_array']} + + def render(self, mode='human'): + if mode == 'rgb_array': + return np.array(...) # return RGB frame suitable for video + elif mode == 'human': + ... # pop up a window and render + else: + super(MyEnv, self).render(mode=mode) # just raise an exception + """ + raise NotImplementedError + + def close(self): + """Override close in your subclass to perform any necessary cleanup. + + Environments will automatically close() themselves when + garbage collected or when the program exits. + """ + pass + + def seed(self, seed=None): + """Sets the seed for this env's random number generator(s). + + Note: + Some environments use multiple pseudorandom number generators. + We want to capture all such seeds used in order to ensure that + there aren't accidental correlations between multiple generators. + + Returns: + list: Returns the list of seeds used in this env's random + number generators. The first value in the list should be the + "main" seed, or the value which a reproducer should pass to + 'seed'. Often, the main seed equals the provided 'seed', but + this won't be true if seed=None, for example. + """ + return + + @property + def unwrapped(self): + """Completely unwrap this env. + + Returns: + gym.Env: The base non-wrapped gym.Env instance + """ + return self + + def __str__(self): + if self.spec is None: + return "<{} instance>".format(type(self).__name__) + else: + return "<{}<{}>>".format(type(self).__name__, self.spec.id) + + def __enter__(self): + """Support with-statement for the environment.""" + return self + + def __exit__(self, *args): + """Support with-statement for the environment.""" + self.close() + # propagate exception + return False + + +class GoalEnv(Env): + """A goal-based environment. It functions just as any regular OpenAI Gym environment but it + imposes a required structure on the observation_space. More concretely, the observation + space is required to contain at least three elements, namely `observation`, `desired_goal`, and + `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve. + `achieved_goal` is the goal that it currently achieved instead. `observation` contains the + actual observations of the environment as per usual. + """ + + def reset(self): + # Enforce that each GoalEnv uses a Goal-compatible observation space. + if not isinstance(self.observation_space, gym.spaces.Dict): + raise error.Error( + "GoalEnv requires an observation space of type gym.spaces.Dict" + ) + for key in ["observation", "achieved_goal", "desired_goal"]: + if key not in self.observation_space.spaces: + raise error.Error( + 'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format( + key + ) + ) + + @abstractmethod + def compute_reward(self, achieved_goal, desired_goal, info): + """Compute the step reward. This externalizes the reward function and makes + it dependent on a desired goal and the one that was achieved. If you wish to include + additional rewards that are independent of the goal, you can include the necessary values + to derive it in 'info' and compute it accordingly. + + Args: + achieved_goal (object): the goal that was achieved during execution + desired_goal (object): the desired goal that we asked the agent to attempt to achieve + info (dict): an info dictionary with additional information + + Returns: + float: The reward that corresponds to the provided achieved goal w.r.t. to the desired + goal. Note that the following should always hold true: + + ob, reward, done, info = env.step() + assert reward == env.compute_reward(ob['achieved_goal'], ob['desired_goal'], info) + """ + raise NotImplementedError + + +class Wrapper(Env): + """Wraps the environment to allow a modular transformation. + + This class is the base class for all wrappers. The subclass could override + some methods to change the behavior of the original environment without touching the + original code. + + .. note:: + + Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. + + """ + + def __init__(self, env): + self.env = env + + self._action_space = None + self._observation_space = None + self._reward_range = None + self._metadata = None + + def __getattr__(self, name): + if name.startswith("_"): + raise AttributeError( + "attempted to get missing private attribute '{}'".format(name) + ) + return getattr(self.env, name) + + @property + def spec(self): + return self.env.spec + + @classmethod + def class_name(cls): + return cls.__name__ + + @property + def action_space(self): + if self._action_space is None: + return self.env.action_space + return self._action_space + + @action_space.setter + def action_space(self, space): + self._action_space = space + + @property + def observation_space(self): + if self._observation_space is None: + return self.env.observation_space + return self._observation_space + + @observation_space.setter + def observation_space(self, space): + self._observation_space = space + + @property + def reward_range(self): + if self._reward_range is None: + return self.env.reward_range + return self._reward_range + + @reward_range.setter + def reward_range(self, value): + self._reward_range = value + + @property + def metadata(self): + if self._metadata is None: + return self.env.metadata + return self._metadata + + @metadata.setter + def metadata(self, value): + self._metadata = value + + def step(self, action): + return self.env.step(action) + + def reset(self, **kwargs): + return self.env.reset(**kwargs) + + def render(self, mode="human", **kwargs): + return self.env.render(mode, **kwargs) + + def close(self): + return self.env.close() + + def seed(self, seed=None): + return self.env.seed(seed) + + def compute_reward(self, achieved_goal, desired_goal, info): + return self.env.compute_reward(achieved_goal, desired_goal, info) + + def __str__(self): + return "<{}{}>".format(type(self).__name__, self.env) + + def __repr__(self): + return str(self) + + @property + def unwrapped(self): + return self.env.unwrapped + + +class ObservationWrapper(Wrapper): + def reset(self, **kwargs): + observation = self.env.reset(**kwargs) + return self.observation(observation) + + def step(self, action): + observation, reward, done, info = self.env.step(action) + return self.observation(observation), reward, done, info + + @abstractmethod + def observation(self, observation): + raise NotImplementedError + + +class RewardWrapper(Wrapper): + def reset(self, **kwargs): + return self.env.reset(**kwargs) + + def step(self, action): + observation, reward, done, info = self.env.step(action) + return observation, self.reward(reward), done, info + + @abstractmethod + def reward(self, reward): + raise NotImplementedError + + +class ActionWrapper(Wrapper): + def reset(self, **kwargs): + return self.env.reset(**kwargs) + + def step(self, action): + return self.env.step(self.action(action)) + + @abstractmethod + def action(self, action): + raise NotImplementedError + + @abstractmethod + def reverse_action(self, action): + raise NotImplementedError diff --git a/gym-0.21.0/gym/envs/box2d/bipedal_walker.py b/gym-0.21.0/gym/envs/box2d/bipedal_walker.py new file mode 100644 index 0000000000000000000000000000000000000000..34b564f217c13d0b76138bc5eba021cdba3718d1 --- /dev/null +++ b/gym-0.21.0/gym/envs/box2d/bipedal_walker.py @@ -0,0 +1,658 @@ +import sys +import math + +import numpy as np +import Box2D +from Box2D.b2 import ( + edgeShape, + circleShape, + fixtureDef, + polygonShape, + revoluteJointDef, + contactListener, +) + +import gym +from gym import spaces +from gym.utils import colorize, seeding, EzPickle + +# This is simple 4-joints walker robot environment. +# +# There are two versions: +# +# - Normal, with slightly uneven terrain. +# +# - Hardcore with ladders, stumps, pitfalls. +# +# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls, +# it gets -100. Applying motor torque costs a small amount of points, more optimal agent +# will get better score. +# +# Heuristic is provided for testing, it's also useful to get demonstrations to +# learn from. To run heuristic: +# +# python gym/envs/box2d/bipedal_walker.py +# +# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed, +# position of joints and joints angular speed, legs contact with ground, and 10 lidar +# rangefinder measurements to help to deal with the hardcore version. There's no coordinates +# in the state vector. Lidar is less useful in normal version, but it works. +# +# To solve the game you need to get 300 points in 1600 time steps. +# +# To solve hardcore version you need 300 points in 2000 time steps. +# +# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym. + +FPS = 50 +SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well + +MOTORS_TORQUE = 80 +SPEED_HIP = 4 +SPEED_KNEE = 6 +LIDAR_RANGE = 160 / SCALE + +INITIAL_RANDOM = 5 + +HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)] +LEG_DOWN = -8 / SCALE +LEG_W, LEG_H = 8 / SCALE, 34 / SCALE + +VIEWPORT_W = 600 +VIEWPORT_H = 400 + +TERRAIN_STEP = 14 / SCALE +TERRAIN_LENGTH = 200 # in steps +TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4 +TERRAIN_GRASS = 10 # low long are grass spots, in steps +TERRAIN_STARTPAD = 20 # in steps +FRICTION = 2.5 + +HULL_FD = fixtureDef( + shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]), + density=5.0, + friction=0.1, + categoryBits=0x0020, + maskBits=0x001, # collide only with ground + restitution=0.0, +) # 0.99 bouncy + +LEG_FD = fixtureDef( + shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)), + density=1.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001, +) + +LOWER_FD = fixtureDef( + shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)), + density=1.0, + restitution=0.0, + categoryBits=0x0020, + maskBits=0x001, +) + + +class ContactDetector(contactListener): + def __init__(self, env): + contactListener.__init__(self) + self.env = env + + def BeginContact(self, contact): + if ( + self.env.hull == contact.fixtureA.body + or self.env.hull == contact.fixtureB.body + ): + self.env.game_over = True + for leg in [self.env.legs[1], self.env.legs[3]]: + if leg in [contact.fixtureA.body, contact.fixtureB.body]: + leg.ground_contact = True + + def EndContact(self, contact): + for leg in [self.env.legs[1], self.env.legs[3]]: + if leg in [contact.fixtureA.body, contact.fixtureB.body]: + leg.ground_contact = False + + +class BipedalWalker(gym.Env, EzPickle): + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS} + + hardcore = False + + def __init__(self): + EzPickle.__init__(self) + self.seed() + self.viewer = None + + self.world = Box2D.b2World() + self.terrain = None + self.hull = None + + self.prev_shaping = None + + self.fd_polygon = fixtureDef( + shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]), + friction=FRICTION, + ) + + self.fd_edge = fixtureDef( + shape=edgeShape(vertices=[(0, 0), (1, 1)]), + friction=FRICTION, + categoryBits=0x0001, + ) + + high = np.array([np.inf] * 24).astype(np.float32) + self.action_space = spaces.Box( + np.array([-1, -1, -1, -1]).astype(np.float32), + np.array([1, 1, 1, 1]).astype(np.float32), + ) + self.observation_space = spaces.Box(-high, high) + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _destroy(self): + if not self.terrain: + return + self.world.contactListener = None + for t in self.terrain: + self.world.DestroyBody(t) + self.terrain = [] + self.world.DestroyBody(self.hull) + self.hull = None + for leg in self.legs: + self.world.DestroyBody(leg) + self.legs = [] + self.joints = [] + + def _generate_terrain(self, hardcore): + GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) + state = GRASS + velocity = 0.0 + y = TERRAIN_HEIGHT + counter = TERRAIN_STARTPAD + oneshot = False + self.terrain = [] + self.terrain_x = [] + self.terrain_y = [] + for i in range(TERRAIN_LENGTH): + x = i * TERRAIN_STEP + self.terrain_x.append(x) + + if state == GRASS and not oneshot: + velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) + if i > TERRAIN_STARTPAD: + velocity += self.np_random.uniform(-1, 1) / SCALE # 1 + y += velocity + + elif state == PIT and oneshot: + counter = self.np_random.randint(3, 5) + poly = [ + (x, y), + (x + TERRAIN_STEP, y), + (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP), + (x, y - 4 * TERRAIN_STEP), + ] + self.fd_polygon.shape.vertices = poly + t = self.world.CreateStaticBody(fixtures=self.fd_polygon) + t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) + self.terrain.append(t) + + self.fd_polygon.shape.vertices = [ + (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly + ] + t = self.world.CreateStaticBody(fixtures=self.fd_polygon) + t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) + self.terrain.append(t) + counter += 2 + original_y = y + + elif state == PIT and not oneshot: + y = original_y + if counter > 1: + y -= 4 * TERRAIN_STEP + + elif state == STUMP and oneshot: + counter = self.np_random.randint(1, 3) + poly = [ + (x, y), + (x + counter * TERRAIN_STEP, y), + (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP), + (x, y + counter * TERRAIN_STEP), + ] + self.fd_polygon.shape.vertices = poly + t = self.world.CreateStaticBody(fixtures=self.fd_polygon) + t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) + self.terrain.append(t) + + elif state == STAIRS and oneshot: + stair_height = +1 if self.np_random.rand() > 0.5 else -1 + stair_width = self.np_random.randint(4, 5) + stair_steps = self.np_random.randint(3, 5) + original_y = y + for s in range(stair_steps): + poly = [ + ( + x + (s * stair_width) * TERRAIN_STEP, + y + (s * stair_height) * TERRAIN_STEP, + ), + ( + x + ((1 + s) * stair_width) * TERRAIN_STEP, + y + (s * stair_height) * TERRAIN_STEP, + ), + ( + x + ((1 + s) * stair_width) * TERRAIN_STEP, + y + (-1 + s * stair_height) * TERRAIN_STEP, + ), + ( + x + (s * stair_width) * TERRAIN_STEP, + y + (-1 + s * stair_height) * TERRAIN_STEP, + ), + ] + self.fd_polygon.shape.vertices = poly + t = self.world.CreateStaticBody(fixtures=self.fd_polygon) + t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6) + self.terrain.append(t) + counter = stair_steps * stair_width + + elif state == STAIRS and not oneshot: + s = stair_steps * stair_width - counter - stair_height + n = s / stair_width + y = original_y + (n * stair_height) * TERRAIN_STEP + + oneshot = False + self.terrain_y.append(y) + counter -= 1 + if counter == 0: + counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS) + if state == GRASS and hardcore: + state = self.np_random.randint(1, _STATES_) + oneshot = True + else: + state = GRASS + oneshot = True + + self.terrain_poly = [] + for i in range(TERRAIN_LENGTH - 1): + poly = [ + (self.terrain_x[i], self.terrain_y[i]), + (self.terrain_x[i + 1], self.terrain_y[i + 1]), + ] + self.fd_edge.shape.vertices = poly + t = self.world.CreateStaticBody(fixtures=self.fd_edge) + color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3) + t.color1 = color + t.color2 = color + self.terrain.append(t) + color = (0.4, 0.6, 0.3) + poly += [(poly[1][0], 0), (poly[0][0], 0)] + self.terrain_poly.append((poly, color)) + self.terrain.reverse() + + def _generate_clouds(self): + # Sorry for the clouds, couldn't resist + self.cloud_poly = [] + for i in range(TERRAIN_LENGTH // 20): + x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP + y = VIEWPORT_H / SCALE * 3 / 4 + poly = [ + ( + x + + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5) + + self.np_random.uniform(0, 5 * TERRAIN_STEP), + y + + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5) + + self.np_random.uniform(0, 5 * TERRAIN_STEP), + ) + for a in range(5) + ] + x1 = min([p[0] for p in poly]) + x2 = max([p[0] for p in poly]) + self.cloud_poly.append((poly, x1, x2)) + + def reset(self): + self._destroy() + self.world.contactListener_bug_workaround = ContactDetector(self) + self.world.contactListener = self.world.contactListener_bug_workaround + self.game_over = False + self.prev_shaping = None + self.scroll = 0.0 + self.lidar_render = 0 + + W = VIEWPORT_W / SCALE + H = VIEWPORT_H / SCALE + + self._generate_terrain(self.hardcore) + self._generate_clouds() + + init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2 + init_y = TERRAIN_HEIGHT + 2 * LEG_H + self.hull = self.world.CreateDynamicBody( + position=(init_x, init_y), fixtures=HULL_FD + ) + self.hull.color1 = (0.5, 0.4, 0.9) + self.hull.color2 = (0.3, 0.3, 0.5) + self.hull.ApplyForceToCenter( + (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True + ) + + self.legs = [] + self.joints = [] + for i in [-1, +1]: + leg = self.world.CreateDynamicBody( + position=(init_x, init_y - LEG_H / 2 - LEG_DOWN), + angle=(i * 0.05), + fixtures=LEG_FD, + ) + leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0) + leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0) + rjd = revoluteJointDef( + bodyA=self.hull, + bodyB=leg, + localAnchorA=(0, LEG_DOWN), + localAnchorB=(0, LEG_H / 2), + enableMotor=True, + enableLimit=True, + maxMotorTorque=MOTORS_TORQUE, + motorSpeed=i, + lowerAngle=-0.8, + upperAngle=1.1, + ) + self.legs.append(leg) + self.joints.append(self.world.CreateJoint(rjd)) + + lower = self.world.CreateDynamicBody( + position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN), + angle=(i * 0.05), + fixtures=LOWER_FD, + ) + lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0) + lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0) + rjd = revoluteJointDef( + bodyA=leg, + bodyB=lower, + localAnchorA=(0, -LEG_H / 2), + localAnchorB=(0, LEG_H / 2), + enableMotor=True, + enableLimit=True, + maxMotorTorque=MOTORS_TORQUE, + motorSpeed=1, + lowerAngle=-1.6, + upperAngle=-0.1, + ) + lower.ground_contact = False + self.legs.append(lower) + self.joints.append(self.world.CreateJoint(rjd)) + + self.drawlist = self.terrain + self.legs + [self.hull] + + class LidarCallback(Box2D.b2.rayCastCallback): + def ReportFixture(self, fixture, point, normal, fraction): + if (fixture.filterData.categoryBits & 1) == 0: + return -1 + self.p2 = point + self.fraction = fraction + return fraction + + self.lidar = [LidarCallback() for _ in range(10)] + + return self.step(np.array([0, 0, 0, 0]))[0] + + def step(self, action): + # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help + control_speed = False # Should be easier as well + if control_speed: + self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1)) + self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1)) + self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1)) + self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1)) + else: + self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0])) + self.joints[0].maxMotorTorque = float( + MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1) + ) + self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1])) + self.joints[1].maxMotorTorque = float( + MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1) + ) + self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2])) + self.joints[2].maxMotorTorque = float( + MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1) + ) + self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3])) + self.joints[3].maxMotorTorque = float( + MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1) + ) + + self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) + + pos = self.hull.position + vel = self.hull.linearVelocity + + for i in range(10): + self.lidar[i].fraction = 1.0 + self.lidar[i].p1 = pos + self.lidar[i].p2 = ( + pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE, + pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE, + ) + self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2) + + state = [ + self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. + 2.0 * self.hull.angularVelocity / FPS, + 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range + 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS, + self.joints[ + 0 + ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) + self.joints[0].speed / SPEED_HIP, + self.joints[1].angle + 1.0, + self.joints[1].speed / SPEED_KNEE, + 1.0 if self.legs[1].ground_contact else 0.0, + self.joints[2].angle, + self.joints[2].speed / SPEED_HIP, + self.joints[3].angle + 1.0, + self.joints[3].speed / SPEED_KNEE, + 1.0 if self.legs[3].ground_contact else 0.0, + ] + state += [l.fraction for l in self.lidar] + assert len(state) == 24 + + self.scroll = pos.x - VIEWPORT_W / SCALE / 5 + + shaping = ( + 130 * pos[0] / SCALE + ) # moving forward is a way to receive reward (normalized to get 300 on completion) + shaping -= 5.0 * abs( + state[0] + ) # keep head straight, other than that and falling, any behavior is unpunished + + reward = 0 + if self.prev_shaping is not None: + reward = shaping - self.prev_shaping + self.prev_shaping = shaping + + for a in action: + reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1) + # normalized to about -50.0 using heuristic, more optimal agent should spend less + + done = False + if self.game_over or pos[0] < 0: + reward = -100 + done = True + if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP: + done = True + return np.array(state, dtype=np.float32), reward, done, {} + + def render(self, mode="human"): + from gym.envs.classic_control import rendering + + if self.viewer is None: + self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H) + self.viewer.set_bounds( + self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE + ) + + self.viewer.draw_polygon( + [ + (self.scroll, 0), + (self.scroll + VIEWPORT_W / SCALE, 0), + (self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE), + (self.scroll, VIEWPORT_H / SCALE), + ], + color=(0.9, 0.9, 1.0), + ) + for poly, x1, x2 in self.cloud_poly: + if x2 < self.scroll / 2: + continue + if x1 > self.scroll / 2 + VIEWPORT_W / SCALE: + continue + self.viewer.draw_polygon( + [(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1) + ) + for poly, color in self.terrain_poly: + if poly[1][0] < self.scroll: + continue + if poly[0][0] > self.scroll + VIEWPORT_W / SCALE: + continue + self.viewer.draw_polygon(poly, color=color) + + self.lidar_render = (self.lidar_render + 1) % 100 + i = self.lidar_render + if i < 2 * len(self.lidar): + l = ( + self.lidar[i] + if i < len(self.lidar) + else self.lidar[len(self.lidar) - i - 1] + ) + self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1) + + for obj in self.drawlist: + for f in obj.fixtures: + trans = f.body.transform + if type(f.shape) is circleShape: + t = rendering.Transform(translation=trans * f.shape.pos) + self.viewer.draw_circle( + f.shape.radius, 30, color=obj.color1 + ).add_attr(t) + self.viewer.draw_circle( + f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2 + ).add_attr(t) + else: + path = [trans * v for v in f.shape.vertices] + self.viewer.draw_polygon(path, color=obj.color1) + path.append(path[0]) + self.viewer.draw_polyline(path, color=obj.color2, linewidth=2) + + flagy1 = TERRAIN_HEIGHT + flagy2 = flagy1 + 50 / SCALE + x = TERRAIN_STEP * 3 + self.viewer.draw_polyline( + [(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2 + ) + f = [ + (x, flagy2), + (x, flagy2 - 10 / SCALE), + (x + 25 / SCALE, flagy2 - 5 / SCALE), + ] + self.viewer.draw_polygon(f, color=(0.9, 0.2, 0)) + self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2) + + return self.viewer.render(return_rgb_array=mode == "rgb_array") + + def close(self): + if self.viewer is not None: + self.viewer.close() + self.viewer = None + + +class BipedalWalkerHardcore(BipedalWalker): + hardcore = True + + +if __name__ == "__main__": + # Heurisic: suboptimal, have no notion of balance. + env = BipedalWalker() + env.reset() + steps = 0 + total_reward = 0 + a = np.array([0.0, 0.0, 0.0, 0.0]) + STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3 + SPEED = 0.29 # Will fall forward on higher speed + state = STAY_ON_ONE_LEG + moving_leg = 0 + supporting_leg = 1 - moving_leg + SUPPORT_KNEE_ANGLE = +0.1 + supporting_knee_angle = SUPPORT_KNEE_ANGLE + while True: + s, r, done, info = env.step(a) + total_reward += r + if steps % 20 == 0 or done: + print("\naction " + str(["{:+0.2f}".format(x) for x in a])) + print("step {} total_reward {:+0.2f}".format(steps, total_reward)) + print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]])) + print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]])) + print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]])) + steps += 1 + + contact0 = s[8] + contact1 = s[13] + moving_s_base = 4 + 5 * moving_leg + supporting_s_base = 4 + 5 * supporting_leg + + hip_targ = [None, None] # -0.8 .. +1.1 + knee_targ = [None, None] # -0.6 .. +0.9 + hip_todo = [0.0, 0.0] + knee_todo = [0.0, 0.0] + + if state == STAY_ON_ONE_LEG: + hip_targ[moving_leg] = 1.1 + knee_targ[moving_leg] = -0.6 + supporting_knee_angle += 0.03 + if s[2] > SPEED: + supporting_knee_angle += 0.03 + supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE) + knee_targ[supporting_leg] = supporting_knee_angle + if s[supporting_s_base + 0] < 0.10: # supporting leg is behind + state = PUT_OTHER_DOWN + if state == PUT_OTHER_DOWN: + hip_targ[moving_leg] = +0.1 + knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE + knee_targ[supporting_leg] = supporting_knee_angle + if s[moving_s_base + 4]: + state = PUSH_OFF + supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE) + if state == PUSH_OFF: + knee_targ[moving_leg] = supporting_knee_angle + knee_targ[supporting_leg] = +1.0 + if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED: + state = STAY_ON_ONE_LEG + moving_leg = 1 - moving_leg + supporting_leg = 1 - moving_leg + + if hip_targ[0]: + hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5] + if hip_targ[1]: + hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10] + if knee_targ[0]: + knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7] + if knee_targ[1]: + knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12] + + hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait + hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1] + knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations + knee_todo[1] -= 15.0 * s[3] + + a[0] = hip_todo[0] + a[1] = knee_todo[0] + a[2] = hip_todo[1] + a[3] = knee_todo[1] + a = np.clip(0.5 * a, -1.0, 1.0) + + env.render() + if done: + break diff --git a/gym-0.21.0/gym/envs/classic_control/cartpole.py b/gym-0.21.0/gym/envs/classic_control/cartpole.py new file mode 100644 index 0000000000000000000000000000000000000000..e11adcba6f413017e8661e8990dde668a63a129c --- /dev/null +++ b/gym-0.21.0/gym/envs/classic_control/cartpole.py @@ -0,0 +1,234 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from http://incompleteideas.net/sutton/book/code/pole.c +permalink: https://perma.cc/C9ZM-652R +""" + +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np + + +class CartPoleEnv(gym.Env): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along + a frictionless track. The pendulum starts upright, and the goal is to + prevent it from falling over by increasing and reducing the cart's + velocity. + + Source: + This environment corresponds to the version of the cart-pole problem + described by Barto, Sutton, and Anderson + + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -0.418 rad (-24 deg) 0.418 rad (24 deg) + 3 Pole Angular Velocity -Inf Inf + + Actions: + Type: Discrete(2) + Num Action + 0 Push cart to the left + 1 Push cart to the right + + Note: The amount the velocity that is reduced or increased is not + fixed; it depends on the angle the pole is pointing. This is because + the center of gravity of the pole increases the amount of energy needed + to move the cart underneath it + + Reward: + Reward is 1 for every step taken, including the termination step + + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + + Episode Termination: + Pole Angle is more than 12 degrees. + Cart Position is more than 2.4 (center of the cart reaches the edge of + the display). + Episode length is greater than 200. + Solved Requirements: + Considered solved when the average return is greater than or equal to + 195.0 over 100 consecutive trials. + """ + + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50} + + def __init__(self): + self.gravity = 9.8 + self.masscart = 1.0 + self.masspole = 0.1 + self.total_mass = self.masspole + self.masscart + self.length = 0.5 # actually half the pole's length + self.polemass_length = self.masspole * self.length + self.force_mag = 10.0 + self.tau = 0.02 # seconds between state updates + self.kinematics_integrator = "euler" + + # Angle at which to fail the episode + self.theta_threshold_radians = 12 * 2 * math.pi / 360 + self.x_threshold = 2.4 + + # Angle limit set to 2 * theta_threshold_radians so failing observation + # is still within bounds. + high = np.array( + [ + self.x_threshold * 2, + np.finfo(np.float32).max, + self.theta_threshold_radians * 2, + np.finfo(np.float32).max, + ], + dtype=np.float32, + ) + + self.action_space = spaces.Discrete(2) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + self.viewer = None + self.state = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + err_msg = "%r (%s) invalid" % (action, type(action)) + assert self.action_space.contains(action), err_msg + + x, x_dot, theta, theta_dot = self.state + force = self.force_mag if action == 1 else -self.force_mag + costheta = math.cos(theta) + sintheta = math.sin(theta) + + # For the interested reader: + # https://coneural.org/florian/papers/05_cart_pole.pdf + temp = ( + force + self.polemass_length * theta_dot ** 2 * sintheta + ) / self.total_mass + thetaacc = (self.gravity * sintheta - costheta * temp) / ( + self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass) + ) + xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass + + if self.kinematics_integrator == "euler": + x = x + self.tau * x_dot + x_dot = x_dot + self.tau * xacc + theta = theta + self.tau * theta_dot + theta_dot = theta_dot + self.tau * thetaacc + else: # semi-implicit euler + x_dot = x_dot + self.tau * xacc + x = x + self.tau * x_dot + theta_dot = theta_dot + self.tau * thetaacc + theta = theta + self.tau * theta_dot + + self.state = (x, x_dot, theta, theta_dot) + + done = bool( + x < -self.x_threshold + or x > self.x_threshold + or theta < -self.theta_threshold_radians + or theta > self.theta_threshold_radians + ) + + if not done: + reward = 1.0 + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = 1.0 + else: + if self.steps_beyond_done == 0: + logger.warn( + "You are calling 'step()' even though this " + "environment has already returned done = True. You " + "should always call 'reset()' once you receive 'done = " + "True' -- any further steps are undefined behavior." + ) + self.steps_beyond_done += 1 + reward = 0.0 + + return np.array(self.state, dtype=np.float32), reward, done, {} + + def reset(self): + self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) + self.steps_beyond_done = None + return np.array(self.state, dtype=np.float32) + + def render(self, mode="human"): + screen_width = 600 + screen_height = 400 + + world_width = self.x_threshold * 2 + scale = screen_width / world_width + carty = 100 # TOP OF CART + polewidth = 10.0 + polelen = scale * (2 * self.length) + cartwidth = 50.0 + cartheight = 30.0 + + if self.viewer is None: + from gym.envs.classic_control import rendering + + self.viewer = rendering.Viewer(screen_width, screen_height) + l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 + axleoffset = cartheight / 4.0 + cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + self.carttrans = rendering.Transform() + cart.add_attr(self.carttrans) + self.viewer.add_geom(cart) + l, r, t, b = ( + -polewidth / 2, + polewidth / 2, + polelen - polewidth / 2, + -polewidth / 2, + ) + pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + pole.set_color(0.8, 0.6, 0.4) + self.poletrans = rendering.Transform(translation=(0, axleoffset)) + pole.add_attr(self.poletrans) + pole.add_attr(self.carttrans) + self.viewer.add_geom(pole) + self.axle = rendering.make_circle(polewidth / 2) + self.axle.add_attr(self.poletrans) + self.axle.add_attr(self.carttrans) + self.axle.set_color(0.5, 0.5, 0.8) + self.viewer.add_geom(self.axle) + self.track = rendering.Line((0, carty), (screen_width, carty)) + self.track.set_color(0, 0, 0) + self.viewer.add_geom(self.track) + + self._pole_geom = pole + + if self.state is None: + return None + + # Edit the pole polygon vertex + pole = self._pole_geom + l, r, t, b = ( + -polewidth / 2, + polewidth / 2, + polelen - polewidth / 2, + -polewidth / 2, + ) + pole.v = [(l, b), (l, t), (r, t), (r, b)] + + x = self.state + cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART + self.carttrans.set_translation(cartx, carty) + self.poletrans.set_rotation(-x[2]) + + return self.viewer.render(return_rgb_array=mode == "rgb_array") + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None diff --git a/gym-0.21.0/gym/envs/classic_control/mountain_car.py b/gym-0.21.0/gym/envs/classic_control/mountain_car.py new file mode 100644 index 0000000000000000000000000000000000000000..e096774b00cf316451db524a3c223609f992cf66 --- /dev/null +++ b/gym-0.21.0/gym/envs/classic_control/mountain_car.py @@ -0,0 +1,177 @@ +""" +http://incompleteideas.net/MountainCar/MountainCar1.cp +permalink: https://perma.cc/6Z2N-PFWC +""" +import math + +import numpy as np + +import gym +from gym import spaces +from gym.utils import seeding + + +class MountainCarEnv(gym.Env): + """ + Description: + The agent (a car) is started at the bottom of a valley. For any given + state the agent may choose to accelerate to the left, right or cease + any acceleration. + + Source: + The environment appeared first in Andrew Moore's PhD Thesis (1990). + + Observation: + Type: Box(2) + Num Observation Min Max + 0 Car Position -1.2 0.6 + 1 Car Velocity -0.07 0.07 + + Actions: + Type: Discrete(3) + Num Action + 0 Accelerate to the Left + 1 Don't accelerate + 2 Accelerate to the Right + + Note: This does not affect the amount of velocity affected by the + gravitational pull acting on the car. + + Reward: + Reward of 0 is awarded if the agent reached the flag (position = 0.5) + on top of the mountain. + Reward of -1 is awarded if the position of the agent is less than 0.5. + + Starting State: + The position of the car is assigned a uniform random value in + [-0.6 , -0.4]. + The starting velocity of the car is always assigned to 0. + + Episode Termination: + The car position is more than 0.5 + Episode length is greater than 200 + """ + + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30} + + def __init__(self, goal_velocity=0): + self.min_position = -1.2 + self.max_position = 0.6 + self.max_speed = 0.07 + self.goal_position = 0.5 + self.goal_velocity = goal_velocity + + self.force = 0.001 + self.gravity = 0.0025 + + self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32) + self.high = np.array([self.max_position, self.max_speed], dtype=np.float32) + + self.viewer = None + + self.action_space = spaces.Discrete(3) + self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) + + self.seed() + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + assert self.action_space.contains(action), "%r (%s) invalid" % ( + action, + type(action), + ) + + position, velocity = self.state + velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity) + velocity = np.clip(velocity, -self.max_speed, self.max_speed) + position += velocity + position = np.clip(position, self.min_position, self.max_position) + if position == self.min_position and velocity < 0: + velocity = 0 + + done = bool(position >= self.goal_position and velocity >= self.goal_velocity) + reward = -1.0 + + self.state = (position, velocity) + return np.array(self.state, dtype=np.float32), reward, done, {} + + def reset(self): + self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) + return np.array(self.state, dtype=np.float32) + + def _height(self, xs): + return np.sin(3 * xs) * 0.45 + 0.55 + + def render(self, mode="human"): + screen_width = 600 + screen_height = 400 + + world_width = self.max_position - self.min_position + scale = screen_width / world_width + carwidth = 40 + carheight = 20 + + if self.viewer is None: + from gym.envs.classic_control import rendering + + self.viewer = rendering.Viewer(screen_width, screen_height) + xs = np.linspace(self.min_position, self.max_position, 100) + ys = self._height(xs) + xys = list(zip((xs - self.min_position) * scale, ys * scale)) + + self.track = rendering.make_polyline(xys) + self.track.set_linewidth(4) + self.viewer.add_geom(self.track) + + clearance = 10 + + l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0 + car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + car.add_attr(rendering.Transform(translation=(0, clearance))) + self.cartrans = rendering.Transform() + car.add_attr(self.cartrans) + self.viewer.add_geom(car) + frontwheel = rendering.make_circle(carheight / 2.5) + frontwheel.set_color(0.5, 0.5, 0.5) + frontwheel.add_attr( + rendering.Transform(translation=(carwidth / 4, clearance)) + ) + frontwheel.add_attr(self.cartrans) + self.viewer.add_geom(frontwheel) + backwheel = rendering.make_circle(carheight / 2.5) + backwheel.add_attr( + rendering.Transform(translation=(-carwidth / 4, clearance)) + ) + backwheel.add_attr(self.cartrans) + backwheel.set_color(0.5, 0.5, 0.5) + self.viewer.add_geom(backwheel) + flagx = (self.goal_position - self.min_position) * scale + flagy1 = self._height(self.goal_position) * scale + flagy2 = flagy1 + 50 + flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2)) + self.viewer.add_geom(flagpole) + flag = rendering.FilledPolygon( + [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)] + ) + flag.set_color(0.8, 0.8, 0) + self.viewer.add_geom(flag) + + pos = self.state[0] + self.cartrans.set_translation( + (pos - self.min_position) * scale, self._height(pos) * scale + ) + self.cartrans.set_rotation(math.cos(3 * pos)) + + return self.viewer.render(return_rgb_array=mode == "rgb_array") + + def get_keys_to_action(self): + # Control with left and right arrow keys. + return {(): 1, (276,): 0, (275,): 2, (275, 276): 1} + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None diff --git a/gym-0.21.0/gym/envs/classic_control/pendulum.py b/gym-0.21.0/gym/envs/classic_control/pendulum.py new file mode 100644 index 0000000000000000000000000000000000000000..10ced4d43d218fbf0bb344b3bbc006fcaa7f8362 --- /dev/null +++ b/gym-0.21.0/gym/envs/classic_control/pendulum.py @@ -0,0 +1,94 @@ +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +from os import path + + +class PendulumEnv(gym.Env): + metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30} + + def __init__(self, g=10.0): + self.max_speed = 8 + self.max_torque = 2.0 + self.dt = 0.05 + self.g = g + self.m = 1.0 + self.l = 1.0 + self.viewer = None + + high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32) + self.action_space = spaces.Box( + low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32 + ) + self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) + + self.seed() + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, u): + th, thdot = self.state # th := theta + + g = self.g + m = self.m + l = self.l + dt = self.dt + + u = np.clip(u, -self.max_torque, self.max_torque)[0] + self.last_u = u # for rendering + costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2) + + newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt + newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) + newth = th + newthdot * dt + + self.state = np.array([newth, newthdot]) + return self._get_obs(), -costs, False, {} + + def reset(self): + high = np.array([np.pi, 1]) + self.state = self.np_random.uniform(low=-high, high=high) + self.last_u = None + return self._get_obs() + + def _get_obs(self): + theta, thetadot = self.state + return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32) + + def render(self, mode="human"): + if self.viewer is None: + from gym.envs.classic_control import rendering + + self.viewer = rendering.Viewer(500, 500) + self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2) + rod = rendering.make_capsule(1, 0.2) + rod.set_color(0.8, 0.3, 0.3) + self.pole_transform = rendering.Transform() + rod.add_attr(self.pole_transform) + self.viewer.add_geom(rod) + axle = rendering.make_circle(0.05) + axle.set_color(0, 0, 0) + self.viewer.add_geom(axle) + fname = path.join(path.dirname(__file__), "assets/clockwise.png") + self.img = rendering.Image(fname, 1.0, 1.0) + self.imgtrans = rendering.Transform() + self.img.add_attr(self.imgtrans) + + self.viewer.add_onetime(self.img) + self.pole_transform.set_rotation(self.state[0] + np.pi / 2) + if self.last_u is not None: + self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2) + + return self.viewer.render(return_rgb_array=mode == "rgb_array") + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None + + +def angle_normalize(x): + return ((x + np.pi) % (2 * np.pi)) - np.pi diff --git a/gym-0.21.0/gym/envs/mujoco/ant.py b/gym-0.21.0/gym/envs/mujoco/ant.py new file mode 100644 index 0000000000000000000000000000000000000000..ae0a2f8dcf9846d6c28ced183e99bab1789f7014 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/ant.py @@ -0,0 +1,56 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5) + utils.EzPickle.__init__(self) + + def step(self, a): + xposbefore = self.get_body_com("torso")[0] + self.do_simulation(a, self.frame_skip) + xposafter = self.get_body_com("torso")[0] + forward_reward = (xposafter - xposbefore) / self.dt + ctrl_cost = 0.5 * np.square(a).sum() + contact_cost = ( + 0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) + ) + survive_reward = 1.0 + reward = forward_reward - ctrl_cost - contact_cost + survive_reward + state = self.state_vector() + notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0 + done = not notdone + ob = self._get_obs() + return ( + ob, + reward, + done, + dict( + reward_forward=forward_reward, + reward_ctrl=-ctrl_cost, + reward_contact=-contact_cost, + reward_survive=survive_reward, + ), + ) + + def _get_obs(self): + return np.concatenate( + [ + self.sim.data.qpos.flat[2:], + self.sim.data.qvel.flat, + np.clip(self.sim.data.cfrc_ext, -1, 1).flat, + ] + ) + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform( + size=self.model.nq, low=-0.1, high=0.1 + ) + qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 + self.set_state(qpos, qvel) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.distance = self.model.stat.extent * 0.5 diff --git a/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml b/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml new file mode 100644 index 0000000000000000000000000000000000000000..b68f256054915f4df2a9318bf5db721062aa072e --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml @@ -0,0 +1,127 @@ + + + diff --git a/gym-0.21.0/gym/envs/mujoco/hopper.py b/gym-0.21.0/gym/envs/mujoco/hopper.py new file mode 100644 index 0000000000000000000000000000000000000000..46bf0a7f5d9968b84ab31d94f2688ce4ede1f101 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/hopper.py @@ -0,0 +1,48 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4) + utils.EzPickle.__init__(self) + + def step(self, a): + posbefore = self.sim.data.qpos[0] + self.do_simulation(a, self.frame_skip) + posafter, height, ang = self.sim.data.qpos[0:3] + alive_bonus = 1.0 + reward = (posafter - posbefore) / self.dt + reward += alive_bonus + reward -= 1e-3 * np.square(a).sum() + s = self.state_vector() + done = not ( + np.isfinite(s).all() + and (np.abs(s[2:]) < 100).all() + and (height > 0.7) + and (abs(ang) < 0.2) + ) + ob = self._get_obs() + return ob, reward, done, {} + + def _get_obs(self): + return np.concatenate( + [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)] + ) + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform( + low=-0.005, high=0.005, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.uniform( + low=-0.005, high=0.005, size=self.model.nv + ) + self.set_state(qpos, qvel) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 2 + self.viewer.cam.distance = self.model.stat.extent * 0.75 + self.viewer.cam.lookat[2] = 1.15 + self.viewer.cam.elevation = -20 diff --git a/gym-0.21.0/gym/envs/mujoco/hopper_v3.py b/gym-0.21.0/gym/envs/mujoco/hopper_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..8bb80024124daea23dbb5efbb0d80e5ea0f38418 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/hopper_v3.py @@ -0,0 +1,137 @@ +import numpy as np +from gym.envs.mujoco import mujoco_env +from gym import utils + + +DEFAULT_CAMERA_CONFIG = { + "trackbodyid": 2, + "distance": 3.0, + "lookat": np.array((0.0, 0.0, 1.15)), + "elevation": -20.0, +} + + +class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__( + self, + xml_file="hopper.xml", + forward_reward_weight=1.0, + ctrl_cost_weight=1e-3, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_state_range=(-100.0, 100.0), + healthy_z_range=(0.7, float("inf")), + healthy_angle_range=(-0.2, 0.2), + reset_noise_scale=5e-3, + exclude_current_positions_from_observation=True, + ): + utils.EzPickle.__init__(**locals()) + + self._forward_reward_weight = forward_reward_weight + + self._ctrl_cost_weight = ctrl_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + + self._healthy_state_range = healthy_state_range + self._healthy_z_range = healthy_z_range + self._healthy_angle_range = healthy_angle_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + + @property + def healthy_reward(self): + return ( + float(self.is_healthy or self._terminate_when_unhealthy) + * self._healthy_reward + ) + + def control_cost(self, action): + control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) + return control_cost + + @property + def is_healthy(self): + z, angle = self.sim.data.qpos[1:3] + state = self.state_vector()[2:] + + min_state, max_state = self._healthy_state_range + min_z, max_z = self._healthy_z_range + min_angle, max_angle = self._healthy_angle_range + + healthy_state = np.all(np.logical_and(min_state < state, state < max_state)) + healthy_z = min_z < z < max_z + healthy_angle = min_angle < angle < max_angle + + is_healthy = all((healthy_state, healthy_z, healthy_angle)) + + return is_healthy + + @property + def done(self): + done = not self.is_healthy if self._terminate_when_unhealthy else False + return done + + def _get_obs(self): + position = self.sim.data.qpos.flat.copy() + velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10) + + if self._exclude_current_positions_from_observation: + position = position[1:] + + observation = np.concatenate((position, velocity)).ravel() + return observation + + def step(self, action): + x_position_before = self.sim.data.qpos[0] + self.do_simulation(action, self.frame_skip) + x_position_after = self.sim.data.qpos[0] + x_velocity = (x_position_after - x_position_before) / self.dt + + ctrl_cost = self.control_cost(action) + + forward_reward = self._forward_reward_weight * x_velocity + healthy_reward = self.healthy_reward + + rewards = forward_reward + healthy_reward + costs = ctrl_cost + + observation = self._get_obs() + reward = rewards - costs + done = self.done + info = { + "x_position": x_position_after, + "x_velocity": x_velocity, + } + + return observation, reward, done, info + + def reset_model(self): + noise_low = -self._reset_noise_scale + noise_high = self._reset_noise_scale + + qpos = self.init_qpos + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nv + ) + + self.set_state(qpos, qvel) + + observation = self._get_obs() + return observation + + def viewer_setup(self): + for key, value in DEFAULT_CAMERA_CONFIG.items(): + if isinstance(value, np.ndarray): + getattr(self.viewer.cam, key)[:] = value + else: + setattr(self.viewer.cam, key, value) diff --git a/gym-0.21.0/gym/envs/mujoco/pusher.py b/gym-0.21.0/gym/envs/mujoco/pusher.py new file mode 100644 index 0000000000000000000000000000000000000000..e82adc6634bcad40e02ca9e0ce70ecd2b71041d2 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/pusher.py @@ -0,0 +1,63 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + +import mujoco_py + + +class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + utils.EzPickle.__init__(self) + mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5) + + def step(self, a): + vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") + vec_2 = self.get_body_com("object") - self.get_body_com("goal") + + reward_near = -np.linalg.norm(vec_1) + reward_dist = -np.linalg.norm(vec_2) + reward_ctrl = -np.square(a).sum() + reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near + + self.do_simulation(a, self.frame_skip) + ob = self._get_obs() + done = False + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) + + def viewer_setup(self): + self.viewer.cam.trackbodyid = -1 + self.viewer.cam.distance = 4.0 + + def reset_model(self): + qpos = self.init_qpos + + self.goal_pos = np.asarray([0, 0]) + while True: + self.cylinder_pos = np.concatenate( + [ + self.np_random.uniform(low=-0.3, high=0, size=1), + self.np_random.uniform(low=-0.2, high=0.2, size=1), + ] + ) + if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: + break + + qpos[-4:-2] = self.cylinder_pos + qpos[-2:] = self.goal_pos + qvel = self.init_qvel + self.np_random.uniform( + low=-0.005, high=0.005, size=self.model.nv + ) + qvel[-4:] = 0 + self.set_state(qpos, qvel) + return self._get_obs() + + def _get_obs(self): + return np.concatenate( + [ + self.sim.data.qpos.flat[:7], + self.sim.data.qvel.flat[:7], + self.get_body_com("tips_arm"), + self.get_body_com("object"), + self.get_body_com("goal"), + ] + ) diff --git a/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py b/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..8cf794d4791b28d72891a597a06f5fa0f4786c9a --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py @@ -0,0 +1,94 @@ +import numpy as np +from gym.envs.mujoco import mujoco_env +from gym import utils + + +DEFAULT_CAMERA_CONFIG = {} + + +class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__( + self, + xml_file="swimmer.xml", + forward_reward_weight=1.0, + ctrl_cost_weight=1e-4, + reset_noise_scale=0.1, + exclude_current_positions_from_observation=True, + ): + utils.EzPickle.__init__(**locals()) + + self._forward_reward_weight = forward_reward_weight + self._ctrl_cost_weight = ctrl_cost_weight + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + + def control_cost(self, action): + control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) + return control_cost + + def step(self, action): + xy_position_before = self.sim.data.qpos[0:2].copy() + self.do_simulation(action, self.frame_skip) + xy_position_after = self.sim.data.qpos[0:2].copy() + + xy_velocity = (xy_position_after - xy_position_before) / self.dt + x_velocity, y_velocity = xy_velocity + + forward_reward = self._forward_reward_weight * x_velocity + + ctrl_cost = self.control_cost(action) + + observation = self._get_obs() + reward = forward_reward - ctrl_cost + done = False + info = { + "reward_fwd": forward_reward, + "reward_ctrl": -ctrl_cost, + "x_position": xy_position_after[0], + "y_position": xy_position_after[1], + "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), + "x_velocity": x_velocity, + "y_velocity": y_velocity, + "forward_reward": forward_reward, + } + + return observation, reward, done, info + + def _get_obs(self): + position = self.sim.data.qpos.flat.copy() + velocity = self.sim.data.qvel.flat.copy() + + if self._exclude_current_positions_from_observation: + position = position[2:] + + observation = np.concatenate([position, velocity]).ravel() + return observation + + def reset_model(self): + noise_low = -self._reset_noise_scale + noise_high = self._reset_noise_scale + + qpos = self.init_qpos + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nv + ) + + self.set_state(qpos, qvel) + + observation = self._get_obs() + return observation + + def viewer_setup(self): + for key, value in DEFAULT_CAMERA_CONFIG.items(): + if isinstance(value, np.ndarray): + getattr(self.viewer.cam, key)[:] = value + else: + setattr(self.viewer.cam, key, value) diff --git a/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml b/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml new file mode 100644 index 0000000000000000000000000000000000000000..c73d6249f3d809336c91798ee10dd57df73de4df --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml new file mode 100644 index 0000000000000000000000000000000000000000..b649f102917eb404c7278c5e7588eea6f878a364 --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml new file mode 100644 index 0000000000000000000000000000000000000000..71f6dfe621860e86569f00895bb1e73286f4558f --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml new file mode 100644 index 0000000000000000000000000000000000000000..f27f265551c741c4eab32d238869d5c43357cb84 --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml @@ -0,0 +1,254 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl new file mode 100644 index 0000000000000000000000000000000000000000..7bc5e20e0687f593401d369224d6f36d78f37534 Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl new file mode 100644 index 0000000000000000000000000000000000000000..4faedd75409416ac79c8481c067bddff06eac19c Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl new file mode 100644 index 0000000000000000000000000000000000000000..535cf4dbca59590f813c6dcc8abbd10bc7810a3a Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/fetch/reach.py b/gym-0.21.0/gym/envs/robotics/fetch/reach.py new file mode 100644 index 0000000000000000000000000000000000000000..67520b95064cb675a2430d7b035a4fd8dd9b89c7 --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/fetch/reach.py @@ -0,0 +1,32 @@ +import os +from gym import utils +from gym.envs.robotics import fetch_env + + +# Ensure we get the path separator correct on windows +MODEL_XML_PATH = os.path.join("fetch", "reach.xml") + + +class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): + def __init__(self, reward_type="sparse"): + initial_qpos = { + "robot0:slide0": 0.4049, + "robot0:slide1": 0.48, + "robot0:slide2": 0.0, + } + fetch_env.FetchEnv.__init__( + self, + MODEL_XML_PATH, + has_object=False, + block_gripper=True, + n_substeps=20, + gripper_extra_height=0.2, + target_in_the_air=True, + target_offset=0.0, + obj_range=0.15, + target_range=0.15, + distance_threshold=0.05, + initial_qpos=initial_qpos, + reward_type=reward_type, + ) + utils.EzPickle.__init__(self, reward_type=reward_type) diff --git a/gym-0.21.0/gym/envs/robotics/hand/__init__.py b/gym-0.21.0/gym/envs/robotics/hand/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gym-0.21.0/gym/envs/robotics/hand/manipulate.py b/gym-0.21.0/gym/envs/robotics/hand/manipulate.py new file mode 100644 index 0000000000000000000000000000000000000000..e3eca062d8b96b9a1dc1afbae74de0d8cdcb6fde --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/hand/manipulate.py @@ -0,0 +1,354 @@ +import os +import numpy as np + +from gym import utils, error +from gym.envs.robotics import rotations, hand_env +from gym.envs.robotics.utils import robot_get_obs + +try: + import mujoco_py +except ImportError as e: + raise error.DependencyNotInstalled( + "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format( + e + ) + ) + + +def quat_from_angle_and_axis(angle, axis): + assert axis.shape == (3,) + axis /= np.linalg.norm(axis) + quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis]) + quat /= np.linalg.norm(quat) + return quat + + +# Ensure we get the path separator correct on windows +MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml") +MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml") +MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml") + + +class ManipulateEnv(hand_env.HandEnv): + def __init__( + self, + model_path, + target_position, + target_rotation, + target_position_range, + reward_type, + initial_qpos=None, + randomize_initial_position=True, + randomize_initial_rotation=True, + distance_threshold=0.01, + rotation_threshold=0.1, + n_substeps=20, + relative_control=False, + ignore_z_target_rotation=False, + ): + """Initializes a new Hand manipulation environment. + + Args: + model_path (string): path to the environments XML file + target_position (string): the type of target position: + - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily + - fixed: target position is set to the initial position of the object + - random: target position is fully randomized according to target_position_range + target_rotation (string): the type of target rotation: + - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily + - fixed: target rotation is set to the initial rotation of the object + - xyz: fully randomized target rotation around the X, Y and Z axis + - z: fully randomized target rotation around the Z axis + - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y + ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored + target_position_range (np.array of shape (3, 2)): range of the target_position randomization + reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense + initial_qpos (dict): a dictionary of joint names and values that define the initial configuration + randomize_initial_position (boolean): whether or not to randomize the initial position of the object + randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object + distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved + rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved + n_substeps (int): number of substeps the simulation runs on every call to step + relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state + """ + self.target_position = target_position + self.target_rotation = target_rotation + self.target_position_range = target_position_range + self.parallel_quats = [ + rotations.euler2quat(r) for r in rotations.get_parallel_rotations() + ] + self.randomize_initial_rotation = randomize_initial_rotation + self.randomize_initial_position = randomize_initial_position + self.distance_threshold = distance_threshold + self.rotation_threshold = rotation_threshold + self.reward_type = reward_type + self.ignore_z_target_rotation = ignore_z_target_rotation + + assert self.target_position in ["ignore", "fixed", "random"] + assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"] + initial_qpos = initial_qpos or {} + + hand_env.HandEnv.__init__( + self, + model_path, + n_substeps=n_substeps, + initial_qpos=initial_qpos, + relative_control=relative_control, + ) + + def _get_achieved_goal(self): + # Object position and rotation. + object_qpos = self.sim.data.get_joint_qpos("object:joint") + assert object_qpos.shape == (7,) + return object_qpos + + def _goal_distance(self, goal_a, goal_b): + assert goal_a.shape == goal_b.shape + assert goal_a.shape[-1] == 7 + + d_pos = np.zeros_like(goal_a[..., 0]) + d_rot = np.zeros_like(goal_b[..., 0]) + if self.target_position != "ignore": + delta_pos = goal_a[..., :3] - goal_b[..., :3] + d_pos = np.linalg.norm(delta_pos, axis=-1) + + if self.target_rotation != "ignore": + quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] + + if self.ignore_z_target_rotation: + # Special case: We want to ignore the Z component of the rotation. + # This code here assumes Euler angles with xyz convention. We first transform + # to euler, then set the Z component to be equal between the two, and finally + # transform back into quaternions. + euler_a = rotations.quat2euler(quat_a) + euler_b = rotations.quat2euler(quat_b) + euler_a[2] = euler_b[2] + quat_a = rotations.euler2quat(euler_a) + + # Subtract quaternions and extract angle between them. + quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) + angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0)) + d_rot = angle_diff + assert d_pos.shape == d_rot.shape + return d_pos, d_rot + + # GoalEnv methods + # ---------------------------- + + def compute_reward(self, achieved_goal, goal, info): + if self.reward_type == "sparse": + success = self._is_success(achieved_goal, goal).astype(np.float32) + return success - 1.0 + else: + d_pos, d_rot = self._goal_distance(achieved_goal, goal) + # We weigh the difference in position to avoid that `d_pos` (in meters) is completely + # dominated by `d_rot` (in radians). + return -(10.0 * d_pos + d_rot) + + # RobotEnv methods + # ---------------------------- + + def _is_success(self, achieved_goal, desired_goal): + d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal) + achieved_pos = (d_pos < self.distance_threshold).astype(np.float32) + achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32) + achieved_both = achieved_pos * achieved_rot + return achieved_both + + def _env_setup(self, initial_qpos): + for name, value in initial_qpos.items(): + self.sim.data.set_joint_qpos(name, value) + self.sim.forward() + + def _reset_sim(self): + self.sim.set_state(self.initial_state) + self.sim.forward() + + initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy() + initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:] + assert initial_qpos.shape == (7,) + assert initial_pos.shape == (3,) + assert initial_quat.shape == (4,) + initial_qpos = None + + # Randomization initial rotation. + if self.randomize_initial_rotation: + if self.target_rotation == "z": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + offset_quat = quat_from_angle_and_axis(angle, axis) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation == "parallel": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + z_quat = quat_from_angle_and_axis(angle, axis) + parallel_quat = self.parallel_quats[ + self.np_random.randint(len(self.parallel_quats)) + ] + offset_quat = rotations.quat_mul(z_quat, parallel_quat) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation in ["xyz", "ignore"]: + angle = self.np_random.uniform(-np.pi, np.pi) + axis = self.np_random.uniform(-1.0, 1.0, size=3) + offset_quat = quat_from_angle_and_axis(angle, axis) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation == "fixed": + pass + else: + raise error.Error( + 'Unknown target_rotation option "{}".'.format(self.target_rotation) + ) + + # Randomize initial position. + if self.randomize_initial_position: + if self.target_position != "fixed": + initial_pos += self.np_random.normal(size=3, scale=0.005) + + initial_quat /= np.linalg.norm(initial_quat) + initial_qpos = np.concatenate([initial_pos, initial_quat]) + self.sim.data.set_joint_qpos("object:joint", initial_qpos) + + def is_on_palm(): + self.sim.forward() + cube_middle_idx = self.sim.model.site_name2id("object:center") + cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx] + is_on_palm = cube_middle_pos[2] > 0.04 + return is_on_palm + + # Run the simulation for a bunch of timesteps to let everything settle in. + for _ in range(10): + self._set_action(np.zeros(20)) + try: + self.sim.step() + except mujoco_py.MujocoException: + return False + return is_on_palm() + + def _sample_goal(self): + # Select a goal for the object position. + target_pos = None + if self.target_position == "random": + assert self.target_position_range.shape == (3, 2) + offset = self.np_random.uniform( + self.target_position_range[:, 0], self.target_position_range[:, 1] + ) + assert offset.shape == (3,) + target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset + elif self.target_position in ["ignore", "fixed"]: + target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + else: + raise error.Error( + 'Unknown target_position option "{}".'.format(self.target_position) + ) + assert target_pos is not None + assert target_pos.shape == (3,) + + # Select a goal for the object rotation. + target_quat = None + if self.target_rotation == "z": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + target_quat = quat_from_angle_and_axis(angle, axis) + elif self.target_rotation == "parallel": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + target_quat = quat_from_angle_and_axis(angle, axis) + parallel_quat = self.parallel_quats[ + self.np_random.randint(len(self.parallel_quats)) + ] + target_quat = rotations.quat_mul(target_quat, parallel_quat) + elif self.target_rotation == "xyz": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = self.np_random.uniform(-1.0, 1.0, size=3) + target_quat = quat_from_angle_and_axis(angle, axis) + elif self.target_rotation in ["ignore", "fixed"]: + target_quat = self.sim.data.get_joint_qpos("object:joint") + else: + raise error.Error( + 'Unknown target_rotation option "{}".'.format(self.target_rotation) + ) + assert target_quat is not None + assert target_quat.shape == (4,) + + target_quat /= np.linalg.norm(target_quat) # normalized quaternion + goal = np.concatenate([target_pos, target_quat]) + return goal + + def _render_callback(self): + # Assign current state to target object but offset a bit so that the actual object + # is not obscured. + goal = self.goal.copy() + assert goal.shape == (7,) + if self.target_position == "ignore": + # Move the object to the side since we do not care about it's position. + goal[0] += 0.15 + self.sim.data.set_joint_qpos("target:joint", goal) + self.sim.data.set_joint_qvel("target:joint", np.zeros(6)) + + if "object_hidden" in self.sim.model.geom_names: + hidden_id = self.sim.model.geom_name2id("object_hidden") + self.sim.model.geom_rgba[hidden_id, 3] = 1.0 + self.sim.forward() + + def _get_obs(self): + robot_qpos, robot_qvel = robot_get_obs(self.sim) + object_qvel = self.sim.data.get_joint_qvel("object:joint") + achieved_goal = ( + self._get_achieved_goal().ravel() + ) # this contains the object position + rotation + observation = np.concatenate( + [robot_qpos, robot_qvel, object_qvel, achieved_goal] + ) + return { + "observation": observation.copy(), + "achieved_goal": achieved_goal.copy(), + "desired_goal": self.goal.ravel().copy(), + } + + +class HandBlockEnv(ManipulateEnv, utils.EzPickle): + def __init__( + self, target_position="random", target_rotation="xyz", reward_type="sparse" + ): + utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) + ManipulateEnv.__init__( + self, + model_path=MANIPULATE_BLOCK_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + ) + + +class HandEggEnv(ManipulateEnv, utils.EzPickle): + def __init__( + self, target_position="random", target_rotation="xyz", reward_type="sparse" + ): + utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) + ManipulateEnv.__init__( + self, + model_path=MANIPULATE_EGG_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + ) + + +class HandPenEnv(ManipulateEnv, utils.EzPickle): + def __init__( + self, target_position="random", target_rotation="xyz", reward_type="sparse" + ): + utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) + ManipulateEnv.__init__( + self, + model_path=MANIPULATE_PEN_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + randomize_initial_rotation=False, + reward_type=reward_type, + ignore_z_target_rotation=True, + distance_threshold=0.05, + ) diff --git a/gym-0.21.0/gym/envs/robotics/rotations.py b/gym-0.21.0/gym/envs/robotics/rotations.py new file mode 100644 index 0000000000000000000000000000000000000000..fe241a10885716ab7caaf4a5ecaaa690f416ed54 --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/rotations.py @@ -0,0 +1,387 @@ +# Copyright (c) 2009-2017, Matthew Brett and Christoph Gohlke +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Many methods borrow heavily or entirely from transforms3d: +# https://github.com/matthew-brett/transforms3d +# They have mostly been modified to support batched operations. + +import numpy as np +import itertools + +""" +Rotations +========= + +Note: these have caused many subtle bugs in the past. +Be careful while updating these methods and while using them in clever ways. + +See MuJoCo documentation here: http://mujoco.org/book/modeling.html#COrientation + +Conventions +----------- + - All functions accept batches as well as individual rotations + - All rotation conventions match respective MuJoCo defaults + - All angles are in radians + - Matricies follow LR convention + - Euler Angles are all relative with 'xyz' axes ordering + - See specific representation for more information + +Representations +--------------- + +Euler + There are many euler angle frames -- here we will strive to use the default + in MuJoCo, which is eulerseq='xyz'. + This frame is a relative rotating frame, about x, y, and z axes in order. + Relative rotating means that after we rotate about x, then we use the + new (rotated) y, and the same for z. + +Quaternions + These are defined in terms of rotation (angle) about a unit vector (x, y, z) + We use the following convention: + q0 = cos(angle / 2) + q1 = sin(angle / 2) * x + q2 = sin(angle / 2) * y + q3 = sin(angle / 2) * z + This is also sometimes called qw, qx, qy, qz. + Note that quaternions are ambiguous, because we can represent a rotation by + angle about vector and -angle about vector <-x, -y, -z>. + To choose between these, we pick "first nonzero positive", where we + make the first nonzero element of the quaternion positive. + This can result in mismatches if you're converting an quaternion that is not + "first nonzero positive" to a different representation and back. + +Axis Angle + (Not currently implemented) + These are very straightforward. Rotation is angle about a unit vector. + +XY Axes + (Not currently implemented) + We are given x axis and y axis, and z axis is cross product of x and y. + +Z Axis + This is NOT RECOMMENDED. Defines a unit vector for the Z axis, + but rotation about this axis is not well defined. + Instead pick a fixed reference direction for another axis (e.g. X) + and calculate the other (e.g. Y = Z cross-product X), + then use XY Axes rotation instead. + +SO3 + (Not currently implemented) + While not supported by MuJoCo, this representation has a lot of nice features. + We expect to add support for these in the future. + +TODO / Missing +-------------- + - Rotation integration or derivatives (e.g. velocity conversions) + - More representations (SO3, etc) + - Random sampling (e.g. sample uniform random rotation) + - Performance benchmarks/measurements + - (Maybe) define everything as to/from matricies, for simplicity +""" + +# For testing whether a number is close to zero +_FLOAT_EPS = np.finfo(np.float64).eps +_EPS4 = _FLOAT_EPS * 4.0 + + +def euler2mat(euler): + """Convert Euler Angles to Rotation Matrix. See rotation.py for notes""" + euler = np.asarray(euler, dtype=np.float64) + assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler) + + ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0] + si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) + ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) + cc, cs = ci * ck, ci * sk + sc, ss = si * ck, si * sk + + mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64) + mat[..., 2, 2] = cj * ck + mat[..., 2, 1] = sj * sc - cs + mat[..., 2, 0] = sj * cc + ss + mat[..., 1, 2] = cj * sk + mat[..., 1, 1] = sj * ss + cc + mat[..., 1, 0] = sj * cs - sc + mat[..., 0, 2] = -sj + mat[..., 0, 1] = cj * si + mat[..., 0, 0] = cj * ci + return mat + + +def euler2quat(euler): + """Convert Euler Angles to Quaternions. See rotation.py for notes""" + euler = np.asarray(euler, dtype=np.float64) + assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler) + + ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2 + si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak) + ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak) + cc, cs = ci * ck, ci * sk + sc, ss = si * ck, si * sk + + quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64) + quat[..., 0] = cj * cc + sj * ss + quat[..., 3] = cj * sc - sj * cs + quat[..., 2] = -(cj * ss + sj * cc) + quat[..., 1] = cj * cs - sj * sc + return quat + + +def mat2euler(mat): + """Convert Rotation Matrix to Euler Angles. See rotation.py for notes""" + mat = np.asarray(mat, dtype=np.float64) + assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) + + cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2]) + condition = cy > _EPS4 + euler = np.empty(mat.shape[:-1], dtype=np.float64) + euler[..., 2] = np.where( + condition, + -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]), + -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]), + ) + euler[..., 1] = np.where( + condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy) + ) + euler[..., 0] = np.where( + condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0 + ) + return euler + + +def mat2quat(mat): + """Convert Rotation Matrix to Quaternion. See rotation.py for notes""" + mat = np.asarray(mat, dtype=np.float64) + assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat) + + Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2] + Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2] + Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2] + # Fill only lower half of symmetric matrix + K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64) + K[..., 0, 0] = Qxx - Qyy - Qzz + K[..., 1, 0] = Qyx + Qxy + K[..., 1, 1] = Qyy - Qxx - Qzz + K[..., 2, 0] = Qzx + Qxz + K[..., 2, 1] = Qzy + Qyz + K[..., 2, 2] = Qzz - Qxx - Qyy + K[..., 3, 0] = Qyz - Qzy + K[..., 3, 1] = Qzx - Qxz + K[..., 3, 2] = Qxy - Qyx + K[..., 3, 3] = Qxx + Qyy + Qzz + K /= 3.0 + # TODO: vectorize this -- probably could be made faster + q = np.empty(K.shape[:-2] + (4,)) + it = np.nditer(q[..., 0], flags=["multi_index"]) + while not it.finished: + # Use Hermitian eigenvectors, values for speed + vals, vecs = np.linalg.eigh(K[it.multi_index]) + # Select largest eigenvector, reorder to w,x,y,z quaternion + q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)] + # Prefer quaternion with positive w + # (q * -1 corresponds to same rotation as q) + if q[it.multi_index][0] < 0: + q[it.multi_index] *= -1 + it.iternext() + return q + + +def quat2euler(quat): + """Convert Quaternion to Euler Angles. See rotation.py for notes""" + return mat2euler(quat2mat(quat)) + + +def subtract_euler(e1, e2): + assert e1.shape == e2.shape + assert e1.shape[-1] == 3 + q1 = euler2quat(e1) + q2 = euler2quat(e2) + q_diff = quat_mul(q1, quat_conjugate(q2)) + return quat2euler(q_diff) + + +def quat2mat(quat): + """Convert Quaternion to Euler Angles. See rotation.py for notes""" + quat = np.asarray(quat, dtype=np.float64) + assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat) + + w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3] + Nq = np.sum(quat * quat, axis=-1) + s = 2.0 / Nq + X, Y, Z = x * s, y * s, z * s + wX, wY, wZ = w * X, w * Y, w * Z + xX, xY, xZ = x * X, x * Y, x * Z + yY, yZ, zZ = y * Y, y * Z, z * Z + + mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64) + mat[..., 0, 0] = 1.0 - (yY + zZ) + mat[..., 0, 1] = xY - wZ + mat[..., 0, 2] = xZ + wY + mat[..., 1, 0] = xY + wZ + mat[..., 1, 1] = 1.0 - (xX + zZ) + mat[..., 1, 2] = yZ - wX + mat[..., 2, 0] = xZ - wY + mat[..., 2, 1] = yZ + wX + mat[..., 2, 2] = 1.0 - (xX + yY) + return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3)) + + +def quat_conjugate(q): + inv_q = -q + inv_q[..., 0] *= -1 + return inv_q + + +def quat_mul(q0, q1): + assert q0.shape == q1.shape + assert q0.shape[-1] == 4 + assert q1.shape[-1] == 4 + + w0 = q0[..., 0] + x0 = q0[..., 1] + y0 = q0[..., 2] + z0 = q0[..., 3] + + w1 = q1[..., 0] + x1 = q1[..., 1] + y1 = q1[..., 2] + z1 = q1[..., 3] + + w = w0 * w1 - x0 * x1 - y0 * y1 - z0 * z1 + x = w0 * x1 + x0 * w1 + y0 * z1 - z0 * y1 + y = w0 * y1 + y0 * w1 + z0 * x1 - x0 * z1 + z = w0 * z1 + z0 * w1 + x0 * y1 - y0 * x1 + q = np.array([w, x, y, z]) + if q.ndim == 2: + q = q.swapaxes(0, 1) + assert q.shape == q0.shape + return q + + +def quat_rot_vec(q, v0): + q_v0 = np.array([0, v0[0], v0[1], v0[2]]) + q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q))) + v = q_v[1:] + return v + + +def quat_identity(): + return np.array([1, 0, 0, 0]) + + +def quat2axisangle(quat): + theta = 0 + axis = np.array([0, 0, 1]) + sin_theta = np.linalg.norm(quat[1:]) + + if sin_theta > 0.0001: + theta = 2 * np.arcsin(sin_theta) + theta *= 1 if quat[0] >= 0 else -1 + axis = quat[1:] / sin_theta + + return axis, theta + + +def euler2point_euler(euler): + _euler = euler.copy() + if len(_euler.shape) < 2: + _euler = np.expand_dims(_euler, 0) + assert _euler.shape[1] == 3 + _euler_sin = np.sin(_euler) + _euler_cos = np.cos(_euler) + return np.concatenate([_euler_sin, _euler_cos], axis=-1) + + +def point_euler2euler(euler): + _euler = euler.copy() + if len(_euler.shape) < 2: + _euler = np.expand_dims(_euler, 0) + assert _euler.shape[1] == 6 + angle = np.arctan(_euler[..., :3] / _euler[..., 3:]) + angle[_euler[..., 3:] < 0] += np.pi + return angle + + +def quat2point_quat(quat): + # Should be in qw, qx, qy, qz + _quat = quat.copy() + if len(_quat.shape) < 2: + _quat = np.expand_dims(_quat, 0) + assert _quat.shape[1] == 4 + angle = np.arccos(_quat[:, [0]]) * 2 + xyz = _quat[:, 1:] + xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[ + np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5 + ] + return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1) + + +def point_quat2quat(quat): + _quat = quat.copy() + if len(_quat.shape) < 2: + _quat = np.expand_dims(_quat, 0) + assert _quat.shape[1] == 5 + angle = np.arctan(_quat[:, [0]] / _quat[:, [1]]) + qw = np.cos(angle / 2) + + qxyz = _quat[:, 2:] + qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[ + np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5 + ] + return np.concatenate([qw, qxyz], axis=-1) + + +def normalize_angles(angles): + """Puts angles in [-pi, pi] range.""" + angles = angles.copy() + if angles.size > 0: + angles = (angles + np.pi) % (2 * np.pi) - np.pi + assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6 + return angles + + +def round_to_straight_angles(angles): + """Returns closest angle modulo 90 degrees""" + angles = np.round(angles / (np.pi / 2)) * (np.pi / 2) + return normalize_angles(angles) + + +def get_parallel_rotations(): + mult90 = [0, np.pi / 2, -np.pi / 2, np.pi] + parallel_rotations = [] + for euler in itertools.product(mult90, repeat=3): + canonical = mat2euler(euler2mat(euler)) + canonical = np.round(canonical / (np.pi / 2)) + if canonical[0] == -2: + canonical[0] = 2 + if canonical[2] == -2: + canonical[2] = 2 + canonical *= np.pi / 2 + if all([(canonical != rot).any() for rot in parallel_rotations]): + parallel_rotations += [canonical] + assert len(parallel_rotations) == 24 + return parallel_rotations diff --git a/gym-0.21.0/gym/spaces/discrete.py b/gym-0.21.0/gym/spaces/discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..eb358e6c1afda6dbf4751a2c83229c67bdbb45d2 --- /dev/null +++ b/gym-0.21.0/gym/spaces/discrete.py @@ -0,0 +1,37 @@ +import numpy as np +from .space import Space + + +class Discrete(Space): + r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. + + Example:: + + >>> Discrete(2) + + """ + + def __init__(self, n, seed=None): + assert n >= 0 + self.n = n + super(Discrete, self).__init__((), np.int64, seed) + + def sample(self): + return self.np_random.randint(self.n) + + def contains(self, x): + if isinstance(x, int): + as_int = x + elif isinstance(x, (np.generic, np.ndarray)) and ( + x.dtype.char in np.typecodes["AllInteger"] and x.shape == () + ): + as_int = int(x) + else: + return False + return as_int >= 0 and as_int < self.n + + def __repr__(self): + return "Discrete(%d)" % self.n + + def __eq__(self, other): + return isinstance(other, Discrete) and self.n == other.n diff --git a/gym-0.21.0/gym/utils/__init__.py b/gym-0.21.0/gym/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e01a19841b1e3d05ecaff92398b065cb88fd8a0a --- /dev/null +++ b/gym-0.21.0/gym/utils/__init__.py @@ -0,0 +1,9 @@ +"""A set of common utilities used within the environments. These are +not intended as API functions, and will not remain stable over time. +""" + +# These submodules should not have any import-time dependencies. +# We want this since we use `utils` during our import-time sanity checks +# that verify that our dependencies are actually present. +from .colorize import colorize +from .ezpickle import EzPickle diff --git a/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f78030527d5f547fb858c83cf42f219de3c51d8d Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74c6396526fcd0bfa5a2d9ef52a3d3a2daf583db Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..937dfb839ff527928d71a0deb5471ad1d153d1ac Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b44f02043b60f6ecaeadf976243e4d78a8db9811 Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f92ce0d8342bf3a923b97757ede2fbeb4d71bc09 Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/atomic_write.py b/gym-0.21.0/gym/utils/atomic_write.py new file mode 100644 index 0000000000000000000000000000000000000000..25d546de1460802cfcd78556a5a15c1036b0b099 --- /dev/null +++ b/gym-0.21.0/gym/utils/atomic_write.py @@ -0,0 +1,60 @@ +# Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python + +import os +from contextlib import contextmanager + +# We would ideally atomically replace any existing file with the new +# version. However, on Windows there's no Python-only solution prior +# to Python 3.3. (This library includes a C extension to do so: +# https://pypi.python.org/pypi/pyosreplace/0.1.) +# +# Correspondingly, we make a best effort, but on Python < 3.3 use a +# replace method which could result in the file temporarily +# disappearing. +import sys + +if sys.version_info >= (3, 3): + # Python 3.3 and up have a native `replace` method + from os import replace +elif sys.platform.startswith("win"): + + def replace(src, dst): + # TODO: on Windows, this will raise if the file is in use, + # which is possible. We'll need to make this more robust over + # time. + try: + os.remove(dst) + except OSError: + pass + os.rename(src, dst) + + +else: + # POSIX rename() is always atomic + from os import rename as replace + + +@contextmanager +def atomic_write(filepath, binary=False, fsync=False): + """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked. + + :param filepath: the file path to be opened + :param binary: whether to open the file in a binary mode instead of textual + :param fsync: whether to force write the file to disk + """ + + tmppath = filepath + "~" + while os.path.isfile(tmppath): + tmppath += "~" + try: + with open(tmppath, "wb" if binary else "w") as file: + yield file + if fsync: + file.flush() + os.fsync(file.fileno()) + replace(tmppath, filepath) + finally: + try: + os.remove(tmppath) + except (IOError, OSError): + pass diff --git a/gym-0.21.0/gym/utils/closer.py b/gym-0.21.0/gym/utils/closer.py new file mode 100644 index 0000000000000000000000000000000000000000..635b65f0c7c2fd29451a46d42a683d7017dfb0a8 --- /dev/null +++ b/gym-0.21.0/gym/utils/closer.py @@ -0,0 +1,68 @@ +import atexit +import threading +import weakref + + +class Closer(object): + """A registry that ensures your objects get closed, whether manually, + upon garbage collection, or upon exit. To work properly, your + objects need to cooperate and do something like the following: + + ``` + closer = Closer() + class Example(object): + def __init__(self): + self._id = closer.register(self) + + def close(self): + # Probably worth making idempotent too! + ... + closer.unregister(self._id) + + def __del__(self): + self.close() + ``` + + That is, your objects should: + + - register() themselves and save the returned ID + - unregister() themselves upon close() + - include a __del__ method which close()'s the object + """ + + def __init__(self, atexit_register=True): + self.lock = threading.Lock() + self.next_id = -1 + self.closeables = weakref.WeakValueDictionary() + + if atexit_register: + atexit.register(self.close) + + def generate_next_id(self): + with self.lock: + self.next_id += 1 + return self.next_id + + def register(self, closeable): + """Registers an object with a 'close' method. + + Returns: + int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired. + """ + assert hasattr(closeable, "close"), "No close method for {}".format(closeable) + + next_id = self.generate_next_id() + self.closeables[next_id] = closeable + return next_id + + def unregister(self, id): + assert id is not None + if id in self.closeables: + del self.closeables[id] + + def close(self): + # Explicitly fetch all monitors first so that they can't disappear while + # we iterate. cf. http://stackoverflow.com/a/12429620 + closeables = list(self.closeables.values()) + for closeable in closeables: + closeable.close() diff --git a/gym-0.21.0/gym/utils/play.py b/gym-0.21.0/gym/utils/play.py new file mode 100644 index 0000000000000000000000000000000000000000..2e06f4a32e650a3e223c25a8da33d0b7a46d169a --- /dev/null +++ b/gym-0.21.0/gym/utils/play.py @@ -0,0 +1,196 @@ +import gym +import pygame +import matplotlib +import argparse +from gym import logger + +try: + matplotlib.use("TkAgg") + import matplotlib.pyplot as plt +except ImportError as e: + logger.warn("failed to set matplotlib backend, plotting will not work: %s" % str(e)) + plt = None + +from collections import deque +from pygame.locals import VIDEORESIZE + + +def display_arr(screen, arr, video_size, transpose): + arr_min, arr_max = arr.min(), arr.max() + arr = 255.0 * (arr - arr_min) / (arr_max - arr_min) + pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr) + pyg_img = pygame.transform.scale(pyg_img, video_size) + screen.blit(pyg_img, (0, 0)) + + +def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None): + """Allows one to play the game using keyboard. + + To simply play the game use: + + play(gym.make("Pong-v4")) + + Above code works also if env is wrapped, so it's particularly useful in + verifying that the frame-level preprocessing does not render the game + unplayable. + + If you wish to plot real time statistics as you play, you can use + gym.utils.play.PlayPlot. Here's a sample code for plotting the reward + for last 5 second of gameplay. + + def callback(obs_t, obs_tp1, action, rew, done, info): + return [rew,] + plotter = PlayPlot(callback, 30 * 5, ["reward"]) + + env = gym.make("Pong-v4") + play(env, callback=plotter.callback) + + + Arguments + --------- + env: gym.Env + Environment to use for playing. + transpose: bool + If True the output of observation is transposed. + Defaults to true. + fps: int + Maximum number of steps of the environment to execute every second. + Defaults to 30. + zoom: float + Make screen edge this many times bigger + callback: lambda or None + Callback if a callback is provided it will be executed after + every step. It takes the following input: + obs_t: observation before performing action + obs_tp1: observation after performing action + action: action that was executed + rew: reward that was received + done: whether the environment is done or not + info: debug info + keys_to_action: dict: tuple(int) -> int or None + Mapping from keys pressed to action performed. + For example if pressed 'w' and space at the same time is supposed + to trigger action number 2 then key_to_action dict would look like this: + + { + # ... + sorted(ord('w'), ord(' ')) -> 2 + # ... + } + If None, default key_to_action mapping for that env is used, if provided. + """ + env.reset() + rendered = env.render(mode="rgb_array") + + if keys_to_action is None: + if hasattr(env, "get_keys_to_action"): + keys_to_action = env.get_keys_to_action() + elif hasattr(env.unwrapped, "get_keys_to_action"): + keys_to_action = env.unwrapped.get_keys_to_action() + else: + assert False, ( + env.spec.id + + " does not have explicit key to action mapping, " + + "please specify one manually" + ) + relevant_keys = set(sum(map(list, keys_to_action.keys()), [])) + + video_size = [rendered.shape[1], rendered.shape[0]] + if zoom is not None: + video_size = int(video_size[0] * zoom), int(video_size[1] * zoom) + + pressed_keys = [] + running = True + env_done = True + + screen = pygame.display.set_mode(video_size) + clock = pygame.time.Clock() + + while running: + if env_done: + env_done = False + obs = env.reset() + else: + action = keys_to_action.get(tuple(sorted(pressed_keys)), 0) + prev_obs = obs + obs, rew, env_done, info = env.step(action) + if callback is not None: + callback(prev_obs, obs, action, rew, env_done, info) + if obs is not None: + rendered = env.render(mode="rgb_array") + display_arr(screen, rendered, transpose=transpose, video_size=video_size) + + # process pygame events + for event in pygame.event.get(): + # test events, set key states + if event.type == pygame.KEYDOWN: + if event.key in relevant_keys: + pressed_keys.append(event.key) + elif event.key == 27: + running = False + elif event.type == pygame.KEYUP: + if event.key in relevant_keys: + pressed_keys.remove(event.key) + elif event.type == pygame.QUIT: + running = False + elif event.type == VIDEORESIZE: + video_size = event.size + screen = pygame.display.set_mode(video_size) + print(video_size) + + pygame.display.flip() + clock.tick(fps) + pygame.quit() + + +class PlayPlot(object): + def __init__(self, callback, horizon_timesteps, plot_names): + self.data_callback = callback + self.horizon_timesteps = horizon_timesteps + self.plot_names = plot_names + + assert plt is not None, "matplotlib backend failed, plotting will not work" + + num_plots = len(self.plot_names) + self.fig, self.ax = plt.subplots(num_plots) + if num_plots == 1: + self.ax = [self.ax] + for axis, name in zip(self.ax, plot_names): + axis.set_title(name) + self.t = 0 + self.cur_plot = [None for _ in range(num_plots)] + self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)] + + def callback(self, obs_t, obs_tp1, action, rew, done, info): + points = self.data_callback(obs_t, obs_tp1, action, rew, done, info) + for point, data_series in zip(points, self.data): + data_series.append(point) + self.t += 1 + + xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t + + for i, plot in enumerate(self.cur_plot): + if plot is not None: + plot.remove() + self.cur_plot[i] = self.ax[i].scatter( + range(xmin, xmax), list(self.data[i]), c="blue" + ) + self.ax[i].set_xlim(xmin, xmax) + plt.pause(0.000001) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--env", + type=str, + default="MontezumaRevengeNoFrameskip-v4", + help="Define Environment", + ) + args = parser.parse_args() + env = gym.make(args.env) + play(env, zoom=4, fps=60) + + +if __name__ == "__main__": + main() diff --git a/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc b/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c48e24f54ef78a2d9c80fb43af9e439d38e5afd4 Binary files /dev/null and b/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9c683d3281d5a91de124ebc6f83167fc617d3dd Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a6a9be2a6781c17f13f1716bf7ec57c4d9ca8a9 Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..71cbda2ea184a17f9443394e0e9c8fab5dbd988a Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/utils/misc.py b/gym-0.21.0/gym/vector/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..aa38253dabf1aadebbe89243d2888fd3fa290948 --- /dev/null +++ b/gym-0.21.0/gym/vector/utils/misc.py @@ -0,0 +1,44 @@ +import contextlib +import os + +__all__ = ["CloudpickleWrapper", "clear_mpi_env_vars"] + + +class CloudpickleWrapper(object): + def __init__(self, fn): + self.fn = fn + + def __getstate__(self): + import cloudpickle + + return cloudpickle.dumps(self.fn) + + def __setstate__(self, ob): + import pickle + + self.fn = pickle.loads(ob) + + def __call__(self): + return self.fn() + + +@contextlib.contextmanager +def clear_mpi_env_vars(): + """ + `from mpi4py import MPI` will call `MPI_Init` by default. If the child + process has MPI environment variables, MPI will think that the child process + is an MPI process just like the parent and do bad things such as hang. + + This context manager is a hacky way to clear those environment variables + temporarily such as when we are starting multiprocessing Processes. + """ + removed_environment = {} + for k, v in list(os.environ.items()): + for prefix in ["OMPI_", "PMI_"]: + if k.startswith(prefix): + removed_environment[k] = v + del os.environ[k] + try: + yield + finally: + os.environ.update(removed_environment) diff --git a/gym-0.21.0/gym/vector/utils/spaces.py b/gym-0.21.0/gym/vector/utils/spaces.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4727de5a08fa3d77e14177bf38b3d94fa5fe4e --- /dev/null +++ b/gym-0.21.0/gym/vector/utils/spaces.py @@ -0,0 +1,89 @@ +import numpy as np +from collections import OrderedDict + +from gym.spaces import Space, Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict + +_BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary) +__all__ = ["_BaseGymSpaces", "batch_space"] + + +def batch_space(space, n=1): + """Create a (batched) space, containing multiple copies of a single space. + + Parameters + ---------- + space : `gym.spaces.Space` instance + Space (e.g. the observation space) for a single environment in the + vectorized environment. + + n : int + Number of environments in the vectorized environment. + + Returns + ------- + batched_space : `gym.spaces.Space` instance + Space (e.g. the observation space) for a batch of environments in the + vectorized environment. + + Example + ------- + >>> from gym.spaces import Box, Dict + >>> space = Dict({ + ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32), + ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)}) + >>> batch_space(space, n=5) + Dict(position:Box(5, 3), velocity:Box(5, 2)) + """ + if isinstance(space, _BaseGymSpaces): + return batch_space_base(space, n=n) + elif isinstance(space, Tuple): + return batch_space_tuple(space, n=n) + elif isinstance(space, Dict): + return batch_space_dict(space, n=n) + elif isinstance(space, Space): + return batch_space_custom(space, n=n) + else: + raise ValueError( + "Cannot batch space with type `{0}`. The space must " + "be a valid `gym.Space` instance.".format(type(space)) + ) + + +def batch_space_base(space, n=1): + if isinstance(space, Box): + repeats = tuple([n] + [1] * space.low.ndim) + low, high = np.tile(space.low, repeats), np.tile(space.high, repeats) + return Box(low=low, high=high, dtype=space.dtype) + + elif isinstance(space, Discrete): + return MultiDiscrete(np.full((n,), space.n, dtype=space.dtype)) + + elif isinstance(space, MultiDiscrete): + repeats = tuple([n] + [1] * space.nvec.ndim) + high = np.tile(space.nvec, repeats) - 1 + return Box(low=np.zeros_like(high), high=high, dtype=space.dtype) + + elif isinstance(space, MultiBinary): + return Box(low=0, high=1, shape=(n,) + space.shape, dtype=space.dtype) + + else: + raise ValueError("Space type `{0}` is not supported.".format(type(space))) + + +def batch_space_tuple(space, n=1): + return Tuple(tuple(batch_space(subspace, n=n) for subspace in space.spaces)) + + +def batch_space_dict(space, n=1): + return Dict( + OrderedDict( + [ + (key, batch_space(subspace, n=n)) + for (key, subspace) in space.spaces.items() + ] + ) + ) + + +def batch_space_custom(space, n=1): + return Tuple(tuple(space for _ in range(n))) diff --git a/gym-0.21.0/gym/vector/vector_env.py b/gym-0.21.0/gym/vector/vector_env.py new file mode 100644 index 0000000000000000000000000000000000000000..d6bbaa7c38d35e5a7576c63b5c81400840f7236c --- /dev/null +++ b/gym-0.21.0/gym/vector/vector_env.py @@ -0,0 +1,203 @@ +import gym +from gym.spaces import Tuple +from gym.vector.utils.spaces import batch_space + +__all__ = ["VectorEnv"] + + +class VectorEnv(gym.Env): + r"""Base class for vectorized environments. + + Each observation returned from vectorized environment is a batch of observations + for each sub-environment. And :meth:`step` is also expected to receive a batch of + actions for each sub-environment. + + .. note:: + + All sub-environments should share the identical observation and action spaces. + In other words, a vector of multiple different environments is not supported. + + Parameters + ---------- + num_envs : int + Number of environments in the vectorized environment. + + observation_space : `gym.spaces.Space` instance + Observation space of a single environment. + + action_space : `gym.spaces.Space` instance + Action space of a single environment. + """ + + def __init__(self, num_envs, observation_space, action_space): + super(VectorEnv, self).__init__() + self.num_envs = num_envs + self.is_vector_env = True + self.observation_space = batch_space(observation_space, n=num_envs) + self.action_space = Tuple((action_space,) * num_envs) + + self.closed = False + self.viewer = None + + # The observation and action spaces of a single environment are + # kept in separate properties + self.single_observation_space = observation_space + self.single_action_space = action_space + + def reset_async(self): + pass + + def reset_wait(self, **kwargs): + raise NotImplementedError() + + def reset(self): + r"""Reset all sub-environments and return a batch of initial observations. + + Returns + ------- + observations : sample from `observation_space` + A batch of observations from the vectorized environment. + """ + self.reset_async() + return self.reset_wait() + + def step_async(self, actions): + pass + + def step_wait(self, **kwargs): + raise NotImplementedError() + + def step(self, actions): + r"""Take an action for each sub-environments. + + Parameters + ---------- + actions : iterable of samples from `action_space` + List of actions. + + Returns + ------- + observations : sample from `observation_space` + A batch of observations from the vectorized environment. + + rewards : `np.ndarray` instance (dtype `np.float_`) + A vector of rewards from the vectorized environment. + + dones : `np.ndarray` instance (dtype `np.bool_`) + A vector whose entries indicate whether the episode has ended. + + infos : list of dict + A list of auxiliary diagnostic information dicts from sub-environments. + """ + + self.step_async(actions) + return self.step_wait() + + def close_extras(self, **kwargs): + r"""Clean up the extra resources e.g. beyond what's in this base class.""" + raise NotImplementedError() + + def close(self, **kwargs): + r"""Close all sub-environments and release resources. + + It also closes all the existing image viewers, then calls :meth:`close_extras` and set + :attr:`closed` as ``True``. + + .. warning:: + + This function itself does not close the environments, it should be handled + in :meth:`close_extras`. This is generic for both synchronous and asynchronous + vectorized environments. + + .. note:: + + This will be automatically called when garbage collected or program exited. + + """ + if self.closed: + return + if self.viewer is not None: + self.viewer.close() + self.close_extras(**kwargs) + self.closed = True + + def seed(self, seeds=None): + """ + Parameters + ---------- + seeds : list of int, or int, optional + Random seed for each individual environment. If `seeds` is a list of + length `num_envs`, then the items of the list are chosen as random + seeds. If `seeds` is an int, then each environment uses the random + seed `seeds + n`, where `n` is the index of the environment (between + `0` and `num_envs - 1`). + """ + pass + + def __del__(self): + if not getattr(self, "closed", True): + self.close(terminate=True) + + def __repr__(self): + if self.spec is None: + return "{}({})".format(self.__class__.__name__, self.num_envs) + else: + return "{}({}, {})".format( + self.__class__.__name__, self.spec.id, self.num_envs + ) + + +class VectorEnvWrapper(VectorEnv): + r"""Wraps the vectorized environment to allow a modular transformation. + + This class is the base class for all wrappers for vectorized environments. The subclass + could override some methods to change the behavior of the original vectorized environment + without touching the original code. + + .. note:: + + Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. + + """ + + def __init__(self, env): + assert isinstance(env, VectorEnv) + self.env = env + + # explicitly forward the methods defined in VectorEnv + # to self.env (instead of the base class) + def reset_async(self): + return self.env.reset_async() + + def reset_wait(self): + return self.env.reset_wait() + + def step_async(self, actions): + return self.env.step_async(actions) + + def step_wait(self): + return self.env.step_wait() + + def close(self, **kwargs): + return self.env.close(**kwargs) + + def close_extras(self, **kwargs): + return self.env.close_extras(**kwargs) + + def seed(self, seeds=None): + return self.env.seed(seeds) + + # implicitly forward all other methods and attributes to self.env + def __getattr__(self, name): + if name.startswith("_"): + raise AttributeError( + "attempted to get missing private attribute '{}'".format(name) + ) + return getattr(self.env, name) + + @property + def unwrapped(self): + return self.env.unwrapped + + def __repr__(self): + return "<{}, {}>".format(self.__class__.__name__, self.env) diff --git a/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc b/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3a43defd8ea76d6c6a9a46ad913a741fbeaad26 Binary files /dev/null and b/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/wrappers/atari_preprocessing.py b/gym-0.21.0/gym/wrappers/atari_preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..12dafc0dd0f04f0ec4bcd540f700538665ec3115 --- /dev/null +++ b/gym-0.21.0/gym/wrappers/atari_preprocessing.py @@ -0,0 +1,167 @@ +import numpy as np +import gym +from gym.spaces import Box +from gym.wrappers import TimeLimit + +try: + import cv2 +except ImportError: + cv2 = None + + +class AtariPreprocessing(gym.Wrapper): + r"""Atari 2600 preprocessings. + + This class follows the guidelines in + Machado et al. (2018), "Revisiting the Arcade Learning Environment: + Evaluation Protocols and Open Problems for General Agents". + + Specifically: + + * NoopReset: obtain initial state by taking random number of no-ops on reset. + * Frame skipping: 4 by default + * Max-pooling: most recent two observations + * Termination signal when a life is lost: turned off by default. Not recommended by Machado et al. (2018). + * Resize to a square image: 84x84 by default + * Grayscale observation: optional + * Scale observation: optional + + Args: + env (Env): environment + noop_max (int): max number of no-ops + frame_skip (int): the frequency at which the agent experiences the game. + screen_size (int): resize Atari frame + terminal_on_life_loss (bool): if True, then step() returns done=True whenever a + life is lost. + grayscale_obs (bool): if True, then gray scale observation is returned, otherwise, RGB observation + is returned. + grayscale_newaxis (bool): if True and grayscale_obs=True, then a channel axis is added to + grayscale observations to make them 3-dimensional. + scale_obs (bool): if True, then observation normalized in range [0,1] is returned. It also limits memory + optimization benefits of FrameStack Wrapper. + """ + + def __init__( + self, + env, + noop_max=30, + frame_skip=4, + screen_size=84, + terminal_on_life_loss=False, + grayscale_obs=True, + grayscale_newaxis=False, + scale_obs=False, + ): + super().__init__(env) + assert ( + cv2 is not None + ), "opencv-python package not installed! Try running pip install gym[atari] to get dependencies for atari" + assert frame_skip > 0 + assert screen_size > 0 + assert noop_max >= 0 + if frame_skip > 1: + assert "NoFrameskip" in env.spec.id, ( + "disable frame-skipping in the original env. for more than one" + " frame-skip as it will be done by the wrapper" + ) + self.noop_max = noop_max + assert env.unwrapped.get_action_meanings()[0] == "NOOP" + + self.frame_skip = frame_skip + self.screen_size = screen_size + self.terminal_on_life_loss = terminal_on_life_loss + self.grayscale_obs = grayscale_obs + self.grayscale_newaxis = grayscale_newaxis + self.scale_obs = scale_obs + + # buffer of most recent two observations for max pooling + if grayscale_obs: + self.obs_buffer = [ + np.empty(env.observation_space.shape[:2], dtype=np.uint8), + np.empty(env.observation_space.shape[:2], dtype=np.uint8), + ] + else: + self.obs_buffer = [ + np.empty(env.observation_space.shape, dtype=np.uint8), + np.empty(env.observation_space.shape, dtype=np.uint8), + ] + + self.ale = env.unwrapped.ale + self.lives = 0 + self.game_over = False + + _low, _high, _obs_dtype = ( + (0, 255, np.uint8) if not scale_obs else (0, 1, np.float32) + ) + _shape = (screen_size, screen_size, 1 if grayscale_obs else 3) + if grayscale_obs and not grayscale_newaxis: + _shape = _shape[:-1] # Remove channel axis + self.observation_space = Box( + low=_low, high=_high, shape=_shape, dtype=_obs_dtype + ) + + def step(self, action): + R = 0.0 + + for t in range(self.frame_skip): + _, reward, done, info = self.env.step(action) + R += reward + self.game_over = done + + if self.terminal_on_life_loss: + new_lives = self.ale.lives() + done = done or new_lives < self.lives + self.lives = new_lives + + if done: + break + if t == self.frame_skip - 2: + if self.grayscale_obs: + self.ale.getScreenGrayscale(self.obs_buffer[1]) + else: + self.ale.getScreenRGB(self.obs_buffer[1]) + elif t == self.frame_skip - 1: + if self.grayscale_obs: + self.ale.getScreenGrayscale(self.obs_buffer[0]) + else: + self.ale.getScreenRGB(self.obs_buffer[0]) + return self._get_obs(), R, done, info + + def reset(self, **kwargs): + # NoopReset + self.env.reset(**kwargs) + noops = ( + self.env.unwrapped.np_random.randint(1, self.noop_max + 1) + if self.noop_max > 0 + else 0 + ) + for _ in range(noops): + _, _, done, _ = self.env.step(0) + if done: + self.env.reset(**kwargs) + + self.lives = self.ale.lives() + if self.grayscale_obs: + self.ale.getScreenGrayscale(self.obs_buffer[0]) + else: + self.ale.getScreenRGB(self.obs_buffer[0]) + self.obs_buffer[1].fill(0) + return self._get_obs() + + def _get_obs(self): + if self.frame_skip > 1: # more efficient in-place pooling + np.maximum(self.obs_buffer[0], self.obs_buffer[1], out=self.obs_buffer[0]) + obs = cv2.resize( + self.obs_buffer[0], + (self.screen_size, self.screen_size), + interpolation=cv2.INTER_AREA, + ) + + if self.scale_obs: + obs = np.asarray(obs, dtype=np.float32) / 255.0 + else: + obs = np.asarray(obs, dtype=np.uint8) + + if self.grayscale_obs and self.grayscale_newaxis: + obs = np.expand_dims(obs, axis=-1) # Add a channel axis + return obs diff --git a/gym-0.21.0/gym/wrappers/gray_scale_observation.py b/gym-0.21.0/gym/wrappers/gray_scale_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..5e789ba94f4360ab763a301d35be6f3726a31b6d --- /dev/null +++ b/gym-0.21.0/gym/wrappers/gray_scale_observation.py @@ -0,0 +1,34 @@ +import numpy as np +from gym.spaces import Box +from gym import ObservationWrapper + + +class GrayScaleObservation(ObservationWrapper): + r"""Convert the image observation from RGB to gray scale.""" + + def __init__(self, env, keep_dim=False): + super(GrayScaleObservation, self).__init__(env) + self.keep_dim = keep_dim + + assert ( + len(env.observation_space.shape) == 3 + and env.observation_space.shape[-1] == 3 + ) + + obs_shape = self.observation_space.shape[:2] + if self.keep_dim: + self.observation_space = Box( + low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8 + ) + else: + self.observation_space = Box( + low=0, high=255, shape=obs_shape, dtype=np.uint8 + ) + + def observation(self, observation): + import cv2 + + observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) + if self.keep_dim: + observation = np.expand_dims(observation, -1) + return observation diff --git a/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc b/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25870b3e62dd05bb90fd5c1b3dc12a9038f77ffe Binary files /dev/null and b/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/wrappers/normalize.py b/gym-0.21.0/gym/wrappers/normalize.py new file mode 100644 index 0000000000000000000000000000000000000000..c4e9a07a1cf8e3e1eede9783e83acffd5e9191ca --- /dev/null +++ b/gym-0.21.0/gym/wrappers/normalize.py @@ -0,0 +1,105 @@ +import numpy as np +import gym + + +# taken from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py +class RunningMeanStd(object): + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + def __init__(self, epsilon=1e-4, shape=()): + self.mean = np.zeros(shape, "float64") + self.var = np.ones(shape, "float64") + self.count = epsilon + + def update(self, x): + batch_mean = np.mean(x, axis=0) + batch_var = np.var(x, axis=0) + batch_count = x.shape[0] + self.update_from_moments(batch_mean, batch_var, batch_count) + + def update_from_moments(self, batch_mean, batch_var, batch_count): + self.mean, self.var, self.count = update_mean_var_count_from_moments( + self.mean, self.var, self.count, batch_mean, batch_var, batch_count + ) + + +def update_mean_var_count_from_moments( + mean, var, count, batch_mean, batch_var, batch_count +): + delta = batch_mean - mean + tot_count = count + batch_count + + new_mean = mean + delta * batch_count / tot_count + m_a = var * count + m_b = batch_var * batch_count + M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count + new_var = M2 / tot_count + new_count = tot_count + + return new_mean, new_var, new_count + + +class NormalizeObservation(gym.core.Wrapper): + def __init__( + self, + env, + epsilon=1e-8, + ): + super(NormalizeObservation, self).__init__(env) + self.num_envs = getattr(env, "num_envs", 1) + self.is_vector_env = getattr(env, "is_vector_env", False) + if self.is_vector_env: + self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape) + else: + self.obs_rms = RunningMeanStd(shape=self.observation_space.shape) + self.epsilon = epsilon + + def step(self, action): + obs, rews, dones, infos = self.env.step(action) + if self.is_vector_env: + obs = self.normalize(obs) + else: + obs = self.normalize(np.array([obs]))[0] + return obs, rews, dones, infos + + def reset(self): + obs = self.env.reset() + if self.is_vector_env: + obs = self.normalize(obs) + else: + obs = self.normalize(np.array([obs]))[0] + return obs + + def normalize(self, obs): + self.obs_rms.update(obs) + return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon) + + +class NormalizeReward(gym.core.Wrapper): + def __init__( + self, + env, + gamma=0.99, + epsilon=1e-8, + ): + super(NormalizeReward, self).__init__(env) + self.num_envs = getattr(env, "num_envs", 1) + self.is_vector_env = getattr(env, "is_vector_env", False) + self.return_rms = RunningMeanStd(shape=()) + self.returns = np.zeros(self.num_envs) + self.gamma = gamma + self.epsilon = epsilon + + def step(self, action): + obs, rews, dones, infos = self.env.step(action) + if not self.is_vector_env: + rews = np.array([rews]) + self.returns = self.returns * self.gamma + rews + rews = self.normalize(rews) + self.returns[dones] = 0.0 + if not self.is_vector_env: + rews = rews[0] + return obs, rews, dones, infos + + def normalize(self, rews): + self.return_rms.update(self.returns) + return rews / np.sqrt(self.return_rms.var + self.epsilon) diff --git a/gym-0.21.0/gym/wrappers/record_episode_statistics.py b/gym-0.21.0/gym/wrappers/record_episode_statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..5d5d348d3a452dbccad077b19c084d733f25382f --- /dev/null +++ b/gym-0.21.0/gym/wrappers/record_episode_statistics.py @@ -0,0 +1,55 @@ +import time +from collections import deque +import numpy as np +import gym + + +class RecordEpisodeStatistics(gym.Wrapper): + def __init__(self, env, deque_size=100): + super(RecordEpisodeStatistics, self).__init__(env) + self.num_envs = getattr(env, "num_envs", 1) + self.t0 = time.perf_counter() + self.episode_count = 0 + self.episode_returns = None + self.episode_lengths = None + self.return_queue = deque(maxlen=deque_size) + self.length_queue = deque(maxlen=deque_size) + self.is_vector_env = getattr(env, "is_vector_env", False) + + def reset(self, **kwargs): + observations = super(RecordEpisodeStatistics, self).reset(**kwargs) + self.episode_returns = np.zeros(self.num_envs, dtype=np.float32) + self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32) + return observations + + def step(self, action): + observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step( + action + ) + self.episode_returns += rewards + self.episode_lengths += 1 + if not self.is_vector_env: + infos = [infos] + dones = [dones] + for i in range(len(dones)): + if dones[i]: + infos[i] = infos[i].copy() + episode_return = self.episode_returns[i] + episode_length = self.episode_lengths[i] + episode_info = { + "r": episode_return, + "l": episode_length, + "t": round(time.perf_counter() - self.t0, 6), + } + infos[i]["episode"] = episode_info + self.return_queue.append(episode_return) + self.length_queue.append(episode_length) + self.episode_count += 1 + self.episode_returns[i] = 0 + self.episode_lengths[i] = 0 + return ( + observations, + rewards, + dones if self.is_vector_env else dones[0], + infos if self.is_vector_env else infos[0], + ) diff --git a/gym-0.21.0/gym/wrappers/rescale_action.py b/gym-0.21.0/gym/wrappers/rescale_action.py new file mode 100644 index 0000000000000000000000000000000000000000..6143d1c1554ad93c5c560f023cc945d1a79b080e --- /dev/null +++ b/gym-0.21.0/gym/wrappers/rescale_action.py @@ -0,0 +1,48 @@ +import numpy as np +import gym +from gym import spaces + + +class RescaleAction(gym.ActionWrapper): + r"""Rescales the continuous action space of the environment to a range [min_action, max_action]. + + Example:: + + >>> RescaleAction(env, min_action, max_action).action_space == Box(min_action, max_action) + True + + """ + + def __init__(self, env, min_action, max_action): + assert isinstance( + env.action_space, spaces.Box + ), "expected Box action space, got {}".format(type(env.action_space)) + assert np.less_equal(min_action, max_action).all(), (min_action, max_action) + + super(RescaleAction, self).__init__(env) + self.min_action = ( + np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + min_action + ) + self.max_action = ( + np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + max_action + ) + self.action_space = spaces.Box( + low=min_action, + high=max_action, + shape=env.action_space.shape, + dtype=env.action_space.dtype, + ) + + def action(self, action): + assert np.all(np.greater_equal(action, self.min_action)), ( + action, + self.min_action, + ) + assert np.all(np.less_equal(action, self.max_action)), (action, self.max_action) + low = self.env.action_space.low + high = self.env.action_space.high + action = low + (high - low) * ( + (action - self.min_action) / (self.max_action - self.min_action) + ) + action = np.clip(action, low, high) + return action diff --git a/gym-0.21.0/gym/wrappers/time_aware_observation.py b/gym-0.21.0/gym/wrappers/time_aware_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..71ec32f5bfce64b8910c6d906acb64bf14ee14e6 --- /dev/null +++ b/gym-0.21.0/gym/wrappers/time_aware_observation.py @@ -0,0 +1,32 @@ +import numpy as np +from gym.spaces import Box +from gym import ObservationWrapper + + +class TimeAwareObservation(ObservationWrapper): + r"""Augment the observation with current time step in the trajectory. + + .. note:: + Currently it only works with one-dimensional observation space. It doesn't + support pixel observation space yet. + + """ + + def __init__(self, env): + super(TimeAwareObservation, self).__init__(env) + assert isinstance(env.observation_space, Box) + assert env.observation_space.dtype == np.float32 + low = np.append(self.observation_space.low, 0.0) + high = np.append(self.observation_space.high, np.inf) + self.observation_space = Box(low, high, dtype=np.float32) + + def observation(self, observation): + return np.append(observation, self.t) + + def step(self, action): + self.t += 1 + return super(TimeAwareObservation, self).step(action) + + def reset(self, **kwargs): + self.t = 0 + return super(TimeAwareObservation, self).reset(**kwargs) diff --git a/gym-0.21.0/gym/wrappers/time_limit.py b/gym-0.21.0/gym/wrappers/time_limit.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8cac96f4ef85b3fdfc0903d11260845d002bda --- /dev/null +++ b/gym-0.21.0/gym/wrappers/time_limit.py @@ -0,0 +1,27 @@ +import gym + + +class TimeLimit(gym.Wrapper): + def __init__(self, env, max_episode_steps=None): + super(TimeLimit, self).__init__(env) + if max_episode_steps is None and self.env.spec is not None: + max_episode_steps = env.spec.max_episode_steps + if self.env.spec is not None: + self.env.spec.max_episode_steps = max_episode_steps + self._max_episode_steps = max_episode_steps + self._elapsed_steps = None + + def step(self, action): + assert ( + self._elapsed_steps is not None + ), "Cannot call env.step() before calling reset()" + observation, reward, done, info = self.env.step(action) + self._elapsed_steps += 1 + if self._elapsed_steps >= self._max_episode_steps: + info["TimeLimit.truncated"] = not done + done = True + return observation, reward, done, info + + def reset(self, **kwargs): + self._elapsed_steps = 0 + return self.env.reset(**kwargs) diff --git a/gym-0.21.0/requirements.txt b/gym-0.21.0/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6ec5e0cb9a81303ac6321de786bff3bcb992a73 --- /dev/null +++ b/gym-0.21.0/requirements.txt @@ -0,0 +1,9 @@ +ale-py~=0.7 +opencv-python>=3. +box2d-py==2.3.5 +mujoco_py>=1.50, <2.0 +scipy>=1.4.1 +numpy>=1.18.0 +pyglet>=1.4.0 +cloudpickle>=1.2.0 +lz4>=3.1.0 diff --git a/gym-0.21.0/test_requirements.txt b/gym-0.21.0/test_requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f59ce32d8615b68d1aff6972e0245dfa8a110e3 --- /dev/null +++ b/gym-0.21.0/test_requirements.txt @@ -0,0 +1,3 @@ +lz4~=3.1 +pytest~=6.2 +pytest-forked~=1.3 diff --git a/gym-0.21.0/tests/envs/robotics/hand/__init__.py b/gym-0.21.0/tests/envs/robotics/hand/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gym-0.21.0/tests/envs/robotics/hand/test_reach.py b/gym-0.21.0/tests/envs/robotics/hand/test_reach.py new file mode 100644 index 0000000000000000000000000000000000000000..8d360e7068068130c7efdc3e3c95ea93daf5d00e --- /dev/null +++ b/gym-0.21.0/tests/envs/robotics/hand/test_reach.py @@ -0,0 +1,18 @@ +import pickle + +import pytest + +from gym import envs +from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE + + +@pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE) +def test_serialize_deserialize(): + env1 = envs.make("HandReach-v0", distance_threshold=1e-6) + env1.reset() + env2 = pickle.loads(pickle.dumps(env1)) + + assert env1.distance_threshold == env2.distance_threshold, ( + env1.distance_threshold, + env2.distance_threshold, + ) diff --git a/gym-0.21.0/tests/vector/test_vector_env_wrapper.py b/gym-0.21.0/tests/vector/test_vector_env_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..4571b7650ab9310dbe4af3271f658fa9800fd400 --- /dev/null +++ b/gym-0.21.0/tests/vector/test_vector_env_wrapper.py @@ -0,0 +1,20 @@ +import gym +from gym.vector import make +from gym.vector import VectorEnvWrapper + + +class DummyWrapper(VectorEnvWrapper): + def __init__(self, env): + self.env = env + self.counter = 0 + + def reset_async(self): + super().reset_async() + self.counter += 1 + + +def test_vector_env_wrapper_inheritance(): + env = make("FrozenLake-v1", asynchronous=False) + wrapped = DummyWrapper(env) + wrapped.reset() + assert wrapped.counter == 1 diff --git a/gym-0.21.0/tests/wrappers/test_filter_observation.py b/gym-0.21.0/tests/wrappers/test_filter_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..35beaad5650ad6f8c54828debad09648aff64395 --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_filter_observation.py @@ -0,0 +1,84 @@ +import pytest +import numpy as np + +import gym +from gym import spaces +from gym.wrappers.filter_observation import FilterObservation + + +class FakeEnvironment(gym.Env): + def __init__(self, observation_keys=("state")): + self.observation_space = spaces.Dict( + { + name: spaces.Box(shape=(2,), low=-1, high=1, dtype=np.float32) + for name in observation_keys + } + ) + self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32) + + def render(self, width=32, height=32, *args, **kwargs): + del args + del kwargs + image_shape = (height, width, 3) + return np.zeros(image_shape, dtype=np.uint8) + + def reset(self): + observation = self.observation_space.sample() + return observation + + def step(self, action): + del action + observation = self.observation_space.sample() + reward, terminal, info = 0.0, False, {} + return observation, reward, terminal, info + + +FILTER_OBSERVATION_TEST_CASES = ( + (("key1", "key2"), ("key1",)), + (("key1", "key2"), ("key1", "key2")), + (("key1",), None), + (("key1",), ("key1",)), +) + +ERROR_TEST_CASES = ( + ("key", ValueError, "All the filter_keys must be included..*"), + (False, TypeError, "'bool' object is not iterable"), + (1, TypeError, "'int' object is not iterable"), +) + + +class TestFilterObservation(object): + @pytest.mark.parametrize( + "observation_keys,filter_keys", FILTER_OBSERVATION_TEST_CASES + ) + def test_filter_observation(self, observation_keys, filter_keys): + env = FakeEnvironment(observation_keys=observation_keys) + + # Make sure we are testing the right environment for the test. + observation_space = env.observation_space + assert isinstance(observation_space, spaces.Dict) + + wrapped_env = FilterObservation(env, filter_keys=filter_keys) + + assert isinstance(wrapped_env.observation_space, spaces.Dict) + + if filter_keys is None: + filter_keys = tuple(observation_keys) + + assert len(wrapped_env.observation_space.spaces) == len(filter_keys) + assert tuple(wrapped_env.observation_space.spaces.keys()) == tuple(filter_keys) + + # Check that the added space item is consistent with the added observation. + observation = wrapped_env.reset() + assert len(observation) == len(filter_keys) + + @pytest.mark.parametrize("filter_keys,error_type,error_match", ERROR_TEST_CASES) + def test_raises_with_incorrect_arguments( + self, filter_keys, error_type, error_match + ): + env = FakeEnvironment(observation_keys=("key1", "key2")) + + ValueError + + with pytest.raises(error_type, match=error_match): + FilterObservation(env, filter_keys=filter_keys) diff --git a/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py b/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..9e088064761caf50d496851e0f1f8233b3ea9c8f --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py @@ -0,0 +1,38 @@ +import pytest + +import gym +from gym.wrappers import RecordEpisodeStatistics + + +@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"]) +@pytest.mark.parametrize("deque_size", [2, 5]) +def test_record_episode_statistics(env_id, deque_size): + env = gym.make(env_id) + env = RecordEpisodeStatistics(env, deque_size) + + for n in range(5): + env.reset() + assert env.episode_returns[0] == 0.0 + assert env.episode_lengths[0] == 0 + for t in range(env.spec.max_episode_steps): + _, _, done, info = env.step(env.action_space.sample()) + if done: + assert "episode" in info + assert all([item in info["episode"] for item in ["r", "l", "t"]]) + break + assert len(env.return_queue) == deque_size + assert len(env.length_queue) == deque_size + + +@pytest.mark.parametrize("num_envs", [1, 4]) +def test_record_episode_statistics_with_vectorenv(num_envs): + envs = gym.vector.make("CartPole-v0", num_envs=num_envs, asynchronous=False) + envs = RecordEpisodeStatistics(envs) + envs.reset() + for _ in range(envs.env.envs[0].spec.max_episode_steps + 1): + _, _, dones, infos = envs.step(envs.action_space.sample()) + for idx, info in enumerate(infos): + if dones[idx]: + assert "episode" in info + assert all([item in info["episode"] for item in ["r", "l", "t"]]) + break diff --git a/gym-0.21.0/tests/wrappers/test_record_video.py b/gym-0.21.0/tests/wrappers/test_record_video.py new file mode 100644 index 0000000000000000000000000000000000000000..509fe908709b1f6a426b0c494c4755961b403023 --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_record_video.py @@ -0,0 +1,75 @@ +import pytest + +import os +import shutil +import gym +from gym.wrappers import ( + RecordEpisodeStatistics, + RecordVideo, + capped_cubic_video_schedule, +) + + +def test_record_video_using_default_trigger(): + + env = gym.make("CartPole-v1") + env = gym.wrappers.RecordVideo(env, "videos") + env.reset() + for _ in range(199): + action = env.action_space.sample() + _, _, done, _ = env.step(action) + if done: + env.reset() + env.close() + assert os.path.isdir("videos") + mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] + assert len(mp4_files) == sum( + [capped_cubic_video_schedule(i) for i in range(env.episode_id + 1)] + ) + shutil.rmtree("videos") + + +def test_record_video_step_trigger(): + env = gym.make("CartPole-v1") + env._max_episode_steps = 20 + env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) + env.reset() + for _ in range(199): + action = env.action_space.sample() + _, _, done, _ = env.step(action) + if done: + env.reset() + env.close() + assert os.path.isdir("videos") + mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] + assert len(mp4_files) == 2 + shutil.rmtree("videos") + + +def make_env(gym_id, seed): + def thunk(): + env = gym.make(gym_id) + env._max_episode_steps = 20 + if seed == 1: + env = gym.wrappers.RecordVideo( + env, "videos", step_trigger=lambda x: x % 100 == 0 + ) + return env + + return thunk + + +def test_record_video_within_vector(): + envs = gym.vector.SyncVectorEnv([make_env("CartPole-v1", 1 + i) for i in range(2)]) + envs = gym.wrappers.RecordEpisodeStatistics(envs) + envs.reset() + for i in range(199): + _, _, _, infos = envs.step(envs.action_space.sample()) + for info in infos: + if "episode" in info.keys(): + print(f"episode_reward={info['episode']['r']}") + break + assert os.path.isdir("videos") + mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] + assert len(mp4_files) == 2 + shutil.rmtree("videos") diff --git a/gym-0.21.0/tests/wrappers/test_rescale_action.py b/gym-0.21.0/tests/wrappers/test_rescale_action.py new file mode 100644 index 0000000000000000000000000000000000000000..c33e5adc85ee946ea3422a51eec361ba6e5fc9df --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_rescale_action.py @@ -0,0 +1,32 @@ +import pytest + +import numpy as np + +import gym +from gym.wrappers import RescaleAction + + +def test_rescale_action(): + env = gym.make("CartPole-v1") + with pytest.raises(AssertionError): + env = RescaleAction(env, -1, 1) + del env + + env = gym.make("Pendulum-v1") + wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1) + + seed = 0 + env.seed(seed) + wrapped_env.seed(seed) + + obs = env.reset() + wrapped_obs = wrapped_env.reset() + assert np.allclose(obs, wrapped_obs) + + obs, reward, _, _ = env.step([1.5]) + with pytest.raises(AssertionError): + wrapped_env.step([1.5]) + wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75]) + + assert np.allclose(obs, wrapped_obs) + assert np.allclose(reward, wrapped_reward) diff --git a/gym-0.21.0/tests/wrappers/test_resize_observation.py b/gym-0.21.0/tests/wrappers/test_resize_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..688b47269ec31253512b953211ef6d61a341f88e --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_resize_observation.py @@ -0,0 +1,24 @@ +import pytest + +import gym +from gym.wrappers import ResizeObservation + +pytest.importorskip("gym.envs.atari") + + +@pytest.mark.parametrize( + "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"] +) +@pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]]) +def test_resize_observation(env_id, shape): + env = gym.make(env_id) + env = ResizeObservation(env, shape) + + assert env.observation_space.shape[-1] == 3 + obs = env.reset() + if isinstance(shape, int): + assert env.observation_space.shape[:2] == (shape, shape) + assert obs.shape == (shape, shape, 3) + else: + assert env.observation_space.shape[:2] == tuple(shape) + assert obs.shape == tuple(shape) + (3,) diff --git a/gym-0.21.0/tests/wrappers/test_transform_observation.py b/gym-0.21.0/tests/wrappers/test_transform_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..6818cce77ea24b608c9e0d01e94e12a90ead3dfc --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_transform_observation.py @@ -0,0 +1,29 @@ +import pytest + +import numpy as np + +import gym +from gym.wrappers import TransformObservation + + +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) +def test_transform_observation(env_id): + affine_transform = lambda x: 3 * x + 2 + env = gym.make(env_id) + wrapped_env = TransformObservation( + gym.make(env_id), lambda obs: affine_transform(obs) + ) + + env.seed(0) + wrapped_env.seed(0) + + obs = env.reset() + wrapped_obs = wrapped_env.reset() + assert np.allclose(wrapped_obs, affine_transform(obs)) + + action = env.action_space.sample() + obs, reward, done, _ = env.step(action) + wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action) + assert np.allclose(wrapped_obs, affine_transform(obs)) + assert np.allclose(wrapped_reward, reward) + assert wrapped_done == done diff --git a/gym-0.21.0/tests/wrappers/test_transform_reward.py b/gym-0.21.0/tests/wrappers/test_transform_reward.py new file mode 100644 index 0000000000000000000000000000000000000000..6bc380cf2ce23f0fefee810c8b5fe5d921594535 --- /dev/null +++ b/gym-0.21.0/tests/wrappers/test_transform_reward.py @@ -0,0 +1,63 @@ +import pytest + +import numpy as np + +import gym +from gym.wrappers import TransformReward + + +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) +def test_transform_reward(env_id): + # use case #1: scale + scales = [0.1, 200] + for scale in scales: + env = gym.make(env_id) + wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r) + action = env.action_space.sample() + + env.seed(0) + env.reset() + wrapped_env.seed(0) + wrapped_env.reset() + + _, reward, _, _ = env.step(action) + _, wrapped_reward, _, _ = wrapped_env.step(action) + + assert wrapped_reward == scale * reward + del env, wrapped_env + + # use case #2: clip + min_r = -0.0005 + max_r = 0.0002 + env = gym.make(env_id) + wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r)) + action = env.action_space.sample() + + env.seed(0) + env.reset() + wrapped_env.seed(0) + wrapped_env.reset() + + _, reward, _, _ = env.step(action) + _, wrapped_reward, _, _ = wrapped_env.step(action) + + assert abs(wrapped_reward) < abs(reward) + assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 + del env, wrapped_env + + # use case #3: sign + env = gym.make(env_id) + wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r)) + + env.seed(0) + env.reset() + wrapped_env.seed(0) + wrapped_env.reset() + + for _ in range(1000): + action = env.action_space.sample() + _, wrapped_reward, done, _ = wrapped_env.step(action) + assert wrapped_reward in [-1.0, 0.0, 1.0] + if done: + break + del env, wrapped_env