diff --git a/Metaworld/.gitignore b/Metaworld/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..03b6378d8e1d01f4370e6123f1ec3d58935d6bc6
--- /dev/null
+++ b/Metaworld/.gitignore
@@ -0,0 +1,148 @@
+# Plots and figures
+scripts/figures/
+scripts/movies*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+docs/_apidoc/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+
+# metaworld-specific stuff below this line
+MUJOCO_LOG.TXT
+
+# Other IDEs and editors
+*.sublime-project
+*.sublime-workspace
+.project
+.pydevproject
+*.swp
+*.orig
+.idea
+.settings
+.vscode/
+
+# MacOS
+.DS_Store
+
+# no submodules, please
+.gitmodules
+
+# pipenv
+# Including these for now, since pipenv is not the authoritative environment
+# tool
+Pipfile
+Pipfile.lock
diff --git a/Metaworld/LICENSE b/Metaworld/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..524f4b05f6a2a5221bda5978ce7431bf7ff14455
--- /dev/null
+++ b/Metaworld/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Meta-World Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Metaworld/MANIFEST.in b/Metaworld/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..3770244cd741c8454b9245bfc80b28d526b635d9
--- /dev/null
+++ b/Metaworld/MANIFEST.in
@@ -0,0 +1,2 @@
+graft metaworld/envs/assets_v1
+graft metaworld/envs/assets_v2
diff --git a/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0
new file mode 100644
index 0000000000000000000000000000000000000000..36937ecc98ad1f39b4c762ba8a9718816bb16074
Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 differ
diff --git a/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0
new file mode 100644
index 0000000000000000000000000000000000000000..34059363a17c8f22ce3238f8450a3216d1555f0e
Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 differ
diff --git a/VRL3/src/cfgs_adroit/task/hammer.yaml b/VRL3/src/cfgs_adroit/task/hammer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18c49e7dc5d0094c7dad4fc5aea1553dbacbc54f
--- /dev/null
+++ b/VRL3/src/cfgs_adroit/task/hammer.yaml
@@ -0,0 +1,4 @@
+num_train_frames: 4100000
+task_name: hammer-v0
+agent:
+  encoder_lr_scale: 1
\ No newline at end of file
diff --git a/VRL3/src/cfgs_adroit/task/pen.yaml b/VRL3/src/cfgs_adroit/task/pen.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6834a504321d0142aba6ce41d860f8ee44b68a7e
--- /dev/null
+++ b/VRL3/src/cfgs_adroit/task/pen.yaml
@@ -0,0 +1,4 @@
+num_train_frames: 4100000
+task_name: pen-v0
+agent:
+  encoder_lr_scale: 1
\ No newline at end of file
diff --git a/VRL3/src/rrl_local/rrl_utils.py b/VRL3/src/rrl_local/rrl_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..05d943348eb2c607605122af23d683cd2082534d
--- /dev/null
+++ b/VRL3/src/rrl_local/rrl_utils.py
@@ -0,0 +1,85 @@
+# Copyright (c) Rutav Shah, Indian Institute of Technlogy Kharagpur
+# Copyright (c) Facebook, Inc. and its affiliates
+
+from mjrl.utils.gym_env import GymEnv
+from rrl_local.rrl_multicam import BasicAdroitEnv
+import os
+
+def make_basic_env(env, cam_list=[], from_pixels=False, hybrid_state=None, test_image=False, channels_first=False,
+    num_repeats=1, num_frames=1):
+    e = GymEnv(env)
+    env_kwargs = None
+    if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
+        height = 84
+        width = 84
+        latent_dim = height*width*len(cam_list)*3
+        # RRL class instance is environment wrapper...
+        e = BasicAdroitEnv(e, cameras=cam_list,
+            height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state, 
+            test_image=test_image, channels_first=channels_first, num_repeats=num_repeats, num_frames=num_frames)
+        env_kwargs = {'rrl_kwargs' : e.env_kwargs}
+    # if not from pixels... then it's simpler
+    return e, env_kwargs
+
+def make_env(env, cam_list=[], from_pixels=False, encoder_type=None, hybrid_state=None,) :
+    # TODO why is encoder type put into the environment?
+    e = GymEnv(env)
+    env_kwargs = None
+    if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
+        height = 84
+        width = 84
+        latent_dim = height*width*len(cam_list)*3
+
+    if encoder_type and encoder_type == 'resnet34':
+        assert from_pixels==True
+        height = 256
+        width = 256
+        latent_dim = 512*len(cam_list) #TODO each camera provides an image?
+    if from_pixels:
+        # RRL class instance is environment wrapper...
+        e = BasicAdroitEnv(e, cameras=cam_list, encoder_type=encoder_type,
+            height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state)
+        env_kwargs = {'rrl_kwargs' : e.env_kwargs}
+    return e, env_kwargs
+
+def make_dir(dir_path):
+    if not os.path.exists(dir_path):
+        os.makedirs(dir_path)
+
+
+def preprocess_args(args):
+    job_data = {}
+    job_data['seed'] = args.seed
+    job_data['env'] = args.env
+    job_data['output'] = args.output
+    job_data['from_pixels'] = args.from_pixels
+    job_data['hybrid_state'] = args.hybrid_state
+    job_data['stack_frames'] = args.stack_frames
+    job_data['encoder_type'] = args.encoder_type
+    job_data['cam1'] = args.cam1
+    job_data['cam2'] = args.cam2
+    job_data['cam3'] = args.cam3
+    job_data['algorithm'] = args.algorithm
+    job_data['num_cpu'] = args.num_cpu
+    job_data['save_freq'] = args.save_freq
+    job_data['eval_rollouts'] = args.eval_rollouts
+    job_data['demo_file'] = args.demo_file
+    job_data['bc_batch_size'] = args.bc_batch_size
+    job_data['bc_epochs'] = args.bc_epochs
+    job_data['bc_learn_rate'] = args.bc_learn_rate
+    #job_data['policy_size'] = args.policy_size
+    job_data['policy_size'] = tuple(map(int, args.policy_size.split(', ')))
+    job_data['vf_batch_size'] = args.vf_batch_size
+    job_data['vf_epochs'] = args.vf_epochs
+    job_data['vf_learn_rate'] = args.vf_learn_rate
+    job_data['rl_step_size'] = args.rl_step_size
+    job_data['rl_gamma'] = args.rl_gamma
+    job_data['rl_gae'] = args.rl_gae
+    job_data['rl_num_traj'] = args.rl_num_traj
+    job_data['rl_num_iter'] = args.rl_num_iter
+    job_data['lam_0'] = args.lam_0
+    job_data['lam_1'] = args.lam_1
+    print(job_data)
+    return job_data
+
+
diff --git a/VRL3/src/transfer_util.py b/VRL3/src/transfer_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..99691715eb98bd7463044e3ca0bca78efc1d1044
--- /dev/null
+++ b/VRL3/src/transfer_util.py
@@ -0,0 +1,50 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from torchvision import datasets, models, transforms
+
+def set_parameter_requires_grad(model, n_resblock_finetune):
+    assert n_resblock_finetune in (0, 1, 2, 3, 4, 5)
+    for param in model.parameters():
+        param.requires_grad = False
+
+    for name, param in model.named_parameters():
+        condition = (n_resblock_finetune >= 1 and 'layer4' in name) or (n_resblock_finetune >= 2 and 'layer3' in name) or \
+                    (n_resblock_finetune >= 3 and 'layer2' in name) or (n_resblock_finetune >= 4 and 'layer1' in name) or \
+                    (n_resblock_finetune >= 5)
+
+        if condition:
+            param.requires_grad = True
+
+    for name, param in model.named_parameters():
+        if 'bn' in name:
+            param.requires_grad = False
+
+def initialize_model(model_name, n_resblock_finetune, use_pretrained=True):
+    # Initialize these variables which will be set in this if statement. Each of these
+    #   variables is model specific.
+    model_ft = None
+    input_size = 0
+
+    if model_name == "resnet18":
+        """ Resnet18
+        """
+        model_ft = models.resnet18(pretrained=use_pretrained)
+        set_parameter_requires_grad(model_ft, n_resblock_finetune)
+        feature_size = model_ft.fc.in_features
+        input_size = 224
+    elif model_name == 'resnet34':
+        model_ft = models.resnet34(pretrained=use_pretrained)
+        set_parameter_requires_grad(model_ft, n_resblock_finetune)
+        feature_size = model_ft.fc.in_features
+        input_size = 224
+    else:
+        print("Invalid model name, exiting...")
+        exit()
+
+    return model_ft, input_size, feature_size
+
+
+
+
+
diff --git a/gym-0.21.0/.gitignore b/gym-0.21.0/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..a4ebf2a52774af7cbe8f56641f5fe9c3dc4f87b0
--- /dev/null
+++ b/gym-0.21.0/.gitignore
@@ -0,0 +1,39 @@
+*.swp
+*.pyc
+*.py~
+.DS_Store
+.cache
+.pytest_cache/
+
+# Setuptools distribution and build folders.
+/dist/
+/build
+
+# Virtualenv
+/env
+
+# Python egg metadata, regenerated from source files by setuptools.
+/*.egg-info
+
+*.sublime-project
+*.sublime-workspace
+
+logs/
+
+.ipynb_checkpoints
+ghostdriver.log
+
+junk
+MUJOCO_LOG.txt
+
+rllab_mujoco
+
+tutorial/*.html
+
+# IDE files
+.eggs
+.tox
+
+# PyCharm project files
+.idea
+vizdoom.ini
diff --git a/gym-0.21.0/CODE_OF_CONDUCT.rst b/gym-0.21.0/CODE_OF_CONDUCT.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e208081cae3a9aefd992b3eff96b3809a29c0f34
--- /dev/null
+++ b/gym-0.21.0/CODE_OF_CONDUCT.rst
@@ -0,0 +1,13 @@
+OpenAI Gym is dedicated to providing a harassment-free experience for
+everyone, regardless of gender, gender identity and expression, sexual
+orientation, disability, physical appearance, body size, age, race, or
+religion. We do not tolerate harassment of participants in any form.
+
+This code of conduct applies to all OpenAI Gym spaces (including Gist
+comments) both online and off. Anyone who violates this code of
+conduct may be sanctioned or expelled from these spaces at the
+discretion of the OpenAI team.
+
+We may add additional rules over time, which will be made clearly
+available to participants. Participants are responsible for knowing
+and abiding by these rules.
diff --git a/gym-0.21.0/bin/docker_entrypoint b/gym-0.21.0/bin/docker_entrypoint
new file mode 100644
index 0000000000000000000000000000000000000000..a5fe2c9f0ca075c9767057b131faaf572ac4bf95
--- /dev/null
+++ b/gym-0.21.0/bin/docker_entrypoint
@@ -0,0 +1,26 @@
+#!/bin/bash
+# This script is the entrypoint for our Docker image.
+
+set -ex
+
+# Set up display; otherwise rendering will fail
+Xvfb -screen 0 1024x768x24 &
+export DISPLAY=:0
+
+# Wait for the file to come up
+display=0
+file="/tmp/.X11-unix/X$display"
+for i in $(seq 1 10); do
+    if [ -e "$file" ]; then
+	break
+    fi
+
+    echo "Waiting for $file to be created (try $i/10)"
+    sleep "$i"
+done
+if ! [ -e "$file" ]; then
+    echo "Timing out: $file was not created"
+    exit 1
+fi
+
+exec "$@"
diff --git a/gym-0.21.0/docs/api.md b/gym-0.21.0/docs/api.md
new file mode 100644
index 0000000000000000000000000000000000000000..68749f0edf27e7ea01bbd4ae91287cffff287902
--- /dev/null
+++ b/gym-0.21.0/docs/api.md
@@ -0,0 +1,123 @@
+# API
+## Initializing Environments
+Initializing environment is very easy in Gym and can be done via: 
+
+```python
+import gym
+env = gym.make('CartPole-v0')
+```
+
+## Interacting with the Environment
+This example will run an instance of `CartPole-v0` environment for 1000 timesteps, rendering the environment at each step. You should see a window pop up rendering the classic [cart-pole](https://www.youtube.com/watch?v=J7E6_my3CHk&ab_channel=TylerStreeter) problem
+
+```python
+import gym
+env = gym.make('CartPole-v0')
+env.reset()
+for _ in range(1000): 
+	env.render()  # by default `mode="human"`(GUI), you can pass `mode="rbg_array"` to retrieve an image instead
+	env.step(env.action_space.sample())  # take a random action 	
+env.close()
+```
+
+The output should look something like this
+
+![cartpole-no-reset](https://user-images.githubusercontent.com/28860173/129241283-70069f7c-453d-4670-a226-854203bd8a1b.gif)
+
+
+The commonly used methods are: 
+
+`reset()` resets the environment to its initial state and returns the observation corresponding to the initial state
+`step(action)` takes an action as an input and implements that action in the environment. This method returns a set of four values 
+`render()` renders the environment
+	
+- `observation` (**object**) : an environment specific object representation your observation of the environment after the step is taken. Its often aliased as the next state after the action has been taken
+- `reward`(**float**) : immediate reward achieved by the previous action. Actual value and range will varies between environments, but the final goal is always to increase your total reward
+- `done`(**boolean**): whether it’s time to `reset` the environment again. Most (but not all) tasks are divided up into well-defined episodes, and `done` being `True` indicates the episode has terminated. (For example, perhaps the pole tipped too far, or you lost your last life.)
+- `info`(**dict**) : This provides general information helpful for debugging or additional information depending on the environment, such as the raw probabilities behind the environment’s last state change
+
+
+## Additional Environment API
+
+- `action_space`: this attribute gives the format of valid actions. It is of datatype `Space` provided by Gym. (For ex: If the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions 0 & 1 )
+```python
+print(env.action_space)
+#> Discrete(2)
+
+print(env.observation_space)
+#> Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
+
+```
+- `observation_space`: this attribute gives the format of valid observations. It if of datatype `Space` provided by Gym. (For ex: if the observation space is of type `Box` and the shape of the object is `(4,)`, this denotes a valid observation will be an array of 4 numbers). We can check the box bounds as well with attributes
+
+```python
+print(env.observation_space.high)
+#> array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32)
+
+print(env.observation_space.low)
+#> array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32)
+```
+- There are multiple types of Space types inherently available in gym:
+	- `Box` describes an n-dimensional continuous space. Its a bounded space where we can define the upper and lower limit which describe the valid values our observations can take.
+	- `Discrete` describes a discrete space where { 0, 1, ......., n-1} are the possible values our observation/action can take. 
+	- `Dict` represents a dictionary of simple spaces.
+	- `Tuple` represents a tuple of simple spaces
+	- `MultiBinary` creates a n-shape binary space. Argument n can be a number or a `list` of numbers
+	- `MultiDiscrete` consists of a series of `Discrete` action spaces with different number of actions in each element
+	```python
+	observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
+	print(observation_space.sample())
+	#> [ 1.6952509 -0.4399011 -0.7981693]
+
+	observation_space = Discrete(4)
+	print(observation_space.sample())
+	#> 1
+
+	observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)})
+	print(observation_space.sample())
+	#> OrderedDict([('position', 0), ('velocity', 1)])
+
+	observation_space = Tuple((Discrete(2), Discrete(3)))
+	print(observation_space.sample())
+	#> (1, 2)
+
+	observation_space = MultiBinary(5)
+	print(observation_space.sample())
+	#> [1 1 1 0 1]
+
+	observation_space = MultiDiscrete([ 5, 2, 2 ])
+	print(observation_space.sample())
+	#> [3 0 0]
+	```
+- `reward_range`:  returns a tuple corresponding to min and max possible rewards. Default range is set to `[-inf,+inf]`. You can set it if you want a narrower range 
+- `close()` : Override close in your subclass to perform any necessary cleanup
+- `seed()`: Sets the seed for this env's random number generator
+
+
+### Unwrapping an environment
+If you have a wrapped environment, and you want to get the unwrapped environment underneath all the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the `.unwrapped` attribute. If the environment is already a base environment, the `.unwrapped` attribute will just return itself.
+
+```python
+base_env = env.unwrapped
+```
+
+### Vectorized Environment
+Vectorized Environments are a way of stacking multiple independent environments, so that instead of training on one environment, our agent can train on multiple environments at a time. Each `observation` returned from a vectorized environment is a batch of observations for each sub-environment, and `step` is also expected to receive a batch of actions for each sub-environment.
+
+**NOTE:** All sub-environments should share the identical observation and action spaces. A vector of multiple different environments is not supported
+
+Gym Vector API consists of two types of vectorized environments:
+
+- `AsyncVectorEnv` runs multiple environments in parallel. It uses `multiprocessing` processes, and pipes for communication.
+- `SyncVectorEnv`runs multiple environments serially
+
+```python
+import gym
+env = gym.vector.make('CartPole-v1', 3,asynchronous=True)  # Creates an Asynchronous env
+env.reset()
+#> array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
+#> [ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
+#> [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
+#> dtype=float32)
+
+```
diff --git a/gym-0.21.0/docs/toy_text/frozen_lake.md b/gym-0.21.0/docs/toy_text/frozen_lake.md
new file mode 100644
index 0000000000000000000000000000000000000000..b84daf7ec1a7830efa1c6cbe8f2685b0d7e15e91
--- /dev/null
+++ b/gym-0.21.0/docs/toy_text/frozen_lake.md
@@ -0,0 +1,70 @@
+Frozen Lake
+---
+|Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import|
+| ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------|
+|Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake|
+---
+
+
+Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake.
+
+The agent take a 1-element vector for actions.
+The action space is `(dir)`, where `dir` decides direction to move in which can be:
+
+- 0: LEFT
+- 1: DOWN
+- 2: RIGHT
+- 3: UP 
+
+The observation is a value representing the agents current position as
+
+    current_row * nrows + current_col
+
+**Rewards:**
+
+Reward schedule:
+- Reach goal(G): +1
+- Reach hole(H): 0
+
+### Arguments
+
+```
+gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True)
+```
+
+`desc`: Used to specify custom map for frozen lake. For example,
+
+    desc=["SFFF", "FHFH", "FFFH", "HFFG"].
+
+`map_name`: ID to use any of the preloaded maps.
+
+    "4x4":[
+        "SFFF", 
+        "FHFH", 
+        "FFFH", 
+        "HFFG"
+        ]
+
+    "8x8": [
+        "SFFFFFFF",
+        "FFFFFFFF",
+        "FFFHFFFF",
+        "FFFFFHFF",
+        "FFFHFFFF",
+        "FHHFFFHF",
+        "FHFFHFHF",
+        "FFFHFFFG",
+    ]
+
+
+    
+
+`is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions.
+
+    For example, if action is left and is_slippery is True, then:
+    - P(move left)=1/3
+    - P(move up)=1/3
+    - P(move down)=1/3 
+### Version History
+
+* v0: Initial versions release (1.0.0)
diff --git a/gym-0.21.0/docs/tutorials.md b/gym-0.21.0/docs/tutorials.md
new file mode 100644
index 0000000000000000000000000000000000000000..7abc9543e8224fcc69dd824a1fad396f20d91aa2
--- /dev/null
+++ b/gym-0.21.0/docs/tutorials.md
@@ -0,0 +1,17 @@
+## Getting Started With OpenAI Gym: The Basic Building Blocks
+
+https://blog.paperspace.com/getting-started-with-openai-gym/
+
+A good starting point explaining all the basic building blocks of the Gym API.
+
+
+
+## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym
+Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents.
+
+https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/
+
+
+## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym
+
+https://www.gocoder.one/blog/rl-tutorial-with-openai-gym
diff --git a/gym-0.21.0/gym/core.py b/gym-0.21.0/gym/core.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae7b32c355714a1fc2b5ea2f0d4ee01f4845fcf0
--- /dev/null
+++ b/gym-0.21.0/gym/core.py
@@ -0,0 +1,357 @@
+from abc import abstractmethod
+
+import gym
+from gym import error
+from gym.utils import closer
+
+
+class Env(object):
+    """The main OpenAI Gym class. It encapsulates an environment with
+    arbitrary behind-the-scenes dynamics. An environment can be
+    partially or fully observed.
+
+    The main API methods that users of this class need to know are:
+
+        step
+        reset
+        render
+        close
+        seed
+
+    And set the following attributes:
+
+        action_space: The Space object corresponding to valid actions
+        observation_space: The Space object corresponding to valid observations
+        reward_range: A tuple corresponding to the min and max possible rewards
+
+    Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
+
+    The methods are accessed publicly as "step", "reset", etc...
+    """
+
+    # Set this in SOME subclasses
+    metadata = {"render.modes": []}
+    reward_range = (-float("inf"), float("inf"))
+    spec = None
+
+    # Set these in ALL subclasses
+    action_space = None
+    observation_space = None
+
+    @abstractmethod
+    def step(self, action):
+        """Run one timestep of the environment's dynamics. When end of
+        episode is reached, you are responsible for calling `reset()`
+        to reset this environment's state.
+
+        Accepts an action and returns a tuple (observation, reward, done, info).
+
+        Args:
+            action (object): an action provided by the agent
+
+        Returns:
+            observation (object): agent's observation of the current environment
+            reward (float) : amount of reward returned after previous action
+            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
+            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def reset(self):
+        """Resets the environment to an initial state and returns an initial
+        observation.
+
+        Note that this function should not reset the environment's random
+        number generator(s); random variables in the environment's state should
+        be sampled independently between multiple calls to `reset()`. In other
+        words, each call of `reset()` should yield an environment suitable for
+        a new episode, independent of previous episodes.
+
+        Returns:
+            observation (object): the initial observation.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def render(self, mode="human"):
+        """Renders the environment.
+
+        The set of supported modes varies per environment. (And some
+        environments do not support rendering at all.) By convention,
+        if mode is:
+
+        - human: render to the current display or terminal and
+          return nothing. Usually for human consumption.
+        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
+          representing RGB values for an x-by-y pixel image, suitable
+          for turning into a video.
+        - ansi: Return a string (str) or StringIO.StringIO containing a
+          terminal-style text representation. The text can include newlines
+          and ANSI escape sequences (e.g. for colors).
+
+        Note:
+            Make sure that your class's metadata 'render.modes' key includes
+              the list of supported modes. It's recommended to call super()
+              in implementations to use the functionality of this method.
+
+        Args:
+            mode (str): the mode to render with
+
+        Example:
+
+        class MyEnv(Env):
+            metadata = {'render.modes': ['human', 'rgb_array']}
+
+            def render(self, mode='human'):
+                if mode == 'rgb_array':
+                    return np.array(...) # return RGB frame suitable for video
+                elif mode == 'human':
+                    ... # pop up a window and render
+                else:
+                    super(MyEnv, self).render(mode=mode) # just raise an exception
+        """
+        raise NotImplementedError
+
+    def close(self):
+        """Override close in your subclass to perform any necessary cleanup.
+
+        Environments will automatically close() themselves when
+        garbage collected or when the program exits.
+        """
+        pass
+
+    def seed(self, seed=None):
+        """Sets the seed for this env's random number generator(s).
+
+        Note:
+            Some environments use multiple pseudorandom number generators.
+            We want to capture all such seeds used in order to ensure that
+            there aren't accidental correlations between multiple generators.
+
+        Returns:
+            list<bigint>: Returns the list of seeds used in this env's random
+              number generators. The first value in the list should be the
+              "main" seed, or the value which a reproducer should pass to
+              'seed'. Often, the main seed equals the provided 'seed', but
+              this won't be true if seed=None, for example.
+        """
+        return
+
+    @property
+    def unwrapped(self):
+        """Completely unwrap this env.
+
+        Returns:
+            gym.Env: The base non-wrapped gym.Env instance
+        """
+        return self
+
+    def __str__(self):
+        if self.spec is None:
+            return "<{} instance>".format(type(self).__name__)
+        else:
+            return "<{}<{}>>".format(type(self).__name__, self.spec.id)
+
+    def __enter__(self):
+        """Support with-statement for the environment."""
+        return self
+
+    def __exit__(self, *args):
+        """Support with-statement for the environment."""
+        self.close()
+        # propagate exception
+        return False
+
+
+class GoalEnv(Env):
+    """A goal-based environment. It functions just as any regular OpenAI Gym environment but it
+    imposes a required structure on the observation_space. More concretely, the observation
+    space is required to contain at least three elements, namely `observation`, `desired_goal`, and
+    `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
+    `achieved_goal` is the goal that it currently achieved instead. `observation` contains the
+    actual observations of the environment as per usual.
+    """
+
+    def reset(self):
+        # Enforce that each GoalEnv uses a Goal-compatible observation space.
+        if not isinstance(self.observation_space, gym.spaces.Dict):
+            raise error.Error(
+                "GoalEnv requires an observation space of type gym.spaces.Dict"
+            )
+        for key in ["observation", "achieved_goal", "desired_goal"]:
+            if key not in self.observation_space.spaces:
+                raise error.Error(
+                    'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(
+                        key
+                    )
+                )
+
+    @abstractmethod
+    def compute_reward(self, achieved_goal, desired_goal, info):
+        """Compute the step reward. This externalizes the reward function and makes
+        it dependent on a desired goal and the one that was achieved. If you wish to include
+        additional rewards that are independent of the goal, you can include the necessary values
+        to derive it in 'info' and compute it accordingly.
+
+        Args:
+            achieved_goal (object): the goal that was achieved during execution
+            desired_goal (object): the desired goal that we asked the agent to attempt to achieve
+            info (dict): an info dictionary with additional information
+
+        Returns:
+            float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
+            goal. Note that the following should always hold true:
+
+                ob, reward, done, info = env.step()
+                assert reward == env.compute_reward(ob['achieved_goal'], ob['desired_goal'], info)
+        """
+        raise NotImplementedError
+
+
+class Wrapper(Env):
+    """Wraps the environment to allow a modular transformation.
+
+    This class is the base class for all wrappers. The subclass could override
+    some methods to change the behavior of the original environment without touching the
+    original code.
+
+    .. note::
+
+        Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
+
+    """
+
+    def __init__(self, env):
+        self.env = env
+
+        self._action_space = None
+        self._observation_space = None
+        self._reward_range = None
+        self._metadata = None
+
+    def __getattr__(self, name):
+        if name.startswith("_"):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name)
+            )
+        return getattr(self.env, name)
+
+    @property
+    def spec(self):
+        return self.env.spec
+
+    @classmethod
+    def class_name(cls):
+        return cls.__name__
+
+    @property
+    def action_space(self):
+        if self._action_space is None:
+            return self.env.action_space
+        return self._action_space
+
+    @action_space.setter
+    def action_space(self, space):
+        self._action_space = space
+
+    @property
+    def observation_space(self):
+        if self._observation_space is None:
+            return self.env.observation_space
+        return self._observation_space
+
+    @observation_space.setter
+    def observation_space(self, space):
+        self._observation_space = space
+
+    @property
+    def reward_range(self):
+        if self._reward_range is None:
+            return self.env.reward_range
+        return self._reward_range
+
+    @reward_range.setter
+    def reward_range(self, value):
+        self._reward_range = value
+
+    @property
+    def metadata(self):
+        if self._metadata is None:
+            return self.env.metadata
+        return self._metadata
+
+    @metadata.setter
+    def metadata(self, value):
+        self._metadata = value
+
+    def step(self, action):
+        return self.env.step(action)
+
+    def reset(self, **kwargs):
+        return self.env.reset(**kwargs)
+
+    def render(self, mode="human", **kwargs):
+        return self.env.render(mode, **kwargs)
+
+    def close(self):
+        return self.env.close()
+
+    def seed(self, seed=None):
+        return self.env.seed(seed)
+
+    def compute_reward(self, achieved_goal, desired_goal, info):
+        return self.env.compute_reward(achieved_goal, desired_goal, info)
+
+    def __str__(self):
+        return "<{}{}>".format(type(self).__name__, self.env)
+
+    def __repr__(self):
+        return str(self)
+
+    @property
+    def unwrapped(self):
+        return self.env.unwrapped
+
+
+class ObservationWrapper(Wrapper):
+    def reset(self, **kwargs):
+        observation = self.env.reset(**kwargs)
+        return self.observation(observation)
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return self.observation(observation), reward, done, info
+
+    @abstractmethod
+    def observation(self, observation):
+        raise NotImplementedError
+
+
+class RewardWrapper(Wrapper):
+    def reset(self, **kwargs):
+        return self.env.reset(**kwargs)
+
+    def step(self, action):
+        observation, reward, done, info = self.env.step(action)
+        return observation, self.reward(reward), done, info
+
+    @abstractmethod
+    def reward(self, reward):
+        raise NotImplementedError
+
+
+class ActionWrapper(Wrapper):
+    def reset(self, **kwargs):
+        return self.env.reset(**kwargs)
+
+    def step(self, action):
+        return self.env.step(self.action(action))
+
+    @abstractmethod
+    def action(self, action):
+        raise NotImplementedError
+
+    @abstractmethod
+    def reverse_action(self, action):
+        raise NotImplementedError
diff --git a/gym-0.21.0/gym/envs/box2d/bipedal_walker.py b/gym-0.21.0/gym/envs/box2d/bipedal_walker.py
new file mode 100644
index 0000000000000000000000000000000000000000..34b564f217c13d0b76138bc5eba021cdba3718d1
--- /dev/null
+++ b/gym-0.21.0/gym/envs/box2d/bipedal_walker.py
@@ -0,0 +1,658 @@
+import sys
+import math
+
+import numpy as np
+import Box2D
+from Box2D.b2 import (
+    edgeShape,
+    circleShape,
+    fixtureDef,
+    polygonShape,
+    revoluteJointDef,
+    contactListener,
+)
+
+import gym
+from gym import spaces
+from gym.utils import colorize, seeding, EzPickle
+
+# This is simple 4-joints walker robot environment.
+#
+# There are two versions:
+#
+# - Normal, with slightly uneven terrain.
+#
+# - Hardcore with ladders, stumps, pitfalls.
+#
+# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
+# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
+# will get better score.
+#
+# Heuristic is provided for testing, it's also useful to get demonstrations to
+# learn from. To run heuristic:
+#
+# python gym/envs/box2d/bipedal_walker.py
+#
+# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
+# position of joints and joints angular speed, legs contact with ground, and 10 lidar
+# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
+# in the state vector. Lidar is less useful in normal version, but it works.
+#
+# To solve the game you need to get 300 points in 1600 time steps.
+#
+# To solve hardcore version you need 300 points in 2000 time steps.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+FPS = 50
+SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well
+
+MOTORS_TORQUE = 80
+SPEED_HIP = 4
+SPEED_KNEE = 6
+LIDAR_RANGE = 160 / SCALE
+
+INITIAL_RANDOM = 5
+
+HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
+LEG_DOWN = -8 / SCALE
+LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
+
+VIEWPORT_W = 600
+VIEWPORT_H = 400
+
+TERRAIN_STEP = 14 / SCALE
+TERRAIN_LENGTH = 200  # in steps
+TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
+TERRAIN_GRASS = 10  # low long are grass spots, in steps
+TERRAIN_STARTPAD = 20  # in steps
+FRICTION = 2.5
+
+HULL_FD = fixtureDef(
+    shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
+    density=5.0,
+    friction=0.1,
+    categoryBits=0x0020,
+    maskBits=0x001,  # collide only with ground
+    restitution=0.0,
+)  # 0.99 bouncy
+
+LEG_FD = fixtureDef(
+    shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)
+
+LOWER_FD = fixtureDef(
+    shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
+    density=1.0,
+    restitution=0.0,
+    categoryBits=0x0020,
+    maskBits=0x001,
+)
+
+
+class ContactDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+
+    def BeginContact(self, contact):
+        if (
+            self.env.hull == contact.fixtureA.body
+            or self.env.hull == contact.fixtureB.body
+        ):
+            self.env.game_over = True
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = True
+
+    def EndContact(self, contact):
+        for leg in [self.env.legs[1], self.env.legs[3]]:
+            if leg in [contact.fixtureA.body, contact.fixtureB.body]:
+                leg.ground_contact = False
+
+
+class BipedalWalker(gym.Env, EzPickle):
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}
+
+    hardcore = False
+
+    def __init__(self):
+        EzPickle.__init__(self)
+        self.seed()
+        self.viewer = None
+
+        self.world = Box2D.b2World()
+        self.terrain = None
+        self.hull = None
+
+        self.prev_shaping = None
+
+        self.fd_polygon = fixtureDef(
+            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
+            friction=FRICTION,
+        )
+
+        self.fd_edge = fixtureDef(
+            shape=edgeShape(vertices=[(0, 0), (1, 1)]),
+            friction=FRICTION,
+            categoryBits=0x0001,
+        )
+
+        high = np.array([np.inf] * 24).astype(np.float32)
+        self.action_space = spaces.Box(
+            np.array([-1, -1, -1, -1]).astype(np.float32),
+            np.array([1, 1, 1, 1]).astype(np.float32),
+        )
+        self.observation_space = spaces.Box(-high, high)
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _destroy(self):
+        if not self.terrain:
+            return
+        self.world.contactListener = None
+        for t in self.terrain:
+            self.world.DestroyBody(t)
+        self.terrain = []
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for leg in self.legs:
+            self.world.DestroyBody(leg)
+        self.legs = []
+        self.joints = []
+
+    def _generate_terrain(self, hardcore):
+        GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
+        state = GRASS
+        velocity = 0.0
+        y = TERRAIN_HEIGHT
+        counter = TERRAIN_STARTPAD
+        oneshot = False
+        self.terrain = []
+        self.terrain_x = []
+        self.terrain_y = []
+        for i in range(TERRAIN_LENGTH):
+            x = i * TERRAIN_STEP
+            self.terrain_x.append(x)
+
+            if state == GRASS and not oneshot:
+                velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
+                if i > TERRAIN_STARTPAD:
+                    velocity += self.np_random.uniform(-1, 1) / SCALE  # 1
+                y += velocity
+
+            elif state == PIT and oneshot:
+                counter = self.np_random.randint(3, 5)
+                poly = [
+                    (x, y),
+                    (x + TERRAIN_STEP, y),
+                    (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
+                    (x, y - 4 * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
+                self.terrain.append(t)
+
+                self.fd_polygon.shape.vertices = [
+                    (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
+                ]
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
+                self.terrain.append(t)
+                counter += 2
+                original_y = y
+
+            elif state == PIT and not oneshot:
+                y = original_y
+                if counter > 1:
+                    y -= 4 * TERRAIN_STEP
+
+            elif state == STUMP and oneshot:
+                counter = self.np_random.randint(1, 3)
+                poly = [
+                    (x, y),
+                    (x + counter * TERRAIN_STEP, y),
+                    (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
+                    (x, y + counter * TERRAIN_STEP),
+                ]
+                self.fd_polygon.shape.vertices = poly
+                t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
+                self.terrain.append(t)
+
+            elif state == STAIRS and oneshot:
+                stair_height = +1 if self.np_random.rand() > 0.5 else -1
+                stair_width = self.np_random.randint(4, 5)
+                stair_steps = self.np_random.randint(3, 5)
+                original_y = y
+                for s in range(stair_steps):
+                    poly = [
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + ((1 + s) * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                        (
+                            x + (s * stair_width) * TERRAIN_STEP,
+                            y + (-1 + s * stair_height) * TERRAIN_STEP,
+                        ),
+                    ]
+                    self.fd_polygon.shape.vertices = poly
+                    t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
+                    t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
+                    self.terrain.append(t)
+                counter = stair_steps * stair_width
+
+            elif state == STAIRS and not oneshot:
+                s = stair_steps * stair_width - counter - stair_height
+                n = s / stair_width
+                y = original_y + (n * stair_height) * TERRAIN_STEP
+
+            oneshot = False
+            self.terrain_y.append(y)
+            counter -= 1
+            if counter == 0:
+                counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS)
+                if state == GRASS and hardcore:
+                    state = self.np_random.randint(1, _STATES_)
+                    oneshot = True
+                else:
+                    state = GRASS
+                    oneshot = True
+
+        self.terrain_poly = []
+        for i in range(TERRAIN_LENGTH - 1):
+            poly = [
+                (self.terrain_x[i], self.terrain_y[i]),
+                (self.terrain_x[i + 1], self.terrain_y[i + 1]),
+            ]
+            self.fd_edge.shape.vertices = poly
+            t = self.world.CreateStaticBody(fixtures=self.fd_edge)
+            color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3)
+            t.color1 = color
+            t.color2 = color
+            self.terrain.append(t)
+            color = (0.4, 0.6, 0.3)
+            poly += [(poly[1][0], 0), (poly[0][0], 0)]
+            self.terrain_poly.append((poly, color))
+        self.terrain.reverse()
+
+    def _generate_clouds(self):
+        # Sorry for the clouds, couldn't resist
+        self.cloud_poly = []
+        for i in range(TERRAIN_LENGTH // 20):
+            x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
+            y = VIEWPORT_H / SCALE * 3 / 4
+            poly = [
+                (
+                    x
+                    + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                    y
+                    + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+                    + self.np_random.uniform(0, 5 * TERRAIN_STEP),
+                )
+                for a in range(5)
+            ]
+            x1 = min([p[0] for p in poly])
+            x2 = max([p[0] for p in poly])
+            self.cloud_poly.append((poly, x1, x2))
+
+    def reset(self):
+        self._destroy()
+        self.world.contactListener_bug_workaround = ContactDetector(self)
+        self.world.contactListener = self.world.contactListener_bug_workaround
+        self.game_over = False
+        self.prev_shaping = None
+        self.scroll = 0.0
+        self.lidar_render = 0
+
+        W = VIEWPORT_W / SCALE
+        H = VIEWPORT_H / SCALE
+
+        self._generate_terrain(self.hardcore)
+        self._generate_clouds()
+
+        init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
+        init_y = TERRAIN_HEIGHT + 2 * LEG_H
+        self.hull = self.world.CreateDynamicBody(
+            position=(init_x, init_y), fixtures=HULL_FD
+        )
+        self.hull.color1 = (0.5, 0.4, 0.9)
+        self.hull.color2 = (0.3, 0.3, 0.5)
+        self.hull.ApplyForceToCenter(
+            (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
+        )
+
+        self.legs = []
+        self.joints = []
+        for i in [-1, +1]:
+            leg = self.world.CreateDynamicBody(
+                position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LEG_FD,
+            )
+            leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
+            leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=leg,
+                localAnchorA=(0, LEG_DOWN),
+                localAnchorB=(0, LEG_H / 2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed=i,
+                lowerAngle=-0.8,
+                upperAngle=1.1,
+            )
+            self.legs.append(leg)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+            lower = self.world.CreateDynamicBody(
+                position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
+                angle=(i * 0.05),
+                fixtures=LOWER_FD,
+            )
+            lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
+            lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
+            rjd = revoluteJointDef(
+                bodyA=leg,
+                bodyB=lower,
+                localAnchorA=(0, -LEG_H / 2),
+                localAnchorB=(0, LEG_H / 2),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=MOTORS_TORQUE,
+                motorSpeed=1,
+                lowerAngle=-1.6,
+                upperAngle=-0.1,
+            )
+            lower.ground_contact = False
+            self.legs.append(lower)
+            self.joints.append(self.world.CreateJoint(rjd))
+
+        self.drawlist = self.terrain + self.legs + [self.hull]
+
+        class LidarCallback(Box2D.b2.rayCastCallback):
+            def ReportFixture(self, fixture, point, normal, fraction):
+                if (fixture.filterData.categoryBits & 1) == 0:
+                    return -1
+                self.p2 = point
+                self.fraction = fraction
+                return fraction
+
+        self.lidar = [LidarCallback() for _ in range(10)]
+
+        return self.step(np.array([0, 0, 0, 0]))[0]
+
+    def step(self, action):
+        # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
+        control_speed = False  # Should be easier as well
+        if control_speed:
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
+        else:
+            self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
+            self.joints[0].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
+            )
+            self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
+            self.joints[1].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
+            )
+            self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
+            self.joints[2].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
+            )
+            self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
+            self.joints[3].maxMotorTorque = float(
+                MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
+            )
+
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+
+        pos = self.hull.position
+        vel = self.hull.linearVelocity
+
+        for i in range(10):
+            self.lidar[i].fraction = 1.0
+            self.lidar[i].p1 = pos
+            self.lidar[i].p2 = (
+                pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
+                pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
+            )
+            self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
+
+        state = [
+            self.hull.angle,  # Normal angles up to 0.5 here, but sure more is possible.
+            2.0 * self.hull.angularVelocity / FPS,
+            0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS,  # Normalized to get -1..1 range
+            0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
+            self.joints[
+                0
+            ].angle,  # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
+            self.joints[0].speed / SPEED_HIP,
+            self.joints[1].angle + 1.0,
+            self.joints[1].speed / SPEED_KNEE,
+            1.0 if self.legs[1].ground_contact else 0.0,
+            self.joints[2].angle,
+            self.joints[2].speed / SPEED_HIP,
+            self.joints[3].angle + 1.0,
+            self.joints[3].speed / SPEED_KNEE,
+            1.0 if self.legs[3].ground_contact else 0.0,
+        ]
+        state += [l.fraction for l in self.lidar]
+        assert len(state) == 24
+
+        self.scroll = pos.x - VIEWPORT_W / SCALE / 5
+
+        shaping = (
+            130 * pos[0] / SCALE
+        )  # moving forward is a way to receive reward (normalized to get 300 on completion)
+        shaping -= 5.0 * abs(
+            state[0]
+        )  # keep head straight, other than that and falling, any behavior is unpunished
+
+        reward = 0
+        if self.prev_shaping is not None:
+            reward = shaping - self.prev_shaping
+        self.prev_shaping = shaping
+
+        for a in action:
+            reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
+            # normalized to about -50.0 using heuristic, more optimal agent should spend less
+
+        done = False
+        if self.game_over or pos[0] < 0:
+            reward = -100
+            done = True
+        if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
+            done = True
+        return np.array(state, dtype=np.float32), reward, done, {}
+
+    def render(self, mode="human"):
+        from gym.envs.classic_control import rendering
+
+        if self.viewer is None:
+            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
+        self.viewer.set_bounds(
+            self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
+        )
+
+        self.viewer.draw_polygon(
+            [
+                (self.scroll, 0),
+                (self.scroll + VIEWPORT_W / SCALE, 0),
+                (self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE),
+                (self.scroll, VIEWPORT_H / SCALE),
+            ],
+            color=(0.9, 0.9, 1.0),
+        )
+        for poly, x1, x2 in self.cloud_poly:
+            if x2 < self.scroll / 2:
+                continue
+            if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
+                continue
+            self.viewer.draw_polygon(
+                [(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1)
+            )
+        for poly, color in self.terrain_poly:
+            if poly[1][0] < self.scroll:
+                continue
+            if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
+                continue
+            self.viewer.draw_polygon(poly, color=color)
+
+        self.lidar_render = (self.lidar_render + 1) % 100
+        i = self.lidar_render
+        if i < 2 * len(self.lidar):
+            l = (
+                self.lidar[i]
+                if i < len(self.lidar)
+                else self.lidar[len(self.lidar) - i - 1]
+            )
+            self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1)
+
+        for obj in self.drawlist:
+            for f in obj.fixtures:
+                trans = f.body.transform
+                if type(f.shape) is circleShape:
+                    t = rendering.Transform(translation=trans * f.shape.pos)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 30, color=obj.color1
+                    ).add_attr(t)
+                    self.viewer.draw_circle(
+                        f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2
+                    ).add_attr(t)
+                else:
+                    path = [trans * v for v in f.shape.vertices]
+                    self.viewer.draw_polygon(path, color=obj.color1)
+                    path.append(path[0])
+                    self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
+
+        flagy1 = TERRAIN_HEIGHT
+        flagy2 = flagy1 + 50 / SCALE
+        x = TERRAIN_STEP * 3
+        self.viewer.draw_polyline(
+            [(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2
+        )
+        f = [
+            (x, flagy2),
+            (x, flagy2 - 10 / SCALE),
+            (x + 25 / SCALE, flagy2 - 5 / SCALE),
+        ]
+        self.viewer.draw_polygon(f, color=(0.9, 0.2, 0))
+        self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2)
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+
+class BipedalWalkerHardcore(BipedalWalker):
+    hardcore = True
+
+
+if __name__ == "__main__":
+    # Heurisic: suboptimal, have no notion of balance.
+    env = BipedalWalker()
+    env.reset()
+    steps = 0
+    total_reward = 0
+    a = np.array([0.0, 0.0, 0.0, 0.0])
+    STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
+    SPEED = 0.29  # Will fall forward on higher speed
+    state = STAY_ON_ONE_LEG
+    moving_leg = 0
+    supporting_leg = 1 - moving_leg
+    SUPPORT_KNEE_ANGLE = +0.1
+    supporting_knee_angle = SUPPORT_KNEE_ANGLE
+    while True:
+        s, r, done, info = env.step(a)
+        total_reward += r
+        if steps % 20 == 0 or done:
+            print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
+            print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+            print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]]))
+            print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]]))
+            print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
+        steps += 1
+
+        contact0 = s[8]
+        contact1 = s[13]
+        moving_s_base = 4 + 5 * moving_leg
+        supporting_s_base = 4 + 5 * supporting_leg
+
+        hip_targ = [None, None]  # -0.8 .. +1.1
+        knee_targ = [None, None]  # -0.6 .. +0.9
+        hip_todo = [0.0, 0.0]
+        knee_todo = [0.0, 0.0]
+
+        if state == STAY_ON_ONE_LEG:
+            hip_targ[moving_leg] = 1.1
+            knee_targ[moving_leg] = -0.6
+            supporting_knee_angle += 0.03
+            if s[2] > SPEED:
+                supporting_knee_angle += 0.03
+            supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[supporting_s_base + 0] < 0.10:  # supporting leg is behind
+                state = PUT_OTHER_DOWN
+        if state == PUT_OTHER_DOWN:
+            hip_targ[moving_leg] = +0.1
+            knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
+            knee_targ[supporting_leg] = supporting_knee_angle
+            if s[moving_s_base + 4]:
+                state = PUSH_OFF
+                supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
+        if state == PUSH_OFF:
+            knee_targ[moving_leg] = supporting_knee_angle
+            knee_targ[supporting_leg] = +1.0
+            if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
+                state = STAY_ON_ONE_LEG
+                moving_leg = 1 - moving_leg
+                supporting_leg = 1 - moving_leg
+
+        if hip_targ[0]:
+            hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
+        if hip_targ[1]:
+            hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
+        if knee_targ[0]:
+            knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
+        if knee_targ[1]:
+            knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
+
+        hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1]  # PID to keep head strait
+        hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
+        knee_todo[0] -= 15.0 * s[3]  # vertical speed, to damp oscillations
+        knee_todo[1] -= 15.0 * s[3]
+
+        a[0] = hip_todo[0]
+        a[1] = knee_todo[0]
+        a[2] = hip_todo[1]
+        a[3] = knee_todo[1]
+        a = np.clip(0.5 * a, -1.0, 1.0)
+
+        env.render()
+        if done:
+            break
diff --git a/gym-0.21.0/gym/envs/classic_control/cartpole.py b/gym-0.21.0/gym/envs/classic_control/cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..e11adcba6f413017e8661e8990dde668a63a129c
--- /dev/null
+++ b/gym-0.21.0/gym/envs/classic_control/cartpole.py
@@ -0,0 +1,234 @@
+"""
+Classic cart-pole system implemented by Rich Sutton et al.
+Copied from http://incompleteideas.net/sutton/book/code/pole.c
+permalink: https://perma.cc/C9ZM-652R
+"""
+
+import math
+import gym
+from gym import spaces, logger
+from gym.utils import seeding
+import numpy as np
+
+
+class CartPoleEnv(gym.Env):
+    """
+    Description:
+        A pole is attached by an un-actuated joint to a cart, which moves along
+        a frictionless track. The pendulum starts upright, and the goal is to
+        prevent it from falling over by increasing and reducing the cart's
+        velocity.
+
+    Source:
+        This environment corresponds to the version of the cart-pole problem
+        described by Barto, Sutton, and Anderson
+
+    Observation:
+        Type: Box(4)
+        Num     Observation               Min                     Max
+        0       Cart Position             -4.8                    4.8
+        1       Cart Velocity             -Inf                    Inf
+        2       Pole Angle                -0.418 rad (-24 deg)    0.418 rad (24 deg)
+        3       Pole Angular Velocity     -Inf                    Inf
+
+    Actions:
+        Type: Discrete(2)
+        Num   Action
+        0     Push cart to the left
+        1     Push cart to the right
+
+        Note: The amount the velocity that is reduced or increased is not
+        fixed; it depends on the angle the pole is pointing. This is because
+        the center of gravity of the pole increases the amount of energy needed
+        to move the cart underneath it
+
+    Reward:
+        Reward is 1 for every step taken, including the termination step
+
+    Starting State:
+        All observations are assigned a uniform random value in [-0.05..0.05]
+
+    Episode Termination:
+        Pole Angle is more than 12 degrees.
+        Cart Position is more than 2.4 (center of the cart reaches the edge of
+        the display).
+        Episode length is greater than 200.
+        Solved Requirements:
+        Considered solved when the average return is greater than or equal to
+        195.0 over 100 consecutive trials.
+    """
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
+
+    def __init__(self):
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = self.masspole + self.masscart
+        self.length = 0.5  # actually half the pole's length
+        self.polemass_length = self.masspole * self.length
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = "euler"
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation
+        # is still within bounds.
+        high = np.array(
+            [
+                self.x_threshold * 2,
+                np.finfo(np.float32).max,
+                self.theta_threshold_radians * 2,
+                np.finfo(np.float32).max,
+            ],
+            dtype=np.float32,
+        )
+
+        self.action_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.seed()
+        self.viewer = None
+        self.state = None
+
+        self.steps_beyond_done = None
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+        err_msg = "%r (%s) invalid" % (action, type(action))
+        assert self.action_space.contains(action), err_msg
+
+        x, x_dot, theta, theta_dot = self.state
+        force = self.force_mag if action == 1 else -self.force_mag
+        costheta = math.cos(theta)
+        sintheta = math.sin(theta)
+
+        # For the interested reader:
+        # https://coneural.org/florian/papers/05_cart_pole.pdf
+        temp = (
+            force + self.polemass_length * theta_dot ** 2 * sintheta
+        ) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) / (
+            self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
+        )
+        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+
+        if self.kinematics_integrator == "euler":
+            x = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else:  # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+
+        self.state = (x, x_dot, theta, theta_dot)
+
+        done = bool(
+            x < -self.x_threshold
+            or x > self.x_threshold
+            or theta < -self.theta_threshold_radians
+            or theta > self.theta_threshold_radians
+        )
+
+        if not done:
+            reward = 1.0
+        elif self.steps_beyond_done is None:
+            # Pole just fell!
+            self.steps_beyond_done = 0
+            reward = 1.0
+        else:
+            if self.steps_beyond_done == 0:
+                logger.warn(
+                    "You are calling 'step()' even though this "
+                    "environment has already returned done = True. You "
+                    "should always call 'reset()' once you receive 'done = "
+                    "True' -- any further steps are undefined behavior."
+                )
+            self.steps_beyond_done += 1
+            reward = 0.0
+
+        return np.array(self.state, dtype=np.float32), reward, done, {}
+
+    def reset(self):
+        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.steps_beyond_done = None
+        return np.array(self.state, dtype=np.float32)
+
+    def render(self, mode="human"):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.x_threshold * 2
+        scale = screen_width / world_width
+        carty = 100  # TOP OF CART
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
+            axleoffset = cartheight / 4.0
+            cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            self.carttrans = rendering.Transform()
+            cart.add_attr(self.carttrans)
+            self.viewer.add_geom(cart)
+            l, r, t, b = (
+                -polewidth / 2,
+                polewidth / 2,
+                polelen - polewidth / 2,
+                -polewidth / 2,
+            )
+            pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            pole.set_color(0.8, 0.6, 0.4)
+            self.poletrans = rendering.Transform(translation=(0, axleoffset))
+            pole.add_attr(self.poletrans)
+            pole.add_attr(self.carttrans)
+            self.viewer.add_geom(pole)
+            self.axle = rendering.make_circle(polewidth / 2)
+            self.axle.add_attr(self.poletrans)
+            self.axle.add_attr(self.carttrans)
+            self.axle.set_color(0.5, 0.5, 0.8)
+            self.viewer.add_geom(self.axle)
+            self.track = rendering.Line((0, carty), (screen_width, carty))
+            self.track.set_color(0, 0, 0)
+            self.viewer.add_geom(self.track)
+
+            self._pole_geom = pole
+
+        if self.state is None:
+            return None
+
+        # Edit the pole polygon vertex
+        pole = self._pole_geom
+        l, r, t, b = (
+            -polewidth / 2,
+            polewidth / 2,
+            polelen - polewidth / 2,
+            -polewidth / 2,
+        )
+        pole.v = [(l, b), (l, t), (r, t), (r, b)]
+
+        x = self.state
+        cartx = x[0] * scale + screen_width / 2.0  # MIDDLE OF CART
+        self.carttrans.set_translation(cartx, carty)
+        self.poletrans.set_rotation(-x[2])
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
diff --git a/gym-0.21.0/gym/envs/classic_control/mountain_car.py b/gym-0.21.0/gym/envs/classic_control/mountain_car.py
new file mode 100644
index 0000000000000000000000000000000000000000..e096774b00cf316451db524a3c223609f992cf66
--- /dev/null
+++ b/gym-0.21.0/gym/envs/classic_control/mountain_car.py
@@ -0,0 +1,177 @@
+"""
+http://incompleteideas.net/MountainCar/MountainCar1.cp
+permalink: https://perma.cc/6Z2N-PFWC
+"""
+import math
+
+import numpy as np
+
+import gym
+from gym import spaces
+from gym.utils import seeding
+
+
+class MountainCarEnv(gym.Env):
+    """
+    Description:
+        The agent (a car) is started at the bottom of a valley. For any given
+        state the agent may choose to accelerate to the left, right or cease
+        any acceleration.
+
+    Source:
+        The environment appeared first in Andrew Moore's PhD Thesis (1990).
+
+    Observation:
+        Type: Box(2)
+        Num    Observation               Min            Max
+        0      Car Position              -1.2           0.6
+        1      Car Velocity              -0.07          0.07
+
+    Actions:
+        Type: Discrete(3)
+        Num    Action
+        0      Accelerate to the Left
+        1      Don't accelerate
+        2      Accelerate to the Right
+
+        Note: This does not affect the amount of velocity affected by the
+        gravitational pull acting on the car.
+
+    Reward:
+         Reward of 0 is awarded if the agent reached the flag (position = 0.5)
+         on top of the mountain.
+         Reward of -1 is awarded if the position of the agent is less than 0.5.
+
+    Starting State:
+         The position of the car is assigned a uniform random value in
+         [-0.6 , -0.4].
+         The starting velocity of the car is always assigned to 0.
+
+    Episode Termination:
+         The car position is more than 0.5
+         Episode length is greater than 200
+    """
+
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
+
+    def __init__(self, goal_velocity=0):
+        self.min_position = -1.2
+        self.max_position = 0.6
+        self.max_speed = 0.07
+        self.goal_position = 0.5
+        self.goal_velocity = goal_velocity
+
+        self.force = 0.001
+        self.gravity = 0.0025
+
+        self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
+        self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
+
+        self.viewer = None
+
+        self.action_space = spaces.Discrete(3)
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
+
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+        assert self.action_space.contains(action), "%r (%s) invalid" % (
+            action,
+            type(action),
+        )
+
+        position, velocity = self.state
+        velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
+        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
+        position += velocity
+        position = np.clip(position, self.min_position, self.max_position)
+        if position == self.min_position and velocity < 0:
+            velocity = 0
+
+        done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
+        reward = -1.0
+
+        self.state = (position, velocity)
+        return np.array(self.state, dtype=np.float32), reward, done, {}
+
+    def reset(self):
+        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
+        return np.array(self.state, dtype=np.float32)
+
+    def _height(self, xs):
+        return np.sin(3 * xs) * 0.45 + 0.55
+
+    def render(self, mode="human"):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.max_position - self.min_position
+        scale = screen_width / world_width
+        carwidth = 40
+        carheight = 20
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            xs = np.linspace(self.min_position, self.max_position, 100)
+            ys = self._height(xs)
+            xys = list(zip((xs - self.min_position) * scale, ys * scale))
+
+            self.track = rendering.make_polyline(xys)
+            self.track.set_linewidth(4)
+            self.viewer.add_geom(self.track)
+
+            clearance = 10
+
+            l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
+            car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            car.add_attr(rendering.Transform(translation=(0, clearance)))
+            self.cartrans = rendering.Transform()
+            car.add_attr(self.cartrans)
+            self.viewer.add_geom(car)
+            frontwheel = rendering.make_circle(carheight / 2.5)
+            frontwheel.set_color(0.5, 0.5, 0.5)
+            frontwheel.add_attr(
+                rendering.Transform(translation=(carwidth / 4, clearance))
+            )
+            frontwheel.add_attr(self.cartrans)
+            self.viewer.add_geom(frontwheel)
+            backwheel = rendering.make_circle(carheight / 2.5)
+            backwheel.add_attr(
+                rendering.Transform(translation=(-carwidth / 4, clearance))
+            )
+            backwheel.add_attr(self.cartrans)
+            backwheel.set_color(0.5, 0.5, 0.5)
+            self.viewer.add_geom(backwheel)
+            flagx = (self.goal_position - self.min_position) * scale
+            flagy1 = self._height(self.goal_position) * scale
+            flagy2 = flagy1 + 50
+            flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
+            self.viewer.add_geom(flagpole)
+            flag = rendering.FilledPolygon(
+                [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
+            )
+            flag.set_color(0.8, 0.8, 0)
+            self.viewer.add_geom(flag)
+
+        pos = self.state[0]
+        self.cartrans.set_translation(
+            (pos - self.min_position) * scale, self._height(pos) * scale
+        )
+        self.cartrans.set_rotation(math.cos(3 * pos))
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def get_keys_to_action(self):
+        # Control with left and right arrow keys.
+        return {(): 1, (276,): 0, (275,): 2, (275, 276): 1}
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
diff --git a/gym-0.21.0/gym/envs/classic_control/pendulum.py b/gym-0.21.0/gym/envs/classic_control/pendulum.py
new file mode 100644
index 0000000000000000000000000000000000000000..10ced4d43d218fbf0bb344b3bbc006fcaa7f8362
--- /dev/null
+++ b/gym-0.21.0/gym/envs/classic_control/pendulum.py
@@ -0,0 +1,94 @@
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+from os import path
+
+
+class PendulumEnv(gym.Env):
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
+
+    def __init__(self, g=10.0):
+        self.max_speed = 8
+        self.max_torque = 2.0
+        self.dt = 0.05
+        self.g = g
+        self.m = 1.0
+        self.l = 1.0
+        self.viewer = None
+
+        high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
+        self.action_space = spaces.Box(
+            low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
+        )
+        self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
+
+        self.seed()
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, u):
+        th, thdot = self.state  # th := theta
+
+        g = self.g
+        m = self.m
+        l = self.l
+        dt = self.dt
+
+        u = np.clip(u, -self.max_torque, self.max_torque)[0]
+        self.last_u = u  # for rendering
+        costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
+
+        newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
+        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
+        newth = th + newthdot * dt
+
+        self.state = np.array([newth, newthdot])
+        return self._get_obs(), -costs, False, {}
+
+    def reset(self):
+        high = np.array([np.pi, 1])
+        self.state = self.np_random.uniform(low=-high, high=high)
+        self.last_u = None
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta, thetadot = self.state
+        return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
+
+    def render(self, mode="human"):
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+
+            self.viewer = rendering.Viewer(500, 500)
+            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
+            rod = rendering.make_capsule(1, 0.2)
+            rod.set_color(0.8, 0.3, 0.3)
+            self.pole_transform = rendering.Transform()
+            rod.add_attr(self.pole_transform)
+            self.viewer.add_geom(rod)
+            axle = rendering.make_circle(0.05)
+            axle.set_color(0, 0, 0)
+            self.viewer.add_geom(axle)
+            fname = path.join(path.dirname(__file__), "assets/clockwise.png")
+            self.img = rendering.Image(fname, 1.0, 1.0)
+            self.imgtrans = rendering.Transform()
+            self.img.add_attr(self.imgtrans)
+
+        self.viewer.add_onetime(self.img)
+        self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
+        if self.last_u is not None:
+            self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
+
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
+
+
+def angle_normalize(x):
+    return ((x + np.pi) % (2 * np.pi)) - np.pi
diff --git a/gym-0.21.0/gym/envs/mujoco/ant.py b/gym-0.21.0/gym/envs/mujoco/ant.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae0a2f8dcf9846d6c28ced183e99bab1789f7014
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/ant.py
@@ -0,0 +1,56 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        xposbefore = self.get_body_com("torso")[0]
+        self.do_simulation(a, self.frame_skip)
+        xposafter = self.get_body_com("torso")[0]
+        forward_reward = (xposafter - xposbefore) / self.dt
+        ctrl_cost = 0.5 * np.square(a).sum()
+        contact_cost = (
+            0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
+        )
+        survive_reward = 1.0
+        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
+        state = self.state_vector()
+        notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
+        done = not notdone
+        ob = self._get_obs()
+        return (
+            ob,
+            reward,
+            done,
+            dict(
+                reward_forward=forward_reward,
+                reward_ctrl=-ctrl_cost,
+                reward_contact=-contact_cost,
+                reward_survive=survive_reward,
+            ),
+        )
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[2:],
+                self.sim.data.qvel.flat,
+                np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
+            ]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            size=self.model.nq, low=-0.1, high=0.1
+        )
+        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.distance = self.model.stat.extent * 0.5
diff --git a/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml b/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b68f256054915f4df2a9318bf5db721062aa072e
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/assets/thrower.xml
@@ -0,0 +1,127 @@
+<mujoco model="arm3d">
+  <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
+  <option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
+  <default>
+    <joint armature='0.75' damping="1" limited="true"/>
+    <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
+  </default>
+
+  <worldbody>
+    <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
+
+    <geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
+
+    <body name="r_shoulder_pan_link" pos="0 -0.6 0">
+      <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
+      <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
+      <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
+      <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
+      <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
+      <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
+
+      <body name="r_shoulder_lift_link" pos="0.1 0 0">
+        <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
+        <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
+
+        <body name="r_upper_arm_roll_link" pos="0 0 0">
+          <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
+          <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
+
+          <body name="r_upper_arm_link" pos="0 0 0">
+            <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
+
+            <body name="r_elbow_flex_link" pos="0.4 0 0">
+              <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
+              <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
+
+              <body name="r_forearm_roll_link" pos="0 0 0">
+                <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
+                <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
+
+                <body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
+                  <geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
+
+                  <body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
+                    <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
+                    <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
+
+                    <body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
+                      <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
+                      <geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
+                      <body pos="0 0 0" axisangle="0 0 1 0.392">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.57">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.178">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 0.785">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 1.96">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 2.355">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                      <body pos="0 0 0" axisangle="0 0 1 2.74">
+                        <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                        <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
+                      </body>
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <body name="goal" pos="0.575 0.5 -0.328">
+      <geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
+      <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
+      <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
+    </body>
+
+    <body name="ball" pos="0.5 -0.8 0.275">
+      <geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
+      <joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
+    </body>
+
+  </worldbody>
+
+  <actuator>
+    <motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
+    <motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
+    <motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
+    <motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
+  </actuator>
+
+</mujoco>
diff --git a/gym-0.21.0/gym/envs/mujoco/hopper.py b/gym-0.21.0/gym/envs/mujoco/hopper.py
new file mode 100644
index 0000000000000000000000000000000000000000..46bf0a7f5d9968b84ab31d94f2688ce4ede1f101
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/hopper.py
@@ -0,0 +1,48 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
+        utils.EzPickle.__init__(self)
+
+    def step(self, a):
+        posbefore = self.sim.data.qpos[0]
+        self.do_simulation(a, self.frame_skip)
+        posafter, height, ang = self.sim.data.qpos[0:3]
+        alive_bonus = 1.0
+        reward = (posafter - posbefore) / self.dt
+        reward += alive_bonus
+        reward -= 1e-3 * np.square(a).sum()
+        s = self.state_vector()
+        done = not (
+            np.isfinite(s).all()
+            and (np.abs(s[2:]) < 100).all()
+            and (height > 0.7)
+            and (abs(ang) < 0.2)
+        )
+        ob = self._get_obs()
+        return ob, reward, done, {}
+
+    def _get_obs(self):
+        return np.concatenate(
+            [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
+        )
+
+    def reset_model(self):
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 2
+        self.viewer.cam.distance = self.model.stat.extent * 0.75
+        self.viewer.cam.lookat[2] = 1.15
+        self.viewer.cam.elevation = -20
diff --git a/gym-0.21.0/gym/envs/mujoco/hopper_v3.py b/gym-0.21.0/gym/envs/mujoco/hopper_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bb80024124daea23dbb5efbb0d80e5ea0f38418
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/hopper_v3.py
@@ -0,0 +1,137 @@
+import numpy as np
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+
+
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 3.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+
+
+class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(
+        self,
+        xml_file="hopper.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_state_range=(-100.0, 100.0),
+        healthy_z_range=(0.7, float("inf")),
+        healthy_angle_range=(-0.2, 0.2),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+    ):
+        utils.EzPickle.__init__(**locals())
+
+        self._forward_reward_weight = forward_reward_weight
+
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+
+        self._healthy_state_range = healthy_state_range
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
+
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    @property
+    def is_healthy(self):
+        z, angle = self.sim.data.qpos[1:3]
+        state = self.state_vector()[2:]
+
+        min_state, max_state = self._healthy_state_range
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+
+        healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+
+        is_healthy = all((healthy_state, healthy_z, healthy_angle))
+
+        return is_healthy
+
+    @property
+    def done(self):
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
+        return done
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
+
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+
+    def step(self, action):
+        x_position_before = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.sim.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+
+        ctrl_cost = self.control_cost(action)
+
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+
+        observation = self._get_obs()
+        reward = rewards - costs
+        done = self.done
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+
+        return observation, reward, done, info
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
diff --git a/gym-0.21.0/gym/envs/mujoco/pusher.py b/gym-0.21.0/gym/envs/mujoco/pusher.py
new file mode 100644
index 0000000000000000000000000000000000000000..e82adc6634bcad40e02ca9e0ce70ecd2b71041d2
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/pusher.py
@@ -0,0 +1,63 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+import mujoco_py
+
+
+class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        utils.EzPickle.__init__(self)
+        mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)
+
+    def step(self, a):
+        vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
+        vec_2 = self.get_body_com("object") - self.get_body_com("goal")
+
+        reward_near = -np.linalg.norm(vec_1)
+        reward_dist = -np.linalg.norm(vec_2)
+        reward_ctrl = -np.square(a).sum()
+        reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
+
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = -1
+        self.viewer.cam.distance = 4.0
+
+    def reset_model(self):
+        qpos = self.init_qpos
+
+        self.goal_pos = np.asarray([0, 0])
+        while True:
+            self.cylinder_pos = np.concatenate(
+                [
+                    self.np_random.uniform(low=-0.3, high=0, size=1),
+                    self.np_random.uniform(low=-0.2, high=0.2, size=1),
+                ]
+            )
+            if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
+                break
+
+        qpos[-4:-2] = self.cylinder_pos
+        qpos[-2:] = self.goal_pos
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=-0.005, high=0.005, size=self.model.nv
+        )
+        qvel[-4:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        return np.concatenate(
+            [
+                self.sim.data.qpos.flat[:7],
+                self.sim.data.qvel.flat[:7],
+                self.get_body_com("tips_arm"),
+                self.get_body_com("object"),
+                self.get_body_com("goal"),
+            ]
+        )
diff --git a/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py b/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf794d4791b28d72891a597a06f5fa0f4786c9a
--- /dev/null
+++ b/gym-0.21.0/gym/envs/mujoco/swimmer_v3.py
@@ -0,0 +1,94 @@
+import numpy as np
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+
+
+DEFAULT_CAMERA_CONFIG = {}
+
+
+class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(
+        self,
+        xml_file="swimmer.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-4,
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+    ):
+        utils.EzPickle.__init__(**locals())
+
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+
+        self._reset_noise_scale = reset_noise_scale
+
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+
+        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
+
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+
+    def step(self, action):
+        xy_position_before = self.sim.data.qpos[0:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.sim.data.qpos[0:2].copy()
+
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+
+        forward_reward = self._forward_reward_weight * x_velocity
+
+        ctrl_cost = self.control_cost(action)
+
+        observation = self._get_obs()
+        reward = forward_reward - ctrl_cost
+        done = False
+        info = {
+            "reward_fwd": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+
+        return observation, reward, done, info
+
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+
+        observation = np.concatenate([position, velocity]).ravel()
+        return observation
+
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+
+        self.set_state(qpos, qvel)
+
+        observation = self._get_obs()
+        return observation
+
+    def viewer_setup(self):
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)
diff --git a/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml b/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml
new file mode 100644
index 0000000000000000000000000000000000000000..c73d6249f3d809336c91798ee10dd57df73de4df
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+
+	<include file="shared.xml"></include>
+	
+	<worldbody>
+		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="0.8 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+
+		<include file="robot.xml"></include>
+		
+		<body pos="1.3 0.75 0.2" name="table0">
+			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
+		</body>
+
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+	
+	<actuator></actuator>
+</mujoco>
diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml
new file mode 100644
index 0000000000000000000000000000000000000000..b649f102917eb404c7278c5e7588eea6f878a364
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+    <include file="shared_touch_sensors_92.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+
+        <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+        <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
+
+        <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
+        <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0"></body>
+
+        <include file="robot_touch_sensors_92.xml"></include>
+
+        <body name="object" pos="1 0.87 0.2">
+            <geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
+            <geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
+            <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="object:joint" type="free" damping="0.01"></joint>
+        </body>
+        <body name="target" pos="1 0.87 0.2">
+            <geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
+            <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
+            <joint name="target:joint" type="free" damping="0.01"></joint>
+        </body>
+
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml
new file mode 100644
index 0000000000000000000000000000000000000000..71f6dfe621860e86569f00895bb1e73286f4558f
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+    <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
+    <option timestep="0.002" iterations="20" apirate="200">
+        <flag warmstart="enable"></flag>
+    </option>
+
+    <include file="shared.xml"></include>
+
+    <asset>
+        <include file="shared_asset.xml"></include>
+    </asset>
+
+    <worldbody>
+        <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
+        <body name="floor0" pos="1 1 0">
+            <site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
+            <site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
+            <site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
+            <site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
+            <site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
+
+            <site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
+            <site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
+            <site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
+            <site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
+            <site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
+        </body>
+
+        <include file="robot.xml"></include>
+        
+        <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+    </worldbody>
+</mujoco>
diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml
new file mode 100644
index 0000000000000000000000000000000000000000..f27f265551c741c4eab32d238869d5c43357cb84
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml
@@ -0,0 +1,254 @@
+<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
+<mujoco>
+    <size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
+
+    <visual>
+        <map fogstart="3" fogend="5" force="0.1"></map>
+        <quality shadowsize="4096"></quality>
+    </visual>
+
+    <default>
+        <default class="robot0:asset_class">
+            <geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
+            <joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
+            <site size="0.005" rgba="0.4 0.9 0.4 1"></site>
+            <general ctrllimited="true" forcelimited="true"></general>
+        </default>
+        <default class="robot0:D_Touch">
+            <site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
+        </default>
+        <default class="robot0:DC_Hand">
+            <geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
+        </default>
+        <default class="robot0:D_Vizual">
+            <geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
+        </default>
+        <default class="robot0:free">
+            <joint type="free" damping="0" armature="0" limited="false"></joint>
+        </default>
+    </default>
+
+    <contact>
+        <pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
+        <pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
+        <pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
+        <pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
+        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
+        <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
+    </contact>
+
+    <tendon>
+        <fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
+            <joint joint="robot0:WRJ1" coef="0.0325"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
+            <joint joint="robot0:WRJ1" coef="-0.0325"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
+            <joint joint="robot0:WRJ0" coef="0.0175"></joint>
+        </fixed>
+        <fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
+            <joint joint="robot0:WRJ0" coef="-0.0175"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
+            <joint joint="robot0:FFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
+            <joint joint="robot0:FFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
+            <joint joint="robot0:FFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
+            <joint joint="robot0:FFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
+            <joint joint="robot0:FFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
+            <joint joint="robot0:FFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
+            <joint joint="robot0:MFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
+            <joint joint="robot0:MFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
+            <joint joint="robot0:MFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
+            <joint joint="robot0:MFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
+            <joint joint="robot0:MFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
+            <joint joint="robot0:MFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
+            <joint joint="robot0:RFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
+            <joint joint="robot0:RFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
+            <joint joint="robot0:RFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
+            <joint joint="robot0:RFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
+            <joint joint="robot0:RFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
+            <joint joint="robot0:RFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
+            <joint joint="robot0:LFJ4" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
+            <joint joint="robot0:LFJ4" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
+            <joint joint="robot0:LFJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
+            <joint joint="robot0:LFJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
+            <joint joint="robot0:LFJ2" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
+            <joint joint="robot0:LFJ2" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
+            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
+            <joint joint="robot0:LFJ0" coef="0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
+            <joint joint="robot0:LFJ0" coef="-0.00705"></joint>
+            <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
+            <joint joint="robot0:THJ4" coef="0.01636"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
+            <joint joint="robot0:THJ4" coef="-0.01636"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
+            <joint joint="robot0:THJ3" coef="0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
+            <joint joint="robot0:THJ3" coef="-0.01"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
+            <joint joint="robot0:THJ2" coef="0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
+            <joint joint="robot0:THJ2" coef="-0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
+            <joint joint="robot0:THJ1" coef="0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
+            <joint joint="robot0:THJ1" coef="-0.011"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
+            <joint joint="robot0:THJ0" coef="0.009"></joint>
+        </fixed>
+        <fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
+            <joint joint="robot0:THJ0" coef="-0.009"></joint>
+        </fixed>
+    </tendon>
+
+    <sensor>
+        <jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
+        <jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
+        <jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
+        <jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
+        <jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
+        <jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
+        <jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
+        <jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
+        <touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
+        <touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
+        <touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
+        <touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
+        <touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
+    </sensor>
+
+    <actuator>
+        <position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
+        <position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
+        <position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
+        <position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
+        <position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
+        <position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
+        <position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
+        <position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
+        <position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
+    </actuator>
+</mujoco>
diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl
new file mode 100644
index 0000000000000000000000000000000000000000..7bc5e20e0687f593401d369224d6f36d78f37534
Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl differ
diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl
new file mode 100644
index 0000000000000000000000000000000000000000..4faedd75409416ac79c8481c067bddff06eac19c
Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl differ
diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
new file mode 100644
index 0000000000000000000000000000000000000000..535cf4dbca59590f813c6dcc8abbd10bc7810a3a
Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl differ
diff --git a/gym-0.21.0/gym/envs/robotics/fetch/reach.py b/gym-0.21.0/gym/envs/robotics/fetch/reach.py
new file mode 100644
index 0000000000000000000000000000000000000000..67520b95064cb675a2430d7b035a4fd8dd9b89c7
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/fetch/reach.py
@@ -0,0 +1,32 @@
+import os
+from gym import utils
+from gym.envs.robotics import fetch_env
+
+
+# Ensure we get the path separator correct on windows
+MODEL_XML_PATH = os.path.join("fetch", "reach.xml")
+
+
+class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
+    def __init__(self, reward_type="sparse"):
+        initial_qpos = {
+            "robot0:slide0": 0.4049,
+            "robot0:slide1": 0.48,
+            "robot0:slide2": 0.0,
+        }
+        fetch_env.FetchEnv.__init__(
+            self,
+            MODEL_XML_PATH,
+            has_object=False,
+            block_gripper=True,
+            n_substeps=20,
+            gripper_extra_height=0.2,
+            target_in_the_air=True,
+            target_offset=0.0,
+            obj_range=0.15,
+            target_range=0.15,
+            distance_threshold=0.05,
+            initial_qpos=initial_qpos,
+            reward_type=reward_type,
+        )
+        utils.EzPickle.__init__(self, reward_type=reward_type)
diff --git a/gym-0.21.0/gym/envs/robotics/hand/__init__.py b/gym-0.21.0/gym/envs/robotics/hand/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/gym-0.21.0/gym/envs/robotics/hand/manipulate.py b/gym-0.21.0/gym/envs/robotics/hand/manipulate.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3eca062d8b96b9a1dc1afbae74de0d8cdcb6fde
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/hand/manipulate.py
@@ -0,0 +1,354 @@
+import os
+import numpy as np
+
+from gym import utils, error
+from gym.envs.robotics import rotations, hand_env
+from gym.envs.robotics.utils import robot_get_obs
+
+try:
+    import mujoco_py
+except ImportError as e:
+    raise error.DependencyNotInstalled(
+        "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
+            e
+        )
+    )
+
+
+def quat_from_angle_and_axis(angle, axis):
+    assert axis.shape == (3,)
+    axis /= np.linalg.norm(axis)
+    quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis])
+    quat /= np.linalg.norm(quat)
+    return quat
+
+
+# Ensure we get the path separator correct on windows
+MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml")
+MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml")
+MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml")
+
+
+class ManipulateEnv(hand_env.HandEnv):
+    def __init__(
+        self,
+        model_path,
+        target_position,
+        target_rotation,
+        target_position_range,
+        reward_type,
+        initial_qpos=None,
+        randomize_initial_position=True,
+        randomize_initial_rotation=True,
+        distance_threshold=0.01,
+        rotation_threshold=0.1,
+        n_substeps=20,
+        relative_control=False,
+        ignore_z_target_rotation=False,
+    ):
+        """Initializes a new Hand manipulation environment.
+
+        Args:
+            model_path (string): path to the environments XML file
+            target_position (string): the type of target position:
+                - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
+                - fixed: target position is set to the initial position of the object
+                - random: target position is fully randomized according to target_position_range
+            target_rotation (string): the type of target rotation:
+                - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
+                - fixed: target rotation is set to the initial rotation of the object
+                - xyz: fully randomized target rotation around the X, Y and Z axis
+                - z: fully randomized target rotation around the Z axis
+                - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
+            ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
+            target_position_range (np.array of shape (3, 2)): range of the target_position randomization
+            reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
+            initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
+            randomize_initial_position (boolean): whether or not to randomize the initial position of the object
+            randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
+            distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
+            rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
+            n_substeps (int): number of substeps the simulation runs on every call to step
+            relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
+        """
+        self.target_position = target_position
+        self.target_rotation = target_rotation
+        self.target_position_range = target_position_range
+        self.parallel_quats = [
+            rotations.euler2quat(r) for r in rotations.get_parallel_rotations()
+        ]
+        self.randomize_initial_rotation = randomize_initial_rotation
+        self.randomize_initial_position = randomize_initial_position
+        self.distance_threshold = distance_threshold
+        self.rotation_threshold = rotation_threshold
+        self.reward_type = reward_type
+        self.ignore_z_target_rotation = ignore_z_target_rotation
+
+        assert self.target_position in ["ignore", "fixed", "random"]
+        assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"]
+        initial_qpos = initial_qpos or {}
+
+        hand_env.HandEnv.__init__(
+            self,
+            model_path,
+            n_substeps=n_substeps,
+            initial_qpos=initial_qpos,
+            relative_control=relative_control,
+        )
+
+    def _get_achieved_goal(self):
+        # Object position and rotation.
+        object_qpos = self.sim.data.get_joint_qpos("object:joint")
+        assert object_qpos.shape == (7,)
+        return object_qpos
+
+    def _goal_distance(self, goal_a, goal_b):
+        assert goal_a.shape == goal_b.shape
+        assert goal_a.shape[-1] == 7
+
+        d_pos = np.zeros_like(goal_a[..., 0])
+        d_rot = np.zeros_like(goal_b[..., 0])
+        if self.target_position != "ignore":
+            delta_pos = goal_a[..., :3] - goal_b[..., :3]
+            d_pos = np.linalg.norm(delta_pos, axis=-1)
+
+        if self.target_rotation != "ignore":
+            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
+
+            if self.ignore_z_target_rotation:
+                # Special case: We want to ignore the Z component of the rotation.
+                # This code here assumes Euler angles with xyz convention. We first transform
+                # to euler, then set the Z component to be equal between the two, and finally
+                # transform back into quaternions.
+                euler_a = rotations.quat2euler(quat_a)
+                euler_b = rotations.quat2euler(quat_b)
+                euler_a[2] = euler_b[2]
+                quat_a = rotations.euler2quat(euler_a)
+
+            # Subtract quaternions and extract angle between them.
+            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
+            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0))
+            d_rot = angle_diff
+        assert d_pos.shape == d_rot.shape
+        return d_pos, d_rot
+
+    # GoalEnv methods
+    # ----------------------------
+
+    def compute_reward(self, achieved_goal, goal, info):
+        if self.reward_type == "sparse":
+            success = self._is_success(achieved_goal, goal).astype(np.float32)
+            return success - 1.0
+        else:
+            d_pos, d_rot = self._goal_distance(achieved_goal, goal)
+            # We weigh the difference in position to avoid that `d_pos` (in meters) is completely
+            # dominated by `d_rot` (in radians).
+            return -(10.0 * d_pos + d_rot)
+
+    # RobotEnv methods
+    # ----------------------------
+
+    def _is_success(self, achieved_goal, desired_goal):
+        d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal)
+        achieved_pos = (d_pos < self.distance_threshold).astype(np.float32)
+        achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32)
+        achieved_both = achieved_pos * achieved_rot
+        return achieved_both
+
+    def _env_setup(self, initial_qpos):
+        for name, value in initial_qpos.items():
+            self.sim.data.set_joint_qpos(name, value)
+        self.sim.forward()
+
+    def _reset_sim(self):
+        self.sim.set_state(self.initial_state)
+        self.sim.forward()
+
+        initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy()
+        initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
+        assert initial_qpos.shape == (7,)
+        assert initial_pos.shape == (3,)
+        assert initial_quat.shape == (4,)
+        initial_qpos = None
+
+        # Randomization initial rotation.
+        if self.randomize_initial_rotation:
+            if self.target_rotation == "z":
+                angle = self.np_random.uniform(-np.pi, np.pi)
+                axis = np.array([0.0, 0.0, 1.0])
+                offset_quat = quat_from_angle_and_axis(angle, axis)
+                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+            elif self.target_rotation == "parallel":
+                angle = self.np_random.uniform(-np.pi, np.pi)
+                axis = np.array([0.0, 0.0, 1.0])
+                z_quat = quat_from_angle_and_axis(angle, axis)
+                parallel_quat = self.parallel_quats[
+                    self.np_random.randint(len(self.parallel_quats))
+                ]
+                offset_quat = rotations.quat_mul(z_quat, parallel_quat)
+                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+            elif self.target_rotation in ["xyz", "ignore"]:
+                angle = self.np_random.uniform(-np.pi, np.pi)
+                axis = self.np_random.uniform(-1.0, 1.0, size=3)
+                offset_quat = quat_from_angle_and_axis(angle, axis)
+                initial_quat = rotations.quat_mul(initial_quat, offset_quat)
+            elif self.target_rotation == "fixed":
+                pass
+            else:
+                raise error.Error(
+                    'Unknown target_rotation option "{}".'.format(self.target_rotation)
+                )
+
+        # Randomize initial position.
+        if self.randomize_initial_position:
+            if self.target_position != "fixed":
+                initial_pos += self.np_random.normal(size=3, scale=0.005)
+
+        initial_quat /= np.linalg.norm(initial_quat)
+        initial_qpos = np.concatenate([initial_pos, initial_quat])
+        self.sim.data.set_joint_qpos("object:joint", initial_qpos)
+
+        def is_on_palm():
+            self.sim.forward()
+            cube_middle_idx = self.sim.model.site_name2id("object:center")
+            cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
+            is_on_palm = cube_middle_pos[2] > 0.04
+            return is_on_palm
+
+        # Run the simulation for a bunch of timesteps to let everything settle in.
+        for _ in range(10):
+            self._set_action(np.zeros(20))
+            try:
+                self.sim.step()
+            except mujoco_py.MujocoException:
+                return False
+        return is_on_palm()
+
+    def _sample_goal(self):
+        # Select a goal for the object position.
+        target_pos = None
+        if self.target_position == "random":
+            assert self.target_position_range.shape == (3, 2)
+            offset = self.np_random.uniform(
+                self.target_position_range[:, 0], self.target_position_range[:, 1]
+            )
+            assert offset.shape == (3,)
+            target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset
+        elif self.target_position in ["ignore", "fixed"]:
+            target_pos = self.sim.data.get_joint_qpos("object:joint")[:3]
+        else:
+            raise error.Error(
+                'Unknown target_position option "{}".'.format(self.target_position)
+            )
+        assert target_pos is not None
+        assert target_pos.shape == (3,)
+
+        # Select a goal for the object rotation.
+        target_quat = None
+        if self.target_rotation == "z":
+            angle = self.np_random.uniform(-np.pi, np.pi)
+            axis = np.array([0.0, 0.0, 1.0])
+            target_quat = quat_from_angle_and_axis(angle, axis)
+        elif self.target_rotation == "parallel":
+            angle = self.np_random.uniform(-np.pi, np.pi)
+            axis = np.array([0.0, 0.0, 1.0])
+            target_quat = quat_from_angle_and_axis(angle, axis)
+            parallel_quat = self.parallel_quats[
+                self.np_random.randint(len(self.parallel_quats))
+            ]
+            target_quat = rotations.quat_mul(target_quat, parallel_quat)
+        elif self.target_rotation == "xyz":
+            angle = self.np_random.uniform(-np.pi, np.pi)
+            axis = self.np_random.uniform(-1.0, 1.0, size=3)
+            target_quat = quat_from_angle_and_axis(angle, axis)
+        elif self.target_rotation in ["ignore", "fixed"]:
+            target_quat = self.sim.data.get_joint_qpos("object:joint")
+        else:
+            raise error.Error(
+                'Unknown target_rotation option "{}".'.format(self.target_rotation)
+            )
+        assert target_quat is not None
+        assert target_quat.shape == (4,)
+
+        target_quat /= np.linalg.norm(target_quat)  # normalized quaternion
+        goal = np.concatenate([target_pos, target_quat])
+        return goal
+
+    def _render_callback(self):
+        # Assign current state to target object but offset a bit so that the actual object
+        # is not obscured.
+        goal = self.goal.copy()
+        assert goal.shape == (7,)
+        if self.target_position == "ignore":
+            # Move the object to the side since we do not care about it's position.
+            goal[0] += 0.15
+        self.sim.data.set_joint_qpos("target:joint", goal)
+        self.sim.data.set_joint_qvel("target:joint", np.zeros(6))
+
+        if "object_hidden" in self.sim.model.geom_names:
+            hidden_id = self.sim.model.geom_name2id("object_hidden")
+            self.sim.model.geom_rgba[hidden_id, 3] = 1.0
+        self.sim.forward()
+
+    def _get_obs(self):
+        robot_qpos, robot_qvel = robot_get_obs(self.sim)
+        object_qvel = self.sim.data.get_joint_qvel("object:joint")
+        achieved_goal = (
+            self._get_achieved_goal().ravel()
+        )  # this contains the object position + rotation
+        observation = np.concatenate(
+            [robot_qpos, robot_qvel, object_qvel, achieved_goal]
+        )
+        return {
+            "observation": observation.copy(),
+            "achieved_goal": achieved_goal.copy(),
+            "desired_goal": self.goal.ravel().copy(),
+        }
+
+
+class HandBlockEnv(ManipulateEnv, utils.EzPickle):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
+        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_BLOCK_XML,
+            target_position=target_position,
+            target_rotation=target_rotation,
+            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+            reward_type=reward_type,
+        )
+
+
+class HandEggEnv(ManipulateEnv, utils.EzPickle):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
+        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_EGG_XML,
+            target_position=target_position,
+            target_rotation=target_rotation,
+            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+            reward_type=reward_type,
+        )
+
+
+class HandPenEnv(ManipulateEnv, utils.EzPickle):
+    def __init__(
+        self, target_position="random", target_rotation="xyz", reward_type="sparse"
+    ):
+        utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
+        ManipulateEnv.__init__(
+            self,
+            model_path=MANIPULATE_PEN_XML,
+            target_position=target_position,
+            target_rotation=target_rotation,
+            target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
+            randomize_initial_rotation=False,
+            reward_type=reward_type,
+            ignore_z_target_rotation=True,
+            distance_threshold=0.05,
+        )
diff --git a/gym-0.21.0/gym/envs/robotics/rotations.py b/gym-0.21.0/gym/envs/robotics/rotations.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe241a10885716ab7caaf4a5ecaaa690f416ed54
--- /dev/null
+++ b/gym-0.21.0/gym/envs/robotics/rotations.py
@@ -0,0 +1,387 @@
+# Copyright (c) 2009-2017, Matthew Brett and Christoph Gohlke
+#    All rights reserved.
+#
+#    Redistribution and use in source and binary forms, with or without
+#    modification, are permitted provided that the following conditions are
+#    met:
+#
+#    1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+#    2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+#    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+#    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+#    THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+#    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Many methods borrow heavily or entirely from transforms3d:
+# https://github.com/matthew-brett/transforms3d
+# They have mostly been modified to support batched operations.
+
+import numpy as np
+import itertools
+
+"""
+Rotations
+=========
+
+Note: these have caused many subtle bugs in the past.
+Be careful while updating these methods and while using them in clever ways.
+
+See MuJoCo documentation here: http://mujoco.org/book/modeling.html#COrientation
+
+Conventions
+-----------
+    - All functions accept batches as well as individual rotations
+    - All rotation conventions match respective MuJoCo defaults
+    - All angles are in radians
+    - Matricies follow LR convention
+    - Euler Angles are all relative with 'xyz' axes ordering
+    - See specific representation for more information
+
+Representations
+---------------
+
+Euler
+    There are many euler angle frames -- here we will strive to use the default
+        in MuJoCo, which is eulerseq='xyz'.
+    This frame is a relative rotating frame, about x, y, and z axes in order.
+        Relative rotating means that after we rotate about x, then we use the
+        new (rotated) y, and the same for z.
+
+Quaternions
+    These are defined in terms of rotation (angle) about a unit vector (x, y, z)
+    We use the following <q0, q1, q2, q3> convention:
+            q0 = cos(angle / 2)
+            q1 = sin(angle / 2) * x
+            q2 = sin(angle / 2) * y
+            q3 = sin(angle / 2) * z
+        This is also sometimes called qw, qx, qy, qz.
+    Note that quaternions are ambiguous, because we can represent a rotation by
+        angle about vector <x, y, z> and -angle about vector <-x, -y, -z>.
+        To choose between these, we pick "first nonzero positive", where we
+        make the first nonzero element of the quaternion positive.
+    This can result in mismatches if you're converting an quaternion that is not
+        "first nonzero positive" to a different representation and back.
+
+Axis Angle
+    (Not currently implemented)
+    These are very straightforward.  Rotation is angle about a unit vector.
+
+XY Axes
+    (Not currently implemented)
+    We are given x axis and y axis, and z axis is cross product of x and y.
+
+Z Axis
+    This is NOT RECOMMENDED.  Defines a unit vector for the Z axis,
+        but rotation about this axis is not well defined.
+    Instead pick a fixed reference direction for another axis (e.g. X)
+        and calculate the other (e.g. Y = Z cross-product X),
+        then use XY Axes rotation instead.
+
+SO3
+    (Not currently implemented)
+    While not supported by MuJoCo, this representation has a lot of nice features.
+    We expect to add support for these in the future.
+
+TODO / Missing
+--------------
+    - Rotation integration or derivatives (e.g. velocity conversions)
+    - More representations (SO3, etc)
+    - Random sampling (e.g. sample uniform random rotation)
+    - Performance benchmarks/measurements
+    - (Maybe) define everything as to/from matricies, for simplicity
+"""
+
+# For testing whether a number is close to zero
+_FLOAT_EPS = np.finfo(np.float64).eps
+_EPS4 = _FLOAT_EPS * 4.0
+
+
+def euler2mat(euler):
+    """Convert Euler Angles to Rotation Matrix.  See rotation.py for notes"""
+    euler = np.asarray(euler, dtype=np.float64)
+    assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
+
+    ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
+    si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+    ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+    cc, cs = ci * ck, ci * sk
+    sc, ss = si * ck, si * sk
+
+    mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
+    mat[..., 2, 2] = cj * ck
+    mat[..., 2, 1] = sj * sc - cs
+    mat[..., 2, 0] = sj * cc + ss
+    mat[..., 1, 2] = cj * sk
+    mat[..., 1, 1] = sj * ss + cc
+    mat[..., 1, 0] = sj * cs - sc
+    mat[..., 0, 2] = -sj
+    mat[..., 0, 1] = cj * si
+    mat[..., 0, 0] = cj * ci
+    return mat
+
+
+def euler2quat(euler):
+    """Convert Euler Angles to Quaternions.  See rotation.py for notes"""
+    euler = np.asarray(euler, dtype=np.float64)
+    assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
+
+    ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
+    si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+    ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+    cc, cs = ci * ck, ci * sk
+    sc, ss = si * ck, si * sk
+
+    quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64)
+    quat[..., 0] = cj * cc + sj * ss
+    quat[..., 3] = cj * sc - sj * cs
+    quat[..., 2] = -(cj * ss + sj * cc)
+    quat[..., 1] = cj * cs - sj * sc
+    return quat
+
+
+def mat2euler(mat):
+    """Convert Rotation Matrix to Euler Angles.  See rotation.py for notes"""
+    mat = np.asarray(mat, dtype=np.float64)
+    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+    cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
+    condition = cy > _EPS4
+    euler = np.empty(mat.shape[:-1], dtype=np.float64)
+    euler[..., 2] = np.where(
+        condition,
+        -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
+        -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]),
+    )
+    euler[..., 1] = np.where(
+        condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy)
+    )
+    euler[..., 0] = np.where(
+        condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0
+    )
+    return euler
+
+
+def mat2quat(mat):
+    """Convert Rotation Matrix to Quaternion.  See rotation.py for notes"""
+    mat = np.asarray(mat, dtype=np.float64)
+    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+    Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2]
+    Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2]
+    Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2]
+    # Fill only lower half of symmetric matrix
+    K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64)
+    K[..., 0, 0] = Qxx - Qyy - Qzz
+    K[..., 1, 0] = Qyx + Qxy
+    K[..., 1, 1] = Qyy - Qxx - Qzz
+    K[..., 2, 0] = Qzx + Qxz
+    K[..., 2, 1] = Qzy + Qyz
+    K[..., 2, 2] = Qzz - Qxx - Qyy
+    K[..., 3, 0] = Qyz - Qzy
+    K[..., 3, 1] = Qzx - Qxz
+    K[..., 3, 2] = Qxy - Qyx
+    K[..., 3, 3] = Qxx + Qyy + Qzz
+    K /= 3.0
+    # TODO: vectorize this -- probably could be made faster
+    q = np.empty(K.shape[:-2] + (4,))
+    it = np.nditer(q[..., 0], flags=["multi_index"])
+    while not it.finished:
+        # Use Hermitian eigenvectors, values for speed
+        vals, vecs = np.linalg.eigh(K[it.multi_index])
+        # Select largest eigenvector, reorder to w,x,y,z quaternion
+        q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)]
+        # Prefer quaternion with positive w
+        # (q * -1 corresponds to same rotation as q)
+        if q[it.multi_index][0] < 0:
+            q[it.multi_index] *= -1
+        it.iternext()
+    return q
+
+
+def quat2euler(quat):
+    """Convert Quaternion to Euler Angles.  See rotation.py for notes"""
+    return mat2euler(quat2mat(quat))
+
+
+def subtract_euler(e1, e2):
+    assert e1.shape == e2.shape
+    assert e1.shape[-1] == 3
+    q1 = euler2quat(e1)
+    q2 = euler2quat(e2)
+    q_diff = quat_mul(q1, quat_conjugate(q2))
+    return quat2euler(q_diff)
+
+
+def quat2mat(quat):
+    """Convert Quaternion to Euler Angles.  See rotation.py for notes"""
+    quat = np.asarray(quat, dtype=np.float64)
+    assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
+
+    w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3]
+    Nq = np.sum(quat * quat, axis=-1)
+    s = 2.0 / Nq
+    X, Y, Z = x * s, y * s, z * s
+    wX, wY, wZ = w * X, w * Y, w * Z
+    xX, xY, xZ = x * X, x * Y, x * Z
+    yY, yZ, zZ = y * Y, y * Z, z * Z
+
+    mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64)
+    mat[..., 0, 0] = 1.0 - (yY + zZ)
+    mat[..., 0, 1] = xY - wZ
+    mat[..., 0, 2] = xZ + wY
+    mat[..., 1, 0] = xY + wZ
+    mat[..., 1, 1] = 1.0 - (xX + zZ)
+    mat[..., 1, 2] = yZ - wX
+    mat[..., 2, 0] = xZ - wY
+    mat[..., 2, 1] = yZ + wX
+    mat[..., 2, 2] = 1.0 - (xX + yY)
+    return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
+
+
+def quat_conjugate(q):
+    inv_q = -q
+    inv_q[..., 0] *= -1
+    return inv_q
+
+
+def quat_mul(q0, q1):
+    assert q0.shape == q1.shape
+    assert q0.shape[-1] == 4
+    assert q1.shape[-1] == 4
+
+    w0 = q0[..., 0]
+    x0 = q0[..., 1]
+    y0 = q0[..., 2]
+    z0 = q0[..., 3]
+
+    w1 = q1[..., 0]
+    x1 = q1[..., 1]
+    y1 = q1[..., 2]
+    z1 = q1[..., 3]
+
+    w = w0 * w1 - x0 * x1 - y0 * y1 - z0 * z1
+    x = w0 * x1 + x0 * w1 + y0 * z1 - z0 * y1
+    y = w0 * y1 + y0 * w1 + z0 * x1 - x0 * z1
+    z = w0 * z1 + z0 * w1 + x0 * y1 - y0 * x1
+    q = np.array([w, x, y, z])
+    if q.ndim == 2:
+        q = q.swapaxes(0, 1)
+    assert q.shape == q0.shape
+    return q
+
+
+def quat_rot_vec(q, v0):
+    q_v0 = np.array([0, v0[0], v0[1], v0[2]])
+    q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
+    v = q_v[1:]
+    return v
+
+
+def quat_identity():
+    return np.array([1, 0, 0, 0])
+
+
+def quat2axisangle(quat):
+    theta = 0
+    axis = np.array([0, 0, 1])
+    sin_theta = np.linalg.norm(quat[1:])
+
+    if sin_theta > 0.0001:
+        theta = 2 * np.arcsin(sin_theta)
+        theta *= 1 if quat[0] >= 0 else -1
+        axis = quat[1:] / sin_theta
+
+    return axis, theta
+
+
+def euler2point_euler(euler):
+    _euler = euler.copy()
+    if len(_euler.shape) < 2:
+        _euler = np.expand_dims(_euler, 0)
+    assert _euler.shape[1] == 3
+    _euler_sin = np.sin(_euler)
+    _euler_cos = np.cos(_euler)
+    return np.concatenate([_euler_sin, _euler_cos], axis=-1)
+
+
+def point_euler2euler(euler):
+    _euler = euler.copy()
+    if len(_euler.shape) < 2:
+        _euler = np.expand_dims(_euler, 0)
+    assert _euler.shape[1] == 6
+    angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
+    angle[_euler[..., 3:] < 0] += np.pi
+    return angle
+
+
+def quat2point_quat(quat):
+    # Should be in qw, qx, qy, qz
+    _quat = quat.copy()
+    if len(_quat.shape) < 2:
+        _quat = np.expand_dims(_quat, 0)
+    assert _quat.shape[1] == 4
+    angle = np.arccos(_quat[:, [0]]) * 2
+    xyz = _quat[:, 1:]
+    xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[
+        np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
+    ]
+    return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1)
+
+
+def point_quat2quat(quat):
+    _quat = quat.copy()
+    if len(_quat.shape) < 2:
+        _quat = np.expand_dims(_quat, 0)
+    assert _quat.shape[1] == 5
+    angle = np.arctan(_quat[:, [0]] / _quat[:, [1]])
+    qw = np.cos(angle / 2)
+
+    qxyz = _quat[:, 2:]
+    qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[
+        np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
+    ]
+    return np.concatenate([qw, qxyz], axis=-1)
+
+
+def normalize_angles(angles):
+    """Puts angles in [-pi, pi] range."""
+    angles = angles.copy()
+    if angles.size > 0:
+        angles = (angles + np.pi) % (2 * np.pi) - np.pi
+        assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6
+    return angles
+
+
+def round_to_straight_angles(angles):
+    """Returns closest angle modulo 90 degrees"""
+    angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
+    return normalize_angles(angles)
+
+
+def get_parallel_rotations():
+    mult90 = [0, np.pi / 2, -np.pi / 2, np.pi]
+    parallel_rotations = []
+    for euler in itertools.product(mult90, repeat=3):
+        canonical = mat2euler(euler2mat(euler))
+        canonical = np.round(canonical / (np.pi / 2))
+        if canonical[0] == -2:
+            canonical[0] = 2
+        if canonical[2] == -2:
+            canonical[2] = 2
+        canonical *= np.pi / 2
+        if all([(canonical != rot).any() for rot in parallel_rotations]):
+            parallel_rotations += [canonical]
+    assert len(parallel_rotations) == 24
+    return parallel_rotations
diff --git a/gym-0.21.0/gym/spaces/discrete.py b/gym-0.21.0/gym/spaces/discrete.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb358e6c1afda6dbf4751a2c83229c67bdbb45d2
--- /dev/null
+++ b/gym-0.21.0/gym/spaces/discrete.py
@@ -0,0 +1,37 @@
+import numpy as np
+from .space import Space
+
+
+class Discrete(Space):
+    r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
+
+    Example::
+
+        >>> Discrete(2)
+
+    """
+
+    def __init__(self, n, seed=None):
+        assert n >= 0
+        self.n = n
+        super(Discrete, self).__init__((), np.int64, seed)
+
+    def sample(self):
+        return self.np_random.randint(self.n)
+
+    def contains(self, x):
+        if isinstance(x, int):
+            as_int = x
+        elif isinstance(x, (np.generic, np.ndarray)) and (
+            x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
+        ):
+            as_int = int(x)
+        else:
+            return False
+        return as_int >= 0 and as_int < self.n
+
+    def __repr__(self):
+        return "Discrete(%d)" % self.n
+
+    def __eq__(self, other):
+        return isinstance(other, Discrete) and self.n == other.n
diff --git a/gym-0.21.0/gym/utils/__init__.py b/gym-0.21.0/gym/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e01a19841b1e3d05ecaff92398b065cb88fd8a0a
--- /dev/null
+++ b/gym-0.21.0/gym/utils/__init__.py
@@ -0,0 +1,9 @@
+"""A set of common utilities used within the environments. These are
+not intended as API functions, and will not remain stable over time.
+"""
+
+# These submodules should not have any import-time dependencies.
+# We want this since we use `utils` during our import-time sanity checks
+# that verify that our dependencies are actually present.
+from .colorize import colorize
+from .ezpickle import EzPickle
diff --git a/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f78030527d5f547fb858c83cf42f219de3c51d8d
Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74c6396526fcd0bfa5a2d9ef52a3d3a2daf583db
Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..937dfb839ff527928d71a0deb5471ad1d153d1ac
Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b44f02043b60f6ecaeadf976243e4d78a8db9811
Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f92ce0d8342bf3a923b97757ede2fbeb4d71bc09
Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/utils/atomic_write.py b/gym-0.21.0/gym/utils/atomic_write.py
new file mode 100644
index 0000000000000000000000000000000000000000..25d546de1460802cfcd78556a5a15c1036b0b099
--- /dev/null
+++ b/gym-0.21.0/gym/utils/atomic_write.py
@@ -0,0 +1,60 @@
+# Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
+
+import os
+from contextlib import contextmanager
+
+# We would ideally atomically replace any existing file with the new
+# version. However, on Windows there's no Python-only solution prior
+# to Python 3.3. (This library includes a C extension to do so:
+# https://pypi.python.org/pypi/pyosreplace/0.1.)
+#
+# Correspondingly, we make a best effort, but on Python < 3.3 use a
+# replace method which could result in the file temporarily
+# disappearing.
+import sys
+
+if sys.version_info >= (3, 3):
+    # Python 3.3 and up have a native `replace` method
+    from os import replace
+elif sys.platform.startswith("win"):
+
+    def replace(src, dst):
+        # TODO: on Windows, this will raise if the file is in use,
+        # which is possible. We'll need to make this more robust over
+        # time.
+        try:
+            os.remove(dst)
+        except OSError:
+            pass
+        os.rename(src, dst)
+
+
+else:
+    # POSIX rename() is always atomic
+    from os import rename as replace
+
+
+@contextmanager
+def atomic_write(filepath, binary=False, fsync=False):
+    """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
+
+    :param filepath: the file path to be opened
+    :param binary: whether to open the file in a binary mode instead of textual
+    :param fsync: whether to force write the file to disk
+    """
+
+    tmppath = filepath + "~"
+    while os.path.isfile(tmppath):
+        tmppath += "~"
+    try:
+        with open(tmppath, "wb" if binary else "w") as file:
+            yield file
+            if fsync:
+                file.flush()
+                os.fsync(file.fileno())
+        replace(tmppath, filepath)
+    finally:
+        try:
+            os.remove(tmppath)
+        except (IOError, OSError):
+            pass
diff --git a/gym-0.21.0/gym/utils/closer.py b/gym-0.21.0/gym/utils/closer.py
new file mode 100644
index 0000000000000000000000000000000000000000..635b65f0c7c2fd29451a46d42a683d7017dfb0a8
--- /dev/null
+++ b/gym-0.21.0/gym/utils/closer.py
@@ -0,0 +1,68 @@
+import atexit
+import threading
+import weakref
+
+
+class Closer(object):
+    """A registry that ensures your objects get closed, whether manually,
+    upon garbage collection, or upon exit. To work properly, your
+    objects need to cooperate and do something like the following:
+
+    ```
+    closer = Closer()
+    class Example(object):
+        def __init__(self):
+            self._id = closer.register(self)
+
+        def close(self):
+            # Probably worth making idempotent too!
+            ...
+            closer.unregister(self._id)
+
+        def __del__(self):
+            self.close()
+    ```
+
+    That is, your objects should:
+
+    - register() themselves and save the returned ID
+    - unregister() themselves upon close()
+    - include a __del__ method which close()'s the object
+    """
+
+    def __init__(self, atexit_register=True):
+        self.lock = threading.Lock()
+        self.next_id = -1
+        self.closeables = weakref.WeakValueDictionary()
+
+        if atexit_register:
+            atexit.register(self.close)
+
+    def generate_next_id(self):
+        with self.lock:
+            self.next_id += 1
+            return self.next_id
+
+    def register(self, closeable):
+        """Registers an object with a 'close' method.
+
+        Returns:
+            int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
+        """
+        assert hasattr(closeable, "close"), "No close method for {}".format(closeable)
+
+        next_id = self.generate_next_id()
+        self.closeables[next_id] = closeable
+        return next_id
+
+    def unregister(self, id):
+        assert id is not None
+        if id in self.closeables:
+            del self.closeables[id]
+
+    def close(self):
+        # Explicitly fetch all monitors first so that they can't disappear while
+        # we iterate. cf. http://stackoverflow.com/a/12429620
+        closeables = list(self.closeables.values())
+        for closeable in closeables:
+            closeable.close()
diff --git a/gym-0.21.0/gym/utils/play.py b/gym-0.21.0/gym/utils/play.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e06f4a32e650a3e223c25a8da33d0b7a46d169a
--- /dev/null
+++ b/gym-0.21.0/gym/utils/play.py
@@ -0,0 +1,196 @@
+import gym
+import pygame
+import matplotlib
+import argparse
+from gym import logger
+
+try:
+    matplotlib.use("TkAgg")
+    import matplotlib.pyplot as plt
+except ImportError as e:
+    logger.warn("failed to set matplotlib backend, plotting will not work: %s" % str(e))
+    plt = None
+
+from collections import deque
+from pygame.locals import VIDEORESIZE
+
+
+def display_arr(screen, arr, video_size, transpose):
+    arr_min, arr_max = arr.min(), arr.max()
+    arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
+    pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
+    pyg_img = pygame.transform.scale(pyg_img, video_size)
+    screen.blit(pyg_img, (0, 0))
+
+
+def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
+    """Allows one to play the game using keyboard.
+
+    To simply play the game use:
+
+        play(gym.make("Pong-v4"))
+
+    Above code works also if env is wrapped, so it's particularly useful in
+    verifying that the frame-level preprocessing does not render the game
+    unplayable.
+
+    If you wish to plot real time statistics as you play, you can use
+    gym.utils.play.PlayPlot. Here's a sample code for plotting the reward
+    for last 5 second of gameplay.
+
+        def callback(obs_t, obs_tp1, action, rew, done, info):
+            return [rew,]
+        plotter = PlayPlot(callback, 30 * 5, ["reward"])
+
+        env = gym.make("Pong-v4")
+        play(env, callback=plotter.callback)
+
+
+    Arguments
+    ---------
+    env: gym.Env
+        Environment to use for playing.
+    transpose: bool
+        If True the output of observation is transposed.
+        Defaults to true.
+    fps: int
+        Maximum number of steps of the environment to execute every second.
+        Defaults to 30.
+    zoom: float
+        Make screen edge this many times bigger
+    callback: lambda or None
+        Callback if a callback is provided it will be executed after
+        every step. It takes the following input:
+            obs_t: observation before performing action
+            obs_tp1: observation after performing action
+            action: action that was executed
+            rew: reward that was received
+            done: whether the environment is done or not
+            info: debug info
+    keys_to_action: dict: tuple(int) -> int or None
+        Mapping from keys pressed to action performed.
+        For example if pressed 'w' and space at the same time is supposed
+        to trigger action number 2 then key_to_action dict would look like this:
+
+            {
+                # ...
+                sorted(ord('w'), ord(' ')) -> 2
+                # ...
+            }
+        If None, default key_to_action mapping for that env is used, if provided.
+    """
+    env.reset()
+    rendered = env.render(mode="rgb_array")
+
+    if keys_to_action is None:
+        if hasattr(env, "get_keys_to_action"):
+            keys_to_action = env.get_keys_to_action()
+        elif hasattr(env.unwrapped, "get_keys_to_action"):
+            keys_to_action = env.unwrapped.get_keys_to_action()
+        else:
+            assert False, (
+                env.spec.id
+                + " does not have explicit key to action mapping, "
+                + "please specify one manually"
+            )
+    relevant_keys = set(sum(map(list, keys_to_action.keys()), []))
+
+    video_size = [rendered.shape[1], rendered.shape[0]]
+    if zoom is not None:
+        video_size = int(video_size[0] * zoom), int(video_size[1] * zoom)
+
+    pressed_keys = []
+    running = True
+    env_done = True
+
+    screen = pygame.display.set_mode(video_size)
+    clock = pygame.time.Clock()
+
+    while running:
+        if env_done:
+            env_done = False
+            obs = env.reset()
+        else:
+            action = keys_to_action.get(tuple(sorted(pressed_keys)), 0)
+            prev_obs = obs
+            obs, rew, env_done, info = env.step(action)
+            if callback is not None:
+                callback(prev_obs, obs, action, rew, env_done, info)
+        if obs is not None:
+            rendered = env.render(mode="rgb_array")
+            display_arr(screen, rendered, transpose=transpose, video_size=video_size)
+
+        # process pygame events
+        for event in pygame.event.get():
+            # test events, set key states
+            if event.type == pygame.KEYDOWN:
+                if event.key in relevant_keys:
+                    pressed_keys.append(event.key)
+                elif event.key == 27:
+                    running = False
+            elif event.type == pygame.KEYUP:
+                if event.key in relevant_keys:
+                    pressed_keys.remove(event.key)
+            elif event.type == pygame.QUIT:
+                running = False
+            elif event.type == VIDEORESIZE:
+                video_size = event.size
+                screen = pygame.display.set_mode(video_size)
+                print(video_size)
+
+        pygame.display.flip()
+        clock.tick(fps)
+    pygame.quit()
+
+
+class PlayPlot(object):
+    def __init__(self, callback, horizon_timesteps, plot_names):
+        self.data_callback = callback
+        self.horizon_timesteps = horizon_timesteps
+        self.plot_names = plot_names
+
+        assert plt is not None, "matplotlib backend failed, plotting will not work"
+
+        num_plots = len(self.plot_names)
+        self.fig, self.ax = plt.subplots(num_plots)
+        if num_plots == 1:
+            self.ax = [self.ax]
+        for axis, name in zip(self.ax, plot_names):
+            axis.set_title(name)
+        self.t = 0
+        self.cur_plot = [None for _ in range(num_plots)]
+        self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
+
+    def callback(self, obs_t, obs_tp1, action, rew, done, info):
+        points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
+        for point, data_series in zip(points, self.data):
+            data_series.append(point)
+        self.t += 1
+
+        xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
+
+        for i, plot in enumerate(self.cur_plot):
+            if plot is not None:
+                plot.remove()
+            self.cur_plot[i] = self.ax[i].scatter(
+                range(xmin, xmax), list(self.data[i]), c="blue"
+            )
+            self.ax[i].set_xlim(xmin, xmax)
+        plt.pause(0.000001)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--env",
+        type=str,
+        default="MontezumaRevengeNoFrameskip-v4",
+        help="Define Environment",
+    )
+    args = parser.parse_args()
+    env = gym.make(args.env)
+    play(env, zoom=4, fps=60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc b/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c48e24f54ef78a2d9c80fb43af9e439d38e5afd4
Binary files /dev/null and b/gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9c683d3281d5a91de124ebc6f83167fc617d3dd
Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a6a9be2a6781c17f13f1716bf7ec57c4d9ca8a9
Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc b/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71cbda2ea184a17f9443394e0e9c8fab5dbd988a
Binary files /dev/null and b/gym-0.21.0/gym/vector/utils/__pycache__/shared_memory.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/vector/utils/misc.py b/gym-0.21.0/gym/vector/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa38253dabf1aadebbe89243d2888fd3fa290948
--- /dev/null
+++ b/gym-0.21.0/gym/vector/utils/misc.py
@@ -0,0 +1,44 @@
+import contextlib
+import os
+
+__all__ = ["CloudpickleWrapper", "clear_mpi_env_vars"]
+
+
+class CloudpickleWrapper(object):
+    def __init__(self, fn):
+        self.fn = fn
+
+    def __getstate__(self):
+        import cloudpickle
+
+        return cloudpickle.dumps(self.fn)
+
+    def __setstate__(self, ob):
+        import pickle
+
+        self.fn = pickle.loads(ob)
+
+    def __call__(self):
+        return self.fn()
+
+
+@contextlib.contextmanager
+def clear_mpi_env_vars():
+    """
+    `from mpi4py import MPI` will call `MPI_Init` by default. If the child
+    process has MPI environment variables, MPI will think that the child process
+    is an MPI process just like the parent and do bad things such as hang.
+
+    This context manager is a hacky way to clear those environment variables
+    temporarily such as when we are starting multiprocessing Processes.
+    """
+    removed_environment = {}
+    for k, v in list(os.environ.items()):
+        for prefix in ["OMPI_", "PMI_"]:
+            if k.startswith(prefix):
+                removed_environment[k] = v
+                del os.environ[k]
+    try:
+        yield
+    finally:
+        os.environ.update(removed_environment)
diff --git a/gym-0.21.0/gym/vector/utils/spaces.py b/gym-0.21.0/gym/vector/utils/spaces.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac4727de5a08fa3d77e14177bf38b3d94fa5fe4e
--- /dev/null
+++ b/gym-0.21.0/gym/vector/utils/spaces.py
@@ -0,0 +1,89 @@
+import numpy as np
+from collections import OrderedDict
+
+from gym.spaces import Space, Box, Discrete, MultiDiscrete, MultiBinary, Tuple, Dict
+
+_BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary)
+__all__ = ["_BaseGymSpaces", "batch_space"]
+
+
+def batch_space(space, n=1):
+    """Create a (batched) space, containing multiple copies of a single space.
+
+    Parameters
+    ----------
+    space : `gym.spaces.Space` instance
+        Space (e.g. the observation space) for a single environment in the
+        vectorized environment.
+
+    n : int
+        Number of environments in the vectorized environment.
+
+    Returns
+    -------
+    batched_space : `gym.spaces.Space` instance
+        Space (e.g. the observation space) for a batch of environments in the
+        vectorized environment.
+
+    Example
+    -------
+    >>> from gym.spaces import Box, Dict
+    >>> space = Dict({
+    ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32),
+    ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)})
+    >>> batch_space(space, n=5)
+    Dict(position:Box(5, 3), velocity:Box(5, 2))
+    """
+    if isinstance(space, _BaseGymSpaces):
+        return batch_space_base(space, n=n)
+    elif isinstance(space, Tuple):
+        return batch_space_tuple(space, n=n)
+    elif isinstance(space, Dict):
+        return batch_space_dict(space, n=n)
+    elif isinstance(space, Space):
+        return batch_space_custom(space, n=n)
+    else:
+        raise ValueError(
+            "Cannot batch space with type `{0}`. The space must "
+            "be a valid `gym.Space` instance.".format(type(space))
+        )
+
+
+def batch_space_base(space, n=1):
+    if isinstance(space, Box):
+        repeats = tuple([n] + [1] * space.low.ndim)
+        low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
+        return Box(low=low, high=high, dtype=space.dtype)
+
+    elif isinstance(space, Discrete):
+        return MultiDiscrete(np.full((n,), space.n, dtype=space.dtype))
+
+    elif isinstance(space, MultiDiscrete):
+        repeats = tuple([n] + [1] * space.nvec.ndim)
+        high = np.tile(space.nvec, repeats) - 1
+        return Box(low=np.zeros_like(high), high=high, dtype=space.dtype)
+
+    elif isinstance(space, MultiBinary):
+        return Box(low=0, high=1, shape=(n,) + space.shape, dtype=space.dtype)
+
+    else:
+        raise ValueError("Space type `{0}` is not supported.".format(type(space)))
+
+
+def batch_space_tuple(space, n=1):
+    return Tuple(tuple(batch_space(subspace, n=n) for subspace in space.spaces))
+
+
+def batch_space_dict(space, n=1):
+    return Dict(
+        OrderedDict(
+            [
+                (key, batch_space(subspace, n=n))
+                for (key, subspace) in space.spaces.items()
+            ]
+        )
+    )
+
+
+def batch_space_custom(space, n=1):
+    return Tuple(tuple(space for _ in range(n)))
diff --git a/gym-0.21.0/gym/vector/vector_env.py b/gym-0.21.0/gym/vector/vector_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6bbaa7c38d35e5a7576c63b5c81400840f7236c
--- /dev/null
+++ b/gym-0.21.0/gym/vector/vector_env.py
@@ -0,0 +1,203 @@
+import gym
+from gym.spaces import Tuple
+from gym.vector.utils.spaces import batch_space
+
+__all__ = ["VectorEnv"]
+
+
+class VectorEnv(gym.Env):
+    r"""Base class for vectorized environments.
+
+    Each observation returned from vectorized environment is a batch of observations
+    for each sub-environment. And :meth:`step` is also expected to receive a batch of
+    actions for each sub-environment.
+
+    .. note::
+
+        All sub-environments should share the identical observation and action spaces.
+        In other words, a vector of multiple different environments is not supported.
+
+    Parameters
+    ----------
+    num_envs : int
+        Number of environments in the vectorized environment.
+
+    observation_space : `gym.spaces.Space` instance
+        Observation space of a single environment.
+
+    action_space : `gym.spaces.Space` instance
+        Action space of a single environment.
+    """
+
+    def __init__(self, num_envs, observation_space, action_space):
+        super(VectorEnv, self).__init__()
+        self.num_envs = num_envs
+        self.is_vector_env = True
+        self.observation_space = batch_space(observation_space, n=num_envs)
+        self.action_space = Tuple((action_space,) * num_envs)
+
+        self.closed = False
+        self.viewer = None
+
+        # The observation and action spaces of a single environment are
+        # kept in separate properties
+        self.single_observation_space = observation_space
+        self.single_action_space = action_space
+
+    def reset_async(self):
+        pass
+
+    def reset_wait(self, **kwargs):
+        raise NotImplementedError()
+
+    def reset(self):
+        r"""Reset all sub-environments and return a batch of initial observations.
+
+        Returns
+        -------
+        observations : sample from `observation_space`
+            A batch of observations from the vectorized environment.
+        """
+        self.reset_async()
+        return self.reset_wait()
+
+    def step_async(self, actions):
+        pass
+
+    def step_wait(self, **kwargs):
+        raise NotImplementedError()
+
+    def step(self, actions):
+        r"""Take an action for each sub-environments.
+
+        Parameters
+        ----------
+        actions : iterable of samples from `action_space`
+            List of actions.
+
+        Returns
+        -------
+        observations : sample from `observation_space`
+            A batch of observations from the vectorized environment.
+
+        rewards : `np.ndarray` instance (dtype `np.float_`)
+            A vector of rewards from the vectorized environment.
+
+        dones : `np.ndarray` instance (dtype `np.bool_`)
+            A vector whose entries indicate whether the episode has ended.
+
+        infos : list of dict
+            A list of auxiliary diagnostic information dicts from sub-environments.
+        """
+
+        self.step_async(actions)
+        return self.step_wait()
+
+    def close_extras(self, **kwargs):
+        r"""Clean up the extra resources e.g. beyond what's in this base class."""
+        raise NotImplementedError()
+
+    def close(self, **kwargs):
+        r"""Close all sub-environments and release resources.
+
+        It also closes all the existing image viewers, then calls :meth:`close_extras` and set
+        :attr:`closed` as ``True``.
+
+        .. warning::
+
+            This function itself does not close the environments, it should be handled
+            in :meth:`close_extras`. This is generic for both synchronous and asynchronous
+            vectorized environments.
+
+        .. note::
+
+            This will be automatically called when garbage collected or program exited.
+
+        """
+        if self.closed:
+            return
+        if self.viewer is not None:
+            self.viewer.close()
+        self.close_extras(**kwargs)
+        self.closed = True
+
+    def seed(self, seeds=None):
+        """
+        Parameters
+        ----------
+        seeds : list of int, or int, optional
+            Random seed for each individual environment. If `seeds` is a list of
+            length `num_envs`, then the items of the list are chosen as random
+            seeds. If `seeds` is an int, then each environment uses the random
+            seed `seeds + n`, where `n` is the index of the environment (between
+            `0` and `num_envs - 1`).
+        """
+        pass
+
+    def __del__(self):
+        if not getattr(self, "closed", True):
+            self.close(terminate=True)
+
+    def __repr__(self):
+        if self.spec is None:
+            return "{}({})".format(self.__class__.__name__, self.num_envs)
+        else:
+            return "{}({}, {})".format(
+                self.__class__.__name__, self.spec.id, self.num_envs
+            )
+
+
+class VectorEnvWrapper(VectorEnv):
+    r"""Wraps the vectorized environment to allow a modular transformation.
+
+    This class is the base class for all wrappers for vectorized environments. The subclass
+    could override some methods to change the behavior of the original vectorized environment
+    without touching the original code.
+
+    .. note::
+
+        Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
+
+    """
+
+    def __init__(self, env):
+        assert isinstance(env, VectorEnv)
+        self.env = env
+
+    # explicitly forward the methods defined in VectorEnv
+    # to self.env (instead of the base class)
+    def reset_async(self):
+        return self.env.reset_async()
+
+    def reset_wait(self):
+        return self.env.reset_wait()
+
+    def step_async(self, actions):
+        return self.env.step_async(actions)
+
+    def step_wait(self):
+        return self.env.step_wait()
+
+    def close(self, **kwargs):
+        return self.env.close(**kwargs)
+
+    def close_extras(self, **kwargs):
+        return self.env.close_extras(**kwargs)
+
+    def seed(self, seeds=None):
+        return self.env.seed(seeds)
+
+    # implicitly forward all other methods and attributes to self.env
+    def __getattr__(self, name):
+        if name.startswith("_"):
+            raise AttributeError(
+                "attempted to get missing private attribute '{}'".format(name)
+            )
+        return getattr(self.env, name)
+
+    @property
+    def unwrapped(self):
+        return self.env.unwrapped
+
+    def __repr__(self):
+        return "<{}, {}>".format(self.__class__.__name__, self.env)
diff --git a/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc b/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3a43defd8ea76d6c6a9a46ad913a741fbeaad26
Binary files /dev/null and b/gym-0.21.0/gym/wrappers/__pycache__/transform_reward.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/wrappers/atari_preprocessing.py b/gym-0.21.0/gym/wrappers/atari_preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..12dafc0dd0f04f0ec4bcd540f700538665ec3115
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/atari_preprocessing.py
@@ -0,0 +1,167 @@
+import numpy as np
+import gym
+from gym.spaces import Box
+from gym.wrappers import TimeLimit
+
+try:
+    import cv2
+except ImportError:
+    cv2 = None
+
+
+class AtariPreprocessing(gym.Wrapper):
+    r"""Atari 2600 preprocessings.
+
+    This class follows the guidelines in
+    Machado et al. (2018), "Revisiting the Arcade Learning Environment:
+    Evaluation Protocols and Open Problems for General Agents".
+
+    Specifically:
+
+    * NoopReset: obtain initial state by taking random number of no-ops on reset.
+    * Frame skipping: 4 by default
+    * Max-pooling: most recent two observations
+    * Termination signal when a life is lost: turned off by default. Not recommended by Machado et al. (2018).
+    * Resize to a square image: 84x84 by default
+    * Grayscale observation: optional
+    * Scale observation: optional
+
+    Args:
+        env (Env): environment
+        noop_max (int): max number of no-ops
+        frame_skip (int): the frequency at which the agent experiences the game.
+        screen_size (int): resize Atari frame
+        terminal_on_life_loss (bool): if True, then step() returns done=True whenever a
+            life is lost.
+        grayscale_obs (bool): if True, then gray scale observation is returned, otherwise, RGB observation
+            is returned.
+        grayscale_newaxis (bool): if True and grayscale_obs=True, then a channel axis is added to
+            grayscale observations to make them 3-dimensional.
+        scale_obs (bool): if True, then observation normalized in range [0,1] is returned. It also limits memory
+            optimization benefits of FrameStack Wrapper.
+    """
+
+    def __init__(
+        self,
+        env,
+        noop_max=30,
+        frame_skip=4,
+        screen_size=84,
+        terminal_on_life_loss=False,
+        grayscale_obs=True,
+        grayscale_newaxis=False,
+        scale_obs=False,
+    ):
+        super().__init__(env)
+        assert (
+            cv2 is not None
+        ), "opencv-python package not installed! Try running pip install gym[atari] to get dependencies  for atari"
+        assert frame_skip > 0
+        assert screen_size > 0
+        assert noop_max >= 0
+        if frame_skip > 1:
+            assert "NoFrameskip" in env.spec.id, (
+                "disable frame-skipping in the original env. for more than one"
+                " frame-skip as it will be done by the wrapper"
+            )
+        self.noop_max = noop_max
+        assert env.unwrapped.get_action_meanings()[0] == "NOOP"
+
+        self.frame_skip = frame_skip
+        self.screen_size = screen_size
+        self.terminal_on_life_loss = terminal_on_life_loss
+        self.grayscale_obs = grayscale_obs
+        self.grayscale_newaxis = grayscale_newaxis
+        self.scale_obs = scale_obs
+
+        # buffer of most recent two observations for max pooling
+        if grayscale_obs:
+            self.obs_buffer = [
+                np.empty(env.observation_space.shape[:2], dtype=np.uint8),
+                np.empty(env.observation_space.shape[:2], dtype=np.uint8),
+            ]
+        else:
+            self.obs_buffer = [
+                np.empty(env.observation_space.shape, dtype=np.uint8),
+                np.empty(env.observation_space.shape, dtype=np.uint8),
+            ]
+
+        self.ale = env.unwrapped.ale
+        self.lives = 0
+        self.game_over = False
+
+        _low, _high, _obs_dtype = (
+            (0, 255, np.uint8) if not scale_obs else (0, 1, np.float32)
+        )
+        _shape = (screen_size, screen_size, 1 if grayscale_obs else 3)
+        if grayscale_obs and not grayscale_newaxis:
+            _shape = _shape[:-1]  # Remove channel axis
+        self.observation_space = Box(
+            low=_low, high=_high, shape=_shape, dtype=_obs_dtype
+        )
+
+    def step(self, action):
+        R = 0.0
+
+        for t in range(self.frame_skip):
+            _, reward, done, info = self.env.step(action)
+            R += reward
+            self.game_over = done
+
+            if self.terminal_on_life_loss:
+                new_lives = self.ale.lives()
+                done = done or new_lives < self.lives
+                self.lives = new_lives
+
+            if done:
+                break
+            if t == self.frame_skip - 2:
+                if self.grayscale_obs:
+                    self.ale.getScreenGrayscale(self.obs_buffer[1])
+                else:
+                    self.ale.getScreenRGB(self.obs_buffer[1])
+            elif t == self.frame_skip - 1:
+                if self.grayscale_obs:
+                    self.ale.getScreenGrayscale(self.obs_buffer[0])
+                else:
+                    self.ale.getScreenRGB(self.obs_buffer[0])
+        return self._get_obs(), R, done, info
+
+    def reset(self, **kwargs):
+        # NoopReset
+        self.env.reset(**kwargs)
+        noops = (
+            self.env.unwrapped.np_random.randint(1, self.noop_max + 1)
+            if self.noop_max > 0
+            else 0
+        )
+        for _ in range(noops):
+            _, _, done, _ = self.env.step(0)
+            if done:
+                self.env.reset(**kwargs)
+
+        self.lives = self.ale.lives()
+        if self.grayscale_obs:
+            self.ale.getScreenGrayscale(self.obs_buffer[0])
+        else:
+            self.ale.getScreenRGB(self.obs_buffer[0])
+        self.obs_buffer[1].fill(0)
+        return self._get_obs()
+
+    def _get_obs(self):
+        if self.frame_skip > 1:  # more efficient in-place pooling
+            np.maximum(self.obs_buffer[0], self.obs_buffer[1], out=self.obs_buffer[0])
+        obs = cv2.resize(
+            self.obs_buffer[0],
+            (self.screen_size, self.screen_size),
+            interpolation=cv2.INTER_AREA,
+        )
+
+        if self.scale_obs:
+            obs = np.asarray(obs, dtype=np.float32) / 255.0
+        else:
+            obs = np.asarray(obs, dtype=np.uint8)
+
+        if self.grayscale_obs and self.grayscale_newaxis:
+            obs = np.expand_dims(obs, axis=-1)  # Add a channel axis
+        return obs
diff --git a/gym-0.21.0/gym/wrappers/gray_scale_observation.py b/gym-0.21.0/gym/wrappers/gray_scale_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e789ba94f4360ab763a301d35be6f3726a31b6d
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/gray_scale_observation.py
@@ -0,0 +1,34 @@
+import numpy as np
+from gym.spaces import Box
+from gym import ObservationWrapper
+
+
+class GrayScaleObservation(ObservationWrapper):
+    r"""Convert the image observation from RGB to gray scale."""
+
+    def __init__(self, env, keep_dim=False):
+        super(GrayScaleObservation, self).__init__(env)
+        self.keep_dim = keep_dim
+
+        assert (
+            len(env.observation_space.shape) == 3
+            and env.observation_space.shape[-1] == 3
+        )
+
+        obs_shape = self.observation_space.shape[:2]
+        if self.keep_dim:
+            self.observation_space = Box(
+                low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8
+            )
+        else:
+            self.observation_space = Box(
+                low=0, high=255, shape=obs_shape, dtype=np.uint8
+            )
+
+    def observation(self, observation):
+        import cv2
+
+        observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
+        if self.keep_dim:
+            observation = np.expand_dims(observation, -1)
+        return observation
diff --git a/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc b/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..25870b3e62dd05bb90fd5c1b3dc12a9038f77ffe
Binary files /dev/null and b/gym-0.21.0/gym/wrappers/monitoring/__pycache__/video_recorder.cpython-38.pyc differ
diff --git a/gym-0.21.0/gym/wrappers/normalize.py b/gym-0.21.0/gym/wrappers/normalize.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4e9a07a1cf8e3e1eede9783e83acffd5e9191ca
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/normalize.py
@@ -0,0 +1,105 @@
+import numpy as np
+import gym
+
+
+# taken from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py
+class RunningMeanStd(object):
+    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
+    def __init__(self, epsilon=1e-4, shape=()):
+        self.mean = np.zeros(shape, "float64")
+        self.var = np.ones(shape, "float64")
+        self.count = epsilon
+
+    def update(self, x):
+        batch_mean = np.mean(x, axis=0)
+        batch_var = np.var(x, axis=0)
+        batch_count = x.shape[0]
+        self.update_from_moments(batch_mean, batch_var, batch_count)
+
+    def update_from_moments(self, batch_mean, batch_var, batch_count):
+        self.mean, self.var, self.count = update_mean_var_count_from_moments(
+            self.mean, self.var, self.count, batch_mean, batch_var, batch_count
+        )
+
+
+def update_mean_var_count_from_moments(
+    mean, var, count, batch_mean, batch_var, batch_count
+):
+    delta = batch_mean - mean
+    tot_count = count + batch_count
+
+    new_mean = mean + delta * batch_count / tot_count
+    m_a = var * count
+    m_b = batch_var * batch_count
+    M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count
+    new_var = M2 / tot_count
+    new_count = tot_count
+
+    return new_mean, new_var, new_count
+
+
+class NormalizeObservation(gym.core.Wrapper):
+    def __init__(
+        self,
+        env,
+        epsilon=1e-8,
+    ):
+        super(NormalizeObservation, self).__init__(env)
+        self.num_envs = getattr(env, "num_envs", 1)
+        self.is_vector_env = getattr(env, "is_vector_env", False)
+        if self.is_vector_env:
+            self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape)
+        else:
+            self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
+        self.epsilon = epsilon
+
+    def step(self, action):
+        obs, rews, dones, infos = self.env.step(action)
+        if self.is_vector_env:
+            obs = self.normalize(obs)
+        else:
+            obs = self.normalize(np.array([obs]))[0]
+        return obs, rews, dones, infos
+
+    def reset(self):
+        obs = self.env.reset()
+        if self.is_vector_env:
+            obs = self.normalize(obs)
+        else:
+            obs = self.normalize(np.array([obs]))[0]
+        return obs
+
+    def normalize(self, obs):
+        self.obs_rms.update(obs)
+        return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon)
+
+
+class NormalizeReward(gym.core.Wrapper):
+    def __init__(
+        self,
+        env,
+        gamma=0.99,
+        epsilon=1e-8,
+    ):
+        super(NormalizeReward, self).__init__(env)
+        self.num_envs = getattr(env, "num_envs", 1)
+        self.is_vector_env = getattr(env, "is_vector_env", False)
+        self.return_rms = RunningMeanStd(shape=())
+        self.returns = np.zeros(self.num_envs)
+        self.gamma = gamma
+        self.epsilon = epsilon
+
+    def step(self, action):
+        obs, rews, dones, infos = self.env.step(action)
+        if not self.is_vector_env:
+            rews = np.array([rews])
+        self.returns = self.returns * self.gamma + rews
+        rews = self.normalize(rews)
+        self.returns[dones] = 0.0
+        if not self.is_vector_env:
+            rews = rews[0]
+        return obs, rews, dones, infos
+
+    def normalize(self, rews):
+        self.return_rms.update(self.returns)
+        return rews / np.sqrt(self.return_rms.var + self.epsilon)
diff --git a/gym-0.21.0/gym/wrappers/record_episode_statistics.py b/gym-0.21.0/gym/wrappers/record_episode_statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d5d348d3a452dbccad077b19c084d733f25382f
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/record_episode_statistics.py
@@ -0,0 +1,55 @@
+import time
+from collections import deque
+import numpy as np
+import gym
+
+
+class RecordEpisodeStatistics(gym.Wrapper):
+    def __init__(self, env, deque_size=100):
+        super(RecordEpisodeStatistics, self).__init__(env)
+        self.num_envs = getattr(env, "num_envs", 1)
+        self.t0 = time.perf_counter()
+        self.episode_count = 0
+        self.episode_returns = None
+        self.episode_lengths = None
+        self.return_queue = deque(maxlen=deque_size)
+        self.length_queue = deque(maxlen=deque_size)
+        self.is_vector_env = getattr(env, "is_vector_env", False)
+
+    def reset(self, **kwargs):
+        observations = super(RecordEpisodeStatistics, self).reset(**kwargs)
+        self.episode_returns = np.zeros(self.num_envs, dtype=np.float32)
+        self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32)
+        return observations
+
+    def step(self, action):
+        observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step(
+            action
+        )
+        self.episode_returns += rewards
+        self.episode_lengths += 1
+        if not self.is_vector_env:
+            infos = [infos]
+            dones = [dones]
+        for i in range(len(dones)):
+            if dones[i]:
+                infos[i] = infos[i].copy()
+                episode_return = self.episode_returns[i]
+                episode_length = self.episode_lengths[i]
+                episode_info = {
+                    "r": episode_return,
+                    "l": episode_length,
+                    "t": round(time.perf_counter() - self.t0, 6),
+                }
+                infos[i]["episode"] = episode_info
+                self.return_queue.append(episode_return)
+                self.length_queue.append(episode_length)
+                self.episode_count += 1
+                self.episode_returns[i] = 0
+                self.episode_lengths[i] = 0
+        return (
+            observations,
+            rewards,
+            dones if self.is_vector_env else dones[0],
+            infos if self.is_vector_env else infos[0],
+        )
diff --git a/gym-0.21.0/gym/wrappers/rescale_action.py b/gym-0.21.0/gym/wrappers/rescale_action.py
new file mode 100644
index 0000000000000000000000000000000000000000..6143d1c1554ad93c5c560f023cc945d1a79b080e
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/rescale_action.py
@@ -0,0 +1,48 @@
+import numpy as np
+import gym
+from gym import spaces
+
+
+class RescaleAction(gym.ActionWrapper):
+    r"""Rescales the continuous action space of the environment to a range [min_action, max_action].
+
+    Example::
+
+        >>> RescaleAction(env, min_action, max_action).action_space == Box(min_action, max_action)
+        True
+
+    """
+
+    def __init__(self, env, min_action, max_action):
+        assert isinstance(
+            env.action_space, spaces.Box
+        ), "expected Box action space, got {}".format(type(env.action_space))
+        assert np.less_equal(min_action, max_action).all(), (min_action, max_action)
+
+        super(RescaleAction, self).__init__(env)
+        self.min_action = (
+            np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + min_action
+        )
+        self.max_action = (
+            np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + max_action
+        )
+        self.action_space = spaces.Box(
+            low=min_action,
+            high=max_action,
+            shape=env.action_space.shape,
+            dtype=env.action_space.dtype,
+        )
+
+    def action(self, action):
+        assert np.all(np.greater_equal(action, self.min_action)), (
+            action,
+            self.min_action,
+        )
+        assert np.all(np.less_equal(action, self.max_action)), (action, self.max_action)
+        low = self.env.action_space.low
+        high = self.env.action_space.high
+        action = low + (high - low) * (
+            (action - self.min_action) / (self.max_action - self.min_action)
+        )
+        action = np.clip(action, low, high)
+        return action
diff --git a/gym-0.21.0/gym/wrappers/time_aware_observation.py b/gym-0.21.0/gym/wrappers/time_aware_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..71ec32f5bfce64b8910c6d906acb64bf14ee14e6
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/time_aware_observation.py
@@ -0,0 +1,32 @@
+import numpy as np
+from gym.spaces import Box
+from gym import ObservationWrapper
+
+
+class TimeAwareObservation(ObservationWrapper):
+    r"""Augment the observation with current time step in the trajectory.
+
+    .. note::
+        Currently it only works with one-dimensional observation space. It doesn't
+        support pixel observation space yet.
+
+    """
+
+    def __init__(self, env):
+        super(TimeAwareObservation, self).__init__(env)
+        assert isinstance(env.observation_space, Box)
+        assert env.observation_space.dtype == np.float32
+        low = np.append(self.observation_space.low, 0.0)
+        high = np.append(self.observation_space.high, np.inf)
+        self.observation_space = Box(low, high, dtype=np.float32)
+
+    def observation(self, observation):
+        return np.append(observation, self.t)
+
+    def step(self, action):
+        self.t += 1
+        return super(TimeAwareObservation, self).step(action)
+
+    def reset(self, **kwargs):
+        self.t = 0
+        return super(TimeAwareObservation, self).reset(**kwargs)
diff --git a/gym-0.21.0/gym/wrappers/time_limit.py b/gym-0.21.0/gym/wrappers/time_limit.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee8cac96f4ef85b3fdfc0903d11260845d002bda
--- /dev/null
+++ b/gym-0.21.0/gym/wrappers/time_limit.py
@@ -0,0 +1,27 @@
+import gym
+
+
+class TimeLimit(gym.Wrapper):
+    def __init__(self, env, max_episode_steps=None):
+        super(TimeLimit, self).__init__(env)
+        if max_episode_steps is None and self.env.spec is not None:
+            max_episode_steps = env.spec.max_episode_steps
+        if self.env.spec is not None:
+            self.env.spec.max_episode_steps = max_episode_steps
+        self._max_episode_steps = max_episode_steps
+        self._elapsed_steps = None
+
+    def step(self, action):
+        assert (
+            self._elapsed_steps is not None
+        ), "Cannot call env.step() before calling reset()"
+        observation, reward, done, info = self.env.step(action)
+        self._elapsed_steps += 1
+        if self._elapsed_steps >= self._max_episode_steps:
+            info["TimeLimit.truncated"] = not done
+            done = True
+        return observation, reward, done, info
+
+    def reset(self, **kwargs):
+        self._elapsed_steps = 0
+        return self.env.reset(**kwargs)
diff --git a/gym-0.21.0/requirements.txt b/gym-0.21.0/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ec5e0cb9a81303ac6321de786bff3bcb992a73
--- /dev/null
+++ b/gym-0.21.0/requirements.txt
@@ -0,0 +1,9 @@
+ale-py~=0.7
+opencv-python>=3.
+box2d-py==2.3.5
+mujoco_py>=1.50, <2.0
+scipy>=1.4.1
+numpy>=1.18.0
+pyglet>=1.4.0
+cloudpickle>=1.2.0
+lz4>=3.1.0
diff --git a/gym-0.21.0/test_requirements.txt b/gym-0.21.0/test_requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f59ce32d8615b68d1aff6972e0245dfa8a110e3
--- /dev/null
+++ b/gym-0.21.0/test_requirements.txt
@@ -0,0 +1,3 @@
+lz4~=3.1
+pytest~=6.2
+pytest-forked~=1.3
diff --git a/gym-0.21.0/tests/envs/robotics/hand/__init__.py b/gym-0.21.0/tests/envs/robotics/hand/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/gym-0.21.0/tests/envs/robotics/hand/test_reach.py b/gym-0.21.0/tests/envs/robotics/hand/test_reach.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d360e7068068130c7efdc3e3c95ea93daf5d00e
--- /dev/null
+++ b/gym-0.21.0/tests/envs/robotics/hand/test_reach.py
@@ -0,0 +1,18 @@
+import pickle
+
+import pytest
+
+from gym import envs
+from tests.envs.spec_list import skip_mujoco, SKIP_MUJOCO_WARNING_MESSAGE
+
+
+@pytest.mark.skipif(skip_mujoco, reason=SKIP_MUJOCO_WARNING_MESSAGE)
+def test_serialize_deserialize():
+    env1 = envs.make("HandReach-v0", distance_threshold=1e-6)
+    env1.reset()
+    env2 = pickle.loads(pickle.dumps(env1))
+
+    assert env1.distance_threshold == env2.distance_threshold, (
+        env1.distance_threshold,
+        env2.distance_threshold,
+    )
diff --git a/gym-0.21.0/tests/vector/test_vector_env_wrapper.py b/gym-0.21.0/tests/vector/test_vector_env_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..4571b7650ab9310dbe4af3271f658fa9800fd400
--- /dev/null
+++ b/gym-0.21.0/tests/vector/test_vector_env_wrapper.py
@@ -0,0 +1,20 @@
+import gym
+from gym.vector import make
+from gym.vector import VectorEnvWrapper
+
+
+class DummyWrapper(VectorEnvWrapper):
+    def __init__(self, env):
+        self.env = env
+        self.counter = 0
+
+    def reset_async(self):
+        super().reset_async()
+        self.counter += 1
+
+
+def test_vector_env_wrapper_inheritance():
+    env = make("FrozenLake-v1", asynchronous=False)
+    wrapped = DummyWrapper(env)
+    wrapped.reset()
+    assert wrapped.counter == 1
diff --git a/gym-0.21.0/tests/wrappers/test_filter_observation.py b/gym-0.21.0/tests/wrappers/test_filter_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..35beaad5650ad6f8c54828debad09648aff64395
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_filter_observation.py
@@ -0,0 +1,84 @@
+import pytest
+import numpy as np
+
+import gym
+from gym import spaces
+from gym.wrappers.filter_observation import FilterObservation
+
+
+class FakeEnvironment(gym.Env):
+    def __init__(self, observation_keys=("state")):
+        self.observation_space = spaces.Dict(
+            {
+                name: spaces.Box(shape=(2,), low=-1, high=1, dtype=np.float32)
+                for name in observation_keys
+            }
+        )
+        self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32)
+
+    def render(self, width=32, height=32, *args, **kwargs):
+        del args
+        del kwargs
+        image_shape = (height, width, 3)
+        return np.zeros(image_shape, dtype=np.uint8)
+
+    def reset(self):
+        observation = self.observation_space.sample()
+        return observation
+
+    def step(self, action):
+        del action
+        observation = self.observation_space.sample()
+        reward, terminal, info = 0.0, False, {}
+        return observation, reward, terminal, info
+
+
+FILTER_OBSERVATION_TEST_CASES = (
+    (("key1", "key2"), ("key1",)),
+    (("key1", "key2"), ("key1", "key2")),
+    (("key1",), None),
+    (("key1",), ("key1",)),
+)
+
+ERROR_TEST_CASES = (
+    ("key", ValueError, "All the filter_keys must be included..*"),
+    (False, TypeError, "'bool' object is not iterable"),
+    (1, TypeError, "'int' object is not iterable"),
+)
+
+
+class TestFilterObservation(object):
+    @pytest.mark.parametrize(
+        "observation_keys,filter_keys", FILTER_OBSERVATION_TEST_CASES
+    )
+    def test_filter_observation(self, observation_keys, filter_keys):
+        env = FakeEnvironment(observation_keys=observation_keys)
+
+        # Make sure we are testing the right environment for the test.
+        observation_space = env.observation_space
+        assert isinstance(observation_space, spaces.Dict)
+
+        wrapped_env = FilterObservation(env, filter_keys=filter_keys)
+
+        assert isinstance(wrapped_env.observation_space, spaces.Dict)
+
+        if filter_keys is None:
+            filter_keys = tuple(observation_keys)
+
+        assert len(wrapped_env.observation_space.spaces) == len(filter_keys)
+        assert tuple(wrapped_env.observation_space.spaces.keys()) == tuple(filter_keys)
+
+        # Check that the added space item is consistent with the added observation.
+        observation = wrapped_env.reset()
+        assert len(observation) == len(filter_keys)
+
+    @pytest.mark.parametrize("filter_keys,error_type,error_match", ERROR_TEST_CASES)
+    def test_raises_with_incorrect_arguments(
+        self, filter_keys, error_type, error_match
+    ):
+        env = FakeEnvironment(observation_keys=("key1", "key2"))
+
+        ValueError
+
+        with pytest.raises(error_type, match=error_match):
+            FilterObservation(env, filter_keys=filter_keys)
diff --git a/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py b/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e088064761caf50d496851e0f1f8233b3ea9c8f
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_record_episode_statistics.py
@@ -0,0 +1,38 @@
+import pytest
+
+import gym
+from gym.wrappers import RecordEpisodeStatistics
+
+
+@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"])
+@pytest.mark.parametrize("deque_size", [2, 5])
+def test_record_episode_statistics(env_id, deque_size):
+    env = gym.make(env_id)
+    env = RecordEpisodeStatistics(env, deque_size)
+
+    for n in range(5):
+        env.reset()
+        assert env.episode_returns[0] == 0.0
+        assert env.episode_lengths[0] == 0
+        for t in range(env.spec.max_episode_steps):
+            _, _, done, info = env.step(env.action_space.sample())
+            if done:
+                assert "episode" in info
+                assert all([item in info["episode"] for item in ["r", "l", "t"]])
+                break
+    assert len(env.return_queue) == deque_size
+    assert len(env.length_queue) == deque_size
+
+
+@pytest.mark.parametrize("num_envs", [1, 4])
+def test_record_episode_statistics_with_vectorenv(num_envs):
+    envs = gym.vector.make("CartPole-v0", num_envs=num_envs, asynchronous=False)
+    envs = RecordEpisodeStatistics(envs)
+    envs.reset()
+    for _ in range(envs.env.envs[0].spec.max_episode_steps + 1):
+        _, _, dones, infos = envs.step(envs.action_space.sample())
+        for idx, info in enumerate(infos):
+            if dones[idx]:
+                assert "episode" in info
+                assert all([item in info["episode"] for item in ["r", "l", "t"]])
+                break
diff --git a/gym-0.21.0/tests/wrappers/test_record_video.py b/gym-0.21.0/tests/wrappers/test_record_video.py
new file mode 100644
index 0000000000000000000000000000000000000000..509fe908709b1f6a426b0c494c4755961b403023
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_record_video.py
@@ -0,0 +1,75 @@
+import pytest
+
+import os
+import shutil
+import gym
+from gym.wrappers import (
+    RecordEpisodeStatistics,
+    RecordVideo,
+    capped_cubic_video_schedule,
+)
+
+
+def test_record_video_using_default_trigger():
+
+    env = gym.make("CartPole-v1")
+    env = gym.wrappers.RecordVideo(env, "videos")
+    env.reset()
+    for _ in range(199):
+        action = env.action_space.sample()
+        _, _, done, _ = env.step(action)
+        if done:
+            env.reset()
+    env.close()
+    assert os.path.isdir("videos")
+    mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
+    assert len(mp4_files) == sum(
+        [capped_cubic_video_schedule(i) for i in range(env.episode_id + 1)]
+    )
+    shutil.rmtree("videos")
+
+
+def test_record_video_step_trigger():
+    env = gym.make("CartPole-v1")
+    env._max_episode_steps = 20
+    env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0)
+    env.reset()
+    for _ in range(199):
+        action = env.action_space.sample()
+        _, _, done, _ = env.step(action)
+        if done:
+            env.reset()
+    env.close()
+    assert os.path.isdir("videos")
+    mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
+    assert len(mp4_files) == 2
+    shutil.rmtree("videos")
+
+
+def make_env(gym_id, seed):
+    def thunk():
+        env = gym.make(gym_id)
+        env._max_episode_steps = 20
+        if seed == 1:
+            env = gym.wrappers.RecordVideo(
+                env, "videos", step_trigger=lambda x: x % 100 == 0
+            )
+        return env
+
+    return thunk
+
+
+def test_record_video_within_vector():
+    envs = gym.vector.SyncVectorEnv([make_env("CartPole-v1", 1 + i) for i in range(2)])
+    envs = gym.wrappers.RecordEpisodeStatistics(envs)
+    envs.reset()
+    for i in range(199):
+        _, _, _, infos = envs.step(envs.action_space.sample())
+        for info in infos:
+            if "episode" in info.keys():
+                print(f"episode_reward={info['episode']['r']}")
+                break
+    assert os.path.isdir("videos")
+    mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")]
+    assert len(mp4_files) == 2
+    shutil.rmtree("videos")
diff --git a/gym-0.21.0/tests/wrappers/test_rescale_action.py b/gym-0.21.0/tests/wrappers/test_rescale_action.py
new file mode 100644
index 0000000000000000000000000000000000000000..c33e5adc85ee946ea3422a51eec361ba6e5fc9df
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_rescale_action.py
@@ -0,0 +1,32 @@
+import pytest
+
+import numpy as np
+
+import gym
+from gym.wrappers import RescaleAction
+
+
+def test_rescale_action():
+    env = gym.make("CartPole-v1")
+    with pytest.raises(AssertionError):
+        env = RescaleAction(env, -1, 1)
+    del env
+
+    env = gym.make("Pendulum-v1")
+    wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1)
+
+    seed = 0
+    env.seed(seed)
+    wrapped_env.seed(seed)
+
+    obs = env.reset()
+    wrapped_obs = wrapped_env.reset()
+    assert np.allclose(obs, wrapped_obs)
+
+    obs, reward, _, _ = env.step([1.5])
+    with pytest.raises(AssertionError):
+        wrapped_env.step([1.5])
+    wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75])
+
+    assert np.allclose(obs, wrapped_obs)
+    assert np.allclose(reward, wrapped_reward)
diff --git a/gym-0.21.0/tests/wrappers/test_resize_observation.py b/gym-0.21.0/tests/wrappers/test_resize_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..688b47269ec31253512b953211ef6d61a341f88e
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_resize_observation.py
@@ -0,0 +1,24 @@
+import pytest
+
+import gym
+from gym.wrappers import ResizeObservation
+
+pytest.importorskip("gym.envs.atari")
+
+
+@pytest.mark.parametrize(
+    "env_id", ["PongNoFrameskip-v0", "SpaceInvadersNoFrameskip-v0"]
+)
+@pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]])
+def test_resize_observation(env_id, shape):
+    env = gym.make(env_id)
+    env = ResizeObservation(env, shape)
+
+    assert env.observation_space.shape[-1] == 3
+    obs = env.reset()
+    if isinstance(shape, int):
+        assert env.observation_space.shape[:2] == (shape, shape)
+        assert obs.shape == (shape, shape, 3)
+    else:
+        assert env.observation_space.shape[:2] == tuple(shape)
+        assert obs.shape == tuple(shape) + (3,)
diff --git a/gym-0.21.0/tests/wrappers/test_transform_observation.py b/gym-0.21.0/tests/wrappers/test_transform_observation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6818cce77ea24b608c9e0d01e94e12a90ead3dfc
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_transform_observation.py
@@ -0,0 +1,29 @@
+import pytest
+
+import numpy as np
+
+import gym
+from gym.wrappers import TransformObservation
+
+
+@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
+def test_transform_observation(env_id):
+    affine_transform = lambda x: 3 * x + 2
+    env = gym.make(env_id)
+    wrapped_env = TransformObservation(
+        gym.make(env_id), lambda obs: affine_transform(obs)
+    )
+
+    env.seed(0)
+    wrapped_env.seed(0)
+
+    obs = env.reset()
+    wrapped_obs = wrapped_env.reset()
+    assert np.allclose(wrapped_obs, affine_transform(obs))
+
+    action = env.action_space.sample()
+    obs, reward, done, _ = env.step(action)
+    wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action)
+    assert np.allclose(wrapped_obs, affine_transform(obs))
+    assert np.allclose(wrapped_reward, reward)
+    assert wrapped_done == done
diff --git a/gym-0.21.0/tests/wrappers/test_transform_reward.py b/gym-0.21.0/tests/wrappers/test_transform_reward.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bc380cf2ce23f0fefee810c8b5fe5d921594535
--- /dev/null
+++ b/gym-0.21.0/tests/wrappers/test_transform_reward.py
@@ -0,0 +1,63 @@
+import pytest
+
+import numpy as np
+
+import gym
+from gym.wrappers import TransformReward
+
+
+@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
+def test_transform_reward(env_id):
+    # use case #1: scale
+    scales = [0.1, 200]
+    for scale in scales:
+        env = gym.make(env_id)
+        wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r)
+        action = env.action_space.sample()
+
+        env.seed(0)
+        env.reset()
+        wrapped_env.seed(0)
+        wrapped_env.reset()
+
+        _, reward, _, _ = env.step(action)
+        _, wrapped_reward, _, _ = wrapped_env.step(action)
+
+        assert wrapped_reward == scale * reward
+    del env, wrapped_env
+
+    # use case #2: clip
+    min_r = -0.0005
+    max_r = 0.0002
+    env = gym.make(env_id)
+    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
+    action = env.action_space.sample()
+
+    env.seed(0)
+    env.reset()
+    wrapped_env.seed(0)
+    wrapped_env.reset()
+
+    _, reward, _, _ = env.step(action)
+    _, wrapped_reward, _, _ = wrapped_env.step(action)
+
+    assert abs(wrapped_reward) < abs(reward)
+    assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002
+    del env, wrapped_env
+
+    # use case #3: sign
+    env = gym.make(env_id)
+    wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
+
+    env.seed(0)
+    env.reset()
+    wrapped_env.seed(0)
+    wrapped_env.reset()
+
+    for _ in range(1000):
+        action = env.action_space.sample()
+        _, wrapped_reward, done, _ = wrapped_env.step(action)
+        assert wrapped_reward in [-1.0, 0.0, 1.0]
+        if done:
+            break
+    del env, wrapped_env