Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Metaworld/.gitignore +148 -0
- Metaworld/LICENSE +21 -0
- Metaworld/MANIFEST.in +2 -0
- Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 +0 -0
- Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 +0 -0
- VRL3/src/cfgs_adroit/task/hammer.yaml +4 -0
- VRL3/src/cfgs_adroit/task/pen.yaml +4 -0
- VRL3/src/rrl_local/rrl_utils.py +85 -0
- VRL3/src/transfer_util.py +50 -0
- gym-0.21.0/.gitignore +39 -0
- gym-0.21.0/CODE_OF_CONDUCT.rst +13 -0
- gym-0.21.0/bin/docker_entrypoint +26 -0
- gym-0.21.0/docs/api.md +123 -0
- gym-0.21.0/docs/toy_text/frozen_lake.md +70 -0
- gym-0.21.0/docs/tutorials.md +17 -0
- gym-0.21.0/gym/core.py +357 -0
- gym-0.21.0/gym/envs/box2d/bipedal_walker.py +658 -0
- gym-0.21.0/gym/envs/classic_control/cartpole.py +234 -0
- gym-0.21.0/gym/envs/classic_control/mountain_car.py +177 -0
- gym-0.21.0/gym/envs/classic_control/pendulum.py +94 -0
- gym-0.21.0/gym/envs/mujoco/ant.py +56 -0
- gym-0.21.0/gym/envs/mujoco/assets/thrower.xml +127 -0
- gym-0.21.0/gym/envs/mujoco/hopper.py +48 -0
- gym-0.21.0/gym/envs/mujoco/hopper_v3.py +137 -0
- gym-0.21.0/gym/envs/mujoco/pusher.py +63 -0
- gym-0.21.0/gym/envs/mujoco/swimmer_v3.py +94 -0
- gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml +26 -0
- gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml +42 -0
- gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml +34 -0
- gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml +254 -0
- gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl +0 -0
- gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl +0 -0
- gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl +0 -0
- gym-0.21.0/gym/envs/robotics/fetch/reach.py +32 -0
- gym-0.21.0/gym/envs/robotics/hand/__init__.py +0 -0
- gym-0.21.0/gym/envs/robotics/hand/manipulate.py +354 -0
- gym-0.21.0/gym/envs/robotics/rotations.py +387 -0
- gym-0.21.0/gym/spaces/discrete.py +37 -0
- gym-0.21.0/gym/utils/__init__.py +9 -0
- gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc +0 -0
- gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc +0 -0
- gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc +0 -0
- gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc +0 -0
- gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc +0 -0
- gym-0.21.0/gym/utils/atomic_write.py +60 -0
- gym-0.21.0/gym/utils/closer.py +68 -0
- gym-0.21.0/gym/utils/play.py +196 -0
- gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc +0 -0
- gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc +0 -0
Metaworld/.gitignore
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Plots and figures
|
| 2 |
+
scripts/figures/
|
| 3 |
+
scripts/movies*
|
| 4 |
+
|
| 5 |
+
# Byte-compiled / optimized / DLL files
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
|
| 10 |
+
# C extensions
|
| 11 |
+
*.so
|
| 12 |
+
|
| 13 |
+
# Distribution / packaging
|
| 14 |
+
.Python
|
| 15 |
+
build/
|
| 16 |
+
develop-eggs/
|
| 17 |
+
dist/
|
| 18 |
+
downloads/
|
| 19 |
+
eggs/
|
| 20 |
+
.eggs/
|
| 21 |
+
lib/
|
| 22 |
+
lib64/
|
| 23 |
+
parts/
|
| 24 |
+
sdist/
|
| 25 |
+
var/
|
| 26 |
+
wheels/
|
| 27 |
+
pip-wheel-metadata/
|
| 28 |
+
share/python-wheels/
|
| 29 |
+
*.egg-info/
|
| 30 |
+
.installed.cfg
|
| 31 |
+
*.egg
|
| 32 |
+
MANIFEST
|
| 33 |
+
|
| 34 |
+
# PyInstaller
|
| 35 |
+
# Usually these files are written by a python script from a template
|
| 36 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 37 |
+
*.manifest
|
| 38 |
+
*.spec
|
| 39 |
+
|
| 40 |
+
# Installer logs
|
| 41 |
+
pip-log.txt
|
| 42 |
+
pip-delete-this-directory.txt
|
| 43 |
+
|
| 44 |
+
# Unit test / coverage reports
|
| 45 |
+
htmlcov/
|
| 46 |
+
.tox/
|
| 47 |
+
.nox/
|
| 48 |
+
.coverage
|
| 49 |
+
.coverage.*
|
| 50 |
+
.cache
|
| 51 |
+
nosetests.xml
|
| 52 |
+
coverage.xml
|
| 53 |
+
*.cover
|
| 54 |
+
.hypothesis/
|
| 55 |
+
.pytest_cache/
|
| 56 |
+
|
| 57 |
+
# Translations
|
| 58 |
+
*.mo
|
| 59 |
+
*.pot
|
| 60 |
+
|
| 61 |
+
# Django stuff:
|
| 62 |
+
*.log
|
| 63 |
+
local_settings.py
|
| 64 |
+
db.sqlite3
|
| 65 |
+
|
| 66 |
+
# Flask stuff:
|
| 67 |
+
instance/
|
| 68 |
+
.webassets-cache
|
| 69 |
+
|
| 70 |
+
# Scrapy stuff:
|
| 71 |
+
.scrapy
|
| 72 |
+
|
| 73 |
+
# Sphinx documentation
|
| 74 |
+
docs/_build/
|
| 75 |
+
docs/_apidoc/
|
| 76 |
+
|
| 77 |
+
# PyBuilder
|
| 78 |
+
target/
|
| 79 |
+
|
| 80 |
+
# Jupyter Notebook
|
| 81 |
+
.ipynb_checkpoints
|
| 82 |
+
|
| 83 |
+
# IPython
|
| 84 |
+
profile_default/
|
| 85 |
+
ipython_config.py
|
| 86 |
+
|
| 87 |
+
# pyenv
|
| 88 |
+
.python-version
|
| 89 |
+
|
| 90 |
+
# celery beat schedule file
|
| 91 |
+
celerybeat-schedule
|
| 92 |
+
|
| 93 |
+
# SageMath parsed files
|
| 94 |
+
*.sage.py
|
| 95 |
+
|
| 96 |
+
# Environments
|
| 97 |
+
.env
|
| 98 |
+
.venv
|
| 99 |
+
env/
|
| 100 |
+
venv/
|
| 101 |
+
ENV/
|
| 102 |
+
env.bak/
|
| 103 |
+
venv.bak/
|
| 104 |
+
|
| 105 |
+
# Spyder project settings
|
| 106 |
+
.spyderproject
|
| 107 |
+
.spyproject
|
| 108 |
+
|
| 109 |
+
# Rope project settings
|
| 110 |
+
.ropeproject
|
| 111 |
+
|
| 112 |
+
# mkdocs documentation
|
| 113 |
+
/site
|
| 114 |
+
|
| 115 |
+
# mypy
|
| 116 |
+
.mypy_cache/
|
| 117 |
+
.dmypy.json
|
| 118 |
+
dmypy.json
|
| 119 |
+
|
| 120 |
+
# Pyre type checker
|
| 121 |
+
.pyre/
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# metaworld-specific stuff below this line
|
| 125 |
+
MUJOCO_LOG.TXT
|
| 126 |
+
|
| 127 |
+
# Other IDEs and editors
|
| 128 |
+
*.sublime-project
|
| 129 |
+
*.sublime-workspace
|
| 130 |
+
.project
|
| 131 |
+
.pydevproject
|
| 132 |
+
*.swp
|
| 133 |
+
*.orig
|
| 134 |
+
.idea
|
| 135 |
+
.settings
|
| 136 |
+
.vscode/
|
| 137 |
+
|
| 138 |
+
# MacOS
|
| 139 |
+
.DS_Store
|
| 140 |
+
|
| 141 |
+
# no submodules, please
|
| 142 |
+
.gitmodules
|
| 143 |
+
|
| 144 |
+
# pipenv
|
| 145 |
+
# Including these for now, since pipenv is not the authoritative environment
|
| 146 |
+
# tool
|
| 147 |
+
Pipfile
|
| 148 |
+
Pipfile.lock
|
Metaworld/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2019 Meta-World Team
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
Metaworld/MANIFEST.in
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
graft metaworld/envs/assets_v1
|
| 2 |
+
graft metaworld/envs/assets_v2
|
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0
ADDED
|
Binary file (1.17 kB). View file
|
|
|
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0
ADDED
|
Binary file (1.17 kB). View file
|
|
|
VRL3/src/cfgs_adroit/task/hammer.yaml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
num_train_frames: 4100000
|
| 2 |
+
task_name: hammer-v0
|
| 3 |
+
agent:
|
| 4 |
+
encoder_lr_scale: 1
|
VRL3/src/cfgs_adroit/task/pen.yaml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
num_train_frames: 4100000
|
| 2 |
+
task_name: pen-v0
|
| 3 |
+
agent:
|
| 4 |
+
encoder_lr_scale: 1
|
VRL3/src/rrl_local/rrl_utils.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Rutav Shah, Indian Institute of Technlogy Kharagpur
|
| 2 |
+
# Copyright (c) Facebook, Inc. and its affiliates
|
| 3 |
+
|
| 4 |
+
from mjrl.utils.gym_env import GymEnv
|
| 5 |
+
from rrl_local.rrl_multicam import BasicAdroitEnv
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
def make_basic_env(env, cam_list=[], from_pixels=False, hybrid_state=None, test_image=False, channels_first=False,
|
| 9 |
+
num_repeats=1, num_frames=1):
|
| 10 |
+
e = GymEnv(env)
|
| 11 |
+
env_kwargs = None
|
| 12 |
+
if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
|
| 13 |
+
height = 84
|
| 14 |
+
width = 84
|
| 15 |
+
latent_dim = height*width*len(cam_list)*3
|
| 16 |
+
# RRL class instance is environment wrapper...
|
| 17 |
+
e = BasicAdroitEnv(e, cameras=cam_list,
|
| 18 |
+
height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state,
|
| 19 |
+
test_image=test_image, channels_first=channels_first, num_repeats=num_repeats, num_frames=num_frames)
|
| 20 |
+
env_kwargs = {'rrl_kwargs' : e.env_kwargs}
|
| 21 |
+
# if not from pixels... then it's simpler
|
| 22 |
+
return e, env_kwargs
|
| 23 |
+
|
| 24 |
+
def make_env(env, cam_list=[], from_pixels=False, encoder_type=None, hybrid_state=None,) :
|
| 25 |
+
# TODO why is encoder type put into the environment?
|
| 26 |
+
e = GymEnv(env)
|
| 27 |
+
env_kwargs = None
|
| 28 |
+
if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
|
| 29 |
+
height = 84
|
| 30 |
+
width = 84
|
| 31 |
+
latent_dim = height*width*len(cam_list)*3
|
| 32 |
+
|
| 33 |
+
if encoder_type and encoder_type == 'resnet34':
|
| 34 |
+
assert from_pixels==True
|
| 35 |
+
height = 256
|
| 36 |
+
width = 256
|
| 37 |
+
latent_dim = 512*len(cam_list) #TODO each camera provides an image?
|
| 38 |
+
if from_pixels:
|
| 39 |
+
# RRL class instance is environment wrapper...
|
| 40 |
+
e = BasicAdroitEnv(e, cameras=cam_list, encoder_type=encoder_type,
|
| 41 |
+
height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state)
|
| 42 |
+
env_kwargs = {'rrl_kwargs' : e.env_kwargs}
|
| 43 |
+
return e, env_kwargs
|
| 44 |
+
|
| 45 |
+
def make_dir(dir_path):
|
| 46 |
+
if not os.path.exists(dir_path):
|
| 47 |
+
os.makedirs(dir_path)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def preprocess_args(args):
|
| 51 |
+
job_data = {}
|
| 52 |
+
job_data['seed'] = args.seed
|
| 53 |
+
job_data['env'] = args.env
|
| 54 |
+
job_data['output'] = args.output
|
| 55 |
+
job_data['from_pixels'] = args.from_pixels
|
| 56 |
+
job_data['hybrid_state'] = args.hybrid_state
|
| 57 |
+
job_data['stack_frames'] = args.stack_frames
|
| 58 |
+
job_data['encoder_type'] = args.encoder_type
|
| 59 |
+
job_data['cam1'] = args.cam1
|
| 60 |
+
job_data['cam2'] = args.cam2
|
| 61 |
+
job_data['cam3'] = args.cam3
|
| 62 |
+
job_data['algorithm'] = args.algorithm
|
| 63 |
+
job_data['num_cpu'] = args.num_cpu
|
| 64 |
+
job_data['save_freq'] = args.save_freq
|
| 65 |
+
job_data['eval_rollouts'] = args.eval_rollouts
|
| 66 |
+
job_data['demo_file'] = args.demo_file
|
| 67 |
+
job_data['bc_batch_size'] = args.bc_batch_size
|
| 68 |
+
job_data['bc_epochs'] = args.bc_epochs
|
| 69 |
+
job_data['bc_learn_rate'] = args.bc_learn_rate
|
| 70 |
+
#job_data['policy_size'] = args.policy_size
|
| 71 |
+
job_data['policy_size'] = tuple(map(int, args.policy_size.split(', ')))
|
| 72 |
+
job_data['vf_batch_size'] = args.vf_batch_size
|
| 73 |
+
job_data['vf_epochs'] = args.vf_epochs
|
| 74 |
+
job_data['vf_learn_rate'] = args.vf_learn_rate
|
| 75 |
+
job_data['rl_step_size'] = args.rl_step_size
|
| 76 |
+
job_data['rl_gamma'] = args.rl_gamma
|
| 77 |
+
job_data['rl_gae'] = args.rl_gae
|
| 78 |
+
job_data['rl_num_traj'] = args.rl_num_traj
|
| 79 |
+
job_data['rl_num_iter'] = args.rl_num_iter
|
| 80 |
+
job_data['lam_0'] = args.lam_0
|
| 81 |
+
job_data['lam_1'] = args.lam_1
|
| 82 |
+
print(job_data)
|
| 83 |
+
return job_data
|
| 84 |
+
|
| 85 |
+
|
VRL3/src/transfer_util.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Microsoft Corporation.
|
| 2 |
+
# Licensed under the MIT License.
|
| 3 |
+
|
| 4 |
+
from torchvision import datasets, models, transforms
|
| 5 |
+
|
| 6 |
+
def set_parameter_requires_grad(model, n_resblock_finetune):
|
| 7 |
+
assert n_resblock_finetune in (0, 1, 2, 3, 4, 5)
|
| 8 |
+
for param in model.parameters():
|
| 9 |
+
param.requires_grad = False
|
| 10 |
+
|
| 11 |
+
for name, param in model.named_parameters():
|
| 12 |
+
condition = (n_resblock_finetune >= 1 and 'layer4' in name) or (n_resblock_finetune >= 2 and 'layer3' in name) or \
|
| 13 |
+
(n_resblock_finetune >= 3 and 'layer2' in name) or (n_resblock_finetune >= 4 and 'layer1' in name) or \
|
| 14 |
+
(n_resblock_finetune >= 5)
|
| 15 |
+
|
| 16 |
+
if condition:
|
| 17 |
+
param.requires_grad = True
|
| 18 |
+
|
| 19 |
+
for name, param in model.named_parameters():
|
| 20 |
+
if 'bn' in name:
|
| 21 |
+
param.requires_grad = False
|
| 22 |
+
|
| 23 |
+
def initialize_model(model_name, n_resblock_finetune, use_pretrained=True):
|
| 24 |
+
# Initialize these variables which will be set in this if statement. Each of these
|
| 25 |
+
# variables is model specific.
|
| 26 |
+
model_ft = None
|
| 27 |
+
input_size = 0
|
| 28 |
+
|
| 29 |
+
if model_name == "resnet18":
|
| 30 |
+
""" Resnet18
|
| 31 |
+
"""
|
| 32 |
+
model_ft = models.resnet18(pretrained=use_pretrained)
|
| 33 |
+
set_parameter_requires_grad(model_ft, n_resblock_finetune)
|
| 34 |
+
feature_size = model_ft.fc.in_features
|
| 35 |
+
input_size = 224
|
| 36 |
+
elif model_name == 'resnet34':
|
| 37 |
+
model_ft = models.resnet34(pretrained=use_pretrained)
|
| 38 |
+
set_parameter_requires_grad(model_ft, n_resblock_finetune)
|
| 39 |
+
feature_size = model_ft.fc.in_features
|
| 40 |
+
input_size = 224
|
| 41 |
+
else:
|
| 42 |
+
print("Invalid model name, exiting...")
|
| 43 |
+
exit()
|
| 44 |
+
|
| 45 |
+
return model_ft, input_size, feature_size
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
gym-0.21.0/.gitignore
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.swp
|
| 2 |
+
*.pyc
|
| 3 |
+
*.py~
|
| 4 |
+
.DS_Store
|
| 5 |
+
.cache
|
| 6 |
+
.pytest_cache/
|
| 7 |
+
|
| 8 |
+
# Setuptools distribution and build folders.
|
| 9 |
+
/dist/
|
| 10 |
+
/build
|
| 11 |
+
|
| 12 |
+
# Virtualenv
|
| 13 |
+
/env
|
| 14 |
+
|
| 15 |
+
# Python egg metadata, regenerated from source files by setuptools.
|
| 16 |
+
/*.egg-info
|
| 17 |
+
|
| 18 |
+
*.sublime-project
|
| 19 |
+
*.sublime-workspace
|
| 20 |
+
|
| 21 |
+
logs/
|
| 22 |
+
|
| 23 |
+
.ipynb_checkpoints
|
| 24 |
+
ghostdriver.log
|
| 25 |
+
|
| 26 |
+
junk
|
| 27 |
+
MUJOCO_LOG.txt
|
| 28 |
+
|
| 29 |
+
rllab_mujoco
|
| 30 |
+
|
| 31 |
+
tutorial/*.html
|
| 32 |
+
|
| 33 |
+
# IDE files
|
| 34 |
+
.eggs
|
| 35 |
+
.tox
|
| 36 |
+
|
| 37 |
+
# PyCharm project files
|
| 38 |
+
.idea
|
| 39 |
+
vizdoom.ini
|
gym-0.21.0/CODE_OF_CONDUCT.rst
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OpenAI Gym is dedicated to providing a harassment-free experience for
|
| 2 |
+
everyone, regardless of gender, gender identity and expression, sexual
|
| 3 |
+
orientation, disability, physical appearance, body size, age, race, or
|
| 4 |
+
religion. We do not tolerate harassment of participants in any form.
|
| 5 |
+
|
| 6 |
+
This code of conduct applies to all OpenAI Gym spaces (including Gist
|
| 7 |
+
comments) both online and off. Anyone who violates this code of
|
| 8 |
+
conduct may be sanctioned or expelled from these spaces at the
|
| 9 |
+
discretion of the OpenAI team.
|
| 10 |
+
|
| 11 |
+
We may add additional rules over time, which will be made clearly
|
| 12 |
+
available to participants. Participants are responsible for knowing
|
| 13 |
+
and abiding by these rules.
|
gym-0.21.0/bin/docker_entrypoint
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# This script is the entrypoint for our Docker image.
|
| 3 |
+
|
| 4 |
+
set -ex
|
| 5 |
+
|
| 6 |
+
# Set up display; otherwise rendering will fail
|
| 7 |
+
Xvfb -screen 0 1024x768x24 &
|
| 8 |
+
export DISPLAY=:0
|
| 9 |
+
|
| 10 |
+
# Wait for the file to come up
|
| 11 |
+
display=0
|
| 12 |
+
file="/tmp/.X11-unix/X$display"
|
| 13 |
+
for i in $(seq 1 10); do
|
| 14 |
+
if [ -e "$file" ]; then
|
| 15 |
+
break
|
| 16 |
+
fi
|
| 17 |
+
|
| 18 |
+
echo "Waiting for $file to be created (try $i/10)"
|
| 19 |
+
sleep "$i"
|
| 20 |
+
done
|
| 21 |
+
if ! [ -e "$file" ]; then
|
| 22 |
+
echo "Timing out: $file was not created"
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
exec "$@"
|
gym-0.21.0/docs/api.md
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API
|
| 2 |
+
## Initializing Environments
|
| 3 |
+
Initializing environment is very easy in Gym and can be done via:
|
| 4 |
+
|
| 5 |
+
```python
|
| 6 |
+
import gym
|
| 7 |
+
env = gym.make('CartPole-v0')
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
## Interacting with the Environment
|
| 11 |
+
This example will run an instance of `CartPole-v0` environment for 1000 timesteps, rendering the environment at each step. You should see a window pop up rendering the classic [cart-pole](https://www.youtube.com/watch?v=J7E6_my3CHk&ab_channel=TylerStreeter) problem
|
| 12 |
+
|
| 13 |
+
```python
|
| 14 |
+
import gym
|
| 15 |
+
env = gym.make('CartPole-v0')
|
| 16 |
+
env.reset()
|
| 17 |
+
for _ in range(1000):
|
| 18 |
+
env.render() # by default `mode="human"`(GUI), you can pass `mode="rbg_array"` to retrieve an image instead
|
| 19 |
+
env.step(env.action_space.sample()) # take a random action
|
| 20 |
+
env.close()
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
The output should look something like this
|
| 24 |
+
|
| 25 |
+

|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
The commonly used methods are:
|
| 29 |
+
|
| 30 |
+
`reset()` resets the environment to its initial state and returns the observation corresponding to the initial state
|
| 31 |
+
`step(action)` takes an action as an input and implements that action in the environment. This method returns a set of four values
|
| 32 |
+
`render()` renders the environment
|
| 33 |
+
|
| 34 |
+
- `observation` (**object**) : an environment specific object representation your observation of the environment after the step is taken. Its often aliased as the next state after the action has been taken
|
| 35 |
+
- `reward`(**float**) : immediate reward achieved by the previous action. Actual value and range will varies between environments, but the final goal is always to increase your total reward
|
| 36 |
+
- `done`(**boolean**): whether it’s time to `reset` the environment again. Most (but not all) tasks are divided up into well-defined episodes, and `done` being `True` indicates the episode has terminated. (For example, perhaps the pole tipped too far, or you lost your last life.)
|
| 37 |
+
- `info`(**dict**) : This provides general information helpful for debugging or additional information depending on the environment, such as the raw probabilities behind the environment’s last state change
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
## Additional Environment API
|
| 41 |
+
|
| 42 |
+
- `action_space`: this attribute gives the format of valid actions. It is of datatype `Space` provided by Gym. (For ex: If the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions 0 & 1 )
|
| 43 |
+
```python
|
| 44 |
+
print(env.action_space)
|
| 45 |
+
#> Discrete(2)
|
| 46 |
+
|
| 47 |
+
print(env.observation_space)
|
| 48 |
+
#> Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
|
| 49 |
+
|
| 50 |
+
```
|
| 51 |
+
- `observation_space`: this attribute gives the format of valid observations. It if of datatype `Space` provided by Gym. (For ex: if the observation space is of type `Box` and the shape of the object is `(4,)`, this denotes a valid observation will be an array of 4 numbers). We can check the box bounds as well with attributes
|
| 52 |
+
|
| 53 |
+
```python
|
| 54 |
+
print(env.observation_space.high)
|
| 55 |
+
#> array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32)
|
| 56 |
+
|
| 57 |
+
print(env.observation_space.low)
|
| 58 |
+
#> array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32)
|
| 59 |
+
```
|
| 60 |
+
- There are multiple types of Space types inherently available in gym:
|
| 61 |
+
- `Box` describes an n-dimensional continuous space. Its a bounded space where we can define the upper and lower limit which describe the valid values our observations can take.
|
| 62 |
+
- `Discrete` describes a discrete space where { 0, 1, ......., n-1} are the possible values our observation/action can take.
|
| 63 |
+
- `Dict` represents a dictionary of simple spaces.
|
| 64 |
+
- `Tuple` represents a tuple of simple spaces
|
| 65 |
+
- `MultiBinary` creates a n-shape binary space. Argument n can be a number or a `list` of numbers
|
| 66 |
+
- `MultiDiscrete` consists of a series of `Discrete` action spaces with different number of actions in each element
|
| 67 |
+
```python
|
| 68 |
+
observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
|
| 69 |
+
print(observation_space.sample())
|
| 70 |
+
#> [ 1.6952509 -0.4399011 -0.7981693]
|
| 71 |
+
|
| 72 |
+
observation_space = Discrete(4)
|
| 73 |
+
print(observation_space.sample())
|
| 74 |
+
#> 1
|
| 75 |
+
|
| 76 |
+
observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)})
|
| 77 |
+
print(observation_space.sample())
|
| 78 |
+
#> OrderedDict([('position', 0), ('velocity', 1)])
|
| 79 |
+
|
| 80 |
+
observation_space = Tuple((Discrete(2), Discrete(3)))
|
| 81 |
+
print(observation_space.sample())
|
| 82 |
+
#> (1, 2)
|
| 83 |
+
|
| 84 |
+
observation_space = MultiBinary(5)
|
| 85 |
+
print(observation_space.sample())
|
| 86 |
+
#> [1 1 1 0 1]
|
| 87 |
+
|
| 88 |
+
observation_space = MultiDiscrete([ 5, 2, 2 ])
|
| 89 |
+
print(observation_space.sample())
|
| 90 |
+
#> [3 0 0]
|
| 91 |
+
```
|
| 92 |
+
- `reward_range`: returns a tuple corresponding to min and max possible rewards. Default range is set to `[-inf,+inf]`. You can set it if you want a narrower range
|
| 93 |
+
- `close()` : Override close in your subclass to perform any necessary cleanup
|
| 94 |
+
- `seed()`: Sets the seed for this env's random number generator
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
### Unwrapping an environment
|
| 98 |
+
If you have a wrapped environment, and you want to get the unwrapped environment underneath all the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the `.unwrapped` attribute. If the environment is already a base environment, the `.unwrapped` attribute will just return itself.
|
| 99 |
+
|
| 100 |
+
```python
|
| 101 |
+
base_env = env.unwrapped
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Vectorized Environment
|
| 105 |
+
Vectorized Environments are a way of stacking multiple independent environments, so that instead of training on one environment, our agent can train on multiple environments at a time. Each `observation` returned from a vectorized environment is a batch of observations for each sub-environment, and `step` is also expected to receive a batch of actions for each sub-environment.
|
| 106 |
+
|
| 107 |
+
**NOTE:** All sub-environments should share the identical observation and action spaces. A vector of multiple different environments is not supported
|
| 108 |
+
|
| 109 |
+
Gym Vector API consists of two types of vectorized environments:
|
| 110 |
+
|
| 111 |
+
- `AsyncVectorEnv` runs multiple environments in parallel. It uses `multiprocessing` processes, and pipes for communication.
|
| 112 |
+
- `SyncVectorEnv`runs multiple environments serially
|
| 113 |
+
|
| 114 |
+
```python
|
| 115 |
+
import gym
|
| 116 |
+
env = gym.vector.make('CartPole-v1', 3,asynchronous=True) # Creates an Asynchronous env
|
| 117 |
+
env.reset()
|
| 118 |
+
#> array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
|
| 119 |
+
#> [ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
|
| 120 |
+
#> [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
|
| 121 |
+
#> dtype=float32)
|
| 122 |
+
|
| 123 |
+
```
|
gym-0.21.0/docs/toy_text/frozen_lake.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Frozen Lake
|
| 2 |
+
---
|
| 3 |
+
|Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import|
|
| 4 |
+
| ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------|
|
| 5 |
+
|Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake|
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake.
|
| 10 |
+
|
| 11 |
+
The agent take a 1-element vector for actions.
|
| 12 |
+
The action space is `(dir)`, where `dir` decides direction to move in which can be:
|
| 13 |
+
|
| 14 |
+
- 0: LEFT
|
| 15 |
+
- 1: DOWN
|
| 16 |
+
- 2: RIGHT
|
| 17 |
+
- 3: UP
|
| 18 |
+
|
| 19 |
+
The observation is a value representing the agents current position as
|
| 20 |
+
|
| 21 |
+
current_row * nrows + current_col
|
| 22 |
+
|
| 23 |
+
**Rewards:**
|
| 24 |
+
|
| 25 |
+
Reward schedule:
|
| 26 |
+
- Reach goal(G): +1
|
| 27 |
+
- Reach hole(H): 0
|
| 28 |
+
|
| 29 |
+
### Arguments
|
| 30 |
+
|
| 31 |
+
```
|
| 32 |
+
gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True)
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
`desc`: Used to specify custom map for frozen lake. For example,
|
| 36 |
+
|
| 37 |
+
desc=["SFFF", "FHFH", "FFFH", "HFFG"].
|
| 38 |
+
|
| 39 |
+
`map_name`: ID to use any of the preloaded maps.
|
| 40 |
+
|
| 41 |
+
"4x4":[
|
| 42 |
+
"SFFF",
|
| 43 |
+
"FHFH",
|
| 44 |
+
"FFFH",
|
| 45 |
+
"HFFG"
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
"8x8": [
|
| 49 |
+
"SFFFFFFF",
|
| 50 |
+
"FFFFFFFF",
|
| 51 |
+
"FFFHFFFF",
|
| 52 |
+
"FFFFFHFF",
|
| 53 |
+
"FFFHFFFF",
|
| 54 |
+
"FHHFFFHF",
|
| 55 |
+
"FHFFHFHF",
|
| 56 |
+
"FFFHFFFG",
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
`is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions.
|
| 63 |
+
|
| 64 |
+
For example, if action is left and is_slippery is True, then:
|
| 65 |
+
- P(move left)=1/3
|
| 66 |
+
- P(move up)=1/3
|
| 67 |
+
- P(move down)=1/3
|
| 68 |
+
### Version History
|
| 69 |
+
|
| 70 |
+
* v0: Initial versions release (1.0.0)
|
gym-0.21.0/docs/tutorials.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Getting Started With OpenAI Gym: The Basic Building Blocks
|
| 2 |
+
|
| 3 |
+
https://blog.paperspace.com/getting-started-with-openai-gym/
|
| 4 |
+
|
| 5 |
+
A good starting point explaining all the basic building blocks of the Gym API.
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym
|
| 10 |
+
Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents.
|
| 11 |
+
|
| 12 |
+
https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym
|
| 16 |
+
|
| 17 |
+
https://www.gocoder.one/blog/rl-tutorial-with-openai-gym
|
gym-0.21.0/gym/core.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import abstractmethod
|
| 2 |
+
|
| 3 |
+
import gym
|
| 4 |
+
from gym import error
|
| 5 |
+
from gym.utils import closer
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Env(object):
|
| 9 |
+
"""The main OpenAI Gym class. It encapsulates an environment with
|
| 10 |
+
arbitrary behind-the-scenes dynamics. An environment can be
|
| 11 |
+
partially or fully observed.
|
| 12 |
+
|
| 13 |
+
The main API methods that users of this class need to know are:
|
| 14 |
+
|
| 15 |
+
step
|
| 16 |
+
reset
|
| 17 |
+
render
|
| 18 |
+
close
|
| 19 |
+
seed
|
| 20 |
+
|
| 21 |
+
And set the following attributes:
|
| 22 |
+
|
| 23 |
+
action_space: The Space object corresponding to valid actions
|
| 24 |
+
observation_space: The Space object corresponding to valid observations
|
| 25 |
+
reward_range: A tuple corresponding to the min and max possible rewards
|
| 26 |
+
|
| 27 |
+
Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
|
| 28 |
+
|
| 29 |
+
The methods are accessed publicly as "step", "reset", etc...
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
# Set this in SOME subclasses
|
| 33 |
+
metadata = {"render.modes": []}
|
| 34 |
+
reward_range = (-float("inf"), float("inf"))
|
| 35 |
+
spec = None
|
| 36 |
+
|
| 37 |
+
# Set these in ALL subclasses
|
| 38 |
+
action_space = None
|
| 39 |
+
observation_space = None
|
| 40 |
+
|
| 41 |
+
@abstractmethod
|
| 42 |
+
def step(self, action):
|
| 43 |
+
"""Run one timestep of the environment's dynamics. When end of
|
| 44 |
+
episode is reached, you are responsible for calling `reset()`
|
| 45 |
+
to reset this environment's state.
|
| 46 |
+
|
| 47 |
+
Accepts an action and returns a tuple (observation, reward, done, info).
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
action (object): an action provided by the agent
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
observation (object): agent's observation of the current environment
|
| 54 |
+
reward (float) : amount of reward returned after previous action
|
| 55 |
+
done (bool): whether the episode has ended, in which case further step() calls will return undefined results
|
| 56 |
+
info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
|
| 57 |
+
"""
|
| 58 |
+
raise NotImplementedError
|
| 59 |
+
|
| 60 |
+
@abstractmethod
|
| 61 |
+
def reset(self):
|
| 62 |
+
"""Resets the environment to an initial state and returns an initial
|
| 63 |
+
observation.
|
| 64 |
+
|
| 65 |
+
Note that this function should not reset the environment's random
|
| 66 |
+
number generator(s); random variables in the environment's state should
|
| 67 |
+
be sampled independently between multiple calls to `reset()`. In other
|
| 68 |
+
words, each call of `reset()` should yield an environment suitable for
|
| 69 |
+
a new episode, independent of previous episodes.
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
observation (object): the initial observation.
|
| 73 |
+
"""
|
| 74 |
+
raise NotImplementedError
|
| 75 |
+
|
| 76 |
+
@abstractmethod
|
| 77 |
+
def render(self, mode="human"):
|
| 78 |
+
"""Renders the environment.
|
| 79 |
+
|
| 80 |
+
The set of supported modes varies per environment. (And some
|
| 81 |
+
environments do not support rendering at all.) By convention,
|
| 82 |
+
if mode is:
|
| 83 |
+
|
| 84 |
+
- human: render to the current display or terminal and
|
| 85 |
+
return nothing. Usually for human consumption.
|
| 86 |
+
- rgb_array: Return an numpy.ndarray with shape (x, y, 3),
|
| 87 |
+
representing RGB values for an x-by-y pixel image, suitable
|
| 88 |
+
for turning into a video.
|
| 89 |
+
- ansi: Return a string (str) or StringIO.StringIO containing a
|
| 90 |
+
terminal-style text representation. The text can include newlines
|
| 91 |
+
and ANSI escape sequences (e.g. for colors).
|
| 92 |
+
|
| 93 |
+
Note:
|
| 94 |
+
Make sure that your class's metadata 'render.modes' key includes
|
| 95 |
+
the list of supported modes. It's recommended to call super()
|
| 96 |
+
in implementations to use the functionality of this method.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
mode (str): the mode to render with
|
| 100 |
+
|
| 101 |
+
Example:
|
| 102 |
+
|
| 103 |
+
class MyEnv(Env):
|
| 104 |
+
metadata = {'render.modes': ['human', 'rgb_array']}
|
| 105 |
+
|
| 106 |
+
def render(self, mode='human'):
|
| 107 |
+
if mode == 'rgb_array':
|
| 108 |
+
return np.array(...) # return RGB frame suitable for video
|
| 109 |
+
elif mode == 'human':
|
| 110 |
+
... # pop up a window and render
|
| 111 |
+
else:
|
| 112 |
+
super(MyEnv, self).render(mode=mode) # just raise an exception
|
| 113 |
+
"""
|
| 114 |
+
raise NotImplementedError
|
| 115 |
+
|
| 116 |
+
def close(self):
|
| 117 |
+
"""Override close in your subclass to perform any necessary cleanup.
|
| 118 |
+
|
| 119 |
+
Environments will automatically close() themselves when
|
| 120 |
+
garbage collected or when the program exits.
|
| 121 |
+
"""
|
| 122 |
+
pass
|
| 123 |
+
|
| 124 |
+
def seed(self, seed=None):
|
| 125 |
+
"""Sets the seed for this env's random number generator(s).
|
| 126 |
+
|
| 127 |
+
Note:
|
| 128 |
+
Some environments use multiple pseudorandom number generators.
|
| 129 |
+
We want to capture all such seeds used in order to ensure that
|
| 130 |
+
there aren't accidental correlations between multiple generators.
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
list<bigint>: Returns the list of seeds used in this env's random
|
| 134 |
+
number generators. The first value in the list should be the
|
| 135 |
+
"main" seed, or the value which a reproducer should pass to
|
| 136 |
+
'seed'. Often, the main seed equals the provided 'seed', but
|
| 137 |
+
this won't be true if seed=None, for example.
|
| 138 |
+
"""
|
| 139 |
+
return
|
| 140 |
+
|
| 141 |
+
@property
|
| 142 |
+
def unwrapped(self):
|
| 143 |
+
"""Completely unwrap this env.
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
gym.Env: The base non-wrapped gym.Env instance
|
| 147 |
+
"""
|
| 148 |
+
return self
|
| 149 |
+
|
| 150 |
+
def __str__(self):
|
| 151 |
+
if self.spec is None:
|
| 152 |
+
return "<{} instance>".format(type(self).__name__)
|
| 153 |
+
else:
|
| 154 |
+
return "<{}<{}>>".format(type(self).__name__, self.spec.id)
|
| 155 |
+
|
| 156 |
+
def __enter__(self):
|
| 157 |
+
"""Support with-statement for the environment."""
|
| 158 |
+
return self
|
| 159 |
+
|
| 160 |
+
def __exit__(self, *args):
|
| 161 |
+
"""Support with-statement for the environment."""
|
| 162 |
+
self.close()
|
| 163 |
+
# propagate exception
|
| 164 |
+
return False
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
class GoalEnv(Env):
|
| 168 |
+
"""A goal-based environment. It functions just as any regular OpenAI Gym environment but it
|
| 169 |
+
imposes a required structure on the observation_space. More concretely, the observation
|
| 170 |
+
space is required to contain at least three elements, namely `observation`, `desired_goal`, and
|
| 171 |
+
`achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
|
| 172 |
+
`achieved_goal` is the goal that it currently achieved instead. `observation` contains the
|
| 173 |
+
actual observations of the environment as per usual.
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
def reset(self):
|
| 177 |
+
# Enforce that each GoalEnv uses a Goal-compatible observation space.
|
| 178 |
+
if not isinstance(self.observation_space, gym.spaces.Dict):
|
| 179 |
+
raise error.Error(
|
| 180 |
+
"GoalEnv requires an observation space of type gym.spaces.Dict"
|
| 181 |
+
)
|
| 182 |
+
for key in ["observation", "achieved_goal", "desired_goal"]:
|
| 183 |
+
if key not in self.observation_space.spaces:
|
| 184 |
+
raise error.Error(
|
| 185 |
+
'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(
|
| 186 |
+
key
|
| 187 |
+
)
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
@abstractmethod
|
| 191 |
+
def compute_reward(self, achieved_goal, desired_goal, info):
|
| 192 |
+
"""Compute the step reward. This externalizes the reward function and makes
|
| 193 |
+
it dependent on a desired goal and the one that was achieved. If you wish to include
|
| 194 |
+
additional rewards that are independent of the goal, you can include the necessary values
|
| 195 |
+
to derive it in 'info' and compute it accordingly.
|
| 196 |
+
|
| 197 |
+
Args:
|
| 198 |
+
achieved_goal (object): the goal that was achieved during execution
|
| 199 |
+
desired_goal (object): the desired goal that we asked the agent to attempt to achieve
|
| 200 |
+
info (dict): an info dictionary with additional information
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
|
| 204 |
+
goal. Note that the following should always hold true:
|
| 205 |
+
|
| 206 |
+
ob, reward, done, info = env.step()
|
| 207 |
+
assert reward == env.compute_reward(ob['achieved_goal'], ob['desired_goal'], info)
|
| 208 |
+
"""
|
| 209 |
+
raise NotImplementedError
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
class Wrapper(Env):
|
| 213 |
+
"""Wraps the environment to allow a modular transformation.
|
| 214 |
+
|
| 215 |
+
This class is the base class for all wrappers. The subclass could override
|
| 216 |
+
some methods to change the behavior of the original environment without touching the
|
| 217 |
+
original code.
|
| 218 |
+
|
| 219 |
+
.. note::
|
| 220 |
+
|
| 221 |
+
Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
|
| 222 |
+
|
| 223 |
+
"""
|
| 224 |
+
|
| 225 |
+
def __init__(self, env):
|
| 226 |
+
self.env = env
|
| 227 |
+
|
| 228 |
+
self._action_space = None
|
| 229 |
+
self._observation_space = None
|
| 230 |
+
self._reward_range = None
|
| 231 |
+
self._metadata = None
|
| 232 |
+
|
| 233 |
+
def __getattr__(self, name):
|
| 234 |
+
if name.startswith("_"):
|
| 235 |
+
raise AttributeError(
|
| 236 |
+
"attempted to get missing private attribute '{}'".format(name)
|
| 237 |
+
)
|
| 238 |
+
return getattr(self.env, name)
|
| 239 |
+
|
| 240 |
+
@property
|
| 241 |
+
def spec(self):
|
| 242 |
+
return self.env.spec
|
| 243 |
+
|
| 244 |
+
@classmethod
|
| 245 |
+
def class_name(cls):
|
| 246 |
+
return cls.__name__
|
| 247 |
+
|
| 248 |
+
@property
|
| 249 |
+
def action_space(self):
|
| 250 |
+
if self._action_space is None:
|
| 251 |
+
return self.env.action_space
|
| 252 |
+
return self._action_space
|
| 253 |
+
|
| 254 |
+
@action_space.setter
|
| 255 |
+
def action_space(self, space):
|
| 256 |
+
self._action_space = space
|
| 257 |
+
|
| 258 |
+
@property
|
| 259 |
+
def observation_space(self):
|
| 260 |
+
if self._observation_space is None:
|
| 261 |
+
return self.env.observation_space
|
| 262 |
+
return self._observation_space
|
| 263 |
+
|
| 264 |
+
@observation_space.setter
|
| 265 |
+
def observation_space(self, space):
|
| 266 |
+
self._observation_space = space
|
| 267 |
+
|
| 268 |
+
@property
|
| 269 |
+
def reward_range(self):
|
| 270 |
+
if self._reward_range is None:
|
| 271 |
+
return self.env.reward_range
|
| 272 |
+
return self._reward_range
|
| 273 |
+
|
| 274 |
+
@reward_range.setter
|
| 275 |
+
def reward_range(self, value):
|
| 276 |
+
self._reward_range = value
|
| 277 |
+
|
| 278 |
+
@property
|
| 279 |
+
def metadata(self):
|
| 280 |
+
if self._metadata is None:
|
| 281 |
+
return self.env.metadata
|
| 282 |
+
return self._metadata
|
| 283 |
+
|
| 284 |
+
@metadata.setter
|
| 285 |
+
def metadata(self, value):
|
| 286 |
+
self._metadata = value
|
| 287 |
+
|
| 288 |
+
def step(self, action):
|
| 289 |
+
return self.env.step(action)
|
| 290 |
+
|
| 291 |
+
def reset(self, **kwargs):
|
| 292 |
+
return self.env.reset(**kwargs)
|
| 293 |
+
|
| 294 |
+
def render(self, mode="human", **kwargs):
|
| 295 |
+
return self.env.render(mode, **kwargs)
|
| 296 |
+
|
| 297 |
+
def close(self):
|
| 298 |
+
return self.env.close()
|
| 299 |
+
|
| 300 |
+
def seed(self, seed=None):
|
| 301 |
+
return self.env.seed(seed)
|
| 302 |
+
|
| 303 |
+
def compute_reward(self, achieved_goal, desired_goal, info):
|
| 304 |
+
return self.env.compute_reward(achieved_goal, desired_goal, info)
|
| 305 |
+
|
| 306 |
+
def __str__(self):
|
| 307 |
+
return "<{}{}>".format(type(self).__name__, self.env)
|
| 308 |
+
|
| 309 |
+
def __repr__(self):
|
| 310 |
+
return str(self)
|
| 311 |
+
|
| 312 |
+
@property
|
| 313 |
+
def unwrapped(self):
|
| 314 |
+
return self.env.unwrapped
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
class ObservationWrapper(Wrapper):
|
| 318 |
+
def reset(self, **kwargs):
|
| 319 |
+
observation = self.env.reset(**kwargs)
|
| 320 |
+
return self.observation(observation)
|
| 321 |
+
|
| 322 |
+
def step(self, action):
|
| 323 |
+
observation, reward, done, info = self.env.step(action)
|
| 324 |
+
return self.observation(observation), reward, done, info
|
| 325 |
+
|
| 326 |
+
@abstractmethod
|
| 327 |
+
def observation(self, observation):
|
| 328 |
+
raise NotImplementedError
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
class RewardWrapper(Wrapper):
|
| 332 |
+
def reset(self, **kwargs):
|
| 333 |
+
return self.env.reset(**kwargs)
|
| 334 |
+
|
| 335 |
+
def step(self, action):
|
| 336 |
+
observation, reward, done, info = self.env.step(action)
|
| 337 |
+
return observation, self.reward(reward), done, info
|
| 338 |
+
|
| 339 |
+
@abstractmethod
|
| 340 |
+
def reward(self, reward):
|
| 341 |
+
raise NotImplementedError
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
class ActionWrapper(Wrapper):
|
| 345 |
+
def reset(self, **kwargs):
|
| 346 |
+
return self.env.reset(**kwargs)
|
| 347 |
+
|
| 348 |
+
def step(self, action):
|
| 349 |
+
return self.env.step(self.action(action))
|
| 350 |
+
|
| 351 |
+
@abstractmethod
|
| 352 |
+
def action(self, action):
|
| 353 |
+
raise NotImplementedError
|
| 354 |
+
|
| 355 |
+
@abstractmethod
|
| 356 |
+
def reverse_action(self, action):
|
| 357 |
+
raise NotImplementedError
|
gym-0.21.0/gym/envs/box2d/bipedal_walker.py
ADDED
|
@@ -0,0 +1,658 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import math
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import Box2D
|
| 6 |
+
from Box2D.b2 import (
|
| 7 |
+
edgeShape,
|
| 8 |
+
circleShape,
|
| 9 |
+
fixtureDef,
|
| 10 |
+
polygonShape,
|
| 11 |
+
revoluteJointDef,
|
| 12 |
+
contactListener,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
import gym
|
| 16 |
+
from gym import spaces
|
| 17 |
+
from gym.utils import colorize, seeding, EzPickle
|
| 18 |
+
|
| 19 |
+
# This is simple 4-joints walker robot environment.
|
| 20 |
+
#
|
| 21 |
+
# There are two versions:
|
| 22 |
+
#
|
| 23 |
+
# - Normal, with slightly uneven terrain.
|
| 24 |
+
#
|
| 25 |
+
# - Hardcore with ladders, stumps, pitfalls.
|
| 26 |
+
#
|
| 27 |
+
# Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
|
| 28 |
+
# it gets -100. Applying motor torque costs a small amount of points, more optimal agent
|
| 29 |
+
# will get better score.
|
| 30 |
+
#
|
| 31 |
+
# Heuristic is provided for testing, it's also useful to get demonstrations to
|
| 32 |
+
# learn from. To run heuristic:
|
| 33 |
+
#
|
| 34 |
+
# python gym/envs/box2d/bipedal_walker.py
|
| 35 |
+
#
|
| 36 |
+
# State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
|
| 37 |
+
# position of joints and joints angular speed, legs contact with ground, and 10 lidar
|
| 38 |
+
# rangefinder measurements to help to deal with the hardcore version. There's no coordinates
|
| 39 |
+
# in the state vector. Lidar is less useful in normal version, but it works.
|
| 40 |
+
#
|
| 41 |
+
# To solve the game you need to get 300 points in 1600 time steps.
|
| 42 |
+
#
|
| 43 |
+
# To solve hardcore version you need 300 points in 2000 time steps.
|
| 44 |
+
#
|
| 45 |
+
# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
|
| 46 |
+
|
| 47 |
+
FPS = 50
|
| 48 |
+
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
|
| 49 |
+
|
| 50 |
+
MOTORS_TORQUE = 80
|
| 51 |
+
SPEED_HIP = 4
|
| 52 |
+
SPEED_KNEE = 6
|
| 53 |
+
LIDAR_RANGE = 160 / SCALE
|
| 54 |
+
|
| 55 |
+
INITIAL_RANDOM = 5
|
| 56 |
+
|
| 57 |
+
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
|
| 58 |
+
LEG_DOWN = -8 / SCALE
|
| 59 |
+
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
|
| 60 |
+
|
| 61 |
+
VIEWPORT_W = 600
|
| 62 |
+
VIEWPORT_H = 400
|
| 63 |
+
|
| 64 |
+
TERRAIN_STEP = 14 / SCALE
|
| 65 |
+
TERRAIN_LENGTH = 200 # in steps
|
| 66 |
+
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
|
| 67 |
+
TERRAIN_GRASS = 10 # low long are grass spots, in steps
|
| 68 |
+
TERRAIN_STARTPAD = 20 # in steps
|
| 69 |
+
FRICTION = 2.5
|
| 70 |
+
|
| 71 |
+
HULL_FD = fixtureDef(
|
| 72 |
+
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
|
| 73 |
+
density=5.0,
|
| 74 |
+
friction=0.1,
|
| 75 |
+
categoryBits=0x0020,
|
| 76 |
+
maskBits=0x001, # collide only with ground
|
| 77 |
+
restitution=0.0,
|
| 78 |
+
) # 0.99 bouncy
|
| 79 |
+
|
| 80 |
+
LEG_FD = fixtureDef(
|
| 81 |
+
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
|
| 82 |
+
density=1.0,
|
| 83 |
+
restitution=0.0,
|
| 84 |
+
categoryBits=0x0020,
|
| 85 |
+
maskBits=0x001,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
LOWER_FD = fixtureDef(
|
| 89 |
+
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
|
| 90 |
+
density=1.0,
|
| 91 |
+
restitution=0.0,
|
| 92 |
+
categoryBits=0x0020,
|
| 93 |
+
maskBits=0x001,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class ContactDetector(contactListener):
|
| 98 |
+
def __init__(self, env):
|
| 99 |
+
contactListener.__init__(self)
|
| 100 |
+
self.env = env
|
| 101 |
+
|
| 102 |
+
def BeginContact(self, contact):
|
| 103 |
+
if (
|
| 104 |
+
self.env.hull == contact.fixtureA.body
|
| 105 |
+
or self.env.hull == contact.fixtureB.body
|
| 106 |
+
):
|
| 107 |
+
self.env.game_over = True
|
| 108 |
+
for leg in [self.env.legs[1], self.env.legs[3]]:
|
| 109 |
+
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
| 110 |
+
leg.ground_contact = True
|
| 111 |
+
|
| 112 |
+
def EndContact(self, contact):
|
| 113 |
+
for leg in [self.env.legs[1], self.env.legs[3]]:
|
| 114 |
+
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
|
| 115 |
+
leg.ground_contact = False
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class BipedalWalker(gym.Env, EzPickle):
|
| 119 |
+
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}
|
| 120 |
+
|
| 121 |
+
hardcore = False
|
| 122 |
+
|
| 123 |
+
def __init__(self):
|
| 124 |
+
EzPickle.__init__(self)
|
| 125 |
+
self.seed()
|
| 126 |
+
self.viewer = None
|
| 127 |
+
|
| 128 |
+
self.world = Box2D.b2World()
|
| 129 |
+
self.terrain = None
|
| 130 |
+
self.hull = None
|
| 131 |
+
|
| 132 |
+
self.prev_shaping = None
|
| 133 |
+
|
| 134 |
+
self.fd_polygon = fixtureDef(
|
| 135 |
+
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
|
| 136 |
+
friction=FRICTION,
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
self.fd_edge = fixtureDef(
|
| 140 |
+
shape=edgeShape(vertices=[(0, 0), (1, 1)]),
|
| 141 |
+
friction=FRICTION,
|
| 142 |
+
categoryBits=0x0001,
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
high = np.array([np.inf] * 24).astype(np.float32)
|
| 146 |
+
self.action_space = spaces.Box(
|
| 147 |
+
np.array([-1, -1, -1, -1]).astype(np.float32),
|
| 148 |
+
np.array([1, 1, 1, 1]).astype(np.float32),
|
| 149 |
+
)
|
| 150 |
+
self.observation_space = spaces.Box(-high, high)
|
| 151 |
+
|
| 152 |
+
def seed(self, seed=None):
|
| 153 |
+
self.np_random, seed = seeding.np_random(seed)
|
| 154 |
+
return [seed]
|
| 155 |
+
|
| 156 |
+
def _destroy(self):
|
| 157 |
+
if not self.terrain:
|
| 158 |
+
return
|
| 159 |
+
self.world.contactListener = None
|
| 160 |
+
for t in self.terrain:
|
| 161 |
+
self.world.DestroyBody(t)
|
| 162 |
+
self.terrain = []
|
| 163 |
+
self.world.DestroyBody(self.hull)
|
| 164 |
+
self.hull = None
|
| 165 |
+
for leg in self.legs:
|
| 166 |
+
self.world.DestroyBody(leg)
|
| 167 |
+
self.legs = []
|
| 168 |
+
self.joints = []
|
| 169 |
+
|
| 170 |
+
def _generate_terrain(self, hardcore):
|
| 171 |
+
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
|
| 172 |
+
state = GRASS
|
| 173 |
+
velocity = 0.0
|
| 174 |
+
y = TERRAIN_HEIGHT
|
| 175 |
+
counter = TERRAIN_STARTPAD
|
| 176 |
+
oneshot = False
|
| 177 |
+
self.terrain = []
|
| 178 |
+
self.terrain_x = []
|
| 179 |
+
self.terrain_y = []
|
| 180 |
+
for i in range(TERRAIN_LENGTH):
|
| 181 |
+
x = i * TERRAIN_STEP
|
| 182 |
+
self.terrain_x.append(x)
|
| 183 |
+
|
| 184 |
+
if state == GRASS and not oneshot:
|
| 185 |
+
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
|
| 186 |
+
if i > TERRAIN_STARTPAD:
|
| 187 |
+
velocity += self.np_random.uniform(-1, 1) / SCALE # 1
|
| 188 |
+
y += velocity
|
| 189 |
+
|
| 190 |
+
elif state == PIT and oneshot:
|
| 191 |
+
counter = self.np_random.randint(3, 5)
|
| 192 |
+
poly = [
|
| 193 |
+
(x, y),
|
| 194 |
+
(x + TERRAIN_STEP, y),
|
| 195 |
+
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
|
| 196 |
+
(x, y - 4 * TERRAIN_STEP),
|
| 197 |
+
]
|
| 198 |
+
self.fd_polygon.shape.vertices = poly
|
| 199 |
+
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
| 200 |
+
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
|
| 201 |
+
self.terrain.append(t)
|
| 202 |
+
|
| 203 |
+
self.fd_polygon.shape.vertices = [
|
| 204 |
+
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
|
| 205 |
+
]
|
| 206 |
+
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
| 207 |
+
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
|
| 208 |
+
self.terrain.append(t)
|
| 209 |
+
counter += 2
|
| 210 |
+
original_y = y
|
| 211 |
+
|
| 212 |
+
elif state == PIT and not oneshot:
|
| 213 |
+
y = original_y
|
| 214 |
+
if counter > 1:
|
| 215 |
+
y -= 4 * TERRAIN_STEP
|
| 216 |
+
|
| 217 |
+
elif state == STUMP and oneshot:
|
| 218 |
+
counter = self.np_random.randint(1, 3)
|
| 219 |
+
poly = [
|
| 220 |
+
(x, y),
|
| 221 |
+
(x + counter * TERRAIN_STEP, y),
|
| 222 |
+
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
|
| 223 |
+
(x, y + counter * TERRAIN_STEP),
|
| 224 |
+
]
|
| 225 |
+
self.fd_polygon.shape.vertices = poly
|
| 226 |
+
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
| 227 |
+
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
|
| 228 |
+
self.terrain.append(t)
|
| 229 |
+
|
| 230 |
+
elif state == STAIRS and oneshot:
|
| 231 |
+
stair_height = +1 if self.np_random.rand() > 0.5 else -1
|
| 232 |
+
stair_width = self.np_random.randint(4, 5)
|
| 233 |
+
stair_steps = self.np_random.randint(3, 5)
|
| 234 |
+
original_y = y
|
| 235 |
+
for s in range(stair_steps):
|
| 236 |
+
poly = [
|
| 237 |
+
(
|
| 238 |
+
x + (s * stair_width) * TERRAIN_STEP,
|
| 239 |
+
y + (s * stair_height) * TERRAIN_STEP,
|
| 240 |
+
),
|
| 241 |
+
(
|
| 242 |
+
x + ((1 + s) * stair_width) * TERRAIN_STEP,
|
| 243 |
+
y + (s * stair_height) * TERRAIN_STEP,
|
| 244 |
+
),
|
| 245 |
+
(
|
| 246 |
+
x + ((1 + s) * stair_width) * TERRAIN_STEP,
|
| 247 |
+
y + (-1 + s * stair_height) * TERRAIN_STEP,
|
| 248 |
+
),
|
| 249 |
+
(
|
| 250 |
+
x + (s * stair_width) * TERRAIN_STEP,
|
| 251 |
+
y + (-1 + s * stair_height) * TERRAIN_STEP,
|
| 252 |
+
),
|
| 253 |
+
]
|
| 254 |
+
self.fd_polygon.shape.vertices = poly
|
| 255 |
+
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
|
| 256 |
+
t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
|
| 257 |
+
self.terrain.append(t)
|
| 258 |
+
counter = stair_steps * stair_width
|
| 259 |
+
|
| 260 |
+
elif state == STAIRS and not oneshot:
|
| 261 |
+
s = stair_steps * stair_width - counter - stair_height
|
| 262 |
+
n = s / stair_width
|
| 263 |
+
y = original_y + (n * stair_height) * TERRAIN_STEP
|
| 264 |
+
|
| 265 |
+
oneshot = False
|
| 266 |
+
self.terrain_y.append(y)
|
| 267 |
+
counter -= 1
|
| 268 |
+
if counter == 0:
|
| 269 |
+
counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS)
|
| 270 |
+
if state == GRASS and hardcore:
|
| 271 |
+
state = self.np_random.randint(1, _STATES_)
|
| 272 |
+
oneshot = True
|
| 273 |
+
else:
|
| 274 |
+
state = GRASS
|
| 275 |
+
oneshot = True
|
| 276 |
+
|
| 277 |
+
self.terrain_poly = []
|
| 278 |
+
for i in range(TERRAIN_LENGTH - 1):
|
| 279 |
+
poly = [
|
| 280 |
+
(self.terrain_x[i], self.terrain_y[i]),
|
| 281 |
+
(self.terrain_x[i + 1], self.terrain_y[i + 1]),
|
| 282 |
+
]
|
| 283 |
+
self.fd_edge.shape.vertices = poly
|
| 284 |
+
t = self.world.CreateStaticBody(fixtures=self.fd_edge)
|
| 285 |
+
color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3)
|
| 286 |
+
t.color1 = color
|
| 287 |
+
t.color2 = color
|
| 288 |
+
self.terrain.append(t)
|
| 289 |
+
color = (0.4, 0.6, 0.3)
|
| 290 |
+
poly += [(poly[1][0], 0), (poly[0][0], 0)]
|
| 291 |
+
self.terrain_poly.append((poly, color))
|
| 292 |
+
self.terrain.reverse()
|
| 293 |
+
|
| 294 |
+
def _generate_clouds(self):
|
| 295 |
+
# Sorry for the clouds, couldn't resist
|
| 296 |
+
self.cloud_poly = []
|
| 297 |
+
for i in range(TERRAIN_LENGTH // 20):
|
| 298 |
+
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
|
| 299 |
+
y = VIEWPORT_H / SCALE * 3 / 4
|
| 300 |
+
poly = [
|
| 301 |
+
(
|
| 302 |
+
x
|
| 303 |
+
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
|
| 304 |
+
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
|
| 305 |
+
y
|
| 306 |
+
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
|
| 307 |
+
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
|
| 308 |
+
)
|
| 309 |
+
for a in range(5)
|
| 310 |
+
]
|
| 311 |
+
x1 = min([p[0] for p in poly])
|
| 312 |
+
x2 = max([p[0] for p in poly])
|
| 313 |
+
self.cloud_poly.append((poly, x1, x2))
|
| 314 |
+
|
| 315 |
+
def reset(self):
|
| 316 |
+
self._destroy()
|
| 317 |
+
self.world.contactListener_bug_workaround = ContactDetector(self)
|
| 318 |
+
self.world.contactListener = self.world.contactListener_bug_workaround
|
| 319 |
+
self.game_over = False
|
| 320 |
+
self.prev_shaping = None
|
| 321 |
+
self.scroll = 0.0
|
| 322 |
+
self.lidar_render = 0
|
| 323 |
+
|
| 324 |
+
W = VIEWPORT_W / SCALE
|
| 325 |
+
H = VIEWPORT_H / SCALE
|
| 326 |
+
|
| 327 |
+
self._generate_terrain(self.hardcore)
|
| 328 |
+
self._generate_clouds()
|
| 329 |
+
|
| 330 |
+
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
|
| 331 |
+
init_y = TERRAIN_HEIGHT + 2 * LEG_H
|
| 332 |
+
self.hull = self.world.CreateDynamicBody(
|
| 333 |
+
position=(init_x, init_y), fixtures=HULL_FD
|
| 334 |
+
)
|
| 335 |
+
self.hull.color1 = (0.5, 0.4, 0.9)
|
| 336 |
+
self.hull.color2 = (0.3, 0.3, 0.5)
|
| 337 |
+
self.hull.ApplyForceToCenter(
|
| 338 |
+
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
self.legs = []
|
| 342 |
+
self.joints = []
|
| 343 |
+
for i in [-1, +1]:
|
| 344 |
+
leg = self.world.CreateDynamicBody(
|
| 345 |
+
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
|
| 346 |
+
angle=(i * 0.05),
|
| 347 |
+
fixtures=LEG_FD,
|
| 348 |
+
)
|
| 349 |
+
leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
|
| 350 |
+
leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
|
| 351 |
+
rjd = revoluteJointDef(
|
| 352 |
+
bodyA=self.hull,
|
| 353 |
+
bodyB=leg,
|
| 354 |
+
localAnchorA=(0, LEG_DOWN),
|
| 355 |
+
localAnchorB=(0, LEG_H / 2),
|
| 356 |
+
enableMotor=True,
|
| 357 |
+
enableLimit=True,
|
| 358 |
+
maxMotorTorque=MOTORS_TORQUE,
|
| 359 |
+
motorSpeed=i,
|
| 360 |
+
lowerAngle=-0.8,
|
| 361 |
+
upperAngle=1.1,
|
| 362 |
+
)
|
| 363 |
+
self.legs.append(leg)
|
| 364 |
+
self.joints.append(self.world.CreateJoint(rjd))
|
| 365 |
+
|
| 366 |
+
lower = self.world.CreateDynamicBody(
|
| 367 |
+
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
|
| 368 |
+
angle=(i * 0.05),
|
| 369 |
+
fixtures=LOWER_FD,
|
| 370 |
+
)
|
| 371 |
+
lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
|
| 372 |
+
lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
|
| 373 |
+
rjd = revoluteJointDef(
|
| 374 |
+
bodyA=leg,
|
| 375 |
+
bodyB=lower,
|
| 376 |
+
localAnchorA=(0, -LEG_H / 2),
|
| 377 |
+
localAnchorB=(0, LEG_H / 2),
|
| 378 |
+
enableMotor=True,
|
| 379 |
+
enableLimit=True,
|
| 380 |
+
maxMotorTorque=MOTORS_TORQUE,
|
| 381 |
+
motorSpeed=1,
|
| 382 |
+
lowerAngle=-1.6,
|
| 383 |
+
upperAngle=-0.1,
|
| 384 |
+
)
|
| 385 |
+
lower.ground_contact = False
|
| 386 |
+
self.legs.append(lower)
|
| 387 |
+
self.joints.append(self.world.CreateJoint(rjd))
|
| 388 |
+
|
| 389 |
+
self.drawlist = self.terrain + self.legs + [self.hull]
|
| 390 |
+
|
| 391 |
+
class LidarCallback(Box2D.b2.rayCastCallback):
|
| 392 |
+
def ReportFixture(self, fixture, point, normal, fraction):
|
| 393 |
+
if (fixture.filterData.categoryBits & 1) == 0:
|
| 394 |
+
return -1
|
| 395 |
+
self.p2 = point
|
| 396 |
+
self.fraction = fraction
|
| 397 |
+
return fraction
|
| 398 |
+
|
| 399 |
+
self.lidar = [LidarCallback() for _ in range(10)]
|
| 400 |
+
|
| 401 |
+
return self.step(np.array([0, 0, 0, 0]))[0]
|
| 402 |
+
|
| 403 |
+
def step(self, action):
|
| 404 |
+
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
|
| 405 |
+
control_speed = False # Should be easier as well
|
| 406 |
+
if control_speed:
|
| 407 |
+
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
|
| 408 |
+
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
|
| 409 |
+
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
|
| 410 |
+
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
|
| 411 |
+
else:
|
| 412 |
+
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
|
| 413 |
+
self.joints[0].maxMotorTorque = float(
|
| 414 |
+
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
|
| 415 |
+
)
|
| 416 |
+
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
|
| 417 |
+
self.joints[1].maxMotorTorque = float(
|
| 418 |
+
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
|
| 419 |
+
)
|
| 420 |
+
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
|
| 421 |
+
self.joints[2].maxMotorTorque = float(
|
| 422 |
+
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
|
| 423 |
+
)
|
| 424 |
+
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
|
| 425 |
+
self.joints[3].maxMotorTorque = float(
|
| 426 |
+
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
|
| 430 |
+
|
| 431 |
+
pos = self.hull.position
|
| 432 |
+
vel = self.hull.linearVelocity
|
| 433 |
+
|
| 434 |
+
for i in range(10):
|
| 435 |
+
self.lidar[i].fraction = 1.0
|
| 436 |
+
self.lidar[i].p1 = pos
|
| 437 |
+
self.lidar[i].p2 = (
|
| 438 |
+
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
|
| 439 |
+
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
|
| 440 |
+
)
|
| 441 |
+
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
|
| 442 |
+
|
| 443 |
+
state = [
|
| 444 |
+
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
|
| 445 |
+
2.0 * self.hull.angularVelocity / FPS,
|
| 446 |
+
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
|
| 447 |
+
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
|
| 448 |
+
self.joints[
|
| 449 |
+
0
|
| 450 |
+
].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
|
| 451 |
+
self.joints[0].speed / SPEED_HIP,
|
| 452 |
+
self.joints[1].angle + 1.0,
|
| 453 |
+
self.joints[1].speed / SPEED_KNEE,
|
| 454 |
+
1.0 if self.legs[1].ground_contact else 0.0,
|
| 455 |
+
self.joints[2].angle,
|
| 456 |
+
self.joints[2].speed / SPEED_HIP,
|
| 457 |
+
self.joints[3].angle + 1.0,
|
| 458 |
+
self.joints[3].speed / SPEED_KNEE,
|
| 459 |
+
1.0 if self.legs[3].ground_contact else 0.0,
|
| 460 |
+
]
|
| 461 |
+
state += [l.fraction for l in self.lidar]
|
| 462 |
+
assert len(state) == 24
|
| 463 |
+
|
| 464 |
+
self.scroll = pos.x - VIEWPORT_W / SCALE / 5
|
| 465 |
+
|
| 466 |
+
shaping = (
|
| 467 |
+
130 * pos[0] / SCALE
|
| 468 |
+
) # moving forward is a way to receive reward (normalized to get 300 on completion)
|
| 469 |
+
shaping -= 5.0 * abs(
|
| 470 |
+
state[0]
|
| 471 |
+
) # keep head straight, other than that and falling, any behavior is unpunished
|
| 472 |
+
|
| 473 |
+
reward = 0
|
| 474 |
+
if self.prev_shaping is not None:
|
| 475 |
+
reward = shaping - self.prev_shaping
|
| 476 |
+
self.prev_shaping = shaping
|
| 477 |
+
|
| 478 |
+
for a in action:
|
| 479 |
+
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
|
| 480 |
+
# normalized to about -50.0 using heuristic, more optimal agent should spend less
|
| 481 |
+
|
| 482 |
+
done = False
|
| 483 |
+
if self.game_over or pos[0] < 0:
|
| 484 |
+
reward = -100
|
| 485 |
+
done = True
|
| 486 |
+
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
|
| 487 |
+
done = True
|
| 488 |
+
return np.array(state, dtype=np.float32), reward, done, {}
|
| 489 |
+
|
| 490 |
+
def render(self, mode="human"):
|
| 491 |
+
from gym.envs.classic_control import rendering
|
| 492 |
+
|
| 493 |
+
if self.viewer is None:
|
| 494 |
+
self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
|
| 495 |
+
self.viewer.set_bounds(
|
| 496 |
+
self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
self.viewer.draw_polygon(
|
| 500 |
+
[
|
| 501 |
+
(self.scroll, 0),
|
| 502 |
+
(self.scroll + VIEWPORT_W / SCALE, 0),
|
| 503 |
+
(self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE),
|
| 504 |
+
(self.scroll, VIEWPORT_H / SCALE),
|
| 505 |
+
],
|
| 506 |
+
color=(0.9, 0.9, 1.0),
|
| 507 |
+
)
|
| 508 |
+
for poly, x1, x2 in self.cloud_poly:
|
| 509 |
+
if x2 < self.scroll / 2:
|
| 510 |
+
continue
|
| 511 |
+
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
|
| 512 |
+
continue
|
| 513 |
+
self.viewer.draw_polygon(
|
| 514 |
+
[(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1)
|
| 515 |
+
)
|
| 516 |
+
for poly, color in self.terrain_poly:
|
| 517 |
+
if poly[1][0] < self.scroll:
|
| 518 |
+
continue
|
| 519 |
+
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
|
| 520 |
+
continue
|
| 521 |
+
self.viewer.draw_polygon(poly, color=color)
|
| 522 |
+
|
| 523 |
+
self.lidar_render = (self.lidar_render + 1) % 100
|
| 524 |
+
i = self.lidar_render
|
| 525 |
+
if i < 2 * len(self.lidar):
|
| 526 |
+
l = (
|
| 527 |
+
self.lidar[i]
|
| 528 |
+
if i < len(self.lidar)
|
| 529 |
+
else self.lidar[len(self.lidar) - i - 1]
|
| 530 |
+
)
|
| 531 |
+
self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1)
|
| 532 |
+
|
| 533 |
+
for obj in self.drawlist:
|
| 534 |
+
for f in obj.fixtures:
|
| 535 |
+
trans = f.body.transform
|
| 536 |
+
if type(f.shape) is circleShape:
|
| 537 |
+
t = rendering.Transform(translation=trans * f.shape.pos)
|
| 538 |
+
self.viewer.draw_circle(
|
| 539 |
+
f.shape.radius, 30, color=obj.color1
|
| 540 |
+
).add_attr(t)
|
| 541 |
+
self.viewer.draw_circle(
|
| 542 |
+
f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2
|
| 543 |
+
).add_attr(t)
|
| 544 |
+
else:
|
| 545 |
+
path = [trans * v for v in f.shape.vertices]
|
| 546 |
+
self.viewer.draw_polygon(path, color=obj.color1)
|
| 547 |
+
path.append(path[0])
|
| 548 |
+
self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
|
| 549 |
+
|
| 550 |
+
flagy1 = TERRAIN_HEIGHT
|
| 551 |
+
flagy2 = flagy1 + 50 / SCALE
|
| 552 |
+
x = TERRAIN_STEP * 3
|
| 553 |
+
self.viewer.draw_polyline(
|
| 554 |
+
[(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2
|
| 555 |
+
)
|
| 556 |
+
f = [
|
| 557 |
+
(x, flagy2),
|
| 558 |
+
(x, flagy2 - 10 / SCALE),
|
| 559 |
+
(x + 25 / SCALE, flagy2 - 5 / SCALE),
|
| 560 |
+
]
|
| 561 |
+
self.viewer.draw_polygon(f, color=(0.9, 0.2, 0))
|
| 562 |
+
self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2)
|
| 563 |
+
|
| 564 |
+
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
| 565 |
+
|
| 566 |
+
def close(self):
|
| 567 |
+
if self.viewer is not None:
|
| 568 |
+
self.viewer.close()
|
| 569 |
+
self.viewer = None
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
class BipedalWalkerHardcore(BipedalWalker):
|
| 573 |
+
hardcore = True
|
| 574 |
+
|
| 575 |
+
|
| 576 |
+
if __name__ == "__main__":
|
| 577 |
+
# Heurisic: suboptimal, have no notion of balance.
|
| 578 |
+
env = BipedalWalker()
|
| 579 |
+
env.reset()
|
| 580 |
+
steps = 0
|
| 581 |
+
total_reward = 0
|
| 582 |
+
a = np.array([0.0, 0.0, 0.0, 0.0])
|
| 583 |
+
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
|
| 584 |
+
SPEED = 0.29 # Will fall forward on higher speed
|
| 585 |
+
state = STAY_ON_ONE_LEG
|
| 586 |
+
moving_leg = 0
|
| 587 |
+
supporting_leg = 1 - moving_leg
|
| 588 |
+
SUPPORT_KNEE_ANGLE = +0.1
|
| 589 |
+
supporting_knee_angle = SUPPORT_KNEE_ANGLE
|
| 590 |
+
while True:
|
| 591 |
+
s, r, done, info = env.step(a)
|
| 592 |
+
total_reward += r
|
| 593 |
+
if steps % 20 == 0 or done:
|
| 594 |
+
print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
|
| 595 |
+
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
|
| 596 |
+
print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]]))
|
| 597 |
+
print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]]))
|
| 598 |
+
print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
|
| 599 |
+
steps += 1
|
| 600 |
+
|
| 601 |
+
contact0 = s[8]
|
| 602 |
+
contact1 = s[13]
|
| 603 |
+
moving_s_base = 4 + 5 * moving_leg
|
| 604 |
+
supporting_s_base = 4 + 5 * supporting_leg
|
| 605 |
+
|
| 606 |
+
hip_targ = [None, None] # -0.8 .. +1.1
|
| 607 |
+
knee_targ = [None, None] # -0.6 .. +0.9
|
| 608 |
+
hip_todo = [0.0, 0.0]
|
| 609 |
+
knee_todo = [0.0, 0.0]
|
| 610 |
+
|
| 611 |
+
if state == STAY_ON_ONE_LEG:
|
| 612 |
+
hip_targ[moving_leg] = 1.1
|
| 613 |
+
knee_targ[moving_leg] = -0.6
|
| 614 |
+
supporting_knee_angle += 0.03
|
| 615 |
+
if s[2] > SPEED:
|
| 616 |
+
supporting_knee_angle += 0.03
|
| 617 |
+
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
|
| 618 |
+
knee_targ[supporting_leg] = supporting_knee_angle
|
| 619 |
+
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
|
| 620 |
+
state = PUT_OTHER_DOWN
|
| 621 |
+
if state == PUT_OTHER_DOWN:
|
| 622 |
+
hip_targ[moving_leg] = +0.1
|
| 623 |
+
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
|
| 624 |
+
knee_targ[supporting_leg] = supporting_knee_angle
|
| 625 |
+
if s[moving_s_base + 4]:
|
| 626 |
+
state = PUSH_OFF
|
| 627 |
+
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
|
| 628 |
+
if state == PUSH_OFF:
|
| 629 |
+
knee_targ[moving_leg] = supporting_knee_angle
|
| 630 |
+
knee_targ[supporting_leg] = +1.0
|
| 631 |
+
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
|
| 632 |
+
state = STAY_ON_ONE_LEG
|
| 633 |
+
moving_leg = 1 - moving_leg
|
| 634 |
+
supporting_leg = 1 - moving_leg
|
| 635 |
+
|
| 636 |
+
if hip_targ[0]:
|
| 637 |
+
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
|
| 638 |
+
if hip_targ[1]:
|
| 639 |
+
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
|
| 640 |
+
if knee_targ[0]:
|
| 641 |
+
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
|
| 642 |
+
if knee_targ[1]:
|
| 643 |
+
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
|
| 644 |
+
|
| 645 |
+
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
|
| 646 |
+
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
|
| 647 |
+
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
|
| 648 |
+
knee_todo[1] -= 15.0 * s[3]
|
| 649 |
+
|
| 650 |
+
a[0] = hip_todo[0]
|
| 651 |
+
a[1] = knee_todo[0]
|
| 652 |
+
a[2] = hip_todo[1]
|
| 653 |
+
a[3] = knee_todo[1]
|
| 654 |
+
a = np.clip(0.5 * a, -1.0, 1.0)
|
| 655 |
+
|
| 656 |
+
env.render()
|
| 657 |
+
if done:
|
| 658 |
+
break
|
gym-0.21.0/gym/envs/classic_control/cartpole.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Classic cart-pole system implemented by Rich Sutton et al.
|
| 3 |
+
Copied from http://incompleteideas.net/sutton/book/code/pole.c
|
| 4 |
+
permalink: https://perma.cc/C9ZM-652R
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import math
|
| 8 |
+
import gym
|
| 9 |
+
from gym import spaces, logger
|
| 10 |
+
from gym.utils import seeding
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CartPoleEnv(gym.Env):
|
| 15 |
+
"""
|
| 16 |
+
Description:
|
| 17 |
+
A pole is attached by an un-actuated joint to a cart, which moves along
|
| 18 |
+
a frictionless track. The pendulum starts upright, and the goal is to
|
| 19 |
+
prevent it from falling over by increasing and reducing the cart's
|
| 20 |
+
velocity.
|
| 21 |
+
|
| 22 |
+
Source:
|
| 23 |
+
This environment corresponds to the version of the cart-pole problem
|
| 24 |
+
described by Barto, Sutton, and Anderson
|
| 25 |
+
|
| 26 |
+
Observation:
|
| 27 |
+
Type: Box(4)
|
| 28 |
+
Num Observation Min Max
|
| 29 |
+
0 Cart Position -4.8 4.8
|
| 30 |
+
1 Cart Velocity -Inf Inf
|
| 31 |
+
2 Pole Angle -0.418 rad (-24 deg) 0.418 rad (24 deg)
|
| 32 |
+
3 Pole Angular Velocity -Inf Inf
|
| 33 |
+
|
| 34 |
+
Actions:
|
| 35 |
+
Type: Discrete(2)
|
| 36 |
+
Num Action
|
| 37 |
+
0 Push cart to the left
|
| 38 |
+
1 Push cart to the right
|
| 39 |
+
|
| 40 |
+
Note: The amount the velocity that is reduced or increased is not
|
| 41 |
+
fixed; it depends on the angle the pole is pointing. This is because
|
| 42 |
+
the center of gravity of the pole increases the amount of energy needed
|
| 43 |
+
to move the cart underneath it
|
| 44 |
+
|
| 45 |
+
Reward:
|
| 46 |
+
Reward is 1 for every step taken, including the termination step
|
| 47 |
+
|
| 48 |
+
Starting State:
|
| 49 |
+
All observations are assigned a uniform random value in [-0.05..0.05]
|
| 50 |
+
|
| 51 |
+
Episode Termination:
|
| 52 |
+
Pole Angle is more than 12 degrees.
|
| 53 |
+
Cart Position is more than 2.4 (center of the cart reaches the edge of
|
| 54 |
+
the display).
|
| 55 |
+
Episode length is greater than 200.
|
| 56 |
+
Solved Requirements:
|
| 57 |
+
Considered solved when the average return is greater than or equal to
|
| 58 |
+
195.0 over 100 consecutive trials.
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
| 62 |
+
|
| 63 |
+
def __init__(self):
|
| 64 |
+
self.gravity = 9.8
|
| 65 |
+
self.masscart = 1.0
|
| 66 |
+
self.masspole = 0.1
|
| 67 |
+
self.total_mass = self.masspole + self.masscart
|
| 68 |
+
self.length = 0.5 # actually half the pole's length
|
| 69 |
+
self.polemass_length = self.masspole * self.length
|
| 70 |
+
self.force_mag = 10.0
|
| 71 |
+
self.tau = 0.02 # seconds between state updates
|
| 72 |
+
self.kinematics_integrator = "euler"
|
| 73 |
+
|
| 74 |
+
# Angle at which to fail the episode
|
| 75 |
+
self.theta_threshold_radians = 12 * 2 * math.pi / 360
|
| 76 |
+
self.x_threshold = 2.4
|
| 77 |
+
|
| 78 |
+
# Angle limit set to 2 * theta_threshold_radians so failing observation
|
| 79 |
+
# is still within bounds.
|
| 80 |
+
high = np.array(
|
| 81 |
+
[
|
| 82 |
+
self.x_threshold * 2,
|
| 83 |
+
np.finfo(np.float32).max,
|
| 84 |
+
self.theta_threshold_radians * 2,
|
| 85 |
+
np.finfo(np.float32).max,
|
| 86 |
+
],
|
| 87 |
+
dtype=np.float32,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
self.action_space = spaces.Discrete(2)
|
| 91 |
+
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
|
| 92 |
+
|
| 93 |
+
self.seed()
|
| 94 |
+
self.viewer = None
|
| 95 |
+
self.state = None
|
| 96 |
+
|
| 97 |
+
self.steps_beyond_done = None
|
| 98 |
+
|
| 99 |
+
def seed(self, seed=None):
|
| 100 |
+
self.np_random, seed = seeding.np_random(seed)
|
| 101 |
+
return [seed]
|
| 102 |
+
|
| 103 |
+
def step(self, action):
|
| 104 |
+
err_msg = "%r (%s) invalid" % (action, type(action))
|
| 105 |
+
assert self.action_space.contains(action), err_msg
|
| 106 |
+
|
| 107 |
+
x, x_dot, theta, theta_dot = self.state
|
| 108 |
+
force = self.force_mag if action == 1 else -self.force_mag
|
| 109 |
+
costheta = math.cos(theta)
|
| 110 |
+
sintheta = math.sin(theta)
|
| 111 |
+
|
| 112 |
+
# For the interested reader:
|
| 113 |
+
# https://coneural.org/florian/papers/05_cart_pole.pdf
|
| 114 |
+
temp = (
|
| 115 |
+
force + self.polemass_length * theta_dot ** 2 * sintheta
|
| 116 |
+
) / self.total_mass
|
| 117 |
+
thetaacc = (self.gravity * sintheta - costheta * temp) / (
|
| 118 |
+
self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
|
| 119 |
+
)
|
| 120 |
+
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
|
| 121 |
+
|
| 122 |
+
if self.kinematics_integrator == "euler":
|
| 123 |
+
x = x + self.tau * x_dot
|
| 124 |
+
x_dot = x_dot + self.tau * xacc
|
| 125 |
+
theta = theta + self.tau * theta_dot
|
| 126 |
+
theta_dot = theta_dot + self.tau * thetaacc
|
| 127 |
+
else: # semi-implicit euler
|
| 128 |
+
x_dot = x_dot + self.tau * xacc
|
| 129 |
+
x = x + self.tau * x_dot
|
| 130 |
+
theta_dot = theta_dot + self.tau * thetaacc
|
| 131 |
+
theta = theta + self.tau * theta_dot
|
| 132 |
+
|
| 133 |
+
self.state = (x, x_dot, theta, theta_dot)
|
| 134 |
+
|
| 135 |
+
done = bool(
|
| 136 |
+
x < -self.x_threshold
|
| 137 |
+
or x > self.x_threshold
|
| 138 |
+
or theta < -self.theta_threshold_radians
|
| 139 |
+
or theta > self.theta_threshold_radians
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
if not done:
|
| 143 |
+
reward = 1.0
|
| 144 |
+
elif self.steps_beyond_done is None:
|
| 145 |
+
# Pole just fell!
|
| 146 |
+
self.steps_beyond_done = 0
|
| 147 |
+
reward = 1.0
|
| 148 |
+
else:
|
| 149 |
+
if self.steps_beyond_done == 0:
|
| 150 |
+
logger.warn(
|
| 151 |
+
"You are calling 'step()' even though this "
|
| 152 |
+
"environment has already returned done = True. You "
|
| 153 |
+
"should always call 'reset()' once you receive 'done = "
|
| 154 |
+
"True' -- any further steps are undefined behavior."
|
| 155 |
+
)
|
| 156 |
+
self.steps_beyond_done += 1
|
| 157 |
+
reward = 0.0
|
| 158 |
+
|
| 159 |
+
return np.array(self.state, dtype=np.float32), reward, done, {}
|
| 160 |
+
|
| 161 |
+
def reset(self):
|
| 162 |
+
self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
|
| 163 |
+
self.steps_beyond_done = None
|
| 164 |
+
return np.array(self.state, dtype=np.float32)
|
| 165 |
+
|
| 166 |
+
def render(self, mode="human"):
|
| 167 |
+
screen_width = 600
|
| 168 |
+
screen_height = 400
|
| 169 |
+
|
| 170 |
+
world_width = self.x_threshold * 2
|
| 171 |
+
scale = screen_width / world_width
|
| 172 |
+
carty = 100 # TOP OF CART
|
| 173 |
+
polewidth = 10.0
|
| 174 |
+
polelen = scale * (2 * self.length)
|
| 175 |
+
cartwidth = 50.0
|
| 176 |
+
cartheight = 30.0
|
| 177 |
+
|
| 178 |
+
if self.viewer is None:
|
| 179 |
+
from gym.envs.classic_control import rendering
|
| 180 |
+
|
| 181 |
+
self.viewer = rendering.Viewer(screen_width, screen_height)
|
| 182 |
+
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
|
| 183 |
+
axleoffset = cartheight / 4.0
|
| 184 |
+
cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
|
| 185 |
+
self.carttrans = rendering.Transform()
|
| 186 |
+
cart.add_attr(self.carttrans)
|
| 187 |
+
self.viewer.add_geom(cart)
|
| 188 |
+
l, r, t, b = (
|
| 189 |
+
-polewidth / 2,
|
| 190 |
+
polewidth / 2,
|
| 191 |
+
polelen - polewidth / 2,
|
| 192 |
+
-polewidth / 2,
|
| 193 |
+
)
|
| 194 |
+
pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
|
| 195 |
+
pole.set_color(0.8, 0.6, 0.4)
|
| 196 |
+
self.poletrans = rendering.Transform(translation=(0, axleoffset))
|
| 197 |
+
pole.add_attr(self.poletrans)
|
| 198 |
+
pole.add_attr(self.carttrans)
|
| 199 |
+
self.viewer.add_geom(pole)
|
| 200 |
+
self.axle = rendering.make_circle(polewidth / 2)
|
| 201 |
+
self.axle.add_attr(self.poletrans)
|
| 202 |
+
self.axle.add_attr(self.carttrans)
|
| 203 |
+
self.axle.set_color(0.5, 0.5, 0.8)
|
| 204 |
+
self.viewer.add_geom(self.axle)
|
| 205 |
+
self.track = rendering.Line((0, carty), (screen_width, carty))
|
| 206 |
+
self.track.set_color(0, 0, 0)
|
| 207 |
+
self.viewer.add_geom(self.track)
|
| 208 |
+
|
| 209 |
+
self._pole_geom = pole
|
| 210 |
+
|
| 211 |
+
if self.state is None:
|
| 212 |
+
return None
|
| 213 |
+
|
| 214 |
+
# Edit the pole polygon vertex
|
| 215 |
+
pole = self._pole_geom
|
| 216 |
+
l, r, t, b = (
|
| 217 |
+
-polewidth / 2,
|
| 218 |
+
polewidth / 2,
|
| 219 |
+
polelen - polewidth / 2,
|
| 220 |
+
-polewidth / 2,
|
| 221 |
+
)
|
| 222 |
+
pole.v = [(l, b), (l, t), (r, t), (r, b)]
|
| 223 |
+
|
| 224 |
+
x = self.state
|
| 225 |
+
cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART
|
| 226 |
+
self.carttrans.set_translation(cartx, carty)
|
| 227 |
+
self.poletrans.set_rotation(-x[2])
|
| 228 |
+
|
| 229 |
+
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
| 230 |
+
|
| 231 |
+
def close(self):
|
| 232 |
+
if self.viewer:
|
| 233 |
+
self.viewer.close()
|
| 234 |
+
self.viewer = None
|
gym-0.21.0/gym/envs/classic_control/mountain_car.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
http://incompleteideas.net/MountainCar/MountainCar1.cp
|
| 3 |
+
permalink: https://perma.cc/6Z2N-PFWC
|
| 4 |
+
"""
|
| 5 |
+
import math
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
import gym
|
| 10 |
+
from gym import spaces
|
| 11 |
+
from gym.utils import seeding
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MountainCarEnv(gym.Env):
|
| 15 |
+
"""
|
| 16 |
+
Description:
|
| 17 |
+
The agent (a car) is started at the bottom of a valley. For any given
|
| 18 |
+
state the agent may choose to accelerate to the left, right or cease
|
| 19 |
+
any acceleration.
|
| 20 |
+
|
| 21 |
+
Source:
|
| 22 |
+
The environment appeared first in Andrew Moore's PhD Thesis (1990).
|
| 23 |
+
|
| 24 |
+
Observation:
|
| 25 |
+
Type: Box(2)
|
| 26 |
+
Num Observation Min Max
|
| 27 |
+
0 Car Position -1.2 0.6
|
| 28 |
+
1 Car Velocity -0.07 0.07
|
| 29 |
+
|
| 30 |
+
Actions:
|
| 31 |
+
Type: Discrete(3)
|
| 32 |
+
Num Action
|
| 33 |
+
0 Accelerate to the Left
|
| 34 |
+
1 Don't accelerate
|
| 35 |
+
2 Accelerate to the Right
|
| 36 |
+
|
| 37 |
+
Note: This does not affect the amount of velocity affected by the
|
| 38 |
+
gravitational pull acting on the car.
|
| 39 |
+
|
| 40 |
+
Reward:
|
| 41 |
+
Reward of 0 is awarded if the agent reached the flag (position = 0.5)
|
| 42 |
+
on top of the mountain.
|
| 43 |
+
Reward of -1 is awarded if the position of the agent is less than 0.5.
|
| 44 |
+
|
| 45 |
+
Starting State:
|
| 46 |
+
The position of the car is assigned a uniform random value in
|
| 47 |
+
[-0.6 , -0.4].
|
| 48 |
+
The starting velocity of the car is always assigned to 0.
|
| 49 |
+
|
| 50 |
+
Episode Termination:
|
| 51 |
+
The car position is more than 0.5
|
| 52 |
+
Episode length is greater than 200
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
|
| 56 |
+
|
| 57 |
+
def __init__(self, goal_velocity=0):
|
| 58 |
+
self.min_position = -1.2
|
| 59 |
+
self.max_position = 0.6
|
| 60 |
+
self.max_speed = 0.07
|
| 61 |
+
self.goal_position = 0.5
|
| 62 |
+
self.goal_velocity = goal_velocity
|
| 63 |
+
|
| 64 |
+
self.force = 0.001
|
| 65 |
+
self.gravity = 0.0025
|
| 66 |
+
|
| 67 |
+
self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
|
| 68 |
+
self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
|
| 69 |
+
|
| 70 |
+
self.viewer = None
|
| 71 |
+
|
| 72 |
+
self.action_space = spaces.Discrete(3)
|
| 73 |
+
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
|
| 74 |
+
|
| 75 |
+
self.seed()
|
| 76 |
+
|
| 77 |
+
def seed(self, seed=None):
|
| 78 |
+
self.np_random, seed = seeding.np_random(seed)
|
| 79 |
+
return [seed]
|
| 80 |
+
|
| 81 |
+
def step(self, action):
|
| 82 |
+
assert self.action_space.contains(action), "%r (%s) invalid" % (
|
| 83 |
+
action,
|
| 84 |
+
type(action),
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
position, velocity = self.state
|
| 88 |
+
velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
|
| 89 |
+
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
|
| 90 |
+
position += velocity
|
| 91 |
+
position = np.clip(position, self.min_position, self.max_position)
|
| 92 |
+
if position == self.min_position and velocity < 0:
|
| 93 |
+
velocity = 0
|
| 94 |
+
|
| 95 |
+
done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
|
| 96 |
+
reward = -1.0
|
| 97 |
+
|
| 98 |
+
self.state = (position, velocity)
|
| 99 |
+
return np.array(self.state, dtype=np.float32), reward, done, {}
|
| 100 |
+
|
| 101 |
+
def reset(self):
|
| 102 |
+
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
| 103 |
+
return np.array(self.state, dtype=np.float32)
|
| 104 |
+
|
| 105 |
+
def _height(self, xs):
|
| 106 |
+
return np.sin(3 * xs) * 0.45 + 0.55
|
| 107 |
+
|
| 108 |
+
def render(self, mode="human"):
|
| 109 |
+
screen_width = 600
|
| 110 |
+
screen_height = 400
|
| 111 |
+
|
| 112 |
+
world_width = self.max_position - self.min_position
|
| 113 |
+
scale = screen_width / world_width
|
| 114 |
+
carwidth = 40
|
| 115 |
+
carheight = 20
|
| 116 |
+
|
| 117 |
+
if self.viewer is None:
|
| 118 |
+
from gym.envs.classic_control import rendering
|
| 119 |
+
|
| 120 |
+
self.viewer = rendering.Viewer(screen_width, screen_height)
|
| 121 |
+
xs = np.linspace(self.min_position, self.max_position, 100)
|
| 122 |
+
ys = self._height(xs)
|
| 123 |
+
xys = list(zip((xs - self.min_position) * scale, ys * scale))
|
| 124 |
+
|
| 125 |
+
self.track = rendering.make_polyline(xys)
|
| 126 |
+
self.track.set_linewidth(4)
|
| 127 |
+
self.viewer.add_geom(self.track)
|
| 128 |
+
|
| 129 |
+
clearance = 10
|
| 130 |
+
|
| 131 |
+
l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
|
| 132 |
+
car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
|
| 133 |
+
car.add_attr(rendering.Transform(translation=(0, clearance)))
|
| 134 |
+
self.cartrans = rendering.Transform()
|
| 135 |
+
car.add_attr(self.cartrans)
|
| 136 |
+
self.viewer.add_geom(car)
|
| 137 |
+
frontwheel = rendering.make_circle(carheight / 2.5)
|
| 138 |
+
frontwheel.set_color(0.5, 0.5, 0.5)
|
| 139 |
+
frontwheel.add_attr(
|
| 140 |
+
rendering.Transform(translation=(carwidth / 4, clearance))
|
| 141 |
+
)
|
| 142 |
+
frontwheel.add_attr(self.cartrans)
|
| 143 |
+
self.viewer.add_geom(frontwheel)
|
| 144 |
+
backwheel = rendering.make_circle(carheight / 2.5)
|
| 145 |
+
backwheel.add_attr(
|
| 146 |
+
rendering.Transform(translation=(-carwidth / 4, clearance))
|
| 147 |
+
)
|
| 148 |
+
backwheel.add_attr(self.cartrans)
|
| 149 |
+
backwheel.set_color(0.5, 0.5, 0.5)
|
| 150 |
+
self.viewer.add_geom(backwheel)
|
| 151 |
+
flagx = (self.goal_position - self.min_position) * scale
|
| 152 |
+
flagy1 = self._height(self.goal_position) * scale
|
| 153 |
+
flagy2 = flagy1 + 50
|
| 154 |
+
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
|
| 155 |
+
self.viewer.add_geom(flagpole)
|
| 156 |
+
flag = rendering.FilledPolygon(
|
| 157 |
+
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
|
| 158 |
+
)
|
| 159 |
+
flag.set_color(0.8, 0.8, 0)
|
| 160 |
+
self.viewer.add_geom(flag)
|
| 161 |
+
|
| 162 |
+
pos = self.state[0]
|
| 163 |
+
self.cartrans.set_translation(
|
| 164 |
+
(pos - self.min_position) * scale, self._height(pos) * scale
|
| 165 |
+
)
|
| 166 |
+
self.cartrans.set_rotation(math.cos(3 * pos))
|
| 167 |
+
|
| 168 |
+
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
| 169 |
+
|
| 170 |
+
def get_keys_to_action(self):
|
| 171 |
+
# Control with left and right arrow keys.
|
| 172 |
+
return {(): 1, (276,): 0, (275,): 2, (275, 276): 1}
|
| 173 |
+
|
| 174 |
+
def close(self):
|
| 175 |
+
if self.viewer:
|
| 176 |
+
self.viewer.close()
|
| 177 |
+
self.viewer = None
|
gym-0.21.0/gym/envs/classic_control/pendulum.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
from gym import spaces
|
| 3 |
+
from gym.utils import seeding
|
| 4 |
+
import numpy as np
|
| 5 |
+
from os import path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class PendulumEnv(gym.Env):
|
| 9 |
+
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
|
| 10 |
+
|
| 11 |
+
def __init__(self, g=10.0):
|
| 12 |
+
self.max_speed = 8
|
| 13 |
+
self.max_torque = 2.0
|
| 14 |
+
self.dt = 0.05
|
| 15 |
+
self.g = g
|
| 16 |
+
self.m = 1.0
|
| 17 |
+
self.l = 1.0
|
| 18 |
+
self.viewer = None
|
| 19 |
+
|
| 20 |
+
high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
|
| 21 |
+
self.action_space = spaces.Box(
|
| 22 |
+
low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
|
| 23 |
+
)
|
| 24 |
+
self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
|
| 25 |
+
|
| 26 |
+
self.seed()
|
| 27 |
+
|
| 28 |
+
def seed(self, seed=None):
|
| 29 |
+
self.np_random, seed = seeding.np_random(seed)
|
| 30 |
+
return [seed]
|
| 31 |
+
|
| 32 |
+
def step(self, u):
|
| 33 |
+
th, thdot = self.state # th := theta
|
| 34 |
+
|
| 35 |
+
g = self.g
|
| 36 |
+
m = self.m
|
| 37 |
+
l = self.l
|
| 38 |
+
dt = self.dt
|
| 39 |
+
|
| 40 |
+
u = np.clip(u, -self.max_torque, self.max_torque)[0]
|
| 41 |
+
self.last_u = u # for rendering
|
| 42 |
+
costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
|
| 43 |
+
|
| 44 |
+
newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
|
| 45 |
+
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
|
| 46 |
+
newth = th + newthdot * dt
|
| 47 |
+
|
| 48 |
+
self.state = np.array([newth, newthdot])
|
| 49 |
+
return self._get_obs(), -costs, False, {}
|
| 50 |
+
|
| 51 |
+
def reset(self):
|
| 52 |
+
high = np.array([np.pi, 1])
|
| 53 |
+
self.state = self.np_random.uniform(low=-high, high=high)
|
| 54 |
+
self.last_u = None
|
| 55 |
+
return self._get_obs()
|
| 56 |
+
|
| 57 |
+
def _get_obs(self):
|
| 58 |
+
theta, thetadot = self.state
|
| 59 |
+
return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
|
| 60 |
+
|
| 61 |
+
def render(self, mode="human"):
|
| 62 |
+
if self.viewer is None:
|
| 63 |
+
from gym.envs.classic_control import rendering
|
| 64 |
+
|
| 65 |
+
self.viewer = rendering.Viewer(500, 500)
|
| 66 |
+
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
|
| 67 |
+
rod = rendering.make_capsule(1, 0.2)
|
| 68 |
+
rod.set_color(0.8, 0.3, 0.3)
|
| 69 |
+
self.pole_transform = rendering.Transform()
|
| 70 |
+
rod.add_attr(self.pole_transform)
|
| 71 |
+
self.viewer.add_geom(rod)
|
| 72 |
+
axle = rendering.make_circle(0.05)
|
| 73 |
+
axle.set_color(0, 0, 0)
|
| 74 |
+
self.viewer.add_geom(axle)
|
| 75 |
+
fname = path.join(path.dirname(__file__), "assets/clockwise.png")
|
| 76 |
+
self.img = rendering.Image(fname, 1.0, 1.0)
|
| 77 |
+
self.imgtrans = rendering.Transform()
|
| 78 |
+
self.img.add_attr(self.imgtrans)
|
| 79 |
+
|
| 80 |
+
self.viewer.add_onetime(self.img)
|
| 81 |
+
self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
|
| 82 |
+
if self.last_u is not None:
|
| 83 |
+
self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
|
| 84 |
+
|
| 85 |
+
return self.viewer.render(return_rgb_array=mode == "rgb_array")
|
| 86 |
+
|
| 87 |
+
def close(self):
|
| 88 |
+
if self.viewer:
|
| 89 |
+
self.viewer.close()
|
| 90 |
+
self.viewer = None
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def angle_normalize(x):
|
| 94 |
+
return ((x + np.pi) % (2 * np.pi)) - np.pi
|
gym-0.21.0/gym/envs/mujoco/ant.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from gym import utils
|
| 3 |
+
from gym.envs.mujoco import mujoco_env
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
| 7 |
+
def __init__(self):
|
| 8 |
+
mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
|
| 9 |
+
utils.EzPickle.__init__(self)
|
| 10 |
+
|
| 11 |
+
def step(self, a):
|
| 12 |
+
xposbefore = self.get_body_com("torso")[0]
|
| 13 |
+
self.do_simulation(a, self.frame_skip)
|
| 14 |
+
xposafter = self.get_body_com("torso")[0]
|
| 15 |
+
forward_reward = (xposafter - xposbefore) / self.dt
|
| 16 |
+
ctrl_cost = 0.5 * np.square(a).sum()
|
| 17 |
+
contact_cost = (
|
| 18 |
+
0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
|
| 19 |
+
)
|
| 20 |
+
survive_reward = 1.0
|
| 21 |
+
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
|
| 22 |
+
state = self.state_vector()
|
| 23 |
+
notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
|
| 24 |
+
done = not notdone
|
| 25 |
+
ob = self._get_obs()
|
| 26 |
+
return (
|
| 27 |
+
ob,
|
| 28 |
+
reward,
|
| 29 |
+
done,
|
| 30 |
+
dict(
|
| 31 |
+
reward_forward=forward_reward,
|
| 32 |
+
reward_ctrl=-ctrl_cost,
|
| 33 |
+
reward_contact=-contact_cost,
|
| 34 |
+
reward_survive=survive_reward,
|
| 35 |
+
),
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
def _get_obs(self):
|
| 39 |
+
return np.concatenate(
|
| 40 |
+
[
|
| 41 |
+
self.sim.data.qpos.flat[2:],
|
| 42 |
+
self.sim.data.qvel.flat,
|
| 43 |
+
np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
|
| 44 |
+
]
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
def reset_model(self):
|
| 48 |
+
qpos = self.init_qpos + self.np_random.uniform(
|
| 49 |
+
size=self.model.nq, low=-0.1, high=0.1
|
| 50 |
+
)
|
| 51 |
+
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
|
| 52 |
+
self.set_state(qpos, qvel)
|
| 53 |
+
return self._get_obs()
|
| 54 |
+
|
| 55 |
+
def viewer_setup(self):
|
| 56 |
+
self.viewer.cam.distance = self.model.stat.extent * 0.5
|
gym-0.21.0/gym/envs/mujoco/assets/thrower.xml
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<mujoco model="arm3d">
|
| 2 |
+
<compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
|
| 3 |
+
<option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
|
| 4 |
+
<default>
|
| 5 |
+
<joint armature='0.75' damping="1" limited="true"/>
|
| 6 |
+
<geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
|
| 7 |
+
</default>
|
| 8 |
+
|
| 9 |
+
<worldbody>
|
| 10 |
+
<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
|
| 11 |
+
|
| 12 |
+
<geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
|
| 13 |
+
|
| 14 |
+
<body name="r_shoulder_pan_link" pos="0 -0.6 0">
|
| 15 |
+
<geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
|
| 16 |
+
<geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
|
| 17 |
+
<geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
|
| 18 |
+
<geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
|
| 19 |
+
<geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
|
| 20 |
+
<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
|
| 21 |
+
|
| 22 |
+
<body name="r_shoulder_lift_link" pos="0.1 0 0">
|
| 23 |
+
<geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
|
| 24 |
+
<joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
|
| 25 |
+
|
| 26 |
+
<body name="r_upper_arm_roll_link" pos="0 0 0">
|
| 27 |
+
<geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
|
| 28 |
+
<joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
|
| 29 |
+
|
| 30 |
+
<body name="r_upper_arm_link" pos="0 0 0">
|
| 31 |
+
<geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
|
| 32 |
+
|
| 33 |
+
<body name="r_elbow_flex_link" pos="0.4 0 0">
|
| 34 |
+
<geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
|
| 35 |
+
<joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
|
| 36 |
+
|
| 37 |
+
<body name="r_forearm_roll_link" pos="0 0 0">
|
| 38 |
+
<geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
|
| 39 |
+
<joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
|
| 40 |
+
|
| 41 |
+
<body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
|
| 42 |
+
<geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
|
| 43 |
+
|
| 44 |
+
<body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
|
| 45 |
+
<geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
|
| 46 |
+
<joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
|
| 47 |
+
|
| 48 |
+
<body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
|
| 49 |
+
<joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
|
| 50 |
+
<geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
|
| 51 |
+
<body pos="0 0 0" axisangle="0 0 1 0.392">
|
| 52 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 53 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
|
| 54 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 55 |
+
</body>
|
| 56 |
+
<body pos="0 0 0" axisangle="0 0 1 1.57">
|
| 57 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 58 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 59 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 60 |
+
</body>
|
| 61 |
+
<body pos="0 0 0" axisangle="0 0 1 1.178">
|
| 62 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 63 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 64 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 65 |
+
</body>
|
| 66 |
+
<body pos="0 0 0" axisangle="0 0 1 0.785">
|
| 67 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 68 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 69 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 70 |
+
</body>
|
| 71 |
+
<body pos="0 0 0" axisangle="0 0 1 1.96">
|
| 72 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 73 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 74 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 75 |
+
</body>
|
| 76 |
+
<body pos="0 0 0" axisangle="0 0 1 2.355">
|
| 77 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 78 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 79 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 80 |
+
</body>
|
| 81 |
+
<body pos="0 0 0" axisangle="0 0 1 2.74">
|
| 82 |
+
<geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 83 |
+
<geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 84 |
+
<geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
|
| 85 |
+
</body>
|
| 86 |
+
</body>
|
| 87 |
+
</body>
|
| 88 |
+
</body>
|
| 89 |
+
</body>
|
| 90 |
+
</body>
|
| 91 |
+
</body>
|
| 92 |
+
</body>
|
| 93 |
+
</body>
|
| 94 |
+
</body>
|
| 95 |
+
|
| 96 |
+
<body name="goal" pos="0.575 0.5 -0.328">
|
| 97 |
+
<geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
|
| 98 |
+
<geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
|
| 99 |
+
<geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
|
| 100 |
+
<geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
|
| 101 |
+
<geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
|
| 102 |
+
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
| 103 |
+
<geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
| 104 |
+
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
| 105 |
+
<geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
|
| 106 |
+
<joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
|
| 107 |
+
<joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
|
| 108 |
+
</body>
|
| 109 |
+
|
| 110 |
+
<body name="ball" pos="0.5 -0.8 0.275">
|
| 111 |
+
<geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
|
| 112 |
+
<joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
|
| 113 |
+
</body>
|
| 114 |
+
|
| 115 |
+
</worldbody>
|
| 116 |
+
|
| 117 |
+
<actuator>
|
| 118 |
+
<motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
| 119 |
+
<motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
| 120 |
+
<motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
| 121 |
+
<motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
|
| 122 |
+
<motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
|
| 123 |
+
<motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
|
| 124 |
+
<motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
|
| 125 |
+
</actuator>
|
| 126 |
+
|
| 127 |
+
</mujoco>
|
gym-0.21.0/gym/envs/mujoco/hopper.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from gym import utils
|
| 3 |
+
from gym.envs.mujoco import mujoco_env
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
| 7 |
+
def __init__(self):
|
| 8 |
+
mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
|
| 9 |
+
utils.EzPickle.__init__(self)
|
| 10 |
+
|
| 11 |
+
def step(self, a):
|
| 12 |
+
posbefore = self.sim.data.qpos[0]
|
| 13 |
+
self.do_simulation(a, self.frame_skip)
|
| 14 |
+
posafter, height, ang = self.sim.data.qpos[0:3]
|
| 15 |
+
alive_bonus = 1.0
|
| 16 |
+
reward = (posafter - posbefore) / self.dt
|
| 17 |
+
reward += alive_bonus
|
| 18 |
+
reward -= 1e-3 * np.square(a).sum()
|
| 19 |
+
s = self.state_vector()
|
| 20 |
+
done = not (
|
| 21 |
+
np.isfinite(s).all()
|
| 22 |
+
and (np.abs(s[2:]) < 100).all()
|
| 23 |
+
and (height > 0.7)
|
| 24 |
+
and (abs(ang) < 0.2)
|
| 25 |
+
)
|
| 26 |
+
ob = self._get_obs()
|
| 27 |
+
return ob, reward, done, {}
|
| 28 |
+
|
| 29 |
+
def _get_obs(self):
|
| 30 |
+
return np.concatenate(
|
| 31 |
+
[self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def reset_model(self):
|
| 35 |
+
qpos = self.init_qpos + self.np_random.uniform(
|
| 36 |
+
low=-0.005, high=0.005, size=self.model.nq
|
| 37 |
+
)
|
| 38 |
+
qvel = self.init_qvel + self.np_random.uniform(
|
| 39 |
+
low=-0.005, high=0.005, size=self.model.nv
|
| 40 |
+
)
|
| 41 |
+
self.set_state(qpos, qvel)
|
| 42 |
+
return self._get_obs()
|
| 43 |
+
|
| 44 |
+
def viewer_setup(self):
|
| 45 |
+
self.viewer.cam.trackbodyid = 2
|
| 46 |
+
self.viewer.cam.distance = self.model.stat.extent * 0.75
|
| 47 |
+
self.viewer.cam.lookat[2] = 1.15
|
| 48 |
+
self.viewer.cam.elevation = -20
|
gym-0.21.0/gym/envs/mujoco/hopper_v3.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from gym.envs.mujoco import mujoco_env
|
| 3 |
+
from gym import utils
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
DEFAULT_CAMERA_CONFIG = {
|
| 7 |
+
"trackbodyid": 2,
|
| 8 |
+
"distance": 3.0,
|
| 9 |
+
"lookat": np.array((0.0, 0.0, 1.15)),
|
| 10 |
+
"elevation": -20.0,
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
| 15 |
+
def __init__(
|
| 16 |
+
self,
|
| 17 |
+
xml_file="hopper.xml",
|
| 18 |
+
forward_reward_weight=1.0,
|
| 19 |
+
ctrl_cost_weight=1e-3,
|
| 20 |
+
healthy_reward=1.0,
|
| 21 |
+
terminate_when_unhealthy=True,
|
| 22 |
+
healthy_state_range=(-100.0, 100.0),
|
| 23 |
+
healthy_z_range=(0.7, float("inf")),
|
| 24 |
+
healthy_angle_range=(-0.2, 0.2),
|
| 25 |
+
reset_noise_scale=5e-3,
|
| 26 |
+
exclude_current_positions_from_observation=True,
|
| 27 |
+
):
|
| 28 |
+
utils.EzPickle.__init__(**locals())
|
| 29 |
+
|
| 30 |
+
self._forward_reward_weight = forward_reward_weight
|
| 31 |
+
|
| 32 |
+
self._ctrl_cost_weight = ctrl_cost_weight
|
| 33 |
+
|
| 34 |
+
self._healthy_reward = healthy_reward
|
| 35 |
+
self._terminate_when_unhealthy = terminate_when_unhealthy
|
| 36 |
+
|
| 37 |
+
self._healthy_state_range = healthy_state_range
|
| 38 |
+
self._healthy_z_range = healthy_z_range
|
| 39 |
+
self._healthy_angle_range = healthy_angle_range
|
| 40 |
+
|
| 41 |
+
self._reset_noise_scale = reset_noise_scale
|
| 42 |
+
|
| 43 |
+
self._exclude_current_positions_from_observation = (
|
| 44 |
+
exclude_current_positions_from_observation
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
|
| 48 |
+
|
| 49 |
+
@property
|
| 50 |
+
def healthy_reward(self):
|
| 51 |
+
return (
|
| 52 |
+
float(self.is_healthy or self._terminate_when_unhealthy)
|
| 53 |
+
* self._healthy_reward
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
def control_cost(self, action):
|
| 57 |
+
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
| 58 |
+
return control_cost
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def is_healthy(self):
|
| 62 |
+
z, angle = self.sim.data.qpos[1:3]
|
| 63 |
+
state = self.state_vector()[2:]
|
| 64 |
+
|
| 65 |
+
min_state, max_state = self._healthy_state_range
|
| 66 |
+
min_z, max_z = self._healthy_z_range
|
| 67 |
+
min_angle, max_angle = self._healthy_angle_range
|
| 68 |
+
|
| 69 |
+
healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
|
| 70 |
+
healthy_z = min_z < z < max_z
|
| 71 |
+
healthy_angle = min_angle < angle < max_angle
|
| 72 |
+
|
| 73 |
+
is_healthy = all((healthy_state, healthy_z, healthy_angle))
|
| 74 |
+
|
| 75 |
+
return is_healthy
|
| 76 |
+
|
| 77 |
+
@property
|
| 78 |
+
def done(self):
|
| 79 |
+
done = not self.is_healthy if self._terminate_when_unhealthy else False
|
| 80 |
+
return done
|
| 81 |
+
|
| 82 |
+
def _get_obs(self):
|
| 83 |
+
position = self.sim.data.qpos.flat.copy()
|
| 84 |
+
velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
|
| 85 |
+
|
| 86 |
+
if self._exclude_current_positions_from_observation:
|
| 87 |
+
position = position[1:]
|
| 88 |
+
|
| 89 |
+
observation = np.concatenate((position, velocity)).ravel()
|
| 90 |
+
return observation
|
| 91 |
+
|
| 92 |
+
def step(self, action):
|
| 93 |
+
x_position_before = self.sim.data.qpos[0]
|
| 94 |
+
self.do_simulation(action, self.frame_skip)
|
| 95 |
+
x_position_after = self.sim.data.qpos[0]
|
| 96 |
+
x_velocity = (x_position_after - x_position_before) / self.dt
|
| 97 |
+
|
| 98 |
+
ctrl_cost = self.control_cost(action)
|
| 99 |
+
|
| 100 |
+
forward_reward = self._forward_reward_weight * x_velocity
|
| 101 |
+
healthy_reward = self.healthy_reward
|
| 102 |
+
|
| 103 |
+
rewards = forward_reward + healthy_reward
|
| 104 |
+
costs = ctrl_cost
|
| 105 |
+
|
| 106 |
+
observation = self._get_obs()
|
| 107 |
+
reward = rewards - costs
|
| 108 |
+
done = self.done
|
| 109 |
+
info = {
|
| 110 |
+
"x_position": x_position_after,
|
| 111 |
+
"x_velocity": x_velocity,
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
return observation, reward, done, info
|
| 115 |
+
|
| 116 |
+
def reset_model(self):
|
| 117 |
+
noise_low = -self._reset_noise_scale
|
| 118 |
+
noise_high = self._reset_noise_scale
|
| 119 |
+
|
| 120 |
+
qpos = self.init_qpos + self.np_random.uniform(
|
| 121 |
+
low=noise_low, high=noise_high, size=self.model.nq
|
| 122 |
+
)
|
| 123 |
+
qvel = self.init_qvel + self.np_random.uniform(
|
| 124 |
+
low=noise_low, high=noise_high, size=self.model.nv
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
self.set_state(qpos, qvel)
|
| 128 |
+
|
| 129 |
+
observation = self._get_obs()
|
| 130 |
+
return observation
|
| 131 |
+
|
| 132 |
+
def viewer_setup(self):
|
| 133 |
+
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
| 134 |
+
if isinstance(value, np.ndarray):
|
| 135 |
+
getattr(self.viewer.cam, key)[:] = value
|
| 136 |
+
else:
|
| 137 |
+
setattr(self.viewer.cam, key, value)
|
gym-0.21.0/gym/envs/mujoco/pusher.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from gym import utils
|
| 3 |
+
from gym.envs.mujoco import mujoco_env
|
| 4 |
+
|
| 5 |
+
import mujoco_py
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
| 9 |
+
def __init__(self):
|
| 10 |
+
utils.EzPickle.__init__(self)
|
| 11 |
+
mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)
|
| 12 |
+
|
| 13 |
+
def step(self, a):
|
| 14 |
+
vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
|
| 15 |
+
vec_2 = self.get_body_com("object") - self.get_body_com("goal")
|
| 16 |
+
|
| 17 |
+
reward_near = -np.linalg.norm(vec_1)
|
| 18 |
+
reward_dist = -np.linalg.norm(vec_2)
|
| 19 |
+
reward_ctrl = -np.square(a).sum()
|
| 20 |
+
reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
|
| 21 |
+
|
| 22 |
+
self.do_simulation(a, self.frame_skip)
|
| 23 |
+
ob = self._get_obs()
|
| 24 |
+
done = False
|
| 25 |
+
return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
|
| 26 |
+
|
| 27 |
+
def viewer_setup(self):
|
| 28 |
+
self.viewer.cam.trackbodyid = -1
|
| 29 |
+
self.viewer.cam.distance = 4.0
|
| 30 |
+
|
| 31 |
+
def reset_model(self):
|
| 32 |
+
qpos = self.init_qpos
|
| 33 |
+
|
| 34 |
+
self.goal_pos = np.asarray([0, 0])
|
| 35 |
+
while True:
|
| 36 |
+
self.cylinder_pos = np.concatenate(
|
| 37 |
+
[
|
| 38 |
+
self.np_random.uniform(low=-0.3, high=0, size=1),
|
| 39 |
+
self.np_random.uniform(low=-0.2, high=0.2, size=1),
|
| 40 |
+
]
|
| 41 |
+
)
|
| 42 |
+
if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
|
| 43 |
+
break
|
| 44 |
+
|
| 45 |
+
qpos[-4:-2] = self.cylinder_pos
|
| 46 |
+
qpos[-2:] = self.goal_pos
|
| 47 |
+
qvel = self.init_qvel + self.np_random.uniform(
|
| 48 |
+
low=-0.005, high=0.005, size=self.model.nv
|
| 49 |
+
)
|
| 50 |
+
qvel[-4:] = 0
|
| 51 |
+
self.set_state(qpos, qvel)
|
| 52 |
+
return self._get_obs()
|
| 53 |
+
|
| 54 |
+
def _get_obs(self):
|
| 55 |
+
return np.concatenate(
|
| 56 |
+
[
|
| 57 |
+
self.sim.data.qpos.flat[:7],
|
| 58 |
+
self.sim.data.qvel.flat[:7],
|
| 59 |
+
self.get_body_com("tips_arm"),
|
| 60 |
+
self.get_body_com("object"),
|
| 61 |
+
self.get_body_com("goal"),
|
| 62 |
+
]
|
| 63 |
+
)
|
gym-0.21.0/gym/envs/mujoco/swimmer_v3.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from gym.envs.mujoco import mujoco_env
|
| 3 |
+
from gym import utils
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
DEFAULT_CAMERA_CONFIG = {}
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
| 10 |
+
def __init__(
|
| 11 |
+
self,
|
| 12 |
+
xml_file="swimmer.xml",
|
| 13 |
+
forward_reward_weight=1.0,
|
| 14 |
+
ctrl_cost_weight=1e-4,
|
| 15 |
+
reset_noise_scale=0.1,
|
| 16 |
+
exclude_current_positions_from_observation=True,
|
| 17 |
+
):
|
| 18 |
+
utils.EzPickle.__init__(**locals())
|
| 19 |
+
|
| 20 |
+
self._forward_reward_weight = forward_reward_weight
|
| 21 |
+
self._ctrl_cost_weight = ctrl_cost_weight
|
| 22 |
+
|
| 23 |
+
self._reset_noise_scale = reset_noise_scale
|
| 24 |
+
|
| 25 |
+
self._exclude_current_positions_from_observation = (
|
| 26 |
+
exclude_current_positions_from_observation
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
|
| 30 |
+
|
| 31 |
+
def control_cost(self, action):
|
| 32 |
+
control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
| 33 |
+
return control_cost
|
| 34 |
+
|
| 35 |
+
def step(self, action):
|
| 36 |
+
xy_position_before = self.sim.data.qpos[0:2].copy()
|
| 37 |
+
self.do_simulation(action, self.frame_skip)
|
| 38 |
+
xy_position_after = self.sim.data.qpos[0:2].copy()
|
| 39 |
+
|
| 40 |
+
xy_velocity = (xy_position_after - xy_position_before) / self.dt
|
| 41 |
+
x_velocity, y_velocity = xy_velocity
|
| 42 |
+
|
| 43 |
+
forward_reward = self._forward_reward_weight * x_velocity
|
| 44 |
+
|
| 45 |
+
ctrl_cost = self.control_cost(action)
|
| 46 |
+
|
| 47 |
+
observation = self._get_obs()
|
| 48 |
+
reward = forward_reward - ctrl_cost
|
| 49 |
+
done = False
|
| 50 |
+
info = {
|
| 51 |
+
"reward_fwd": forward_reward,
|
| 52 |
+
"reward_ctrl": -ctrl_cost,
|
| 53 |
+
"x_position": xy_position_after[0],
|
| 54 |
+
"y_position": xy_position_after[1],
|
| 55 |
+
"distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
|
| 56 |
+
"x_velocity": x_velocity,
|
| 57 |
+
"y_velocity": y_velocity,
|
| 58 |
+
"forward_reward": forward_reward,
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
return observation, reward, done, info
|
| 62 |
+
|
| 63 |
+
def _get_obs(self):
|
| 64 |
+
position = self.sim.data.qpos.flat.copy()
|
| 65 |
+
velocity = self.sim.data.qvel.flat.copy()
|
| 66 |
+
|
| 67 |
+
if self._exclude_current_positions_from_observation:
|
| 68 |
+
position = position[2:]
|
| 69 |
+
|
| 70 |
+
observation = np.concatenate([position, velocity]).ravel()
|
| 71 |
+
return observation
|
| 72 |
+
|
| 73 |
+
def reset_model(self):
|
| 74 |
+
noise_low = -self._reset_noise_scale
|
| 75 |
+
noise_high = self._reset_noise_scale
|
| 76 |
+
|
| 77 |
+
qpos = self.init_qpos + self.np_random.uniform(
|
| 78 |
+
low=noise_low, high=noise_high, size=self.model.nq
|
| 79 |
+
)
|
| 80 |
+
qvel = self.init_qvel + self.np_random.uniform(
|
| 81 |
+
low=noise_low, high=noise_high, size=self.model.nv
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
self.set_state(qpos, qvel)
|
| 85 |
+
|
| 86 |
+
observation = self._get_obs()
|
| 87 |
+
return observation
|
| 88 |
+
|
| 89 |
+
def viewer_setup(self):
|
| 90 |
+
for key, value in DEFAULT_CAMERA_CONFIG.items():
|
| 91 |
+
if isinstance(value, np.ndarray):
|
| 92 |
+
getattr(self.viewer.cam, key)[:] = value
|
| 93 |
+
else:
|
| 94 |
+
setattr(self.viewer.cam, key, value)
|
gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="utf-8"?>
|
| 2 |
+
<mujoco>
|
| 3 |
+
<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
|
| 4 |
+
<option timestep="0.002">
|
| 5 |
+
<flag warmstart="enable"></flag>
|
| 6 |
+
</option>
|
| 7 |
+
|
| 8 |
+
<include file="shared.xml"></include>
|
| 9 |
+
|
| 10 |
+
<worldbody>
|
| 11 |
+
<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
|
| 12 |
+
<body name="floor0" pos="0.8 0.75 0">
|
| 13 |
+
<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
|
| 14 |
+
</body>
|
| 15 |
+
|
| 16 |
+
<include file="robot.xml"></include>
|
| 17 |
+
|
| 18 |
+
<body pos="1.3 0.75 0.2" name="table0">
|
| 19 |
+
<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
|
| 20 |
+
</body>
|
| 21 |
+
|
| 22 |
+
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
|
| 23 |
+
</worldbody>
|
| 24 |
+
|
| 25 |
+
<actuator></actuator>
|
| 26 |
+
</mujoco>
|
gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="utf-8"?>
|
| 2 |
+
<mujoco>
|
| 3 |
+
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
|
| 4 |
+
<option timestep="0.002" iterations="20" apirate="200">
|
| 5 |
+
<flag warmstart="enable"></flag>
|
| 6 |
+
</option>
|
| 7 |
+
|
| 8 |
+
<include file="shared.xml"></include>
|
| 9 |
+
<include file="shared_touch_sensors_92.xml"></include>
|
| 10 |
+
|
| 11 |
+
<asset>
|
| 12 |
+
<include file="shared_asset.xml"></include>
|
| 13 |
+
|
| 14 |
+
<texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
|
| 15 |
+
<texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
|
| 16 |
+
|
| 17 |
+
<material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
|
| 18 |
+
<material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
|
| 19 |
+
<material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
|
| 20 |
+
</asset>
|
| 21 |
+
|
| 22 |
+
<worldbody>
|
| 23 |
+
<geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
|
| 24 |
+
<body name="floor0" pos="1 1 0"></body>
|
| 25 |
+
|
| 26 |
+
<include file="robot_touch_sensors_92.xml"></include>
|
| 27 |
+
|
| 28 |
+
<body name="object" pos="1 0.87 0.2">
|
| 29 |
+
<geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
|
| 30 |
+
<geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
|
| 31 |
+
<site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
|
| 32 |
+
<joint name="object:joint" type="free" damping="0.01"></joint>
|
| 33 |
+
</body>
|
| 34 |
+
<body name="target" pos="1 0.87 0.2">
|
| 35 |
+
<geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
|
| 36 |
+
<site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
|
| 37 |
+
<joint name="target:joint" type="free" damping="0.01"></joint>
|
| 38 |
+
</body>
|
| 39 |
+
|
| 40 |
+
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
|
| 41 |
+
</worldbody>
|
| 42 |
+
</mujoco>
|
gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="utf-8"?>
|
| 2 |
+
<mujoco>
|
| 3 |
+
<compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
|
| 4 |
+
<option timestep="0.002" iterations="20" apirate="200">
|
| 5 |
+
<flag warmstart="enable"></flag>
|
| 6 |
+
</option>
|
| 7 |
+
|
| 8 |
+
<include file="shared.xml"></include>
|
| 9 |
+
|
| 10 |
+
<asset>
|
| 11 |
+
<include file="shared_asset.xml"></include>
|
| 12 |
+
</asset>
|
| 13 |
+
|
| 14 |
+
<worldbody>
|
| 15 |
+
<geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
|
| 16 |
+
<body name="floor0" pos="1 1 0">
|
| 17 |
+
<site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
|
| 18 |
+
<site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
|
| 19 |
+
<site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
|
| 20 |
+
<site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
|
| 21 |
+
<site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
|
| 22 |
+
|
| 23 |
+
<site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
|
| 24 |
+
<site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
|
| 25 |
+
<site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
|
| 26 |
+
<site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
|
| 27 |
+
<site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
|
| 28 |
+
</body>
|
| 29 |
+
|
| 30 |
+
<include file="robot.xml"></include>
|
| 31 |
+
|
| 32 |
+
<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
|
| 33 |
+
</worldbody>
|
| 34 |
+
</mujoco>
|
gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
|
| 2 |
+
<mujoco>
|
| 3 |
+
<size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
|
| 4 |
+
|
| 5 |
+
<visual>
|
| 6 |
+
<map fogstart="3" fogend="5" force="0.1"></map>
|
| 7 |
+
<quality shadowsize="4096"></quality>
|
| 8 |
+
</visual>
|
| 9 |
+
|
| 10 |
+
<default>
|
| 11 |
+
<default class="robot0:asset_class">
|
| 12 |
+
<geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
|
| 13 |
+
<joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
|
| 14 |
+
<site size="0.005" rgba="0.4 0.9 0.4 1"></site>
|
| 15 |
+
<general ctrllimited="true" forcelimited="true"></general>
|
| 16 |
+
</default>
|
| 17 |
+
<default class="robot0:D_Touch">
|
| 18 |
+
<site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
|
| 19 |
+
</default>
|
| 20 |
+
<default class="robot0:DC_Hand">
|
| 21 |
+
<geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
|
| 22 |
+
</default>
|
| 23 |
+
<default class="robot0:D_Vizual">
|
| 24 |
+
<geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
|
| 25 |
+
</default>
|
| 26 |
+
<default class="robot0:free">
|
| 27 |
+
<joint type="free" damping="0" armature="0" limited="false"></joint>
|
| 28 |
+
</default>
|
| 29 |
+
</default>
|
| 30 |
+
|
| 31 |
+
<contact>
|
| 32 |
+
<pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 33 |
+
<pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 34 |
+
<pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 35 |
+
<pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 36 |
+
<pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 37 |
+
<pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 38 |
+
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 39 |
+
<pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
|
| 40 |
+
<pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
|
| 41 |
+
<pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
|
| 42 |
+
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
|
| 43 |
+
<pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
|
| 44 |
+
<pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
|
| 45 |
+
<pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
|
| 46 |
+
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
|
| 47 |
+
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
|
| 48 |
+
<pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
|
| 49 |
+
<pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
|
| 50 |
+
<pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
|
| 51 |
+
</contact>
|
| 52 |
+
|
| 53 |
+
<tendon>
|
| 54 |
+
<fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
|
| 55 |
+
<joint joint="robot0:WRJ1" coef="0.0325"></joint>
|
| 56 |
+
</fixed>
|
| 57 |
+
<fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
|
| 58 |
+
<joint joint="robot0:WRJ1" coef="-0.0325"></joint>
|
| 59 |
+
</fixed>
|
| 60 |
+
<fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
|
| 61 |
+
<joint joint="robot0:WRJ0" coef="0.0175"></joint>
|
| 62 |
+
</fixed>
|
| 63 |
+
<fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
|
| 64 |
+
<joint joint="robot0:WRJ0" coef="-0.0175"></joint>
|
| 65 |
+
</fixed>
|
| 66 |
+
<fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
|
| 67 |
+
<joint joint="robot0:FFJ3" coef="0.01"></joint>
|
| 68 |
+
</fixed>
|
| 69 |
+
<fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
|
| 70 |
+
<joint joint="robot0:FFJ3" coef="-0.01"></joint>
|
| 71 |
+
</fixed>
|
| 72 |
+
<fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
|
| 73 |
+
<joint joint="robot0:FFJ2" coef="0.01"></joint>
|
| 74 |
+
</fixed>
|
| 75 |
+
<fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
|
| 76 |
+
<joint joint="robot0:FFJ2" coef="-0.01"></joint>
|
| 77 |
+
</fixed>
|
| 78 |
+
<fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
|
| 79 |
+
<joint joint="robot0:FFJ0" coef="0.00705"></joint>
|
| 80 |
+
<joint joint="robot0:FFJ1" coef="-0.00805"></joint>
|
| 81 |
+
</fixed>
|
| 82 |
+
<fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
|
| 83 |
+
<joint joint="robot0:FFJ0" coef="0.00705"></joint>
|
| 84 |
+
<joint joint="robot0:FFJ1" coef="0.00805"></joint>
|
| 85 |
+
</fixed>
|
| 86 |
+
<fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
|
| 87 |
+
<joint joint="robot0:FFJ0" coef="-0.00705"></joint>
|
| 88 |
+
<joint joint="robot0:FFJ1" coef="-0.00805"></joint>
|
| 89 |
+
</fixed>
|
| 90 |
+
<fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
|
| 91 |
+
<joint joint="robot0:MFJ3" coef="0.01"></joint>
|
| 92 |
+
</fixed>
|
| 93 |
+
<fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
|
| 94 |
+
<joint joint="robot0:MFJ3" coef="-0.01"></joint>
|
| 95 |
+
</fixed>
|
| 96 |
+
<fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
|
| 97 |
+
<joint joint="robot0:MFJ2" coef="0.01"></joint>
|
| 98 |
+
</fixed>
|
| 99 |
+
<fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
|
| 100 |
+
<joint joint="robot0:MFJ2" coef="-0.01"></joint>
|
| 101 |
+
</fixed>
|
| 102 |
+
<fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
|
| 103 |
+
<joint joint="robot0:MFJ0" coef="0.00705"></joint>
|
| 104 |
+
<joint joint="robot0:MFJ1" coef="-0.00805"></joint>
|
| 105 |
+
</fixed>
|
| 106 |
+
<fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
|
| 107 |
+
<joint joint="robot0:MFJ0" coef="0.00705"></joint>
|
| 108 |
+
<joint joint="robot0:MFJ1" coef="0.00805"></joint>
|
| 109 |
+
</fixed>
|
| 110 |
+
<fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
|
| 111 |
+
<joint joint="robot0:MFJ0" coef="-0.00705"></joint>
|
| 112 |
+
<joint joint="robot0:MFJ1" coef="-0.00805"></joint>
|
| 113 |
+
</fixed>
|
| 114 |
+
<fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
|
| 115 |
+
<joint joint="robot0:RFJ3" coef="0.01"></joint>
|
| 116 |
+
</fixed>
|
| 117 |
+
<fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
|
| 118 |
+
<joint joint="robot0:RFJ3" coef="-0.01"></joint>
|
| 119 |
+
</fixed>
|
| 120 |
+
<fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
|
| 121 |
+
<joint joint="robot0:RFJ2" coef="0.01"></joint>
|
| 122 |
+
</fixed>
|
| 123 |
+
<fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
|
| 124 |
+
<joint joint="robot0:RFJ2" coef="-0.01"></joint>
|
| 125 |
+
</fixed>
|
| 126 |
+
<fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
|
| 127 |
+
<joint joint="robot0:RFJ0" coef="0.00705"></joint>
|
| 128 |
+
<joint joint="robot0:RFJ1" coef="-0.00805"></joint>
|
| 129 |
+
</fixed>
|
| 130 |
+
<fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
|
| 131 |
+
<joint joint="robot0:RFJ0" coef="0.00705"></joint>
|
| 132 |
+
<joint joint="robot0:RFJ1" coef="0.00805"></joint>
|
| 133 |
+
</fixed>
|
| 134 |
+
<fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
|
| 135 |
+
<joint joint="robot0:RFJ0" coef="-0.00705"></joint>
|
| 136 |
+
<joint joint="robot0:RFJ1" coef="-0.00805"></joint>
|
| 137 |
+
</fixed>
|
| 138 |
+
<fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
|
| 139 |
+
<joint joint="robot0:LFJ4" coef="0.01"></joint>
|
| 140 |
+
</fixed>
|
| 141 |
+
<fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
|
| 142 |
+
<joint joint="robot0:LFJ4" coef="-0.01"></joint>
|
| 143 |
+
</fixed>
|
| 144 |
+
<fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
|
| 145 |
+
<joint joint="robot0:LFJ3" coef="0.01"></joint>
|
| 146 |
+
</fixed>
|
| 147 |
+
<fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
|
| 148 |
+
<joint joint="robot0:LFJ3" coef="-0.01"></joint>
|
| 149 |
+
</fixed>
|
| 150 |
+
<fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
|
| 151 |
+
<joint joint="robot0:LFJ2" coef="0.01"></joint>
|
| 152 |
+
</fixed>
|
| 153 |
+
<fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
|
| 154 |
+
<joint joint="robot0:LFJ2" coef="-0.01"></joint>
|
| 155 |
+
</fixed>
|
| 156 |
+
<fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
|
| 157 |
+
<joint joint="robot0:LFJ0" coef="0.00705"></joint>
|
| 158 |
+
<joint joint="robot0:LFJ1" coef="-0.00805"></joint>
|
| 159 |
+
</fixed>
|
| 160 |
+
<fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
|
| 161 |
+
<joint joint="robot0:LFJ0" coef="0.00705"></joint>
|
| 162 |
+
<joint joint="robot0:LFJ1" coef="0.00805"></joint>
|
| 163 |
+
</fixed>
|
| 164 |
+
<fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
|
| 165 |
+
<joint joint="robot0:LFJ0" coef="-0.00705"></joint>
|
| 166 |
+
<joint joint="robot0:LFJ1" coef="-0.00805"></joint>
|
| 167 |
+
</fixed>
|
| 168 |
+
<fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
|
| 169 |
+
<joint joint="robot0:THJ4" coef="0.01636"></joint>
|
| 170 |
+
</fixed>
|
| 171 |
+
<fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
|
| 172 |
+
<joint joint="robot0:THJ4" coef="-0.01636"></joint>
|
| 173 |
+
</fixed>
|
| 174 |
+
<fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
|
| 175 |
+
<joint joint="robot0:THJ3" coef="0.01"></joint>
|
| 176 |
+
</fixed>
|
| 177 |
+
<fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
|
| 178 |
+
<joint joint="robot0:THJ3" coef="-0.01"></joint>
|
| 179 |
+
</fixed>
|
| 180 |
+
<fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
|
| 181 |
+
<joint joint="robot0:THJ2" coef="0.011"></joint>
|
| 182 |
+
</fixed>
|
| 183 |
+
<fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
|
| 184 |
+
<joint joint="robot0:THJ2" coef="-0.011"></joint>
|
| 185 |
+
</fixed>
|
| 186 |
+
<fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
|
| 187 |
+
<joint joint="robot0:THJ1" coef="0.011"></joint>
|
| 188 |
+
</fixed>
|
| 189 |
+
<fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
|
| 190 |
+
<joint joint="robot0:THJ1" coef="-0.011"></joint>
|
| 191 |
+
</fixed>
|
| 192 |
+
<fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
|
| 193 |
+
<joint joint="robot0:THJ0" coef="0.009"></joint>
|
| 194 |
+
</fixed>
|
| 195 |
+
<fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
|
| 196 |
+
<joint joint="robot0:THJ0" coef="-0.009"></joint>
|
| 197 |
+
</fixed>
|
| 198 |
+
</tendon>
|
| 199 |
+
|
| 200 |
+
<sensor>
|
| 201 |
+
<jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
|
| 202 |
+
<jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
|
| 203 |
+
<jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
|
| 204 |
+
<jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
|
| 205 |
+
<jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
|
| 206 |
+
<jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
|
| 207 |
+
<jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
|
| 208 |
+
<jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
|
| 209 |
+
<jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
|
| 210 |
+
<jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
|
| 211 |
+
<jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
|
| 212 |
+
<jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
|
| 213 |
+
<jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
|
| 214 |
+
<jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
|
| 215 |
+
<jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
|
| 216 |
+
<jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
|
| 217 |
+
<jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
|
| 218 |
+
<jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
|
| 219 |
+
<jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
|
| 220 |
+
<jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
|
| 221 |
+
<jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
|
| 222 |
+
<jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
|
| 223 |
+
<jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
|
| 224 |
+
<jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
|
| 225 |
+
<touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
|
| 226 |
+
<touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
|
| 227 |
+
<touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
|
| 228 |
+
<touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
|
| 229 |
+
<touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
|
| 230 |
+
</sensor>
|
| 231 |
+
|
| 232 |
+
<actuator>
|
| 233 |
+
<position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
|
| 234 |
+
<position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
|
| 235 |
+
<position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
|
| 236 |
+
<position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
|
| 237 |
+
<position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
|
| 238 |
+
<position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
|
| 239 |
+
<position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
|
| 240 |
+
<position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
|
| 241 |
+
<position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
|
| 242 |
+
<position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
|
| 243 |
+
<position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
|
| 244 |
+
<position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
|
| 245 |
+
<position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
|
| 246 |
+
<position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
|
| 247 |
+
<position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
|
| 248 |
+
<position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
|
| 249 |
+
<position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
|
| 250 |
+
<position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
|
| 251 |
+
<position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
|
| 252 |
+
<position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
|
| 253 |
+
</actuator>
|
| 254 |
+
</mujoco>
|
gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl
ADDED
|
Binary file (22.9 kB). View file
|
|
|
gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl
ADDED
|
Binary file (30.2 kB). View file
|
|
|
gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
ADDED
|
Binary file (75.1 kB). View file
|
|
|
gym-0.21.0/gym/envs/robotics/fetch/reach.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from gym import utils
|
| 3 |
+
from gym.envs.robotics import fetch_env
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# Ensure we get the path separator correct on windows
|
| 7 |
+
MODEL_XML_PATH = os.path.join("fetch", "reach.xml")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
|
| 11 |
+
def __init__(self, reward_type="sparse"):
|
| 12 |
+
initial_qpos = {
|
| 13 |
+
"robot0:slide0": 0.4049,
|
| 14 |
+
"robot0:slide1": 0.48,
|
| 15 |
+
"robot0:slide2": 0.0,
|
| 16 |
+
}
|
| 17 |
+
fetch_env.FetchEnv.__init__(
|
| 18 |
+
self,
|
| 19 |
+
MODEL_XML_PATH,
|
| 20 |
+
has_object=False,
|
| 21 |
+
block_gripper=True,
|
| 22 |
+
n_substeps=20,
|
| 23 |
+
gripper_extra_height=0.2,
|
| 24 |
+
target_in_the_air=True,
|
| 25 |
+
target_offset=0.0,
|
| 26 |
+
obj_range=0.15,
|
| 27 |
+
target_range=0.15,
|
| 28 |
+
distance_threshold=0.05,
|
| 29 |
+
initial_qpos=initial_qpos,
|
| 30 |
+
reward_type=reward_type,
|
| 31 |
+
)
|
| 32 |
+
utils.EzPickle.__init__(self, reward_type=reward_type)
|
gym-0.21.0/gym/envs/robotics/hand/__init__.py
ADDED
|
File without changes
|
gym-0.21.0/gym/envs/robotics/hand/manipulate.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
from gym import utils, error
|
| 5 |
+
from gym.envs.robotics import rotations, hand_env
|
| 6 |
+
from gym.envs.robotics.utils import robot_get_obs
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
import mujoco_py
|
| 10 |
+
except ImportError as e:
|
| 11 |
+
raise error.DependencyNotInstalled(
|
| 12 |
+
"{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
|
| 13 |
+
e
|
| 14 |
+
)
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def quat_from_angle_and_axis(angle, axis):
|
| 19 |
+
assert axis.shape == (3,)
|
| 20 |
+
axis /= np.linalg.norm(axis)
|
| 21 |
+
quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis])
|
| 22 |
+
quat /= np.linalg.norm(quat)
|
| 23 |
+
return quat
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# Ensure we get the path separator correct on windows
|
| 27 |
+
MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml")
|
| 28 |
+
MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml")
|
| 29 |
+
MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class ManipulateEnv(hand_env.HandEnv):
|
| 33 |
+
def __init__(
|
| 34 |
+
self,
|
| 35 |
+
model_path,
|
| 36 |
+
target_position,
|
| 37 |
+
target_rotation,
|
| 38 |
+
target_position_range,
|
| 39 |
+
reward_type,
|
| 40 |
+
initial_qpos=None,
|
| 41 |
+
randomize_initial_position=True,
|
| 42 |
+
randomize_initial_rotation=True,
|
| 43 |
+
distance_threshold=0.01,
|
| 44 |
+
rotation_threshold=0.1,
|
| 45 |
+
n_substeps=20,
|
| 46 |
+
relative_control=False,
|
| 47 |
+
ignore_z_target_rotation=False,
|
| 48 |
+
):
|
| 49 |
+
"""Initializes a new Hand manipulation environment.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
model_path (string): path to the environments XML file
|
| 53 |
+
target_position (string): the type of target position:
|
| 54 |
+
- ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
|
| 55 |
+
- fixed: target position is set to the initial position of the object
|
| 56 |
+
- random: target position is fully randomized according to target_position_range
|
| 57 |
+
target_rotation (string): the type of target rotation:
|
| 58 |
+
- ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
|
| 59 |
+
- fixed: target rotation is set to the initial rotation of the object
|
| 60 |
+
- xyz: fully randomized target rotation around the X, Y and Z axis
|
| 61 |
+
- z: fully randomized target rotation around the Z axis
|
| 62 |
+
- parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
|
| 63 |
+
ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
|
| 64 |
+
target_position_range (np.array of shape (3, 2)): range of the target_position randomization
|
| 65 |
+
reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
|
| 66 |
+
initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
|
| 67 |
+
randomize_initial_position (boolean): whether or not to randomize the initial position of the object
|
| 68 |
+
randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
|
| 69 |
+
distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
|
| 70 |
+
rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
|
| 71 |
+
n_substeps (int): number of substeps the simulation runs on every call to step
|
| 72 |
+
relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
|
| 73 |
+
"""
|
| 74 |
+
self.target_position = target_position
|
| 75 |
+
self.target_rotation = target_rotation
|
| 76 |
+
self.target_position_range = target_position_range
|
| 77 |
+
self.parallel_quats = [
|
| 78 |
+
rotations.euler2quat(r) for r in rotations.get_parallel_rotations()
|
| 79 |
+
]
|
| 80 |
+
self.randomize_initial_rotation = randomize_initial_rotation
|
| 81 |
+
self.randomize_initial_position = randomize_initial_position
|
| 82 |
+
self.distance_threshold = distance_threshold
|
| 83 |
+
self.rotation_threshold = rotation_threshold
|
| 84 |
+
self.reward_type = reward_type
|
| 85 |
+
self.ignore_z_target_rotation = ignore_z_target_rotation
|
| 86 |
+
|
| 87 |
+
assert self.target_position in ["ignore", "fixed", "random"]
|
| 88 |
+
assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"]
|
| 89 |
+
initial_qpos = initial_qpos or {}
|
| 90 |
+
|
| 91 |
+
hand_env.HandEnv.__init__(
|
| 92 |
+
self,
|
| 93 |
+
model_path,
|
| 94 |
+
n_substeps=n_substeps,
|
| 95 |
+
initial_qpos=initial_qpos,
|
| 96 |
+
relative_control=relative_control,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
def _get_achieved_goal(self):
|
| 100 |
+
# Object position and rotation.
|
| 101 |
+
object_qpos = self.sim.data.get_joint_qpos("object:joint")
|
| 102 |
+
assert object_qpos.shape == (7,)
|
| 103 |
+
return object_qpos
|
| 104 |
+
|
| 105 |
+
def _goal_distance(self, goal_a, goal_b):
|
| 106 |
+
assert goal_a.shape == goal_b.shape
|
| 107 |
+
assert goal_a.shape[-1] == 7
|
| 108 |
+
|
| 109 |
+
d_pos = np.zeros_like(goal_a[..., 0])
|
| 110 |
+
d_rot = np.zeros_like(goal_b[..., 0])
|
| 111 |
+
if self.target_position != "ignore":
|
| 112 |
+
delta_pos = goal_a[..., :3] - goal_b[..., :3]
|
| 113 |
+
d_pos = np.linalg.norm(delta_pos, axis=-1)
|
| 114 |
+
|
| 115 |
+
if self.target_rotation != "ignore":
|
| 116 |
+
quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
|
| 117 |
+
|
| 118 |
+
if self.ignore_z_target_rotation:
|
| 119 |
+
# Special case: We want to ignore the Z component of the rotation.
|
| 120 |
+
# This code here assumes Euler angles with xyz convention. We first transform
|
| 121 |
+
# to euler, then set the Z component to be equal between the two, and finally
|
| 122 |
+
# transform back into quaternions.
|
| 123 |
+
euler_a = rotations.quat2euler(quat_a)
|
| 124 |
+
euler_b = rotations.quat2euler(quat_b)
|
| 125 |
+
euler_a[2] = euler_b[2]
|
| 126 |
+
quat_a = rotations.euler2quat(euler_a)
|
| 127 |
+
|
| 128 |
+
# Subtract quaternions and extract angle between them.
|
| 129 |
+
quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
|
| 130 |
+
angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0))
|
| 131 |
+
d_rot = angle_diff
|
| 132 |
+
assert d_pos.shape == d_rot.shape
|
| 133 |
+
return d_pos, d_rot
|
| 134 |
+
|
| 135 |
+
# GoalEnv methods
|
| 136 |
+
# ----------------------------
|
| 137 |
+
|
| 138 |
+
def compute_reward(self, achieved_goal, goal, info):
|
| 139 |
+
if self.reward_type == "sparse":
|
| 140 |
+
success = self._is_success(achieved_goal, goal).astype(np.float32)
|
| 141 |
+
return success - 1.0
|
| 142 |
+
else:
|
| 143 |
+
d_pos, d_rot = self._goal_distance(achieved_goal, goal)
|
| 144 |
+
# We weigh the difference in position to avoid that `d_pos` (in meters) is completely
|
| 145 |
+
# dominated by `d_rot` (in radians).
|
| 146 |
+
return -(10.0 * d_pos + d_rot)
|
| 147 |
+
|
| 148 |
+
# RobotEnv methods
|
| 149 |
+
# ----------------------------
|
| 150 |
+
|
| 151 |
+
def _is_success(self, achieved_goal, desired_goal):
|
| 152 |
+
d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal)
|
| 153 |
+
achieved_pos = (d_pos < self.distance_threshold).astype(np.float32)
|
| 154 |
+
achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32)
|
| 155 |
+
achieved_both = achieved_pos * achieved_rot
|
| 156 |
+
return achieved_both
|
| 157 |
+
|
| 158 |
+
def _env_setup(self, initial_qpos):
|
| 159 |
+
for name, value in initial_qpos.items():
|
| 160 |
+
self.sim.data.set_joint_qpos(name, value)
|
| 161 |
+
self.sim.forward()
|
| 162 |
+
|
| 163 |
+
def _reset_sim(self):
|
| 164 |
+
self.sim.set_state(self.initial_state)
|
| 165 |
+
self.sim.forward()
|
| 166 |
+
|
| 167 |
+
initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy()
|
| 168 |
+
initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
|
| 169 |
+
assert initial_qpos.shape == (7,)
|
| 170 |
+
assert initial_pos.shape == (3,)
|
| 171 |
+
assert initial_quat.shape == (4,)
|
| 172 |
+
initial_qpos = None
|
| 173 |
+
|
| 174 |
+
# Randomization initial rotation.
|
| 175 |
+
if self.randomize_initial_rotation:
|
| 176 |
+
if self.target_rotation == "z":
|
| 177 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 178 |
+
axis = np.array([0.0, 0.0, 1.0])
|
| 179 |
+
offset_quat = quat_from_angle_and_axis(angle, axis)
|
| 180 |
+
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
|
| 181 |
+
elif self.target_rotation == "parallel":
|
| 182 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 183 |
+
axis = np.array([0.0, 0.0, 1.0])
|
| 184 |
+
z_quat = quat_from_angle_and_axis(angle, axis)
|
| 185 |
+
parallel_quat = self.parallel_quats[
|
| 186 |
+
self.np_random.randint(len(self.parallel_quats))
|
| 187 |
+
]
|
| 188 |
+
offset_quat = rotations.quat_mul(z_quat, parallel_quat)
|
| 189 |
+
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
|
| 190 |
+
elif self.target_rotation in ["xyz", "ignore"]:
|
| 191 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 192 |
+
axis = self.np_random.uniform(-1.0, 1.0, size=3)
|
| 193 |
+
offset_quat = quat_from_angle_and_axis(angle, axis)
|
| 194 |
+
initial_quat = rotations.quat_mul(initial_quat, offset_quat)
|
| 195 |
+
elif self.target_rotation == "fixed":
|
| 196 |
+
pass
|
| 197 |
+
else:
|
| 198 |
+
raise error.Error(
|
| 199 |
+
'Unknown target_rotation option "{}".'.format(self.target_rotation)
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# Randomize initial position.
|
| 203 |
+
if self.randomize_initial_position:
|
| 204 |
+
if self.target_position != "fixed":
|
| 205 |
+
initial_pos += self.np_random.normal(size=3, scale=0.005)
|
| 206 |
+
|
| 207 |
+
initial_quat /= np.linalg.norm(initial_quat)
|
| 208 |
+
initial_qpos = np.concatenate([initial_pos, initial_quat])
|
| 209 |
+
self.sim.data.set_joint_qpos("object:joint", initial_qpos)
|
| 210 |
+
|
| 211 |
+
def is_on_palm():
|
| 212 |
+
self.sim.forward()
|
| 213 |
+
cube_middle_idx = self.sim.model.site_name2id("object:center")
|
| 214 |
+
cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
|
| 215 |
+
is_on_palm = cube_middle_pos[2] > 0.04
|
| 216 |
+
return is_on_palm
|
| 217 |
+
|
| 218 |
+
# Run the simulation for a bunch of timesteps to let everything settle in.
|
| 219 |
+
for _ in range(10):
|
| 220 |
+
self._set_action(np.zeros(20))
|
| 221 |
+
try:
|
| 222 |
+
self.sim.step()
|
| 223 |
+
except mujoco_py.MujocoException:
|
| 224 |
+
return False
|
| 225 |
+
return is_on_palm()
|
| 226 |
+
|
| 227 |
+
def _sample_goal(self):
|
| 228 |
+
# Select a goal for the object position.
|
| 229 |
+
target_pos = None
|
| 230 |
+
if self.target_position == "random":
|
| 231 |
+
assert self.target_position_range.shape == (3, 2)
|
| 232 |
+
offset = self.np_random.uniform(
|
| 233 |
+
self.target_position_range[:, 0], self.target_position_range[:, 1]
|
| 234 |
+
)
|
| 235 |
+
assert offset.shape == (3,)
|
| 236 |
+
target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset
|
| 237 |
+
elif self.target_position in ["ignore", "fixed"]:
|
| 238 |
+
target_pos = self.sim.data.get_joint_qpos("object:joint")[:3]
|
| 239 |
+
else:
|
| 240 |
+
raise error.Error(
|
| 241 |
+
'Unknown target_position option "{}".'.format(self.target_position)
|
| 242 |
+
)
|
| 243 |
+
assert target_pos is not None
|
| 244 |
+
assert target_pos.shape == (3,)
|
| 245 |
+
|
| 246 |
+
# Select a goal for the object rotation.
|
| 247 |
+
target_quat = None
|
| 248 |
+
if self.target_rotation == "z":
|
| 249 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 250 |
+
axis = np.array([0.0, 0.0, 1.0])
|
| 251 |
+
target_quat = quat_from_angle_and_axis(angle, axis)
|
| 252 |
+
elif self.target_rotation == "parallel":
|
| 253 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 254 |
+
axis = np.array([0.0, 0.0, 1.0])
|
| 255 |
+
target_quat = quat_from_angle_and_axis(angle, axis)
|
| 256 |
+
parallel_quat = self.parallel_quats[
|
| 257 |
+
self.np_random.randint(len(self.parallel_quats))
|
| 258 |
+
]
|
| 259 |
+
target_quat = rotations.quat_mul(target_quat, parallel_quat)
|
| 260 |
+
elif self.target_rotation == "xyz":
|
| 261 |
+
angle = self.np_random.uniform(-np.pi, np.pi)
|
| 262 |
+
axis = self.np_random.uniform(-1.0, 1.0, size=3)
|
| 263 |
+
target_quat = quat_from_angle_and_axis(angle, axis)
|
| 264 |
+
elif self.target_rotation in ["ignore", "fixed"]:
|
| 265 |
+
target_quat = self.sim.data.get_joint_qpos("object:joint")
|
| 266 |
+
else:
|
| 267 |
+
raise error.Error(
|
| 268 |
+
'Unknown target_rotation option "{}".'.format(self.target_rotation)
|
| 269 |
+
)
|
| 270 |
+
assert target_quat is not None
|
| 271 |
+
assert target_quat.shape == (4,)
|
| 272 |
+
|
| 273 |
+
target_quat /= np.linalg.norm(target_quat) # normalized quaternion
|
| 274 |
+
goal = np.concatenate([target_pos, target_quat])
|
| 275 |
+
return goal
|
| 276 |
+
|
| 277 |
+
def _render_callback(self):
|
| 278 |
+
# Assign current state to target object but offset a bit so that the actual object
|
| 279 |
+
# is not obscured.
|
| 280 |
+
goal = self.goal.copy()
|
| 281 |
+
assert goal.shape == (7,)
|
| 282 |
+
if self.target_position == "ignore":
|
| 283 |
+
# Move the object to the side since we do not care about it's position.
|
| 284 |
+
goal[0] += 0.15
|
| 285 |
+
self.sim.data.set_joint_qpos("target:joint", goal)
|
| 286 |
+
self.sim.data.set_joint_qvel("target:joint", np.zeros(6))
|
| 287 |
+
|
| 288 |
+
if "object_hidden" in self.sim.model.geom_names:
|
| 289 |
+
hidden_id = self.sim.model.geom_name2id("object_hidden")
|
| 290 |
+
self.sim.model.geom_rgba[hidden_id, 3] = 1.0
|
| 291 |
+
self.sim.forward()
|
| 292 |
+
|
| 293 |
+
def _get_obs(self):
|
| 294 |
+
robot_qpos, robot_qvel = robot_get_obs(self.sim)
|
| 295 |
+
object_qvel = self.sim.data.get_joint_qvel("object:joint")
|
| 296 |
+
achieved_goal = (
|
| 297 |
+
self._get_achieved_goal().ravel()
|
| 298 |
+
) # this contains the object position + rotation
|
| 299 |
+
observation = np.concatenate(
|
| 300 |
+
[robot_qpos, robot_qvel, object_qvel, achieved_goal]
|
| 301 |
+
)
|
| 302 |
+
return {
|
| 303 |
+
"observation": observation.copy(),
|
| 304 |
+
"achieved_goal": achieved_goal.copy(),
|
| 305 |
+
"desired_goal": self.goal.ravel().copy(),
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
class HandBlockEnv(ManipulateEnv, utils.EzPickle):
|
| 310 |
+
def __init__(
|
| 311 |
+
self, target_position="random", target_rotation="xyz", reward_type="sparse"
|
| 312 |
+
):
|
| 313 |
+
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
|
| 314 |
+
ManipulateEnv.__init__(
|
| 315 |
+
self,
|
| 316 |
+
model_path=MANIPULATE_BLOCK_XML,
|
| 317 |
+
target_position=target_position,
|
| 318 |
+
target_rotation=target_rotation,
|
| 319 |
+
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
|
| 320 |
+
reward_type=reward_type,
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
class HandEggEnv(ManipulateEnv, utils.EzPickle):
|
| 325 |
+
def __init__(
|
| 326 |
+
self, target_position="random", target_rotation="xyz", reward_type="sparse"
|
| 327 |
+
):
|
| 328 |
+
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
|
| 329 |
+
ManipulateEnv.__init__(
|
| 330 |
+
self,
|
| 331 |
+
model_path=MANIPULATE_EGG_XML,
|
| 332 |
+
target_position=target_position,
|
| 333 |
+
target_rotation=target_rotation,
|
| 334 |
+
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
|
| 335 |
+
reward_type=reward_type,
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
class HandPenEnv(ManipulateEnv, utils.EzPickle):
|
| 340 |
+
def __init__(
|
| 341 |
+
self, target_position="random", target_rotation="xyz", reward_type="sparse"
|
| 342 |
+
):
|
| 343 |
+
utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
|
| 344 |
+
ManipulateEnv.__init__(
|
| 345 |
+
self,
|
| 346 |
+
model_path=MANIPULATE_PEN_XML,
|
| 347 |
+
target_position=target_position,
|
| 348 |
+
target_rotation=target_rotation,
|
| 349 |
+
target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
|
| 350 |
+
randomize_initial_rotation=False,
|
| 351 |
+
reward_type=reward_type,
|
| 352 |
+
ignore_z_target_rotation=True,
|
| 353 |
+
distance_threshold=0.05,
|
| 354 |
+
)
|
gym-0.21.0/gym/envs/robotics/rotations.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2009-2017, Matthew Brett and Christoph Gohlke
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# Redistribution and use in source and binary forms, with or without
|
| 5 |
+
# modification, are permitted provided that the following conditions are
|
| 6 |
+
# met:
|
| 7 |
+
#
|
| 8 |
+
# 1. Redistributions of source code must retain the above copyright notice,
|
| 9 |
+
# this list of conditions and the following disclaimer.
|
| 10 |
+
#
|
| 11 |
+
# 2. Redistributions in binary form must reproduce the above copyright
|
| 12 |
+
# notice, this list of conditions and the following disclaimer in the
|
| 13 |
+
# documentation and/or other materials provided with the distribution.
|
| 14 |
+
#
|
| 15 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
| 16 |
+
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
| 17 |
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 18 |
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
| 19 |
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
| 20 |
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
| 21 |
+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| 22 |
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
| 23 |
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
| 24 |
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
| 25 |
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 26 |
+
|
| 27 |
+
# Many methods borrow heavily or entirely from transforms3d:
|
| 28 |
+
# https://github.com/matthew-brett/transforms3d
|
| 29 |
+
# They have mostly been modified to support batched operations.
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
import itertools
|
| 33 |
+
|
| 34 |
+
"""
|
| 35 |
+
Rotations
|
| 36 |
+
=========
|
| 37 |
+
|
| 38 |
+
Note: these have caused many subtle bugs in the past.
|
| 39 |
+
Be careful while updating these methods and while using them in clever ways.
|
| 40 |
+
|
| 41 |
+
See MuJoCo documentation here: http://mujoco.org/book/modeling.html#COrientation
|
| 42 |
+
|
| 43 |
+
Conventions
|
| 44 |
+
-----------
|
| 45 |
+
- All functions accept batches as well as individual rotations
|
| 46 |
+
- All rotation conventions match respective MuJoCo defaults
|
| 47 |
+
- All angles are in radians
|
| 48 |
+
- Matricies follow LR convention
|
| 49 |
+
- Euler Angles are all relative with 'xyz' axes ordering
|
| 50 |
+
- See specific representation for more information
|
| 51 |
+
|
| 52 |
+
Representations
|
| 53 |
+
---------------
|
| 54 |
+
|
| 55 |
+
Euler
|
| 56 |
+
There are many euler angle frames -- here we will strive to use the default
|
| 57 |
+
in MuJoCo, which is eulerseq='xyz'.
|
| 58 |
+
This frame is a relative rotating frame, about x, y, and z axes in order.
|
| 59 |
+
Relative rotating means that after we rotate about x, then we use the
|
| 60 |
+
new (rotated) y, and the same for z.
|
| 61 |
+
|
| 62 |
+
Quaternions
|
| 63 |
+
These are defined in terms of rotation (angle) about a unit vector (x, y, z)
|
| 64 |
+
We use the following <q0, q1, q2, q3> convention:
|
| 65 |
+
q0 = cos(angle / 2)
|
| 66 |
+
q1 = sin(angle / 2) * x
|
| 67 |
+
q2 = sin(angle / 2) * y
|
| 68 |
+
q3 = sin(angle / 2) * z
|
| 69 |
+
This is also sometimes called qw, qx, qy, qz.
|
| 70 |
+
Note that quaternions are ambiguous, because we can represent a rotation by
|
| 71 |
+
angle about vector <x, y, z> and -angle about vector <-x, -y, -z>.
|
| 72 |
+
To choose between these, we pick "first nonzero positive", where we
|
| 73 |
+
make the first nonzero element of the quaternion positive.
|
| 74 |
+
This can result in mismatches if you're converting an quaternion that is not
|
| 75 |
+
"first nonzero positive" to a different representation and back.
|
| 76 |
+
|
| 77 |
+
Axis Angle
|
| 78 |
+
(Not currently implemented)
|
| 79 |
+
These are very straightforward. Rotation is angle about a unit vector.
|
| 80 |
+
|
| 81 |
+
XY Axes
|
| 82 |
+
(Not currently implemented)
|
| 83 |
+
We are given x axis and y axis, and z axis is cross product of x and y.
|
| 84 |
+
|
| 85 |
+
Z Axis
|
| 86 |
+
This is NOT RECOMMENDED. Defines a unit vector for the Z axis,
|
| 87 |
+
but rotation about this axis is not well defined.
|
| 88 |
+
Instead pick a fixed reference direction for another axis (e.g. X)
|
| 89 |
+
and calculate the other (e.g. Y = Z cross-product X),
|
| 90 |
+
then use XY Axes rotation instead.
|
| 91 |
+
|
| 92 |
+
SO3
|
| 93 |
+
(Not currently implemented)
|
| 94 |
+
While not supported by MuJoCo, this representation has a lot of nice features.
|
| 95 |
+
We expect to add support for these in the future.
|
| 96 |
+
|
| 97 |
+
TODO / Missing
|
| 98 |
+
--------------
|
| 99 |
+
- Rotation integration or derivatives (e.g. velocity conversions)
|
| 100 |
+
- More representations (SO3, etc)
|
| 101 |
+
- Random sampling (e.g. sample uniform random rotation)
|
| 102 |
+
- Performance benchmarks/measurements
|
| 103 |
+
- (Maybe) define everything as to/from matricies, for simplicity
|
| 104 |
+
"""
|
| 105 |
+
|
| 106 |
+
# For testing whether a number is close to zero
|
| 107 |
+
_FLOAT_EPS = np.finfo(np.float64).eps
|
| 108 |
+
_EPS4 = _FLOAT_EPS * 4.0
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def euler2mat(euler):
|
| 112 |
+
"""Convert Euler Angles to Rotation Matrix. See rotation.py for notes"""
|
| 113 |
+
euler = np.asarray(euler, dtype=np.float64)
|
| 114 |
+
assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
|
| 115 |
+
|
| 116 |
+
ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
|
| 117 |
+
si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
|
| 118 |
+
ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
|
| 119 |
+
cc, cs = ci * ck, ci * sk
|
| 120 |
+
sc, ss = si * ck, si * sk
|
| 121 |
+
|
| 122 |
+
mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
|
| 123 |
+
mat[..., 2, 2] = cj * ck
|
| 124 |
+
mat[..., 2, 1] = sj * sc - cs
|
| 125 |
+
mat[..., 2, 0] = sj * cc + ss
|
| 126 |
+
mat[..., 1, 2] = cj * sk
|
| 127 |
+
mat[..., 1, 1] = sj * ss + cc
|
| 128 |
+
mat[..., 1, 0] = sj * cs - sc
|
| 129 |
+
mat[..., 0, 2] = -sj
|
| 130 |
+
mat[..., 0, 1] = cj * si
|
| 131 |
+
mat[..., 0, 0] = cj * ci
|
| 132 |
+
return mat
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def euler2quat(euler):
|
| 136 |
+
"""Convert Euler Angles to Quaternions. See rotation.py for notes"""
|
| 137 |
+
euler = np.asarray(euler, dtype=np.float64)
|
| 138 |
+
assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
|
| 139 |
+
|
| 140 |
+
ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
|
| 141 |
+
si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
|
| 142 |
+
ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
|
| 143 |
+
cc, cs = ci * ck, ci * sk
|
| 144 |
+
sc, ss = si * ck, si * sk
|
| 145 |
+
|
| 146 |
+
quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64)
|
| 147 |
+
quat[..., 0] = cj * cc + sj * ss
|
| 148 |
+
quat[..., 3] = cj * sc - sj * cs
|
| 149 |
+
quat[..., 2] = -(cj * ss + sj * cc)
|
| 150 |
+
quat[..., 1] = cj * cs - sj * sc
|
| 151 |
+
return quat
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def mat2euler(mat):
|
| 155 |
+
"""Convert Rotation Matrix to Euler Angles. See rotation.py for notes"""
|
| 156 |
+
mat = np.asarray(mat, dtype=np.float64)
|
| 157 |
+
assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
|
| 158 |
+
|
| 159 |
+
cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
|
| 160 |
+
condition = cy > _EPS4
|
| 161 |
+
euler = np.empty(mat.shape[:-1], dtype=np.float64)
|
| 162 |
+
euler[..., 2] = np.where(
|
| 163 |
+
condition,
|
| 164 |
+
-np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
|
| 165 |
+
-np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]),
|
| 166 |
+
)
|
| 167 |
+
euler[..., 1] = np.where(
|
| 168 |
+
condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy)
|
| 169 |
+
)
|
| 170 |
+
euler[..., 0] = np.where(
|
| 171 |
+
condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0
|
| 172 |
+
)
|
| 173 |
+
return euler
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def mat2quat(mat):
|
| 177 |
+
"""Convert Rotation Matrix to Quaternion. See rotation.py for notes"""
|
| 178 |
+
mat = np.asarray(mat, dtype=np.float64)
|
| 179 |
+
assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
|
| 180 |
+
|
| 181 |
+
Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2]
|
| 182 |
+
Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2]
|
| 183 |
+
Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2]
|
| 184 |
+
# Fill only lower half of symmetric matrix
|
| 185 |
+
K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64)
|
| 186 |
+
K[..., 0, 0] = Qxx - Qyy - Qzz
|
| 187 |
+
K[..., 1, 0] = Qyx + Qxy
|
| 188 |
+
K[..., 1, 1] = Qyy - Qxx - Qzz
|
| 189 |
+
K[..., 2, 0] = Qzx + Qxz
|
| 190 |
+
K[..., 2, 1] = Qzy + Qyz
|
| 191 |
+
K[..., 2, 2] = Qzz - Qxx - Qyy
|
| 192 |
+
K[..., 3, 0] = Qyz - Qzy
|
| 193 |
+
K[..., 3, 1] = Qzx - Qxz
|
| 194 |
+
K[..., 3, 2] = Qxy - Qyx
|
| 195 |
+
K[..., 3, 3] = Qxx + Qyy + Qzz
|
| 196 |
+
K /= 3.0
|
| 197 |
+
# TODO: vectorize this -- probably could be made faster
|
| 198 |
+
q = np.empty(K.shape[:-2] + (4,))
|
| 199 |
+
it = np.nditer(q[..., 0], flags=["multi_index"])
|
| 200 |
+
while not it.finished:
|
| 201 |
+
# Use Hermitian eigenvectors, values for speed
|
| 202 |
+
vals, vecs = np.linalg.eigh(K[it.multi_index])
|
| 203 |
+
# Select largest eigenvector, reorder to w,x,y,z quaternion
|
| 204 |
+
q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)]
|
| 205 |
+
# Prefer quaternion with positive w
|
| 206 |
+
# (q * -1 corresponds to same rotation as q)
|
| 207 |
+
if q[it.multi_index][0] < 0:
|
| 208 |
+
q[it.multi_index] *= -1
|
| 209 |
+
it.iternext()
|
| 210 |
+
return q
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def quat2euler(quat):
|
| 214 |
+
"""Convert Quaternion to Euler Angles. See rotation.py for notes"""
|
| 215 |
+
return mat2euler(quat2mat(quat))
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def subtract_euler(e1, e2):
|
| 219 |
+
assert e1.shape == e2.shape
|
| 220 |
+
assert e1.shape[-1] == 3
|
| 221 |
+
q1 = euler2quat(e1)
|
| 222 |
+
q2 = euler2quat(e2)
|
| 223 |
+
q_diff = quat_mul(q1, quat_conjugate(q2))
|
| 224 |
+
return quat2euler(q_diff)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def quat2mat(quat):
|
| 228 |
+
"""Convert Quaternion to Euler Angles. See rotation.py for notes"""
|
| 229 |
+
quat = np.asarray(quat, dtype=np.float64)
|
| 230 |
+
assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
|
| 231 |
+
|
| 232 |
+
w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3]
|
| 233 |
+
Nq = np.sum(quat * quat, axis=-1)
|
| 234 |
+
s = 2.0 / Nq
|
| 235 |
+
X, Y, Z = x * s, y * s, z * s
|
| 236 |
+
wX, wY, wZ = w * X, w * Y, w * Z
|
| 237 |
+
xX, xY, xZ = x * X, x * Y, x * Z
|
| 238 |
+
yY, yZ, zZ = y * Y, y * Z, z * Z
|
| 239 |
+
|
| 240 |
+
mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64)
|
| 241 |
+
mat[..., 0, 0] = 1.0 - (yY + zZ)
|
| 242 |
+
mat[..., 0, 1] = xY - wZ
|
| 243 |
+
mat[..., 0, 2] = xZ + wY
|
| 244 |
+
mat[..., 1, 0] = xY + wZ
|
| 245 |
+
mat[..., 1, 1] = 1.0 - (xX + zZ)
|
| 246 |
+
mat[..., 1, 2] = yZ - wX
|
| 247 |
+
mat[..., 2, 0] = xZ - wY
|
| 248 |
+
mat[..., 2, 1] = yZ + wX
|
| 249 |
+
mat[..., 2, 2] = 1.0 - (xX + yY)
|
| 250 |
+
return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def quat_conjugate(q):
|
| 254 |
+
inv_q = -q
|
| 255 |
+
inv_q[..., 0] *= -1
|
| 256 |
+
return inv_q
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def quat_mul(q0, q1):
|
| 260 |
+
assert q0.shape == q1.shape
|
| 261 |
+
assert q0.shape[-1] == 4
|
| 262 |
+
assert q1.shape[-1] == 4
|
| 263 |
+
|
| 264 |
+
w0 = q0[..., 0]
|
| 265 |
+
x0 = q0[..., 1]
|
| 266 |
+
y0 = q0[..., 2]
|
| 267 |
+
z0 = q0[..., 3]
|
| 268 |
+
|
| 269 |
+
w1 = q1[..., 0]
|
| 270 |
+
x1 = q1[..., 1]
|
| 271 |
+
y1 = q1[..., 2]
|
| 272 |
+
z1 = q1[..., 3]
|
| 273 |
+
|
| 274 |
+
w = w0 * w1 - x0 * x1 - y0 * y1 - z0 * z1
|
| 275 |
+
x = w0 * x1 + x0 * w1 + y0 * z1 - z0 * y1
|
| 276 |
+
y = w0 * y1 + y0 * w1 + z0 * x1 - x0 * z1
|
| 277 |
+
z = w0 * z1 + z0 * w1 + x0 * y1 - y0 * x1
|
| 278 |
+
q = np.array([w, x, y, z])
|
| 279 |
+
if q.ndim == 2:
|
| 280 |
+
q = q.swapaxes(0, 1)
|
| 281 |
+
assert q.shape == q0.shape
|
| 282 |
+
return q
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def quat_rot_vec(q, v0):
|
| 286 |
+
q_v0 = np.array([0, v0[0], v0[1], v0[2]])
|
| 287 |
+
q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
|
| 288 |
+
v = q_v[1:]
|
| 289 |
+
return v
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def quat_identity():
|
| 293 |
+
return np.array([1, 0, 0, 0])
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def quat2axisangle(quat):
|
| 297 |
+
theta = 0
|
| 298 |
+
axis = np.array([0, 0, 1])
|
| 299 |
+
sin_theta = np.linalg.norm(quat[1:])
|
| 300 |
+
|
| 301 |
+
if sin_theta > 0.0001:
|
| 302 |
+
theta = 2 * np.arcsin(sin_theta)
|
| 303 |
+
theta *= 1 if quat[0] >= 0 else -1
|
| 304 |
+
axis = quat[1:] / sin_theta
|
| 305 |
+
|
| 306 |
+
return axis, theta
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def euler2point_euler(euler):
|
| 310 |
+
_euler = euler.copy()
|
| 311 |
+
if len(_euler.shape) < 2:
|
| 312 |
+
_euler = np.expand_dims(_euler, 0)
|
| 313 |
+
assert _euler.shape[1] == 3
|
| 314 |
+
_euler_sin = np.sin(_euler)
|
| 315 |
+
_euler_cos = np.cos(_euler)
|
| 316 |
+
return np.concatenate([_euler_sin, _euler_cos], axis=-1)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
def point_euler2euler(euler):
|
| 320 |
+
_euler = euler.copy()
|
| 321 |
+
if len(_euler.shape) < 2:
|
| 322 |
+
_euler = np.expand_dims(_euler, 0)
|
| 323 |
+
assert _euler.shape[1] == 6
|
| 324 |
+
angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
|
| 325 |
+
angle[_euler[..., 3:] < 0] += np.pi
|
| 326 |
+
return angle
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def quat2point_quat(quat):
|
| 330 |
+
# Should be in qw, qx, qy, qz
|
| 331 |
+
_quat = quat.copy()
|
| 332 |
+
if len(_quat.shape) < 2:
|
| 333 |
+
_quat = np.expand_dims(_quat, 0)
|
| 334 |
+
assert _quat.shape[1] == 4
|
| 335 |
+
angle = np.arccos(_quat[:, [0]]) * 2
|
| 336 |
+
xyz = _quat[:, 1:]
|
| 337 |
+
xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[
|
| 338 |
+
np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
|
| 339 |
+
]
|
| 340 |
+
return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1)
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def point_quat2quat(quat):
|
| 344 |
+
_quat = quat.copy()
|
| 345 |
+
if len(_quat.shape) < 2:
|
| 346 |
+
_quat = np.expand_dims(_quat, 0)
|
| 347 |
+
assert _quat.shape[1] == 5
|
| 348 |
+
angle = np.arctan(_quat[:, [0]] / _quat[:, [1]])
|
| 349 |
+
qw = np.cos(angle / 2)
|
| 350 |
+
|
| 351 |
+
qxyz = _quat[:, 2:]
|
| 352 |
+
qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[
|
| 353 |
+
np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
|
| 354 |
+
]
|
| 355 |
+
return np.concatenate([qw, qxyz], axis=-1)
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
def normalize_angles(angles):
|
| 359 |
+
"""Puts angles in [-pi, pi] range."""
|
| 360 |
+
angles = angles.copy()
|
| 361 |
+
if angles.size > 0:
|
| 362 |
+
angles = (angles + np.pi) % (2 * np.pi) - np.pi
|
| 363 |
+
assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6
|
| 364 |
+
return angles
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def round_to_straight_angles(angles):
|
| 368 |
+
"""Returns closest angle modulo 90 degrees"""
|
| 369 |
+
angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
|
| 370 |
+
return normalize_angles(angles)
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def get_parallel_rotations():
|
| 374 |
+
mult90 = [0, np.pi / 2, -np.pi / 2, np.pi]
|
| 375 |
+
parallel_rotations = []
|
| 376 |
+
for euler in itertools.product(mult90, repeat=3):
|
| 377 |
+
canonical = mat2euler(euler2mat(euler))
|
| 378 |
+
canonical = np.round(canonical / (np.pi / 2))
|
| 379 |
+
if canonical[0] == -2:
|
| 380 |
+
canonical[0] = 2
|
| 381 |
+
if canonical[2] == -2:
|
| 382 |
+
canonical[2] = 2
|
| 383 |
+
canonical *= np.pi / 2
|
| 384 |
+
if all([(canonical != rot).any() for rot in parallel_rotations]):
|
| 385 |
+
parallel_rotations += [canonical]
|
| 386 |
+
assert len(parallel_rotations) == 24
|
| 387 |
+
return parallel_rotations
|
gym-0.21.0/gym/spaces/discrete.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from .space import Space
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class Discrete(Space):
|
| 6 |
+
r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
|
| 7 |
+
|
| 8 |
+
Example::
|
| 9 |
+
|
| 10 |
+
>>> Discrete(2)
|
| 11 |
+
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, n, seed=None):
|
| 15 |
+
assert n >= 0
|
| 16 |
+
self.n = n
|
| 17 |
+
super(Discrete, self).__init__((), np.int64, seed)
|
| 18 |
+
|
| 19 |
+
def sample(self):
|
| 20 |
+
return self.np_random.randint(self.n)
|
| 21 |
+
|
| 22 |
+
def contains(self, x):
|
| 23 |
+
if isinstance(x, int):
|
| 24 |
+
as_int = x
|
| 25 |
+
elif isinstance(x, (np.generic, np.ndarray)) and (
|
| 26 |
+
x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
|
| 27 |
+
):
|
| 28 |
+
as_int = int(x)
|
| 29 |
+
else:
|
| 30 |
+
return False
|
| 31 |
+
return as_int >= 0 and as_int < self.n
|
| 32 |
+
|
| 33 |
+
def __repr__(self):
|
| 34 |
+
return "Discrete(%d)" % self.n
|
| 35 |
+
|
| 36 |
+
def __eq__(self, other):
|
| 37 |
+
return isinstance(other, Discrete) and self.n == other.n
|
gym-0.21.0/gym/utils/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A set of common utilities used within the environments. These are
|
| 2 |
+
not intended as API functions, and will not remain stable over time.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
# These submodules should not have any import-time dependencies.
|
| 6 |
+
# We want this since we use `utils` during our import-time sanity checks
|
| 7 |
+
# that verify that our dependencies are actually present.
|
| 8 |
+
from .colorize import colorize
|
| 9 |
+
from .ezpickle import EzPickle
|
gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc
ADDED
|
Binary file (2.39 kB). View file
|
|
|
gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc
ADDED
|
Binary file (958 Bytes). View file
|
|
|
gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc
ADDED
|
Binary file (1.51 kB). View file
|
|
|
gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc
ADDED
|
Binary file (635 Bytes). View file
|
|
|
gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc
ADDED
|
Binary file (6.59 kB). View file
|
|
|
gym-0.21.0/gym/utils/atomic_write.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from contextlib import contextmanager
|
| 5 |
+
|
| 6 |
+
# We would ideally atomically replace any existing file with the new
|
| 7 |
+
# version. However, on Windows there's no Python-only solution prior
|
| 8 |
+
# to Python 3.3. (This library includes a C extension to do so:
|
| 9 |
+
# https://pypi.python.org/pypi/pyosreplace/0.1.)
|
| 10 |
+
#
|
| 11 |
+
# Correspondingly, we make a best effort, but on Python < 3.3 use a
|
| 12 |
+
# replace method which could result in the file temporarily
|
| 13 |
+
# disappearing.
|
| 14 |
+
import sys
|
| 15 |
+
|
| 16 |
+
if sys.version_info >= (3, 3):
|
| 17 |
+
# Python 3.3 and up have a native `replace` method
|
| 18 |
+
from os import replace
|
| 19 |
+
elif sys.platform.startswith("win"):
|
| 20 |
+
|
| 21 |
+
def replace(src, dst):
|
| 22 |
+
# TODO: on Windows, this will raise if the file is in use,
|
| 23 |
+
# which is possible. We'll need to make this more robust over
|
| 24 |
+
# time.
|
| 25 |
+
try:
|
| 26 |
+
os.remove(dst)
|
| 27 |
+
except OSError:
|
| 28 |
+
pass
|
| 29 |
+
os.rename(src, dst)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
else:
|
| 33 |
+
# POSIX rename() is always atomic
|
| 34 |
+
from os import rename as replace
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@contextmanager
|
| 38 |
+
def atomic_write(filepath, binary=False, fsync=False):
|
| 39 |
+
"""Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
|
| 40 |
+
|
| 41 |
+
:param filepath: the file path to be opened
|
| 42 |
+
:param binary: whether to open the file in a binary mode instead of textual
|
| 43 |
+
:param fsync: whether to force write the file to disk
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
tmppath = filepath + "~"
|
| 47 |
+
while os.path.isfile(tmppath):
|
| 48 |
+
tmppath += "~"
|
| 49 |
+
try:
|
| 50 |
+
with open(tmppath, "wb" if binary else "w") as file:
|
| 51 |
+
yield file
|
| 52 |
+
if fsync:
|
| 53 |
+
file.flush()
|
| 54 |
+
os.fsync(file.fileno())
|
| 55 |
+
replace(tmppath, filepath)
|
| 56 |
+
finally:
|
| 57 |
+
try:
|
| 58 |
+
os.remove(tmppath)
|
| 59 |
+
except (IOError, OSError):
|
| 60 |
+
pass
|
gym-0.21.0/gym/utils/closer.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import atexit
|
| 2 |
+
import threading
|
| 3 |
+
import weakref
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Closer(object):
|
| 7 |
+
"""A registry that ensures your objects get closed, whether manually,
|
| 8 |
+
upon garbage collection, or upon exit. To work properly, your
|
| 9 |
+
objects need to cooperate and do something like the following:
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
closer = Closer()
|
| 13 |
+
class Example(object):
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self._id = closer.register(self)
|
| 16 |
+
|
| 17 |
+
def close(self):
|
| 18 |
+
# Probably worth making idempotent too!
|
| 19 |
+
...
|
| 20 |
+
closer.unregister(self._id)
|
| 21 |
+
|
| 22 |
+
def __del__(self):
|
| 23 |
+
self.close()
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
That is, your objects should:
|
| 27 |
+
|
| 28 |
+
- register() themselves and save the returned ID
|
| 29 |
+
- unregister() themselves upon close()
|
| 30 |
+
- include a __del__ method which close()'s the object
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, atexit_register=True):
|
| 34 |
+
self.lock = threading.Lock()
|
| 35 |
+
self.next_id = -1
|
| 36 |
+
self.closeables = weakref.WeakValueDictionary()
|
| 37 |
+
|
| 38 |
+
if atexit_register:
|
| 39 |
+
atexit.register(self.close)
|
| 40 |
+
|
| 41 |
+
def generate_next_id(self):
|
| 42 |
+
with self.lock:
|
| 43 |
+
self.next_id += 1
|
| 44 |
+
return self.next_id
|
| 45 |
+
|
| 46 |
+
def register(self, closeable):
|
| 47 |
+
"""Registers an object with a 'close' method.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
|
| 51 |
+
"""
|
| 52 |
+
assert hasattr(closeable, "close"), "No close method for {}".format(closeable)
|
| 53 |
+
|
| 54 |
+
next_id = self.generate_next_id()
|
| 55 |
+
self.closeables[next_id] = closeable
|
| 56 |
+
return next_id
|
| 57 |
+
|
| 58 |
+
def unregister(self, id):
|
| 59 |
+
assert id is not None
|
| 60 |
+
if id in self.closeables:
|
| 61 |
+
del self.closeables[id]
|
| 62 |
+
|
| 63 |
+
def close(self):
|
| 64 |
+
# Explicitly fetch all monitors first so that they can't disappear while
|
| 65 |
+
# we iterate. cf. http://stackoverflow.com/a/12429620
|
| 66 |
+
closeables = list(self.closeables.values())
|
| 67 |
+
for closeable in closeables:
|
| 68 |
+
closeable.close()
|
gym-0.21.0/gym/utils/play.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
import pygame
|
| 3 |
+
import matplotlib
|
| 4 |
+
import argparse
|
| 5 |
+
from gym import logger
|
| 6 |
+
|
| 7 |
+
try:
|
| 8 |
+
matplotlib.use("TkAgg")
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
except ImportError as e:
|
| 11 |
+
logger.warn("failed to set matplotlib backend, plotting will not work: %s" % str(e))
|
| 12 |
+
plt = None
|
| 13 |
+
|
| 14 |
+
from collections import deque
|
| 15 |
+
from pygame.locals import VIDEORESIZE
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def display_arr(screen, arr, video_size, transpose):
|
| 19 |
+
arr_min, arr_max = arr.min(), arr.max()
|
| 20 |
+
arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
|
| 21 |
+
pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
|
| 22 |
+
pyg_img = pygame.transform.scale(pyg_img, video_size)
|
| 23 |
+
screen.blit(pyg_img, (0, 0))
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
|
| 27 |
+
"""Allows one to play the game using keyboard.
|
| 28 |
+
|
| 29 |
+
To simply play the game use:
|
| 30 |
+
|
| 31 |
+
play(gym.make("Pong-v4"))
|
| 32 |
+
|
| 33 |
+
Above code works also if env is wrapped, so it's particularly useful in
|
| 34 |
+
verifying that the frame-level preprocessing does not render the game
|
| 35 |
+
unplayable.
|
| 36 |
+
|
| 37 |
+
If you wish to plot real time statistics as you play, you can use
|
| 38 |
+
gym.utils.play.PlayPlot. Here's a sample code for plotting the reward
|
| 39 |
+
for last 5 second of gameplay.
|
| 40 |
+
|
| 41 |
+
def callback(obs_t, obs_tp1, action, rew, done, info):
|
| 42 |
+
return [rew,]
|
| 43 |
+
plotter = PlayPlot(callback, 30 * 5, ["reward"])
|
| 44 |
+
|
| 45 |
+
env = gym.make("Pong-v4")
|
| 46 |
+
play(env, callback=plotter.callback)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
Arguments
|
| 50 |
+
---------
|
| 51 |
+
env: gym.Env
|
| 52 |
+
Environment to use for playing.
|
| 53 |
+
transpose: bool
|
| 54 |
+
If True the output of observation is transposed.
|
| 55 |
+
Defaults to true.
|
| 56 |
+
fps: int
|
| 57 |
+
Maximum number of steps of the environment to execute every second.
|
| 58 |
+
Defaults to 30.
|
| 59 |
+
zoom: float
|
| 60 |
+
Make screen edge this many times bigger
|
| 61 |
+
callback: lambda or None
|
| 62 |
+
Callback if a callback is provided it will be executed after
|
| 63 |
+
every step. It takes the following input:
|
| 64 |
+
obs_t: observation before performing action
|
| 65 |
+
obs_tp1: observation after performing action
|
| 66 |
+
action: action that was executed
|
| 67 |
+
rew: reward that was received
|
| 68 |
+
done: whether the environment is done or not
|
| 69 |
+
info: debug info
|
| 70 |
+
keys_to_action: dict: tuple(int) -> int or None
|
| 71 |
+
Mapping from keys pressed to action performed.
|
| 72 |
+
For example if pressed 'w' and space at the same time is supposed
|
| 73 |
+
to trigger action number 2 then key_to_action dict would look like this:
|
| 74 |
+
|
| 75 |
+
{
|
| 76 |
+
# ...
|
| 77 |
+
sorted(ord('w'), ord(' ')) -> 2
|
| 78 |
+
# ...
|
| 79 |
+
}
|
| 80 |
+
If None, default key_to_action mapping for that env is used, if provided.
|
| 81 |
+
"""
|
| 82 |
+
env.reset()
|
| 83 |
+
rendered = env.render(mode="rgb_array")
|
| 84 |
+
|
| 85 |
+
if keys_to_action is None:
|
| 86 |
+
if hasattr(env, "get_keys_to_action"):
|
| 87 |
+
keys_to_action = env.get_keys_to_action()
|
| 88 |
+
elif hasattr(env.unwrapped, "get_keys_to_action"):
|
| 89 |
+
keys_to_action = env.unwrapped.get_keys_to_action()
|
| 90 |
+
else:
|
| 91 |
+
assert False, (
|
| 92 |
+
env.spec.id
|
| 93 |
+
+ " does not have explicit key to action mapping, "
|
| 94 |
+
+ "please specify one manually"
|
| 95 |
+
)
|
| 96 |
+
relevant_keys = set(sum(map(list, keys_to_action.keys()), []))
|
| 97 |
+
|
| 98 |
+
video_size = [rendered.shape[1], rendered.shape[0]]
|
| 99 |
+
if zoom is not None:
|
| 100 |
+
video_size = int(video_size[0] * zoom), int(video_size[1] * zoom)
|
| 101 |
+
|
| 102 |
+
pressed_keys = []
|
| 103 |
+
running = True
|
| 104 |
+
env_done = True
|
| 105 |
+
|
| 106 |
+
screen = pygame.display.set_mode(video_size)
|
| 107 |
+
clock = pygame.time.Clock()
|
| 108 |
+
|
| 109 |
+
while running:
|
| 110 |
+
if env_done:
|
| 111 |
+
env_done = False
|
| 112 |
+
obs = env.reset()
|
| 113 |
+
else:
|
| 114 |
+
action = keys_to_action.get(tuple(sorted(pressed_keys)), 0)
|
| 115 |
+
prev_obs = obs
|
| 116 |
+
obs, rew, env_done, info = env.step(action)
|
| 117 |
+
if callback is not None:
|
| 118 |
+
callback(prev_obs, obs, action, rew, env_done, info)
|
| 119 |
+
if obs is not None:
|
| 120 |
+
rendered = env.render(mode="rgb_array")
|
| 121 |
+
display_arr(screen, rendered, transpose=transpose, video_size=video_size)
|
| 122 |
+
|
| 123 |
+
# process pygame events
|
| 124 |
+
for event in pygame.event.get():
|
| 125 |
+
# test events, set key states
|
| 126 |
+
if event.type == pygame.KEYDOWN:
|
| 127 |
+
if event.key in relevant_keys:
|
| 128 |
+
pressed_keys.append(event.key)
|
| 129 |
+
elif event.key == 27:
|
| 130 |
+
running = False
|
| 131 |
+
elif event.type == pygame.KEYUP:
|
| 132 |
+
if event.key in relevant_keys:
|
| 133 |
+
pressed_keys.remove(event.key)
|
| 134 |
+
elif event.type == pygame.QUIT:
|
| 135 |
+
running = False
|
| 136 |
+
elif event.type == VIDEORESIZE:
|
| 137 |
+
video_size = event.size
|
| 138 |
+
screen = pygame.display.set_mode(video_size)
|
| 139 |
+
print(video_size)
|
| 140 |
+
|
| 141 |
+
pygame.display.flip()
|
| 142 |
+
clock.tick(fps)
|
| 143 |
+
pygame.quit()
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
class PlayPlot(object):
|
| 147 |
+
def __init__(self, callback, horizon_timesteps, plot_names):
|
| 148 |
+
self.data_callback = callback
|
| 149 |
+
self.horizon_timesteps = horizon_timesteps
|
| 150 |
+
self.plot_names = plot_names
|
| 151 |
+
|
| 152 |
+
assert plt is not None, "matplotlib backend failed, plotting will not work"
|
| 153 |
+
|
| 154 |
+
num_plots = len(self.plot_names)
|
| 155 |
+
self.fig, self.ax = plt.subplots(num_plots)
|
| 156 |
+
if num_plots == 1:
|
| 157 |
+
self.ax = [self.ax]
|
| 158 |
+
for axis, name in zip(self.ax, plot_names):
|
| 159 |
+
axis.set_title(name)
|
| 160 |
+
self.t = 0
|
| 161 |
+
self.cur_plot = [None for _ in range(num_plots)]
|
| 162 |
+
self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
|
| 163 |
+
|
| 164 |
+
def callback(self, obs_t, obs_tp1, action, rew, done, info):
|
| 165 |
+
points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
|
| 166 |
+
for point, data_series in zip(points, self.data):
|
| 167 |
+
data_series.append(point)
|
| 168 |
+
self.t += 1
|
| 169 |
+
|
| 170 |
+
xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
|
| 171 |
+
|
| 172 |
+
for i, plot in enumerate(self.cur_plot):
|
| 173 |
+
if plot is not None:
|
| 174 |
+
plot.remove()
|
| 175 |
+
self.cur_plot[i] = self.ax[i].scatter(
|
| 176 |
+
range(xmin, xmax), list(self.data[i]), c="blue"
|
| 177 |
+
)
|
| 178 |
+
self.ax[i].set_xlim(xmin, xmax)
|
| 179 |
+
plt.pause(0.000001)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def main():
|
| 183 |
+
parser = argparse.ArgumentParser()
|
| 184 |
+
parser.add_argument(
|
| 185 |
+
"--env",
|
| 186 |
+
type=str,
|
| 187 |
+
default="MontezumaRevengeNoFrameskip-v4",
|
| 188 |
+
help="Define Environment",
|
| 189 |
+
)
|
| 190 |
+
args = parser.parse_args()
|
| 191 |
+
env = gym.make(args.env)
|
| 192 |
+
play(env, zoom=4, fps=60)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
main()
|
gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (2.56 kB). View file
|
|
|
gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (687 Bytes). View file
|
|
|
gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc
ADDED
|
Binary file (5.5 kB). View file
|
|
|