chenhaojun commited on
Commit
d68fa90
·
verified ·
1 Parent(s): 44e699a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Metaworld/.gitignore +148 -0
  2. Metaworld/LICENSE +21 -0
  3. Metaworld/MANIFEST.in +2 -0
  4. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 +0 -0
  5. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 +0 -0
  6. VRL3/src/cfgs_adroit/task/hammer.yaml +4 -0
  7. VRL3/src/cfgs_adroit/task/pen.yaml +4 -0
  8. VRL3/src/rrl_local/rrl_utils.py +85 -0
  9. VRL3/src/transfer_util.py +50 -0
  10. gym-0.21.0/.gitignore +39 -0
  11. gym-0.21.0/CODE_OF_CONDUCT.rst +13 -0
  12. gym-0.21.0/bin/docker_entrypoint +26 -0
  13. gym-0.21.0/docs/api.md +123 -0
  14. gym-0.21.0/docs/toy_text/frozen_lake.md +70 -0
  15. gym-0.21.0/docs/tutorials.md +17 -0
  16. gym-0.21.0/gym/core.py +357 -0
  17. gym-0.21.0/gym/envs/box2d/bipedal_walker.py +658 -0
  18. gym-0.21.0/gym/envs/classic_control/cartpole.py +234 -0
  19. gym-0.21.0/gym/envs/classic_control/mountain_car.py +177 -0
  20. gym-0.21.0/gym/envs/classic_control/pendulum.py +94 -0
  21. gym-0.21.0/gym/envs/mujoco/ant.py +56 -0
  22. gym-0.21.0/gym/envs/mujoco/assets/thrower.xml +127 -0
  23. gym-0.21.0/gym/envs/mujoco/hopper.py +48 -0
  24. gym-0.21.0/gym/envs/mujoco/hopper_v3.py +137 -0
  25. gym-0.21.0/gym/envs/mujoco/pusher.py +63 -0
  26. gym-0.21.0/gym/envs/mujoco/swimmer_v3.py +94 -0
  27. gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml +26 -0
  28. gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml +42 -0
  29. gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml +34 -0
  30. gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml +254 -0
  31. gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl +0 -0
  32. gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl +0 -0
  33. gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl +0 -0
  34. gym-0.21.0/gym/envs/robotics/fetch/reach.py +32 -0
  35. gym-0.21.0/gym/envs/robotics/hand/__init__.py +0 -0
  36. gym-0.21.0/gym/envs/robotics/hand/manipulate.py +354 -0
  37. gym-0.21.0/gym/envs/robotics/rotations.py +387 -0
  38. gym-0.21.0/gym/spaces/discrete.py +37 -0
  39. gym-0.21.0/gym/utils/__init__.py +9 -0
  40. gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc +0 -0
  41. gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc +0 -0
  42. gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc +0 -0
  43. gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc +0 -0
  44. gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc +0 -0
  45. gym-0.21.0/gym/utils/atomic_write.py +60 -0
  46. gym-0.21.0/gym/utils/closer.py +68 -0
  47. gym-0.21.0/gym/utils/play.py +196 -0
  48. gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc +0 -0
  49. gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc +0 -0
  50. gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc +0 -0
Metaworld/.gitignore ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Plots and figures
2
+ scripts/figures/
3
+ scripts/movies*
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ pip-wheel-metadata/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+ docs/_apidoc/
76
+
77
+ # PyBuilder
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ .python-version
89
+
90
+ # celery beat schedule file
91
+ celerybeat-schedule
92
+
93
+ # SageMath parsed files
94
+ *.sage.py
95
+
96
+ # Environments
97
+ .env
98
+ .venv
99
+ env/
100
+ venv/
101
+ ENV/
102
+ env.bak/
103
+ venv.bak/
104
+
105
+ # Spyder project settings
106
+ .spyderproject
107
+ .spyproject
108
+
109
+ # Rope project settings
110
+ .ropeproject
111
+
112
+ # mkdocs documentation
113
+ /site
114
+
115
+ # mypy
116
+ .mypy_cache/
117
+ .dmypy.json
118
+ dmypy.json
119
+
120
+ # Pyre type checker
121
+ .pyre/
122
+
123
+
124
+ # metaworld-specific stuff below this line
125
+ MUJOCO_LOG.TXT
126
+
127
+ # Other IDEs and editors
128
+ *.sublime-project
129
+ *.sublime-workspace
130
+ .project
131
+ .pydevproject
132
+ *.swp
133
+ *.orig
134
+ .idea
135
+ .settings
136
+ .vscode/
137
+
138
+ # MacOS
139
+ .DS_Store
140
+
141
+ # no submodules, please
142
+ .gitmodules
143
+
144
+ # pipenv
145
+ # Including these for now, since pipenv is not the authoritative environment
146
+ # tool
147
+ Pipfile
148
+ Pipfile.lock
Metaworld/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Meta-World Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Metaworld/MANIFEST.in ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ graft metaworld/envs/assets_v1
2
+ graft metaworld/envs/assets_v2
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/1.0 ADDED
Binary file (1.17 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/11.0 ADDED
Binary file (1.17 kB). View file
 
VRL3/src/cfgs_adroit/task/hammer.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ num_train_frames: 4100000
2
+ task_name: hammer-v0
3
+ agent:
4
+ encoder_lr_scale: 1
VRL3/src/cfgs_adroit/task/pen.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ num_train_frames: 4100000
2
+ task_name: pen-v0
3
+ agent:
4
+ encoder_lr_scale: 1
VRL3/src/rrl_local/rrl_utils.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Rutav Shah, Indian Institute of Technlogy Kharagpur
2
+ # Copyright (c) Facebook, Inc. and its affiliates
3
+
4
+ from mjrl.utils.gym_env import GymEnv
5
+ from rrl_local.rrl_multicam import BasicAdroitEnv
6
+ import os
7
+
8
+ def make_basic_env(env, cam_list=[], from_pixels=False, hybrid_state=None, test_image=False, channels_first=False,
9
+ num_repeats=1, num_frames=1):
10
+ e = GymEnv(env)
11
+ env_kwargs = None
12
+ if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
13
+ height = 84
14
+ width = 84
15
+ latent_dim = height*width*len(cam_list)*3
16
+ # RRL class instance is environment wrapper...
17
+ e = BasicAdroitEnv(e, cameras=cam_list,
18
+ height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state,
19
+ test_image=test_image, channels_first=channels_first, num_repeats=num_repeats, num_frames=num_frames)
20
+ env_kwargs = {'rrl_kwargs' : e.env_kwargs}
21
+ # if not from pixels... then it's simpler
22
+ return e, env_kwargs
23
+
24
+ def make_env(env, cam_list=[], from_pixels=False, encoder_type=None, hybrid_state=None,) :
25
+ # TODO why is encoder type put into the environment?
26
+ e = GymEnv(env)
27
+ env_kwargs = None
28
+ if from_pixels : # TODO here they first check if it's from pixels... if pixel and not resnet then use 84x84??
29
+ height = 84
30
+ width = 84
31
+ latent_dim = height*width*len(cam_list)*3
32
+
33
+ if encoder_type and encoder_type == 'resnet34':
34
+ assert from_pixels==True
35
+ height = 256
36
+ width = 256
37
+ latent_dim = 512*len(cam_list) #TODO each camera provides an image?
38
+ if from_pixels:
39
+ # RRL class instance is environment wrapper...
40
+ e = BasicAdroitEnv(e, cameras=cam_list, encoder_type=encoder_type,
41
+ height=height, width=width, latent_dim=latent_dim, hybrid_state=hybrid_state)
42
+ env_kwargs = {'rrl_kwargs' : e.env_kwargs}
43
+ return e, env_kwargs
44
+
45
+ def make_dir(dir_path):
46
+ if not os.path.exists(dir_path):
47
+ os.makedirs(dir_path)
48
+
49
+
50
+ def preprocess_args(args):
51
+ job_data = {}
52
+ job_data['seed'] = args.seed
53
+ job_data['env'] = args.env
54
+ job_data['output'] = args.output
55
+ job_data['from_pixels'] = args.from_pixels
56
+ job_data['hybrid_state'] = args.hybrid_state
57
+ job_data['stack_frames'] = args.stack_frames
58
+ job_data['encoder_type'] = args.encoder_type
59
+ job_data['cam1'] = args.cam1
60
+ job_data['cam2'] = args.cam2
61
+ job_data['cam3'] = args.cam3
62
+ job_data['algorithm'] = args.algorithm
63
+ job_data['num_cpu'] = args.num_cpu
64
+ job_data['save_freq'] = args.save_freq
65
+ job_data['eval_rollouts'] = args.eval_rollouts
66
+ job_data['demo_file'] = args.demo_file
67
+ job_data['bc_batch_size'] = args.bc_batch_size
68
+ job_data['bc_epochs'] = args.bc_epochs
69
+ job_data['bc_learn_rate'] = args.bc_learn_rate
70
+ #job_data['policy_size'] = args.policy_size
71
+ job_data['policy_size'] = tuple(map(int, args.policy_size.split(', ')))
72
+ job_data['vf_batch_size'] = args.vf_batch_size
73
+ job_data['vf_epochs'] = args.vf_epochs
74
+ job_data['vf_learn_rate'] = args.vf_learn_rate
75
+ job_data['rl_step_size'] = args.rl_step_size
76
+ job_data['rl_gamma'] = args.rl_gamma
77
+ job_data['rl_gae'] = args.rl_gae
78
+ job_data['rl_num_traj'] = args.rl_num_traj
79
+ job_data['rl_num_iter'] = args.rl_num_iter
80
+ job_data['lam_0'] = args.lam_0
81
+ job_data['lam_1'] = args.lam_1
82
+ print(job_data)
83
+ return job_data
84
+
85
+
VRL3/src/transfer_util.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ from torchvision import datasets, models, transforms
5
+
6
+ def set_parameter_requires_grad(model, n_resblock_finetune):
7
+ assert n_resblock_finetune in (0, 1, 2, 3, 4, 5)
8
+ for param in model.parameters():
9
+ param.requires_grad = False
10
+
11
+ for name, param in model.named_parameters():
12
+ condition = (n_resblock_finetune >= 1 and 'layer4' in name) or (n_resblock_finetune >= 2 and 'layer3' in name) or \
13
+ (n_resblock_finetune >= 3 and 'layer2' in name) or (n_resblock_finetune >= 4 and 'layer1' in name) or \
14
+ (n_resblock_finetune >= 5)
15
+
16
+ if condition:
17
+ param.requires_grad = True
18
+
19
+ for name, param in model.named_parameters():
20
+ if 'bn' in name:
21
+ param.requires_grad = False
22
+
23
+ def initialize_model(model_name, n_resblock_finetune, use_pretrained=True):
24
+ # Initialize these variables which will be set in this if statement. Each of these
25
+ # variables is model specific.
26
+ model_ft = None
27
+ input_size = 0
28
+
29
+ if model_name == "resnet18":
30
+ """ Resnet18
31
+ """
32
+ model_ft = models.resnet18(pretrained=use_pretrained)
33
+ set_parameter_requires_grad(model_ft, n_resblock_finetune)
34
+ feature_size = model_ft.fc.in_features
35
+ input_size = 224
36
+ elif model_name == 'resnet34':
37
+ model_ft = models.resnet34(pretrained=use_pretrained)
38
+ set_parameter_requires_grad(model_ft, n_resblock_finetune)
39
+ feature_size = model_ft.fc.in_features
40
+ input_size = 224
41
+ else:
42
+ print("Invalid model name, exiting...")
43
+ exit()
44
+
45
+ return model_ft, input_size, feature_size
46
+
47
+
48
+
49
+
50
+
gym-0.21.0/.gitignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.swp
2
+ *.pyc
3
+ *.py~
4
+ .DS_Store
5
+ .cache
6
+ .pytest_cache/
7
+
8
+ # Setuptools distribution and build folders.
9
+ /dist/
10
+ /build
11
+
12
+ # Virtualenv
13
+ /env
14
+
15
+ # Python egg metadata, regenerated from source files by setuptools.
16
+ /*.egg-info
17
+
18
+ *.sublime-project
19
+ *.sublime-workspace
20
+
21
+ logs/
22
+
23
+ .ipynb_checkpoints
24
+ ghostdriver.log
25
+
26
+ junk
27
+ MUJOCO_LOG.txt
28
+
29
+ rllab_mujoco
30
+
31
+ tutorial/*.html
32
+
33
+ # IDE files
34
+ .eggs
35
+ .tox
36
+
37
+ # PyCharm project files
38
+ .idea
39
+ vizdoom.ini
gym-0.21.0/CODE_OF_CONDUCT.rst ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OpenAI Gym is dedicated to providing a harassment-free experience for
2
+ everyone, regardless of gender, gender identity and expression, sexual
3
+ orientation, disability, physical appearance, body size, age, race, or
4
+ religion. We do not tolerate harassment of participants in any form.
5
+
6
+ This code of conduct applies to all OpenAI Gym spaces (including Gist
7
+ comments) both online and off. Anyone who violates this code of
8
+ conduct may be sanctioned or expelled from these spaces at the
9
+ discretion of the OpenAI team.
10
+
11
+ We may add additional rules over time, which will be made clearly
12
+ available to participants. Participants are responsible for knowing
13
+ and abiding by these rules.
gym-0.21.0/bin/docker_entrypoint ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # This script is the entrypoint for our Docker image.
3
+
4
+ set -ex
5
+
6
+ # Set up display; otherwise rendering will fail
7
+ Xvfb -screen 0 1024x768x24 &
8
+ export DISPLAY=:0
9
+
10
+ # Wait for the file to come up
11
+ display=0
12
+ file="/tmp/.X11-unix/X$display"
13
+ for i in $(seq 1 10); do
14
+ if [ -e "$file" ]; then
15
+ break
16
+ fi
17
+
18
+ echo "Waiting for $file to be created (try $i/10)"
19
+ sleep "$i"
20
+ done
21
+ if ! [ -e "$file" ]; then
22
+ echo "Timing out: $file was not created"
23
+ exit 1
24
+ fi
25
+
26
+ exec "$@"
gym-0.21.0/docs/api.md ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API
2
+ ## Initializing Environments
3
+ Initializing environment is very easy in Gym and can be done via:
4
+
5
+ ```python
6
+ import gym
7
+ env = gym.make('CartPole-v0')
8
+ ```
9
+
10
+ ## Interacting with the Environment
11
+ This example will run an instance of `CartPole-v0` environment for 1000 timesteps, rendering the environment at each step. You should see a window pop up rendering the classic [cart-pole](https://www.youtube.com/watch?v=J7E6_my3CHk&ab_channel=TylerStreeter) problem
12
+
13
+ ```python
14
+ import gym
15
+ env = gym.make('CartPole-v0')
16
+ env.reset()
17
+ for _ in range(1000):
18
+ env.render() # by default `mode="human"`(GUI), you can pass `mode="rbg_array"` to retrieve an image instead
19
+ env.step(env.action_space.sample()) # take a random action
20
+ env.close()
21
+ ```
22
+
23
+ The output should look something like this
24
+
25
+ ![cartpole-no-reset](https://user-images.githubusercontent.com/28860173/129241283-70069f7c-453d-4670-a226-854203bd8a1b.gif)
26
+
27
+
28
+ The commonly used methods are:
29
+
30
+ `reset()` resets the environment to its initial state and returns the observation corresponding to the initial state
31
+ `step(action)` takes an action as an input and implements that action in the environment. This method returns a set of four values
32
+ `render()` renders the environment
33
+
34
+ - `observation` (**object**) : an environment specific object representation your observation of the environment after the step is taken. Its often aliased as the next state after the action has been taken
35
+ - `reward`(**float**) : immediate reward achieved by the previous action. Actual value and range will varies between environments, but the final goal is always to increase your total reward
36
+ - `done`(**boolean**): whether it’s time to `reset` the environment again. Most (but not all) tasks are divided up into well-defined episodes, and `done` being `True` indicates the episode has terminated. (For example, perhaps the pole tipped too far, or you lost your last life.)
37
+ - `info`(**dict**) : This provides general information helpful for debugging or additional information depending on the environment, such as the raw probabilities behind the environment’s last state change
38
+
39
+
40
+ ## Additional Environment API
41
+
42
+ - `action_space`: this attribute gives the format of valid actions. It is of datatype `Space` provided by Gym. (For ex: If the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions 0 & 1 )
43
+ ```python
44
+ print(env.action_space)
45
+ #> Discrete(2)
46
+
47
+ print(env.observation_space)
48
+ #> Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
49
+
50
+ ```
51
+ - `observation_space`: this attribute gives the format of valid observations. It if of datatype `Space` provided by Gym. (For ex: if the observation space is of type `Box` and the shape of the object is `(4,)`, this denotes a valid observation will be an array of 4 numbers). We can check the box bounds as well with attributes
52
+
53
+ ```python
54
+ print(env.observation_space.high)
55
+ #> array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32)
56
+
57
+ print(env.observation_space.low)
58
+ #> array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32)
59
+ ```
60
+ - There are multiple types of Space types inherently available in gym:
61
+ - `Box` describes an n-dimensional continuous space. Its a bounded space where we can define the upper and lower limit which describe the valid values our observations can take.
62
+ - `Discrete` describes a discrete space where { 0, 1, ......., n-1} are the possible values our observation/action can take.
63
+ - `Dict` represents a dictionary of simple spaces.
64
+ - `Tuple` represents a tuple of simple spaces
65
+ - `MultiBinary` creates a n-shape binary space. Argument n can be a number or a `list` of numbers
66
+ - `MultiDiscrete` consists of a series of `Discrete` action spaces with different number of actions in each element
67
+ ```python
68
+ observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
69
+ print(observation_space.sample())
70
+ #> [ 1.6952509 -0.4399011 -0.7981693]
71
+
72
+ observation_space = Discrete(4)
73
+ print(observation_space.sample())
74
+ #> 1
75
+
76
+ observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)})
77
+ print(observation_space.sample())
78
+ #> OrderedDict([('position', 0), ('velocity', 1)])
79
+
80
+ observation_space = Tuple((Discrete(2), Discrete(3)))
81
+ print(observation_space.sample())
82
+ #> (1, 2)
83
+
84
+ observation_space = MultiBinary(5)
85
+ print(observation_space.sample())
86
+ #> [1 1 1 0 1]
87
+
88
+ observation_space = MultiDiscrete([ 5, 2, 2 ])
89
+ print(observation_space.sample())
90
+ #> [3 0 0]
91
+ ```
92
+ - `reward_range`: returns a tuple corresponding to min and max possible rewards. Default range is set to `[-inf,+inf]`. You can set it if you want a narrower range
93
+ - `close()` : Override close in your subclass to perform any necessary cleanup
94
+ - `seed()`: Sets the seed for this env's random number generator
95
+
96
+
97
+ ### Unwrapping an environment
98
+ If you have a wrapped environment, and you want to get the unwrapped environment underneath all the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the `.unwrapped` attribute. If the environment is already a base environment, the `.unwrapped` attribute will just return itself.
99
+
100
+ ```python
101
+ base_env = env.unwrapped
102
+ ```
103
+
104
+ ### Vectorized Environment
105
+ Vectorized Environments are a way of stacking multiple independent environments, so that instead of training on one environment, our agent can train on multiple environments at a time. Each `observation` returned from a vectorized environment is a batch of observations for each sub-environment, and `step` is also expected to receive a batch of actions for each sub-environment.
106
+
107
+ **NOTE:** All sub-environments should share the identical observation and action spaces. A vector of multiple different environments is not supported
108
+
109
+ Gym Vector API consists of two types of vectorized environments:
110
+
111
+ - `AsyncVectorEnv` runs multiple environments in parallel. It uses `multiprocessing` processes, and pipes for communication.
112
+ - `SyncVectorEnv`runs multiple environments serially
113
+
114
+ ```python
115
+ import gym
116
+ env = gym.vector.make('CartPole-v1', 3,asynchronous=True) # Creates an Asynchronous env
117
+ env.reset()
118
+ #> array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
119
+ #> [ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
120
+ #> [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
121
+ #> dtype=float32)
122
+
123
+ ```
gym-0.21.0/docs/toy_text/frozen_lake.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Frozen Lake
2
+ ---
3
+ |Title|Action Type|Action Shape|Action Values|Observation Shape|Observation Values|Average Total Reward|Import|
4
+ | ----------- | -----------| ----------- | -----------| ----------- | -----------| ----------- | -----------|
5
+ |Frozen Lake|Discrete|(1,)|(0,3)|(1,)|(0,nrows*ncolumns)| |from gym.envs.toy_text import frozen_lake|
6
+ ---
7
+
8
+
9
+ Frozen lake involves crossing a frozen lake from Start(S) to goal(G) without falling into any holes(H). The agent may not always move in the intended direction due to the slippery nature of the frozen lake.
10
+
11
+ The agent take a 1-element vector for actions.
12
+ The action space is `(dir)`, where `dir` decides direction to move in which can be:
13
+
14
+ - 0: LEFT
15
+ - 1: DOWN
16
+ - 2: RIGHT
17
+ - 3: UP
18
+
19
+ The observation is a value representing the agents current position as
20
+
21
+ current_row * nrows + current_col
22
+
23
+ **Rewards:**
24
+
25
+ Reward schedule:
26
+ - Reach goal(G): +1
27
+ - Reach hole(H): 0
28
+
29
+ ### Arguments
30
+
31
+ ```
32
+ gym.make('FrozenLake-v0', desc=None,map_name="4x4", is_slippery=True)
33
+ ```
34
+
35
+ `desc`: Used to specify custom map for frozen lake. For example,
36
+
37
+ desc=["SFFF", "FHFH", "FFFH", "HFFG"].
38
+
39
+ `map_name`: ID to use any of the preloaded maps.
40
+
41
+ "4x4":[
42
+ "SFFF",
43
+ "FHFH",
44
+ "FFFH",
45
+ "HFFG"
46
+ ]
47
+
48
+ "8x8": [
49
+ "SFFFFFFF",
50
+ "FFFFFFFF",
51
+ "FFFHFFFF",
52
+ "FFFFFHFF",
53
+ "FFFHFFFF",
54
+ "FHHFFFHF",
55
+ "FHFFHFHF",
56
+ "FFFHFFFG",
57
+ ]
58
+
59
+
60
+
61
+
62
+ `is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions.
63
+
64
+ For example, if action is left and is_slippery is True, then:
65
+ - P(move left)=1/3
66
+ - P(move up)=1/3
67
+ - P(move down)=1/3
68
+ ### Version History
69
+
70
+ * v0: Initial versions release (1.0.0)
gym-0.21.0/docs/tutorials.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Getting Started With OpenAI Gym: The Basic Building Blocks
2
+
3
+ https://blog.paperspace.com/getting-started-with-openai-gym/
4
+
5
+ A good starting point explaining all the basic building blocks of the Gym API.
6
+
7
+
8
+
9
+ ## Reinforcement Q-Learning from Scratch in Python with OpenAI Gym
10
+ Good Algorithmic Introduction to Reinforcement Learning showcasing how to use Gym API for Training Agents.
11
+
12
+ https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/
13
+
14
+
15
+ ## Tutorial: An Introduction to Reinforcement Learning Using OpenAI Gym
16
+
17
+ https://www.gocoder.one/blog/rl-tutorial-with-openai-gym
gym-0.21.0/gym/core.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod
2
+
3
+ import gym
4
+ from gym import error
5
+ from gym.utils import closer
6
+
7
+
8
+ class Env(object):
9
+ """The main OpenAI Gym class. It encapsulates an environment with
10
+ arbitrary behind-the-scenes dynamics. An environment can be
11
+ partially or fully observed.
12
+
13
+ The main API methods that users of this class need to know are:
14
+
15
+ step
16
+ reset
17
+ render
18
+ close
19
+ seed
20
+
21
+ And set the following attributes:
22
+
23
+ action_space: The Space object corresponding to valid actions
24
+ observation_space: The Space object corresponding to valid observations
25
+ reward_range: A tuple corresponding to the min and max possible rewards
26
+
27
+ Note: a default reward range set to [-inf,+inf] already exists. Set it if you want a narrower range.
28
+
29
+ The methods are accessed publicly as "step", "reset", etc...
30
+ """
31
+
32
+ # Set this in SOME subclasses
33
+ metadata = {"render.modes": []}
34
+ reward_range = (-float("inf"), float("inf"))
35
+ spec = None
36
+
37
+ # Set these in ALL subclasses
38
+ action_space = None
39
+ observation_space = None
40
+
41
+ @abstractmethod
42
+ def step(self, action):
43
+ """Run one timestep of the environment's dynamics. When end of
44
+ episode is reached, you are responsible for calling `reset()`
45
+ to reset this environment's state.
46
+
47
+ Accepts an action and returns a tuple (observation, reward, done, info).
48
+
49
+ Args:
50
+ action (object): an action provided by the agent
51
+
52
+ Returns:
53
+ observation (object): agent's observation of the current environment
54
+ reward (float) : amount of reward returned after previous action
55
+ done (bool): whether the episode has ended, in which case further step() calls will return undefined results
56
+ info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
57
+ """
58
+ raise NotImplementedError
59
+
60
+ @abstractmethod
61
+ def reset(self):
62
+ """Resets the environment to an initial state and returns an initial
63
+ observation.
64
+
65
+ Note that this function should not reset the environment's random
66
+ number generator(s); random variables in the environment's state should
67
+ be sampled independently between multiple calls to `reset()`. In other
68
+ words, each call of `reset()` should yield an environment suitable for
69
+ a new episode, independent of previous episodes.
70
+
71
+ Returns:
72
+ observation (object): the initial observation.
73
+ """
74
+ raise NotImplementedError
75
+
76
+ @abstractmethod
77
+ def render(self, mode="human"):
78
+ """Renders the environment.
79
+
80
+ The set of supported modes varies per environment. (And some
81
+ environments do not support rendering at all.) By convention,
82
+ if mode is:
83
+
84
+ - human: render to the current display or terminal and
85
+ return nothing. Usually for human consumption.
86
+ - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
87
+ representing RGB values for an x-by-y pixel image, suitable
88
+ for turning into a video.
89
+ - ansi: Return a string (str) or StringIO.StringIO containing a
90
+ terminal-style text representation. The text can include newlines
91
+ and ANSI escape sequences (e.g. for colors).
92
+
93
+ Note:
94
+ Make sure that your class's metadata 'render.modes' key includes
95
+ the list of supported modes. It's recommended to call super()
96
+ in implementations to use the functionality of this method.
97
+
98
+ Args:
99
+ mode (str): the mode to render with
100
+
101
+ Example:
102
+
103
+ class MyEnv(Env):
104
+ metadata = {'render.modes': ['human', 'rgb_array']}
105
+
106
+ def render(self, mode='human'):
107
+ if mode == 'rgb_array':
108
+ return np.array(...) # return RGB frame suitable for video
109
+ elif mode == 'human':
110
+ ... # pop up a window and render
111
+ else:
112
+ super(MyEnv, self).render(mode=mode) # just raise an exception
113
+ """
114
+ raise NotImplementedError
115
+
116
+ def close(self):
117
+ """Override close in your subclass to perform any necessary cleanup.
118
+
119
+ Environments will automatically close() themselves when
120
+ garbage collected or when the program exits.
121
+ """
122
+ pass
123
+
124
+ def seed(self, seed=None):
125
+ """Sets the seed for this env's random number generator(s).
126
+
127
+ Note:
128
+ Some environments use multiple pseudorandom number generators.
129
+ We want to capture all such seeds used in order to ensure that
130
+ there aren't accidental correlations between multiple generators.
131
+
132
+ Returns:
133
+ list<bigint>: Returns the list of seeds used in this env's random
134
+ number generators. The first value in the list should be the
135
+ "main" seed, or the value which a reproducer should pass to
136
+ 'seed'. Often, the main seed equals the provided 'seed', but
137
+ this won't be true if seed=None, for example.
138
+ """
139
+ return
140
+
141
+ @property
142
+ def unwrapped(self):
143
+ """Completely unwrap this env.
144
+
145
+ Returns:
146
+ gym.Env: The base non-wrapped gym.Env instance
147
+ """
148
+ return self
149
+
150
+ def __str__(self):
151
+ if self.spec is None:
152
+ return "<{} instance>".format(type(self).__name__)
153
+ else:
154
+ return "<{}<{}>>".format(type(self).__name__, self.spec.id)
155
+
156
+ def __enter__(self):
157
+ """Support with-statement for the environment."""
158
+ return self
159
+
160
+ def __exit__(self, *args):
161
+ """Support with-statement for the environment."""
162
+ self.close()
163
+ # propagate exception
164
+ return False
165
+
166
+
167
+ class GoalEnv(Env):
168
+ """A goal-based environment. It functions just as any regular OpenAI Gym environment but it
169
+ imposes a required structure on the observation_space. More concretely, the observation
170
+ space is required to contain at least three elements, namely `observation`, `desired_goal`, and
171
+ `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve.
172
+ `achieved_goal` is the goal that it currently achieved instead. `observation` contains the
173
+ actual observations of the environment as per usual.
174
+ """
175
+
176
+ def reset(self):
177
+ # Enforce that each GoalEnv uses a Goal-compatible observation space.
178
+ if not isinstance(self.observation_space, gym.spaces.Dict):
179
+ raise error.Error(
180
+ "GoalEnv requires an observation space of type gym.spaces.Dict"
181
+ )
182
+ for key in ["observation", "achieved_goal", "desired_goal"]:
183
+ if key not in self.observation_space.spaces:
184
+ raise error.Error(
185
+ 'GoalEnv requires the "{}" key to be part of the observation dictionary.'.format(
186
+ key
187
+ )
188
+ )
189
+
190
+ @abstractmethod
191
+ def compute_reward(self, achieved_goal, desired_goal, info):
192
+ """Compute the step reward. This externalizes the reward function and makes
193
+ it dependent on a desired goal and the one that was achieved. If you wish to include
194
+ additional rewards that are independent of the goal, you can include the necessary values
195
+ to derive it in 'info' and compute it accordingly.
196
+
197
+ Args:
198
+ achieved_goal (object): the goal that was achieved during execution
199
+ desired_goal (object): the desired goal that we asked the agent to attempt to achieve
200
+ info (dict): an info dictionary with additional information
201
+
202
+ Returns:
203
+ float: The reward that corresponds to the provided achieved goal w.r.t. to the desired
204
+ goal. Note that the following should always hold true:
205
+
206
+ ob, reward, done, info = env.step()
207
+ assert reward == env.compute_reward(ob['achieved_goal'], ob['desired_goal'], info)
208
+ """
209
+ raise NotImplementedError
210
+
211
+
212
+ class Wrapper(Env):
213
+ """Wraps the environment to allow a modular transformation.
214
+
215
+ This class is the base class for all wrappers. The subclass could override
216
+ some methods to change the behavior of the original environment without touching the
217
+ original code.
218
+
219
+ .. note::
220
+
221
+ Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
222
+
223
+ """
224
+
225
+ def __init__(self, env):
226
+ self.env = env
227
+
228
+ self._action_space = None
229
+ self._observation_space = None
230
+ self._reward_range = None
231
+ self._metadata = None
232
+
233
+ def __getattr__(self, name):
234
+ if name.startswith("_"):
235
+ raise AttributeError(
236
+ "attempted to get missing private attribute '{}'".format(name)
237
+ )
238
+ return getattr(self.env, name)
239
+
240
+ @property
241
+ def spec(self):
242
+ return self.env.spec
243
+
244
+ @classmethod
245
+ def class_name(cls):
246
+ return cls.__name__
247
+
248
+ @property
249
+ def action_space(self):
250
+ if self._action_space is None:
251
+ return self.env.action_space
252
+ return self._action_space
253
+
254
+ @action_space.setter
255
+ def action_space(self, space):
256
+ self._action_space = space
257
+
258
+ @property
259
+ def observation_space(self):
260
+ if self._observation_space is None:
261
+ return self.env.observation_space
262
+ return self._observation_space
263
+
264
+ @observation_space.setter
265
+ def observation_space(self, space):
266
+ self._observation_space = space
267
+
268
+ @property
269
+ def reward_range(self):
270
+ if self._reward_range is None:
271
+ return self.env.reward_range
272
+ return self._reward_range
273
+
274
+ @reward_range.setter
275
+ def reward_range(self, value):
276
+ self._reward_range = value
277
+
278
+ @property
279
+ def metadata(self):
280
+ if self._metadata is None:
281
+ return self.env.metadata
282
+ return self._metadata
283
+
284
+ @metadata.setter
285
+ def metadata(self, value):
286
+ self._metadata = value
287
+
288
+ def step(self, action):
289
+ return self.env.step(action)
290
+
291
+ def reset(self, **kwargs):
292
+ return self.env.reset(**kwargs)
293
+
294
+ def render(self, mode="human", **kwargs):
295
+ return self.env.render(mode, **kwargs)
296
+
297
+ def close(self):
298
+ return self.env.close()
299
+
300
+ def seed(self, seed=None):
301
+ return self.env.seed(seed)
302
+
303
+ def compute_reward(self, achieved_goal, desired_goal, info):
304
+ return self.env.compute_reward(achieved_goal, desired_goal, info)
305
+
306
+ def __str__(self):
307
+ return "<{}{}>".format(type(self).__name__, self.env)
308
+
309
+ def __repr__(self):
310
+ return str(self)
311
+
312
+ @property
313
+ def unwrapped(self):
314
+ return self.env.unwrapped
315
+
316
+
317
+ class ObservationWrapper(Wrapper):
318
+ def reset(self, **kwargs):
319
+ observation = self.env.reset(**kwargs)
320
+ return self.observation(observation)
321
+
322
+ def step(self, action):
323
+ observation, reward, done, info = self.env.step(action)
324
+ return self.observation(observation), reward, done, info
325
+
326
+ @abstractmethod
327
+ def observation(self, observation):
328
+ raise NotImplementedError
329
+
330
+
331
+ class RewardWrapper(Wrapper):
332
+ def reset(self, **kwargs):
333
+ return self.env.reset(**kwargs)
334
+
335
+ def step(self, action):
336
+ observation, reward, done, info = self.env.step(action)
337
+ return observation, self.reward(reward), done, info
338
+
339
+ @abstractmethod
340
+ def reward(self, reward):
341
+ raise NotImplementedError
342
+
343
+
344
+ class ActionWrapper(Wrapper):
345
+ def reset(self, **kwargs):
346
+ return self.env.reset(**kwargs)
347
+
348
+ def step(self, action):
349
+ return self.env.step(self.action(action))
350
+
351
+ @abstractmethod
352
+ def action(self, action):
353
+ raise NotImplementedError
354
+
355
+ @abstractmethod
356
+ def reverse_action(self, action):
357
+ raise NotImplementedError
gym-0.21.0/gym/envs/box2d/bipedal_walker.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import math
3
+
4
+ import numpy as np
5
+ import Box2D
6
+ from Box2D.b2 import (
7
+ edgeShape,
8
+ circleShape,
9
+ fixtureDef,
10
+ polygonShape,
11
+ revoluteJointDef,
12
+ contactListener,
13
+ )
14
+
15
+ import gym
16
+ from gym import spaces
17
+ from gym.utils import colorize, seeding, EzPickle
18
+
19
+ # This is simple 4-joints walker robot environment.
20
+ #
21
+ # There are two versions:
22
+ #
23
+ # - Normal, with slightly uneven terrain.
24
+ #
25
+ # - Hardcore with ladders, stumps, pitfalls.
26
+ #
27
+ # Reward is given for moving forward, total 300+ points up to the far end. If the robot falls,
28
+ # it gets -100. Applying motor torque costs a small amount of points, more optimal agent
29
+ # will get better score.
30
+ #
31
+ # Heuristic is provided for testing, it's also useful to get demonstrations to
32
+ # learn from. To run heuristic:
33
+ #
34
+ # python gym/envs/box2d/bipedal_walker.py
35
+ #
36
+ # State consists of hull angle speed, angular velocity, horizontal speed, vertical speed,
37
+ # position of joints and joints angular speed, legs contact with ground, and 10 lidar
38
+ # rangefinder measurements to help to deal with the hardcore version. There's no coordinates
39
+ # in the state vector. Lidar is less useful in normal version, but it works.
40
+ #
41
+ # To solve the game you need to get 300 points in 1600 time steps.
42
+ #
43
+ # To solve hardcore version you need 300 points in 2000 time steps.
44
+ #
45
+ # Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
46
+
47
+ FPS = 50
48
+ SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
49
+
50
+ MOTORS_TORQUE = 80
51
+ SPEED_HIP = 4
52
+ SPEED_KNEE = 6
53
+ LIDAR_RANGE = 160 / SCALE
54
+
55
+ INITIAL_RANDOM = 5
56
+
57
+ HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
58
+ LEG_DOWN = -8 / SCALE
59
+ LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
60
+
61
+ VIEWPORT_W = 600
62
+ VIEWPORT_H = 400
63
+
64
+ TERRAIN_STEP = 14 / SCALE
65
+ TERRAIN_LENGTH = 200 # in steps
66
+ TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
67
+ TERRAIN_GRASS = 10 # low long are grass spots, in steps
68
+ TERRAIN_STARTPAD = 20 # in steps
69
+ FRICTION = 2.5
70
+
71
+ HULL_FD = fixtureDef(
72
+ shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
73
+ density=5.0,
74
+ friction=0.1,
75
+ categoryBits=0x0020,
76
+ maskBits=0x001, # collide only with ground
77
+ restitution=0.0,
78
+ ) # 0.99 bouncy
79
+
80
+ LEG_FD = fixtureDef(
81
+ shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
82
+ density=1.0,
83
+ restitution=0.0,
84
+ categoryBits=0x0020,
85
+ maskBits=0x001,
86
+ )
87
+
88
+ LOWER_FD = fixtureDef(
89
+ shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
90
+ density=1.0,
91
+ restitution=0.0,
92
+ categoryBits=0x0020,
93
+ maskBits=0x001,
94
+ )
95
+
96
+
97
+ class ContactDetector(contactListener):
98
+ def __init__(self, env):
99
+ contactListener.__init__(self)
100
+ self.env = env
101
+
102
+ def BeginContact(self, contact):
103
+ if (
104
+ self.env.hull == contact.fixtureA.body
105
+ or self.env.hull == contact.fixtureB.body
106
+ ):
107
+ self.env.game_over = True
108
+ for leg in [self.env.legs[1], self.env.legs[3]]:
109
+ if leg in [contact.fixtureA.body, contact.fixtureB.body]:
110
+ leg.ground_contact = True
111
+
112
+ def EndContact(self, contact):
113
+ for leg in [self.env.legs[1], self.env.legs[3]]:
114
+ if leg in [contact.fixtureA.body, contact.fixtureB.body]:
115
+ leg.ground_contact = False
116
+
117
+
118
+ class BipedalWalker(gym.Env, EzPickle):
119
+ metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}
120
+
121
+ hardcore = False
122
+
123
+ def __init__(self):
124
+ EzPickle.__init__(self)
125
+ self.seed()
126
+ self.viewer = None
127
+
128
+ self.world = Box2D.b2World()
129
+ self.terrain = None
130
+ self.hull = None
131
+
132
+ self.prev_shaping = None
133
+
134
+ self.fd_polygon = fixtureDef(
135
+ shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
136
+ friction=FRICTION,
137
+ )
138
+
139
+ self.fd_edge = fixtureDef(
140
+ shape=edgeShape(vertices=[(0, 0), (1, 1)]),
141
+ friction=FRICTION,
142
+ categoryBits=0x0001,
143
+ )
144
+
145
+ high = np.array([np.inf] * 24).astype(np.float32)
146
+ self.action_space = spaces.Box(
147
+ np.array([-1, -1, -1, -1]).astype(np.float32),
148
+ np.array([1, 1, 1, 1]).astype(np.float32),
149
+ )
150
+ self.observation_space = spaces.Box(-high, high)
151
+
152
+ def seed(self, seed=None):
153
+ self.np_random, seed = seeding.np_random(seed)
154
+ return [seed]
155
+
156
+ def _destroy(self):
157
+ if not self.terrain:
158
+ return
159
+ self.world.contactListener = None
160
+ for t in self.terrain:
161
+ self.world.DestroyBody(t)
162
+ self.terrain = []
163
+ self.world.DestroyBody(self.hull)
164
+ self.hull = None
165
+ for leg in self.legs:
166
+ self.world.DestroyBody(leg)
167
+ self.legs = []
168
+ self.joints = []
169
+
170
+ def _generate_terrain(self, hardcore):
171
+ GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
172
+ state = GRASS
173
+ velocity = 0.0
174
+ y = TERRAIN_HEIGHT
175
+ counter = TERRAIN_STARTPAD
176
+ oneshot = False
177
+ self.terrain = []
178
+ self.terrain_x = []
179
+ self.terrain_y = []
180
+ for i in range(TERRAIN_LENGTH):
181
+ x = i * TERRAIN_STEP
182
+ self.terrain_x.append(x)
183
+
184
+ if state == GRASS and not oneshot:
185
+ velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
186
+ if i > TERRAIN_STARTPAD:
187
+ velocity += self.np_random.uniform(-1, 1) / SCALE # 1
188
+ y += velocity
189
+
190
+ elif state == PIT and oneshot:
191
+ counter = self.np_random.randint(3, 5)
192
+ poly = [
193
+ (x, y),
194
+ (x + TERRAIN_STEP, y),
195
+ (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
196
+ (x, y - 4 * TERRAIN_STEP),
197
+ ]
198
+ self.fd_polygon.shape.vertices = poly
199
+ t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
200
+ t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
201
+ self.terrain.append(t)
202
+
203
+ self.fd_polygon.shape.vertices = [
204
+ (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
205
+ ]
206
+ t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
207
+ t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
208
+ self.terrain.append(t)
209
+ counter += 2
210
+ original_y = y
211
+
212
+ elif state == PIT and not oneshot:
213
+ y = original_y
214
+ if counter > 1:
215
+ y -= 4 * TERRAIN_STEP
216
+
217
+ elif state == STUMP and oneshot:
218
+ counter = self.np_random.randint(1, 3)
219
+ poly = [
220
+ (x, y),
221
+ (x + counter * TERRAIN_STEP, y),
222
+ (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
223
+ (x, y + counter * TERRAIN_STEP),
224
+ ]
225
+ self.fd_polygon.shape.vertices = poly
226
+ t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
227
+ t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
228
+ self.terrain.append(t)
229
+
230
+ elif state == STAIRS and oneshot:
231
+ stair_height = +1 if self.np_random.rand() > 0.5 else -1
232
+ stair_width = self.np_random.randint(4, 5)
233
+ stair_steps = self.np_random.randint(3, 5)
234
+ original_y = y
235
+ for s in range(stair_steps):
236
+ poly = [
237
+ (
238
+ x + (s * stair_width) * TERRAIN_STEP,
239
+ y + (s * stair_height) * TERRAIN_STEP,
240
+ ),
241
+ (
242
+ x + ((1 + s) * stair_width) * TERRAIN_STEP,
243
+ y + (s * stair_height) * TERRAIN_STEP,
244
+ ),
245
+ (
246
+ x + ((1 + s) * stair_width) * TERRAIN_STEP,
247
+ y + (-1 + s * stair_height) * TERRAIN_STEP,
248
+ ),
249
+ (
250
+ x + (s * stair_width) * TERRAIN_STEP,
251
+ y + (-1 + s * stair_height) * TERRAIN_STEP,
252
+ ),
253
+ ]
254
+ self.fd_polygon.shape.vertices = poly
255
+ t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
256
+ t.color1, t.color2 = (1, 1, 1), (0.6, 0.6, 0.6)
257
+ self.terrain.append(t)
258
+ counter = stair_steps * stair_width
259
+
260
+ elif state == STAIRS and not oneshot:
261
+ s = stair_steps * stair_width - counter - stair_height
262
+ n = s / stair_width
263
+ y = original_y + (n * stair_height) * TERRAIN_STEP
264
+
265
+ oneshot = False
266
+ self.terrain_y.append(y)
267
+ counter -= 1
268
+ if counter == 0:
269
+ counter = self.np_random.randint(TERRAIN_GRASS / 2, TERRAIN_GRASS)
270
+ if state == GRASS and hardcore:
271
+ state = self.np_random.randint(1, _STATES_)
272
+ oneshot = True
273
+ else:
274
+ state = GRASS
275
+ oneshot = True
276
+
277
+ self.terrain_poly = []
278
+ for i in range(TERRAIN_LENGTH - 1):
279
+ poly = [
280
+ (self.terrain_x[i], self.terrain_y[i]),
281
+ (self.terrain_x[i + 1], self.terrain_y[i + 1]),
282
+ ]
283
+ self.fd_edge.shape.vertices = poly
284
+ t = self.world.CreateStaticBody(fixtures=self.fd_edge)
285
+ color = (0.3, 1.0 if i % 2 == 0 else 0.8, 0.3)
286
+ t.color1 = color
287
+ t.color2 = color
288
+ self.terrain.append(t)
289
+ color = (0.4, 0.6, 0.3)
290
+ poly += [(poly[1][0], 0), (poly[0][0], 0)]
291
+ self.terrain_poly.append((poly, color))
292
+ self.terrain.reverse()
293
+
294
+ def _generate_clouds(self):
295
+ # Sorry for the clouds, couldn't resist
296
+ self.cloud_poly = []
297
+ for i in range(TERRAIN_LENGTH // 20):
298
+ x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
299
+ y = VIEWPORT_H / SCALE * 3 / 4
300
+ poly = [
301
+ (
302
+ x
303
+ + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
304
+ + self.np_random.uniform(0, 5 * TERRAIN_STEP),
305
+ y
306
+ + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
307
+ + self.np_random.uniform(0, 5 * TERRAIN_STEP),
308
+ )
309
+ for a in range(5)
310
+ ]
311
+ x1 = min([p[0] for p in poly])
312
+ x2 = max([p[0] for p in poly])
313
+ self.cloud_poly.append((poly, x1, x2))
314
+
315
+ def reset(self):
316
+ self._destroy()
317
+ self.world.contactListener_bug_workaround = ContactDetector(self)
318
+ self.world.contactListener = self.world.contactListener_bug_workaround
319
+ self.game_over = False
320
+ self.prev_shaping = None
321
+ self.scroll = 0.0
322
+ self.lidar_render = 0
323
+
324
+ W = VIEWPORT_W / SCALE
325
+ H = VIEWPORT_H / SCALE
326
+
327
+ self._generate_terrain(self.hardcore)
328
+ self._generate_clouds()
329
+
330
+ init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
331
+ init_y = TERRAIN_HEIGHT + 2 * LEG_H
332
+ self.hull = self.world.CreateDynamicBody(
333
+ position=(init_x, init_y), fixtures=HULL_FD
334
+ )
335
+ self.hull.color1 = (0.5, 0.4, 0.9)
336
+ self.hull.color2 = (0.3, 0.3, 0.5)
337
+ self.hull.ApplyForceToCenter(
338
+ (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
339
+ )
340
+
341
+ self.legs = []
342
+ self.joints = []
343
+ for i in [-1, +1]:
344
+ leg = self.world.CreateDynamicBody(
345
+ position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
346
+ angle=(i * 0.05),
347
+ fixtures=LEG_FD,
348
+ )
349
+ leg.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
350
+ leg.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
351
+ rjd = revoluteJointDef(
352
+ bodyA=self.hull,
353
+ bodyB=leg,
354
+ localAnchorA=(0, LEG_DOWN),
355
+ localAnchorB=(0, LEG_H / 2),
356
+ enableMotor=True,
357
+ enableLimit=True,
358
+ maxMotorTorque=MOTORS_TORQUE,
359
+ motorSpeed=i,
360
+ lowerAngle=-0.8,
361
+ upperAngle=1.1,
362
+ )
363
+ self.legs.append(leg)
364
+ self.joints.append(self.world.CreateJoint(rjd))
365
+
366
+ lower = self.world.CreateDynamicBody(
367
+ position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
368
+ angle=(i * 0.05),
369
+ fixtures=LOWER_FD,
370
+ )
371
+ lower.color1 = (0.6 - i / 10.0, 0.3 - i / 10.0, 0.5 - i / 10.0)
372
+ lower.color2 = (0.4 - i / 10.0, 0.2 - i / 10.0, 0.3 - i / 10.0)
373
+ rjd = revoluteJointDef(
374
+ bodyA=leg,
375
+ bodyB=lower,
376
+ localAnchorA=(0, -LEG_H / 2),
377
+ localAnchorB=(0, LEG_H / 2),
378
+ enableMotor=True,
379
+ enableLimit=True,
380
+ maxMotorTorque=MOTORS_TORQUE,
381
+ motorSpeed=1,
382
+ lowerAngle=-1.6,
383
+ upperAngle=-0.1,
384
+ )
385
+ lower.ground_contact = False
386
+ self.legs.append(lower)
387
+ self.joints.append(self.world.CreateJoint(rjd))
388
+
389
+ self.drawlist = self.terrain + self.legs + [self.hull]
390
+
391
+ class LidarCallback(Box2D.b2.rayCastCallback):
392
+ def ReportFixture(self, fixture, point, normal, fraction):
393
+ if (fixture.filterData.categoryBits & 1) == 0:
394
+ return -1
395
+ self.p2 = point
396
+ self.fraction = fraction
397
+ return fraction
398
+
399
+ self.lidar = [LidarCallback() for _ in range(10)]
400
+
401
+ return self.step(np.array([0, 0, 0, 0]))[0]
402
+
403
+ def step(self, action):
404
+ # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
405
+ control_speed = False # Should be easier as well
406
+ if control_speed:
407
+ self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
408
+ self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
409
+ self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
410
+ self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
411
+ else:
412
+ self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
413
+ self.joints[0].maxMotorTorque = float(
414
+ MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
415
+ )
416
+ self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
417
+ self.joints[1].maxMotorTorque = float(
418
+ MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
419
+ )
420
+ self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
421
+ self.joints[2].maxMotorTorque = float(
422
+ MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
423
+ )
424
+ self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
425
+ self.joints[3].maxMotorTorque = float(
426
+ MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
427
+ )
428
+
429
+ self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
430
+
431
+ pos = self.hull.position
432
+ vel = self.hull.linearVelocity
433
+
434
+ for i in range(10):
435
+ self.lidar[i].fraction = 1.0
436
+ self.lidar[i].p1 = pos
437
+ self.lidar[i].p2 = (
438
+ pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
439
+ pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
440
+ )
441
+ self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
442
+
443
+ state = [
444
+ self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
445
+ 2.0 * self.hull.angularVelocity / FPS,
446
+ 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
447
+ 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
448
+ self.joints[
449
+ 0
450
+ ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
451
+ self.joints[0].speed / SPEED_HIP,
452
+ self.joints[1].angle + 1.0,
453
+ self.joints[1].speed / SPEED_KNEE,
454
+ 1.0 if self.legs[1].ground_contact else 0.0,
455
+ self.joints[2].angle,
456
+ self.joints[2].speed / SPEED_HIP,
457
+ self.joints[3].angle + 1.0,
458
+ self.joints[3].speed / SPEED_KNEE,
459
+ 1.0 if self.legs[3].ground_contact else 0.0,
460
+ ]
461
+ state += [l.fraction for l in self.lidar]
462
+ assert len(state) == 24
463
+
464
+ self.scroll = pos.x - VIEWPORT_W / SCALE / 5
465
+
466
+ shaping = (
467
+ 130 * pos[0] / SCALE
468
+ ) # moving forward is a way to receive reward (normalized to get 300 on completion)
469
+ shaping -= 5.0 * abs(
470
+ state[0]
471
+ ) # keep head straight, other than that and falling, any behavior is unpunished
472
+
473
+ reward = 0
474
+ if self.prev_shaping is not None:
475
+ reward = shaping - self.prev_shaping
476
+ self.prev_shaping = shaping
477
+
478
+ for a in action:
479
+ reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
480
+ # normalized to about -50.0 using heuristic, more optimal agent should spend less
481
+
482
+ done = False
483
+ if self.game_over or pos[0] < 0:
484
+ reward = -100
485
+ done = True
486
+ if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
487
+ done = True
488
+ return np.array(state, dtype=np.float32), reward, done, {}
489
+
490
+ def render(self, mode="human"):
491
+ from gym.envs.classic_control import rendering
492
+
493
+ if self.viewer is None:
494
+ self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
495
+ self.viewer.set_bounds(
496
+ self.scroll, VIEWPORT_W / SCALE + self.scroll, 0, VIEWPORT_H / SCALE
497
+ )
498
+
499
+ self.viewer.draw_polygon(
500
+ [
501
+ (self.scroll, 0),
502
+ (self.scroll + VIEWPORT_W / SCALE, 0),
503
+ (self.scroll + VIEWPORT_W / SCALE, VIEWPORT_H / SCALE),
504
+ (self.scroll, VIEWPORT_H / SCALE),
505
+ ],
506
+ color=(0.9, 0.9, 1.0),
507
+ )
508
+ for poly, x1, x2 in self.cloud_poly:
509
+ if x2 < self.scroll / 2:
510
+ continue
511
+ if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
512
+ continue
513
+ self.viewer.draw_polygon(
514
+ [(p[0] + self.scroll / 2, p[1]) for p in poly], color=(1, 1, 1)
515
+ )
516
+ for poly, color in self.terrain_poly:
517
+ if poly[1][0] < self.scroll:
518
+ continue
519
+ if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
520
+ continue
521
+ self.viewer.draw_polygon(poly, color=color)
522
+
523
+ self.lidar_render = (self.lidar_render + 1) % 100
524
+ i = self.lidar_render
525
+ if i < 2 * len(self.lidar):
526
+ l = (
527
+ self.lidar[i]
528
+ if i < len(self.lidar)
529
+ else self.lidar[len(self.lidar) - i - 1]
530
+ )
531
+ self.viewer.draw_polyline([l.p1, l.p2], color=(1, 0, 0), linewidth=1)
532
+
533
+ for obj in self.drawlist:
534
+ for f in obj.fixtures:
535
+ trans = f.body.transform
536
+ if type(f.shape) is circleShape:
537
+ t = rendering.Transform(translation=trans * f.shape.pos)
538
+ self.viewer.draw_circle(
539
+ f.shape.radius, 30, color=obj.color1
540
+ ).add_attr(t)
541
+ self.viewer.draw_circle(
542
+ f.shape.radius, 30, color=obj.color2, filled=False, linewidth=2
543
+ ).add_attr(t)
544
+ else:
545
+ path = [trans * v for v in f.shape.vertices]
546
+ self.viewer.draw_polygon(path, color=obj.color1)
547
+ path.append(path[0])
548
+ self.viewer.draw_polyline(path, color=obj.color2, linewidth=2)
549
+
550
+ flagy1 = TERRAIN_HEIGHT
551
+ flagy2 = flagy1 + 50 / SCALE
552
+ x = TERRAIN_STEP * 3
553
+ self.viewer.draw_polyline(
554
+ [(x, flagy1), (x, flagy2)], color=(0, 0, 0), linewidth=2
555
+ )
556
+ f = [
557
+ (x, flagy2),
558
+ (x, flagy2 - 10 / SCALE),
559
+ (x + 25 / SCALE, flagy2 - 5 / SCALE),
560
+ ]
561
+ self.viewer.draw_polygon(f, color=(0.9, 0.2, 0))
562
+ self.viewer.draw_polyline(f + [f[0]], color=(0, 0, 0), linewidth=2)
563
+
564
+ return self.viewer.render(return_rgb_array=mode == "rgb_array")
565
+
566
+ def close(self):
567
+ if self.viewer is not None:
568
+ self.viewer.close()
569
+ self.viewer = None
570
+
571
+
572
+ class BipedalWalkerHardcore(BipedalWalker):
573
+ hardcore = True
574
+
575
+
576
+ if __name__ == "__main__":
577
+ # Heurisic: suboptimal, have no notion of balance.
578
+ env = BipedalWalker()
579
+ env.reset()
580
+ steps = 0
581
+ total_reward = 0
582
+ a = np.array([0.0, 0.0, 0.0, 0.0])
583
+ STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
584
+ SPEED = 0.29 # Will fall forward on higher speed
585
+ state = STAY_ON_ONE_LEG
586
+ moving_leg = 0
587
+ supporting_leg = 1 - moving_leg
588
+ SUPPORT_KNEE_ANGLE = +0.1
589
+ supporting_knee_angle = SUPPORT_KNEE_ANGLE
590
+ while True:
591
+ s, r, done, info = env.step(a)
592
+ total_reward += r
593
+ if steps % 20 == 0 or done:
594
+ print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
595
+ print("step {} total_reward {:+0.2f}".format(steps, total_reward))
596
+ print("hull " + str(["{:+0.2f}".format(x) for x in s[0:4]]))
597
+ print("leg0 " + str(["{:+0.2f}".format(x) for x in s[4:9]]))
598
+ print("leg1 " + str(["{:+0.2f}".format(x) for x in s[9:14]]))
599
+ steps += 1
600
+
601
+ contact0 = s[8]
602
+ contact1 = s[13]
603
+ moving_s_base = 4 + 5 * moving_leg
604
+ supporting_s_base = 4 + 5 * supporting_leg
605
+
606
+ hip_targ = [None, None] # -0.8 .. +1.1
607
+ knee_targ = [None, None] # -0.6 .. +0.9
608
+ hip_todo = [0.0, 0.0]
609
+ knee_todo = [0.0, 0.0]
610
+
611
+ if state == STAY_ON_ONE_LEG:
612
+ hip_targ[moving_leg] = 1.1
613
+ knee_targ[moving_leg] = -0.6
614
+ supporting_knee_angle += 0.03
615
+ if s[2] > SPEED:
616
+ supporting_knee_angle += 0.03
617
+ supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
618
+ knee_targ[supporting_leg] = supporting_knee_angle
619
+ if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
620
+ state = PUT_OTHER_DOWN
621
+ if state == PUT_OTHER_DOWN:
622
+ hip_targ[moving_leg] = +0.1
623
+ knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
624
+ knee_targ[supporting_leg] = supporting_knee_angle
625
+ if s[moving_s_base + 4]:
626
+ state = PUSH_OFF
627
+ supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
628
+ if state == PUSH_OFF:
629
+ knee_targ[moving_leg] = supporting_knee_angle
630
+ knee_targ[supporting_leg] = +1.0
631
+ if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
632
+ state = STAY_ON_ONE_LEG
633
+ moving_leg = 1 - moving_leg
634
+ supporting_leg = 1 - moving_leg
635
+
636
+ if hip_targ[0]:
637
+ hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
638
+ if hip_targ[1]:
639
+ hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
640
+ if knee_targ[0]:
641
+ knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
642
+ if knee_targ[1]:
643
+ knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
644
+
645
+ hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
646
+ hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
647
+ knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
648
+ knee_todo[1] -= 15.0 * s[3]
649
+
650
+ a[0] = hip_todo[0]
651
+ a[1] = knee_todo[0]
652
+ a[2] = hip_todo[1]
653
+ a[3] = knee_todo[1]
654
+ a = np.clip(0.5 * a, -1.0, 1.0)
655
+
656
+ env.render()
657
+ if done:
658
+ break
gym-0.21.0/gym/envs/classic_control/cartpole.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Classic cart-pole system implemented by Rich Sutton et al.
3
+ Copied from http://incompleteideas.net/sutton/book/code/pole.c
4
+ permalink: https://perma.cc/C9ZM-652R
5
+ """
6
+
7
+ import math
8
+ import gym
9
+ from gym import spaces, logger
10
+ from gym.utils import seeding
11
+ import numpy as np
12
+
13
+
14
+ class CartPoleEnv(gym.Env):
15
+ """
16
+ Description:
17
+ A pole is attached by an un-actuated joint to a cart, which moves along
18
+ a frictionless track. The pendulum starts upright, and the goal is to
19
+ prevent it from falling over by increasing and reducing the cart's
20
+ velocity.
21
+
22
+ Source:
23
+ This environment corresponds to the version of the cart-pole problem
24
+ described by Barto, Sutton, and Anderson
25
+
26
+ Observation:
27
+ Type: Box(4)
28
+ Num Observation Min Max
29
+ 0 Cart Position -4.8 4.8
30
+ 1 Cart Velocity -Inf Inf
31
+ 2 Pole Angle -0.418 rad (-24 deg) 0.418 rad (24 deg)
32
+ 3 Pole Angular Velocity -Inf Inf
33
+
34
+ Actions:
35
+ Type: Discrete(2)
36
+ Num Action
37
+ 0 Push cart to the left
38
+ 1 Push cart to the right
39
+
40
+ Note: The amount the velocity that is reduced or increased is not
41
+ fixed; it depends on the angle the pole is pointing. This is because
42
+ the center of gravity of the pole increases the amount of energy needed
43
+ to move the cart underneath it
44
+
45
+ Reward:
46
+ Reward is 1 for every step taken, including the termination step
47
+
48
+ Starting State:
49
+ All observations are assigned a uniform random value in [-0.05..0.05]
50
+
51
+ Episode Termination:
52
+ Pole Angle is more than 12 degrees.
53
+ Cart Position is more than 2.4 (center of the cart reaches the edge of
54
+ the display).
55
+ Episode length is greater than 200.
56
+ Solved Requirements:
57
+ Considered solved when the average return is greater than or equal to
58
+ 195.0 over 100 consecutive trials.
59
+ """
60
+
61
+ metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
62
+
63
+ def __init__(self):
64
+ self.gravity = 9.8
65
+ self.masscart = 1.0
66
+ self.masspole = 0.1
67
+ self.total_mass = self.masspole + self.masscart
68
+ self.length = 0.5 # actually half the pole's length
69
+ self.polemass_length = self.masspole * self.length
70
+ self.force_mag = 10.0
71
+ self.tau = 0.02 # seconds between state updates
72
+ self.kinematics_integrator = "euler"
73
+
74
+ # Angle at which to fail the episode
75
+ self.theta_threshold_radians = 12 * 2 * math.pi / 360
76
+ self.x_threshold = 2.4
77
+
78
+ # Angle limit set to 2 * theta_threshold_radians so failing observation
79
+ # is still within bounds.
80
+ high = np.array(
81
+ [
82
+ self.x_threshold * 2,
83
+ np.finfo(np.float32).max,
84
+ self.theta_threshold_radians * 2,
85
+ np.finfo(np.float32).max,
86
+ ],
87
+ dtype=np.float32,
88
+ )
89
+
90
+ self.action_space = spaces.Discrete(2)
91
+ self.observation_space = spaces.Box(-high, high, dtype=np.float32)
92
+
93
+ self.seed()
94
+ self.viewer = None
95
+ self.state = None
96
+
97
+ self.steps_beyond_done = None
98
+
99
+ def seed(self, seed=None):
100
+ self.np_random, seed = seeding.np_random(seed)
101
+ return [seed]
102
+
103
+ def step(self, action):
104
+ err_msg = "%r (%s) invalid" % (action, type(action))
105
+ assert self.action_space.contains(action), err_msg
106
+
107
+ x, x_dot, theta, theta_dot = self.state
108
+ force = self.force_mag if action == 1 else -self.force_mag
109
+ costheta = math.cos(theta)
110
+ sintheta = math.sin(theta)
111
+
112
+ # For the interested reader:
113
+ # https://coneural.org/florian/papers/05_cart_pole.pdf
114
+ temp = (
115
+ force + self.polemass_length * theta_dot ** 2 * sintheta
116
+ ) / self.total_mass
117
+ thetaacc = (self.gravity * sintheta - costheta * temp) / (
118
+ self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
119
+ )
120
+ xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
121
+
122
+ if self.kinematics_integrator == "euler":
123
+ x = x + self.tau * x_dot
124
+ x_dot = x_dot + self.tau * xacc
125
+ theta = theta + self.tau * theta_dot
126
+ theta_dot = theta_dot + self.tau * thetaacc
127
+ else: # semi-implicit euler
128
+ x_dot = x_dot + self.tau * xacc
129
+ x = x + self.tau * x_dot
130
+ theta_dot = theta_dot + self.tau * thetaacc
131
+ theta = theta + self.tau * theta_dot
132
+
133
+ self.state = (x, x_dot, theta, theta_dot)
134
+
135
+ done = bool(
136
+ x < -self.x_threshold
137
+ or x > self.x_threshold
138
+ or theta < -self.theta_threshold_radians
139
+ or theta > self.theta_threshold_radians
140
+ )
141
+
142
+ if not done:
143
+ reward = 1.0
144
+ elif self.steps_beyond_done is None:
145
+ # Pole just fell!
146
+ self.steps_beyond_done = 0
147
+ reward = 1.0
148
+ else:
149
+ if self.steps_beyond_done == 0:
150
+ logger.warn(
151
+ "You are calling 'step()' even though this "
152
+ "environment has already returned done = True. You "
153
+ "should always call 'reset()' once you receive 'done = "
154
+ "True' -- any further steps are undefined behavior."
155
+ )
156
+ self.steps_beyond_done += 1
157
+ reward = 0.0
158
+
159
+ return np.array(self.state, dtype=np.float32), reward, done, {}
160
+
161
+ def reset(self):
162
+ self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
163
+ self.steps_beyond_done = None
164
+ return np.array(self.state, dtype=np.float32)
165
+
166
+ def render(self, mode="human"):
167
+ screen_width = 600
168
+ screen_height = 400
169
+
170
+ world_width = self.x_threshold * 2
171
+ scale = screen_width / world_width
172
+ carty = 100 # TOP OF CART
173
+ polewidth = 10.0
174
+ polelen = scale * (2 * self.length)
175
+ cartwidth = 50.0
176
+ cartheight = 30.0
177
+
178
+ if self.viewer is None:
179
+ from gym.envs.classic_control import rendering
180
+
181
+ self.viewer = rendering.Viewer(screen_width, screen_height)
182
+ l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
183
+ axleoffset = cartheight / 4.0
184
+ cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
185
+ self.carttrans = rendering.Transform()
186
+ cart.add_attr(self.carttrans)
187
+ self.viewer.add_geom(cart)
188
+ l, r, t, b = (
189
+ -polewidth / 2,
190
+ polewidth / 2,
191
+ polelen - polewidth / 2,
192
+ -polewidth / 2,
193
+ )
194
+ pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
195
+ pole.set_color(0.8, 0.6, 0.4)
196
+ self.poletrans = rendering.Transform(translation=(0, axleoffset))
197
+ pole.add_attr(self.poletrans)
198
+ pole.add_attr(self.carttrans)
199
+ self.viewer.add_geom(pole)
200
+ self.axle = rendering.make_circle(polewidth / 2)
201
+ self.axle.add_attr(self.poletrans)
202
+ self.axle.add_attr(self.carttrans)
203
+ self.axle.set_color(0.5, 0.5, 0.8)
204
+ self.viewer.add_geom(self.axle)
205
+ self.track = rendering.Line((0, carty), (screen_width, carty))
206
+ self.track.set_color(0, 0, 0)
207
+ self.viewer.add_geom(self.track)
208
+
209
+ self._pole_geom = pole
210
+
211
+ if self.state is None:
212
+ return None
213
+
214
+ # Edit the pole polygon vertex
215
+ pole = self._pole_geom
216
+ l, r, t, b = (
217
+ -polewidth / 2,
218
+ polewidth / 2,
219
+ polelen - polewidth / 2,
220
+ -polewidth / 2,
221
+ )
222
+ pole.v = [(l, b), (l, t), (r, t), (r, b)]
223
+
224
+ x = self.state
225
+ cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART
226
+ self.carttrans.set_translation(cartx, carty)
227
+ self.poletrans.set_rotation(-x[2])
228
+
229
+ return self.viewer.render(return_rgb_array=mode == "rgb_array")
230
+
231
+ def close(self):
232
+ if self.viewer:
233
+ self.viewer.close()
234
+ self.viewer = None
gym-0.21.0/gym/envs/classic_control/mountain_car.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ http://incompleteideas.net/MountainCar/MountainCar1.cp
3
+ permalink: https://perma.cc/6Z2N-PFWC
4
+ """
5
+ import math
6
+
7
+ import numpy as np
8
+
9
+ import gym
10
+ from gym import spaces
11
+ from gym.utils import seeding
12
+
13
+
14
+ class MountainCarEnv(gym.Env):
15
+ """
16
+ Description:
17
+ The agent (a car) is started at the bottom of a valley. For any given
18
+ state the agent may choose to accelerate to the left, right or cease
19
+ any acceleration.
20
+
21
+ Source:
22
+ The environment appeared first in Andrew Moore's PhD Thesis (1990).
23
+
24
+ Observation:
25
+ Type: Box(2)
26
+ Num Observation Min Max
27
+ 0 Car Position -1.2 0.6
28
+ 1 Car Velocity -0.07 0.07
29
+
30
+ Actions:
31
+ Type: Discrete(3)
32
+ Num Action
33
+ 0 Accelerate to the Left
34
+ 1 Don't accelerate
35
+ 2 Accelerate to the Right
36
+
37
+ Note: This does not affect the amount of velocity affected by the
38
+ gravitational pull acting on the car.
39
+
40
+ Reward:
41
+ Reward of 0 is awarded if the agent reached the flag (position = 0.5)
42
+ on top of the mountain.
43
+ Reward of -1 is awarded if the position of the agent is less than 0.5.
44
+
45
+ Starting State:
46
+ The position of the car is assigned a uniform random value in
47
+ [-0.6 , -0.4].
48
+ The starting velocity of the car is always assigned to 0.
49
+
50
+ Episode Termination:
51
+ The car position is more than 0.5
52
+ Episode length is greater than 200
53
+ """
54
+
55
+ metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
56
+
57
+ def __init__(self, goal_velocity=0):
58
+ self.min_position = -1.2
59
+ self.max_position = 0.6
60
+ self.max_speed = 0.07
61
+ self.goal_position = 0.5
62
+ self.goal_velocity = goal_velocity
63
+
64
+ self.force = 0.001
65
+ self.gravity = 0.0025
66
+
67
+ self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
68
+ self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
69
+
70
+ self.viewer = None
71
+
72
+ self.action_space = spaces.Discrete(3)
73
+ self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
74
+
75
+ self.seed()
76
+
77
+ def seed(self, seed=None):
78
+ self.np_random, seed = seeding.np_random(seed)
79
+ return [seed]
80
+
81
+ def step(self, action):
82
+ assert self.action_space.contains(action), "%r (%s) invalid" % (
83
+ action,
84
+ type(action),
85
+ )
86
+
87
+ position, velocity = self.state
88
+ velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
89
+ velocity = np.clip(velocity, -self.max_speed, self.max_speed)
90
+ position += velocity
91
+ position = np.clip(position, self.min_position, self.max_position)
92
+ if position == self.min_position and velocity < 0:
93
+ velocity = 0
94
+
95
+ done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
96
+ reward = -1.0
97
+
98
+ self.state = (position, velocity)
99
+ return np.array(self.state, dtype=np.float32), reward, done, {}
100
+
101
+ def reset(self):
102
+ self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
103
+ return np.array(self.state, dtype=np.float32)
104
+
105
+ def _height(self, xs):
106
+ return np.sin(3 * xs) * 0.45 + 0.55
107
+
108
+ def render(self, mode="human"):
109
+ screen_width = 600
110
+ screen_height = 400
111
+
112
+ world_width = self.max_position - self.min_position
113
+ scale = screen_width / world_width
114
+ carwidth = 40
115
+ carheight = 20
116
+
117
+ if self.viewer is None:
118
+ from gym.envs.classic_control import rendering
119
+
120
+ self.viewer = rendering.Viewer(screen_width, screen_height)
121
+ xs = np.linspace(self.min_position, self.max_position, 100)
122
+ ys = self._height(xs)
123
+ xys = list(zip((xs - self.min_position) * scale, ys * scale))
124
+
125
+ self.track = rendering.make_polyline(xys)
126
+ self.track.set_linewidth(4)
127
+ self.viewer.add_geom(self.track)
128
+
129
+ clearance = 10
130
+
131
+ l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
132
+ car = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
133
+ car.add_attr(rendering.Transform(translation=(0, clearance)))
134
+ self.cartrans = rendering.Transform()
135
+ car.add_attr(self.cartrans)
136
+ self.viewer.add_geom(car)
137
+ frontwheel = rendering.make_circle(carheight / 2.5)
138
+ frontwheel.set_color(0.5, 0.5, 0.5)
139
+ frontwheel.add_attr(
140
+ rendering.Transform(translation=(carwidth / 4, clearance))
141
+ )
142
+ frontwheel.add_attr(self.cartrans)
143
+ self.viewer.add_geom(frontwheel)
144
+ backwheel = rendering.make_circle(carheight / 2.5)
145
+ backwheel.add_attr(
146
+ rendering.Transform(translation=(-carwidth / 4, clearance))
147
+ )
148
+ backwheel.add_attr(self.cartrans)
149
+ backwheel.set_color(0.5, 0.5, 0.5)
150
+ self.viewer.add_geom(backwheel)
151
+ flagx = (self.goal_position - self.min_position) * scale
152
+ flagy1 = self._height(self.goal_position) * scale
153
+ flagy2 = flagy1 + 50
154
+ flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
155
+ self.viewer.add_geom(flagpole)
156
+ flag = rendering.FilledPolygon(
157
+ [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)]
158
+ )
159
+ flag.set_color(0.8, 0.8, 0)
160
+ self.viewer.add_geom(flag)
161
+
162
+ pos = self.state[0]
163
+ self.cartrans.set_translation(
164
+ (pos - self.min_position) * scale, self._height(pos) * scale
165
+ )
166
+ self.cartrans.set_rotation(math.cos(3 * pos))
167
+
168
+ return self.viewer.render(return_rgb_array=mode == "rgb_array")
169
+
170
+ def get_keys_to_action(self):
171
+ # Control with left and right arrow keys.
172
+ return {(): 1, (276,): 0, (275,): 2, (275, 276): 1}
173
+
174
+ def close(self):
175
+ if self.viewer:
176
+ self.viewer.close()
177
+ self.viewer = None
gym-0.21.0/gym/envs/classic_control/pendulum.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ from gym import spaces
3
+ from gym.utils import seeding
4
+ import numpy as np
5
+ from os import path
6
+
7
+
8
+ class PendulumEnv(gym.Env):
9
+ metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
10
+
11
+ def __init__(self, g=10.0):
12
+ self.max_speed = 8
13
+ self.max_torque = 2.0
14
+ self.dt = 0.05
15
+ self.g = g
16
+ self.m = 1.0
17
+ self.l = 1.0
18
+ self.viewer = None
19
+
20
+ high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32)
21
+ self.action_space = spaces.Box(
22
+ low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32
23
+ )
24
+ self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32)
25
+
26
+ self.seed()
27
+
28
+ def seed(self, seed=None):
29
+ self.np_random, seed = seeding.np_random(seed)
30
+ return [seed]
31
+
32
+ def step(self, u):
33
+ th, thdot = self.state # th := theta
34
+
35
+ g = self.g
36
+ m = self.m
37
+ l = self.l
38
+ dt = self.dt
39
+
40
+ u = np.clip(u, -self.max_torque, self.max_torque)[0]
41
+ self.last_u = u # for rendering
42
+ costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2)
43
+
44
+ newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt
45
+ newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
46
+ newth = th + newthdot * dt
47
+
48
+ self.state = np.array([newth, newthdot])
49
+ return self._get_obs(), -costs, False, {}
50
+
51
+ def reset(self):
52
+ high = np.array([np.pi, 1])
53
+ self.state = self.np_random.uniform(low=-high, high=high)
54
+ self.last_u = None
55
+ return self._get_obs()
56
+
57
+ def _get_obs(self):
58
+ theta, thetadot = self.state
59
+ return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32)
60
+
61
+ def render(self, mode="human"):
62
+ if self.viewer is None:
63
+ from gym.envs.classic_control import rendering
64
+
65
+ self.viewer = rendering.Viewer(500, 500)
66
+ self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
67
+ rod = rendering.make_capsule(1, 0.2)
68
+ rod.set_color(0.8, 0.3, 0.3)
69
+ self.pole_transform = rendering.Transform()
70
+ rod.add_attr(self.pole_transform)
71
+ self.viewer.add_geom(rod)
72
+ axle = rendering.make_circle(0.05)
73
+ axle.set_color(0, 0, 0)
74
+ self.viewer.add_geom(axle)
75
+ fname = path.join(path.dirname(__file__), "assets/clockwise.png")
76
+ self.img = rendering.Image(fname, 1.0, 1.0)
77
+ self.imgtrans = rendering.Transform()
78
+ self.img.add_attr(self.imgtrans)
79
+
80
+ self.viewer.add_onetime(self.img)
81
+ self.pole_transform.set_rotation(self.state[0] + np.pi / 2)
82
+ if self.last_u is not None:
83
+ self.imgtrans.scale = (-self.last_u / 2, np.abs(self.last_u) / 2)
84
+
85
+ return self.viewer.render(return_rgb_array=mode == "rgb_array")
86
+
87
+ def close(self):
88
+ if self.viewer:
89
+ self.viewer.close()
90
+ self.viewer = None
91
+
92
+
93
+ def angle_normalize(x):
94
+ return ((x + np.pi) % (2 * np.pi)) - np.pi
gym-0.21.0/gym/envs/mujoco/ant.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym import utils
3
+ from gym.envs.mujoco import mujoco_env
4
+
5
+
6
+ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7
+ def __init__(self):
8
+ mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
9
+ utils.EzPickle.__init__(self)
10
+
11
+ def step(self, a):
12
+ xposbefore = self.get_body_com("torso")[0]
13
+ self.do_simulation(a, self.frame_skip)
14
+ xposafter = self.get_body_com("torso")[0]
15
+ forward_reward = (xposafter - xposbefore) / self.dt
16
+ ctrl_cost = 0.5 * np.square(a).sum()
17
+ contact_cost = (
18
+ 0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
19
+ )
20
+ survive_reward = 1.0
21
+ reward = forward_reward - ctrl_cost - contact_cost + survive_reward
22
+ state = self.state_vector()
23
+ notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
24
+ done = not notdone
25
+ ob = self._get_obs()
26
+ return (
27
+ ob,
28
+ reward,
29
+ done,
30
+ dict(
31
+ reward_forward=forward_reward,
32
+ reward_ctrl=-ctrl_cost,
33
+ reward_contact=-contact_cost,
34
+ reward_survive=survive_reward,
35
+ ),
36
+ )
37
+
38
+ def _get_obs(self):
39
+ return np.concatenate(
40
+ [
41
+ self.sim.data.qpos.flat[2:],
42
+ self.sim.data.qvel.flat,
43
+ np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
44
+ ]
45
+ )
46
+
47
+ def reset_model(self):
48
+ qpos = self.init_qpos + self.np_random.uniform(
49
+ size=self.model.nq, low=-0.1, high=0.1
50
+ )
51
+ qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
52
+ self.set_state(qpos, qvel)
53
+ return self._get_obs()
54
+
55
+ def viewer_setup(self):
56
+ self.viewer.cam.distance = self.model.stat.extent * 0.5
gym-0.21.0/gym/envs/mujoco/assets/thrower.xml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <mujoco model="arm3d">
2
+ <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
3
+ <option timestep="0.01" gravity="0 0 -9.81" iterations="20" integrator="Euler"/>
4
+ <default>
5
+ <joint armature='0.75' damping="1" limited="true"/>
6
+ <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
7
+ </default>
8
+
9
+ <worldbody>
10
+ <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
11
+
12
+ <geom type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
13
+
14
+ <body name="r_shoulder_pan_link" pos="0 -0.6 0">
15
+ <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" density="0.0001"/>
16
+ <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" density="0.0001"/>
17
+ <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" density="0.0001"/>
18
+ <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" density="0.0001"/>
19
+ <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" density="1"/>
20
+ <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-0.4854 1.214602" damping="1.0"/>
21
+
22
+ <body name="r_shoulder_lift_link" pos="0.1 0 0">
23
+ <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" density="0.0001"/>
24
+ <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 0.7963" damping="1.0"/>
25
+
26
+ <body name="r_upper_arm_roll_link" pos="0 0 0">
27
+ <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
28
+ <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="1.0"/>
29
+
30
+ <body name="r_upper_arm_link" pos="0 0 0">
31
+ <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" density="0.0001"/>
32
+
33
+ <body name="r_elbow_flex_link" pos="0.4 0 0">
34
+ <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" density="0.0001"/>
35
+ <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.7 0.7" damping="1.0"/>
36
+
37
+ <body name="r_forearm_roll_link" pos="0 0 0">
38
+ <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" density="0.0001"/>
39
+ <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping="1.0" range="-1.5 1.5"/>
40
+
41
+ <body name="r_forearm_link" pos="0 0 0" axisangle="1 0 0 0.392">
42
+ <geom name="fa" type="capsule" fromto="0 0 0 0 0 0.291" size="0.05" density="0.0001"/>
43
+
44
+ <body name="r_wrist_flex_link" pos="0 0 0.321" axisangle="0 0 1 1.57">
45
+ <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" density="0.0001"/>
46
+ <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-1.0 1.0" damping="1.0" />
47
+
48
+ <body name="r_wrist_roll_link" pos="0 0 0" axisangle="0 1 0 -1.178">
49
+ <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="0 1 0" damping="1.0" range="0 2.25"/>
50
+ <geom type="capsule" fromto="0 -0.05 0 0 0.05 0" size="0.01" contype="1" conaffinity="1" density="0.0001"/>
51
+ <body pos="0 0 0" axisangle="0 0 1 0.392">
52
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
53
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density=".0001"/>
54
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
55
+ </body>
56
+ <body pos="0 0 0" axisangle="0 0 1 1.57">
57
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
58
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
59
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
60
+ </body>
61
+ <body pos="0 0 0" axisangle="0 0 1 1.178">
62
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
63
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
64
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
65
+ </body>
66
+ <body pos="0 0 0" axisangle="0 0 1 0.785">
67
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
68
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
69
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
70
+ </body>
71
+ <body pos="0 0 0" axisangle="0 0 1 1.96">
72
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
73
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
74
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
75
+ </body>
76
+ <body pos="0 0 0" axisangle="0 0 1 2.355">
77
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
78
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
79
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
80
+ </body>
81
+ <body pos="0 0 0" axisangle="0 0 1 2.74">
82
+ <geom type="capsule" fromto="0 0.025 0 0 0.075 0.075" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
83
+ <geom type="capsule" fromto="0 0.075 0.075 0 0.075 0.15" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
84
+ <geom type="capsule" fromto="0 0.075 0.15 0 0 0.225" size="0.005" contype="1" conaffinity="1" density="0.0001"/>
85
+ </body>
86
+ </body>
87
+ </body>
88
+ </body>
89
+ </body>
90
+ </body>
91
+ </body>
92
+ </body>
93
+ </body>
94
+ </body>
95
+
96
+ <body name="goal" pos="0.575 0.5 -0.328">
97
+ <geom rgba="1 1 1 1" type="box" pos="0 0 0.005" size="0.075 0.075 0.001" contype="1" conaffinity="1" density="1000"/>
98
+ <geom rgba="1 1 1 1" type="box" pos="0.0 0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
99
+ <geom rgba="1 1 1 1" type="box" pos="0.0 -0.075 0.034" size="0.075 0.001 0.03" contype="1" conaffinity="0"/>
100
+ <geom rgba="1 1 1 1" type="box" pos="0.075 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
101
+ <geom rgba="1 1 1 1" type="box" pos="-0.076 0 0.034" size="0.001 0.075 0.03" contype="1" conaffinity="0"/>
102
+ <geom rgba="1 1 1 1" type="capsule" fromto="0.073 0.073 0.0075 0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
103
+ <geom rgba="1 1 1 1" type="capsule" fromto="0.073 -0.073 0.0075 0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
104
+ <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 0.073 0.0075 -0.073 0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
105
+ <geom rgba="1 1 1 1" type="capsule" fromto="-0.073 -0.073 0.0075 -0.073 -0.073 0.06" size="0.005" contype="1" conaffinity="0"/>
106
+ <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="1.0"/>
107
+ <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="1.0"/>
108
+ </body>
109
+
110
+ <body name="ball" pos="0.5 -0.8 0.275">
111
+ <geom rgba="1. 1. 1. 1" type="sphere" size="0.03 0.03 0.1" density='25' contype="1" conaffinity="1"/>
112
+ <joint name="ball_free" type="free" armature='0' damping="0" limited="false"/>
113
+ </body>
114
+
115
+ </worldbody>
116
+
117
+ <actuator>
118
+ <motor joint="r_shoulder_pan_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
119
+ <motor joint="r_shoulder_lift_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
120
+ <motor joint="r_upper_arm_roll_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
121
+ <motor joint="r_elbow_flex_joint" ctrlrange="-10.0 10.0" ctrllimited="true" />
122
+ <motor joint="r_forearm_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
123
+ <motor joint="r_wrist_flex_joint" ctrlrange="-15.0 15.0" ctrllimited="true" />
124
+ <motor joint="r_wrist_roll_joint" ctrlrange="-15.0 15.0" ctrllimited="true"/>
125
+ </actuator>
126
+
127
+ </mujoco>
gym-0.21.0/gym/envs/mujoco/hopper.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym import utils
3
+ from gym.envs.mujoco import mujoco_env
4
+
5
+
6
+ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7
+ def __init__(self):
8
+ mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 4)
9
+ utils.EzPickle.__init__(self)
10
+
11
+ def step(self, a):
12
+ posbefore = self.sim.data.qpos[0]
13
+ self.do_simulation(a, self.frame_skip)
14
+ posafter, height, ang = self.sim.data.qpos[0:3]
15
+ alive_bonus = 1.0
16
+ reward = (posafter - posbefore) / self.dt
17
+ reward += alive_bonus
18
+ reward -= 1e-3 * np.square(a).sum()
19
+ s = self.state_vector()
20
+ done = not (
21
+ np.isfinite(s).all()
22
+ and (np.abs(s[2:]) < 100).all()
23
+ and (height > 0.7)
24
+ and (abs(ang) < 0.2)
25
+ )
26
+ ob = self._get_obs()
27
+ return ob, reward, done, {}
28
+
29
+ def _get_obs(self):
30
+ return np.concatenate(
31
+ [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)]
32
+ )
33
+
34
+ def reset_model(self):
35
+ qpos = self.init_qpos + self.np_random.uniform(
36
+ low=-0.005, high=0.005, size=self.model.nq
37
+ )
38
+ qvel = self.init_qvel + self.np_random.uniform(
39
+ low=-0.005, high=0.005, size=self.model.nv
40
+ )
41
+ self.set_state(qpos, qvel)
42
+ return self._get_obs()
43
+
44
+ def viewer_setup(self):
45
+ self.viewer.cam.trackbodyid = 2
46
+ self.viewer.cam.distance = self.model.stat.extent * 0.75
47
+ self.viewer.cam.lookat[2] = 1.15
48
+ self.viewer.cam.elevation = -20
gym-0.21.0/gym/envs/mujoco/hopper_v3.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym.envs.mujoco import mujoco_env
3
+ from gym import utils
4
+
5
+
6
+ DEFAULT_CAMERA_CONFIG = {
7
+ "trackbodyid": 2,
8
+ "distance": 3.0,
9
+ "lookat": np.array((0.0, 0.0, 1.15)),
10
+ "elevation": -20.0,
11
+ }
12
+
13
+
14
+ class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
15
+ def __init__(
16
+ self,
17
+ xml_file="hopper.xml",
18
+ forward_reward_weight=1.0,
19
+ ctrl_cost_weight=1e-3,
20
+ healthy_reward=1.0,
21
+ terminate_when_unhealthy=True,
22
+ healthy_state_range=(-100.0, 100.0),
23
+ healthy_z_range=(0.7, float("inf")),
24
+ healthy_angle_range=(-0.2, 0.2),
25
+ reset_noise_scale=5e-3,
26
+ exclude_current_positions_from_observation=True,
27
+ ):
28
+ utils.EzPickle.__init__(**locals())
29
+
30
+ self._forward_reward_weight = forward_reward_weight
31
+
32
+ self._ctrl_cost_weight = ctrl_cost_weight
33
+
34
+ self._healthy_reward = healthy_reward
35
+ self._terminate_when_unhealthy = terminate_when_unhealthy
36
+
37
+ self._healthy_state_range = healthy_state_range
38
+ self._healthy_z_range = healthy_z_range
39
+ self._healthy_angle_range = healthy_angle_range
40
+
41
+ self._reset_noise_scale = reset_noise_scale
42
+
43
+ self._exclude_current_positions_from_observation = (
44
+ exclude_current_positions_from_observation
45
+ )
46
+
47
+ mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
48
+
49
+ @property
50
+ def healthy_reward(self):
51
+ return (
52
+ float(self.is_healthy or self._terminate_when_unhealthy)
53
+ * self._healthy_reward
54
+ )
55
+
56
+ def control_cost(self, action):
57
+ control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
58
+ return control_cost
59
+
60
+ @property
61
+ def is_healthy(self):
62
+ z, angle = self.sim.data.qpos[1:3]
63
+ state = self.state_vector()[2:]
64
+
65
+ min_state, max_state = self._healthy_state_range
66
+ min_z, max_z = self._healthy_z_range
67
+ min_angle, max_angle = self._healthy_angle_range
68
+
69
+ healthy_state = np.all(np.logical_and(min_state < state, state < max_state))
70
+ healthy_z = min_z < z < max_z
71
+ healthy_angle = min_angle < angle < max_angle
72
+
73
+ is_healthy = all((healthy_state, healthy_z, healthy_angle))
74
+
75
+ return is_healthy
76
+
77
+ @property
78
+ def done(self):
79
+ done = not self.is_healthy if self._terminate_when_unhealthy else False
80
+ return done
81
+
82
+ def _get_obs(self):
83
+ position = self.sim.data.qpos.flat.copy()
84
+ velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
85
+
86
+ if self._exclude_current_positions_from_observation:
87
+ position = position[1:]
88
+
89
+ observation = np.concatenate((position, velocity)).ravel()
90
+ return observation
91
+
92
+ def step(self, action):
93
+ x_position_before = self.sim.data.qpos[0]
94
+ self.do_simulation(action, self.frame_skip)
95
+ x_position_after = self.sim.data.qpos[0]
96
+ x_velocity = (x_position_after - x_position_before) / self.dt
97
+
98
+ ctrl_cost = self.control_cost(action)
99
+
100
+ forward_reward = self._forward_reward_weight * x_velocity
101
+ healthy_reward = self.healthy_reward
102
+
103
+ rewards = forward_reward + healthy_reward
104
+ costs = ctrl_cost
105
+
106
+ observation = self._get_obs()
107
+ reward = rewards - costs
108
+ done = self.done
109
+ info = {
110
+ "x_position": x_position_after,
111
+ "x_velocity": x_velocity,
112
+ }
113
+
114
+ return observation, reward, done, info
115
+
116
+ def reset_model(self):
117
+ noise_low = -self._reset_noise_scale
118
+ noise_high = self._reset_noise_scale
119
+
120
+ qpos = self.init_qpos + self.np_random.uniform(
121
+ low=noise_low, high=noise_high, size=self.model.nq
122
+ )
123
+ qvel = self.init_qvel + self.np_random.uniform(
124
+ low=noise_low, high=noise_high, size=self.model.nv
125
+ )
126
+
127
+ self.set_state(qpos, qvel)
128
+
129
+ observation = self._get_obs()
130
+ return observation
131
+
132
+ def viewer_setup(self):
133
+ for key, value in DEFAULT_CAMERA_CONFIG.items():
134
+ if isinstance(value, np.ndarray):
135
+ getattr(self.viewer.cam, key)[:] = value
136
+ else:
137
+ setattr(self.viewer.cam, key, value)
gym-0.21.0/gym/envs/mujoco/pusher.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym import utils
3
+ from gym.envs.mujoco import mujoco_env
4
+
5
+ import mujoco_py
6
+
7
+
8
+ class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
9
+ def __init__(self):
10
+ utils.EzPickle.__init__(self)
11
+ mujoco_env.MujocoEnv.__init__(self, "pusher.xml", 5)
12
+
13
+ def step(self, a):
14
+ vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
15
+ vec_2 = self.get_body_com("object") - self.get_body_com("goal")
16
+
17
+ reward_near = -np.linalg.norm(vec_1)
18
+ reward_dist = -np.linalg.norm(vec_2)
19
+ reward_ctrl = -np.square(a).sum()
20
+ reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
21
+
22
+ self.do_simulation(a, self.frame_skip)
23
+ ob = self._get_obs()
24
+ done = False
25
+ return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
26
+
27
+ def viewer_setup(self):
28
+ self.viewer.cam.trackbodyid = -1
29
+ self.viewer.cam.distance = 4.0
30
+
31
+ def reset_model(self):
32
+ qpos = self.init_qpos
33
+
34
+ self.goal_pos = np.asarray([0, 0])
35
+ while True:
36
+ self.cylinder_pos = np.concatenate(
37
+ [
38
+ self.np_random.uniform(low=-0.3, high=0, size=1),
39
+ self.np_random.uniform(low=-0.2, high=0.2, size=1),
40
+ ]
41
+ )
42
+ if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
43
+ break
44
+
45
+ qpos[-4:-2] = self.cylinder_pos
46
+ qpos[-2:] = self.goal_pos
47
+ qvel = self.init_qvel + self.np_random.uniform(
48
+ low=-0.005, high=0.005, size=self.model.nv
49
+ )
50
+ qvel[-4:] = 0
51
+ self.set_state(qpos, qvel)
52
+ return self._get_obs()
53
+
54
+ def _get_obs(self):
55
+ return np.concatenate(
56
+ [
57
+ self.sim.data.qpos.flat[:7],
58
+ self.sim.data.qvel.flat[:7],
59
+ self.get_body_com("tips_arm"),
60
+ self.get_body_com("object"),
61
+ self.get_body_com("goal"),
62
+ ]
63
+ )
gym-0.21.0/gym/envs/mujoco/swimmer_v3.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym.envs.mujoco import mujoco_env
3
+ from gym import utils
4
+
5
+
6
+ DEFAULT_CAMERA_CONFIG = {}
7
+
8
+
9
+ class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
10
+ def __init__(
11
+ self,
12
+ xml_file="swimmer.xml",
13
+ forward_reward_weight=1.0,
14
+ ctrl_cost_weight=1e-4,
15
+ reset_noise_scale=0.1,
16
+ exclude_current_positions_from_observation=True,
17
+ ):
18
+ utils.EzPickle.__init__(**locals())
19
+
20
+ self._forward_reward_weight = forward_reward_weight
21
+ self._ctrl_cost_weight = ctrl_cost_weight
22
+
23
+ self._reset_noise_scale = reset_noise_scale
24
+
25
+ self._exclude_current_positions_from_observation = (
26
+ exclude_current_positions_from_observation
27
+ )
28
+
29
+ mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
30
+
31
+ def control_cost(self, action):
32
+ control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
33
+ return control_cost
34
+
35
+ def step(self, action):
36
+ xy_position_before = self.sim.data.qpos[0:2].copy()
37
+ self.do_simulation(action, self.frame_skip)
38
+ xy_position_after = self.sim.data.qpos[0:2].copy()
39
+
40
+ xy_velocity = (xy_position_after - xy_position_before) / self.dt
41
+ x_velocity, y_velocity = xy_velocity
42
+
43
+ forward_reward = self._forward_reward_weight * x_velocity
44
+
45
+ ctrl_cost = self.control_cost(action)
46
+
47
+ observation = self._get_obs()
48
+ reward = forward_reward - ctrl_cost
49
+ done = False
50
+ info = {
51
+ "reward_fwd": forward_reward,
52
+ "reward_ctrl": -ctrl_cost,
53
+ "x_position": xy_position_after[0],
54
+ "y_position": xy_position_after[1],
55
+ "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
56
+ "x_velocity": x_velocity,
57
+ "y_velocity": y_velocity,
58
+ "forward_reward": forward_reward,
59
+ }
60
+
61
+ return observation, reward, done, info
62
+
63
+ def _get_obs(self):
64
+ position = self.sim.data.qpos.flat.copy()
65
+ velocity = self.sim.data.qvel.flat.copy()
66
+
67
+ if self._exclude_current_positions_from_observation:
68
+ position = position[2:]
69
+
70
+ observation = np.concatenate([position, velocity]).ravel()
71
+ return observation
72
+
73
+ def reset_model(self):
74
+ noise_low = -self._reset_noise_scale
75
+ noise_high = self._reset_noise_scale
76
+
77
+ qpos = self.init_qpos + self.np_random.uniform(
78
+ low=noise_low, high=noise_high, size=self.model.nq
79
+ )
80
+ qvel = self.init_qvel + self.np_random.uniform(
81
+ low=noise_low, high=noise_high, size=self.model.nv
82
+ )
83
+
84
+ self.set_state(qpos, qvel)
85
+
86
+ observation = self._get_obs()
87
+ return observation
88
+
89
+ def viewer_setup(self):
90
+ for key, value in DEFAULT_CAMERA_CONFIG.items():
91
+ if isinstance(value, np.ndarray):
92
+ getattr(self.viewer.cam, key)[:] = value
93
+ else:
94
+ setattr(self.viewer.cam, key, value)
gym-0.21.0/gym/envs/robotics/assets/fetch/reach.xml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <mujoco>
3
+ <compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
4
+ <option timestep="0.002">
5
+ <flag warmstart="enable"></flag>
6
+ </option>
7
+
8
+ <include file="shared.xml"></include>
9
+
10
+ <worldbody>
11
+ <geom name="floor0" pos="0.8 0.75 0" size="0.85 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
12
+ <body name="floor0" pos="0.8 0.75 0">
13
+ <site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
14
+ </body>
15
+
16
+ <include file="robot.xml"></include>
17
+
18
+ <body pos="1.3 0.75 0.2" name="table0">
19
+ <geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
20
+ </body>
21
+
22
+ <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
23
+ </worldbody>
24
+
25
+ <actuator></actuator>
26
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/hand/manipulate_block_touch_sensors.xml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <mujoco>
3
+ <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
4
+ <option timestep="0.002" iterations="20" apirate="200">
5
+ <flag warmstart="enable"></flag>
6
+ </option>
7
+
8
+ <include file="shared.xml"></include>
9
+ <include file="shared_touch_sensors_92.xml"></include>
10
+
11
+ <asset>
12
+ <include file="shared_asset.xml"></include>
13
+
14
+ <texture name="texture:object" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
15
+ <texture name="texture:hidden" file="block_hidden.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
16
+
17
+ <material name="material:object" texture="texture:object" specular="1" shininess="0.3" reflectance="0"></material>
18
+ <material name="material:hidden" texture="texture:hidden" specular="1" shininess="0.3" reflectance="0"></material>
19
+ <material name="material:target" texture="texture:object" specular="1" shininess="0.3" reflectance="0" rgba="1 1 1 0.5"></material>
20
+ </asset>
21
+
22
+ <worldbody>
23
+ <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
24
+ <body name="floor0" pos="1 1 0"></body>
25
+
26
+ <include file="robot_touch_sensors_92.xml"></include>
27
+
28
+ <body name="object" pos="1 0.87 0.2">
29
+ <geom name="object" type="box" size="0.025 0.025 0.025" material="material:object" condim="4" density="567"></geom>
30
+ <geom name="object_hidden" type="box" size="0.024 0.024 0.024" material="material:hidden" condim="4" contype="0" conaffinity="0" mass="0"></geom>
31
+ <site name="object:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
32
+ <joint name="object:joint" type="free" damping="0.01"></joint>
33
+ </body>
34
+ <body name="target" pos="1 0.87 0.2">
35
+ <geom name="target" type="box" size="0.025 0.025 0.025" material="material:target" condim="4" group="2" contype="0" conaffinity="0"></geom>
36
+ <site name="target:center" pos="0 0 0" rgba="1 0 0 0" size="0.01 0.01 0.01"></site>
37
+ <joint name="target:joint" type="free" damping="0.01"></joint>
38
+ </body>
39
+
40
+ <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 1 4" dir="0 0 -1" name="light0"></light>
41
+ </worldbody>
42
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/hand/reach.xml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <mujoco>
3
+ <compiler angle="radian" coordinate="local" meshdir="../stls/hand" texturedir="../textures"></compiler>
4
+ <option timestep="0.002" iterations="20" apirate="200">
5
+ <flag warmstart="enable"></flag>
6
+ </option>
7
+
8
+ <include file="shared.xml"></include>
9
+
10
+ <asset>
11
+ <include file="shared_asset.xml"></include>
12
+ </asset>
13
+
14
+ <worldbody>
15
+ <geom name="floor0" pos="1 1 0" size="1 1 1" type="plane" condim="3" material="floor_mat"></geom>
16
+ <body name="floor0" pos="1 1 0">
17
+ <site name="target0" pos="0 0 0" size="0.005" rgba="1 0 0 1" type="sphere"></site>
18
+ <site name="target1" pos="0 0 0" size="0.005" rgba="0 1 0 1" type="sphere"></site>
19
+ <site name="target2" pos="0 0 0" size="0.005" rgba="0 0 1 1" type="sphere"></site>
20
+ <site name="target3" pos="0 0 0" size="0.005" rgba="1 1 0 1" type="sphere"></site>
21
+ <site name="target4" pos="0 0 0" size="0.005" rgba="1 0 1 1" type="sphere"></site>
22
+
23
+ <site name="finger0" pos="0 0 0" size="0.01" rgba="1 0 0 0.2" type="sphere"></site>
24
+ <site name="finger1" pos="0 0 0" size="0.01" rgba="0 1 0 0.2" type="sphere"></site>
25
+ <site name="finger2" pos="0 0 0" size="0.01" rgba="0 0 1 0.2" type="sphere"></site>
26
+ <site name="finger3" pos="0 0 0" size="0.01" rgba="1 1 0 0.2" type="sphere"></site>
27
+ <site name="finger4" pos="0 0 0" size="0.01" rgba="1 0 1 0.2" type="sphere"></site>
28
+ </body>
29
+
30
+ <include file="robot.xml"></include>
31
+
32
+ <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
33
+ </worldbody>
34
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/hand/shared.xml ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
2
+ <mujoco>
3
+ <size njmax="500" nconmax="100" nuser_jnt="1" nuser_site="1" nuser_tendon="1" nuser_sensor="1" nuser_actuator="16" nstack="600000"></size>
4
+
5
+ <visual>
6
+ <map fogstart="3" fogend="5" force="0.1"></map>
7
+ <quality shadowsize="4096"></quality>
8
+ </visual>
9
+
10
+ <default>
11
+ <default class="robot0:asset_class">
12
+ <geom friction="1 0.005 0.001" condim="3" margin="0.0005" contype="1" conaffinity="1"></geom>
13
+ <joint limited="true" damping="0.1" armature="0.001" margin="0.01" frictionloss="0.001"></joint>
14
+ <site size="0.005" rgba="0.4 0.9 0.4 1"></site>
15
+ <general ctrllimited="true" forcelimited="true"></general>
16
+ </default>
17
+ <default class="robot0:D_Touch">
18
+ <site type="box" size="0.009 0.004 0.013" pos="0 -0.004 0.018" rgba="0.8 0.8 0.8 0.15" group="4"></site>
19
+ </default>
20
+ <default class="robot0:DC_Hand">
21
+ <geom material="robot0:MatColl" contype="1" conaffinity="0" group="4"></geom>
22
+ </default>
23
+ <default class="robot0:D_Vizual">
24
+ <geom material="robot0:MatViz" contype="0" conaffinity="0" group="1" type="mesh"></geom>
25
+ </default>
26
+ <default class="robot0:free">
27
+ <joint type="free" damping="0" armature="0" limited="false"></joint>
28
+ </default>
29
+ </default>
30
+
31
+ <contact>
32
+ <pair geom1="robot0:C_ffdistal" geom2="robot0:C_thdistal" condim="1"></pair>
33
+ <pair geom1="robot0:C_ffmiddle" geom2="robot0:C_thdistal" condim="1"></pair>
34
+ <pair geom1="robot0:C_ffproximal" geom2="robot0:C_thdistal" condim="1"></pair>
35
+ <pair geom1="robot0:C_mfproximal" geom2="robot0:C_thdistal" condim="1"></pair>
36
+ <pair geom1="robot0:C_mfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
37
+ <pair geom1="robot0:C_rfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
38
+ <pair geom1="robot0:C_lfdistal" geom2="robot0:C_thdistal" condim="1"></pair>
39
+ <pair geom1="robot0:C_palm0" geom2="robot0:C_thdistal" condim="1"></pair>
40
+ <pair geom1="robot0:C_mfdistal" geom2="robot0:C_ffdistal" condim="1"></pair>
41
+ <pair geom1="robot0:C_rfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
42
+ <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
43
+ <pair geom1="robot0:C_mfproximal" geom2="robot0:C_ffproximal" condim="1"></pair>
44
+ <pair geom1="robot0:C_rfproximal" geom2="robot0:C_mfproximal" condim="1"></pair>
45
+ <pair geom1="robot0:C_lfproximal" geom2="robot0:C_rfproximal" condim="1"></pair>
46
+ <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfdistal" condim="1"></pair>
47
+ <pair geom1="robot0:C_lfdistal" geom2="robot0:C_mfdistal" condim="1"></pair>
48
+ <pair geom1="robot0:C_lfdistal" geom2="robot0:C_rfmiddle" condim="1"></pair>
49
+ <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfdistal" condim="1"></pair>
50
+ <pair geom1="robot0:C_lfmiddle" geom2="robot0:C_rfmiddle" condim="1"></pair>
51
+ </contact>
52
+
53
+ <tendon>
54
+ <fixed name="robot0:T_WRJ1r" limited="true" range="-0.032 0.032" user="1236">
55
+ <joint joint="robot0:WRJ1" coef="0.0325"></joint>
56
+ </fixed>
57
+ <fixed name="robot0:T_WRJ1l" limited="true" range="-0.032 0.032" user="1237">
58
+ <joint joint="robot0:WRJ1" coef="-0.0325"></joint>
59
+ </fixed>
60
+ <fixed name="robot0:T_WRJ0u" limited="true" range="-0.032 0.032" user="1236">
61
+ <joint joint="robot0:WRJ0" coef="0.0175"></joint>
62
+ </fixed>
63
+ <fixed name="robot0:T_WRJ0d" limited="true" range="-0.032 0.032" user="1237">
64
+ <joint joint="robot0:WRJ0" coef="-0.0175"></joint>
65
+ </fixed>
66
+ <fixed name="robot0:T_FFJ3r" limited="true" range="-0.018 0.018" user="1204">
67
+ <joint joint="robot0:FFJ3" coef="0.01"></joint>
68
+ </fixed>
69
+ <fixed name="robot0:T_FFJ3l" limited="true" range="-0.018 0.018" user="1205">
70
+ <joint joint="robot0:FFJ3" coef="-0.01"></joint>
71
+ </fixed>
72
+ <fixed name="robot0:T_FFJ2u" limited="true" range="-0.007 0.03" user="1202">
73
+ <joint joint="robot0:FFJ2" coef="0.01"></joint>
74
+ </fixed>
75
+ <fixed name="robot0:T_FFJ2d" limited="true" range="-0.03 0.007" user="1203">
76
+ <joint joint="robot0:FFJ2" coef="-0.01"></joint>
77
+ </fixed>
78
+ <fixed name="robot0:T_FFJ1c" limited="true" range="-0.001 0.001">
79
+ <joint joint="robot0:FFJ0" coef="0.00705"></joint>
80
+ <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
81
+ </fixed>
82
+ <fixed name="robot0:T_FFJ1u" limited="true" range="-0.007 0.03" user="1200">
83
+ <joint joint="robot0:FFJ0" coef="0.00705"></joint>
84
+ <joint joint="robot0:FFJ1" coef="0.00805"></joint>
85
+ </fixed>
86
+ <fixed name="robot0:T_FFJ1d" limited="true" range="-0.03 0.007" user="1201">
87
+ <joint joint="robot0:FFJ0" coef="-0.00705"></joint>
88
+ <joint joint="robot0:FFJ1" coef="-0.00805"></joint>
89
+ </fixed>
90
+ <fixed name="robot0:T_MFJ3r" limited="true" range="-0.018 0.018" user="1210">
91
+ <joint joint="robot0:MFJ3" coef="0.01"></joint>
92
+ </fixed>
93
+ <fixed name="robot0:T_MFJ3l" limited="true" range="-0.018 0.018" user="1211">
94
+ <joint joint="robot0:MFJ3" coef="-0.01"></joint>
95
+ </fixed>
96
+ <fixed name="robot0:T_MFJ2u" limited="true" range="-0.007 0.03" user="1208">
97
+ <joint joint="robot0:MFJ2" coef="0.01"></joint>
98
+ </fixed>
99
+ <fixed name="robot0:T_MFJ2d" limited="true" range="-0.03 0.007" user="1209">
100
+ <joint joint="robot0:MFJ2" coef="-0.01"></joint>
101
+ </fixed>
102
+ <fixed name="robot0:T_MFJ1c" limited="true" range="-0.001 0.001">
103
+ <joint joint="robot0:MFJ0" coef="0.00705"></joint>
104
+ <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
105
+ </fixed>
106
+ <fixed name="robot0:T_MFJ1u" limited="true" range="-0.007 0.03" user="1206">
107
+ <joint joint="robot0:MFJ0" coef="0.00705"></joint>
108
+ <joint joint="robot0:MFJ1" coef="0.00805"></joint>
109
+ </fixed>
110
+ <fixed name="robot0:T_MFJ1d" limited="true" range="-0.03 0.007" user="1207">
111
+ <joint joint="robot0:MFJ0" coef="-0.00705"></joint>
112
+ <joint joint="robot0:MFJ1" coef="-0.00805"></joint>
113
+ </fixed>
114
+ <fixed name="robot0:T_RFJ3r" limited="true" range="-0.018 0.018" user="1216">
115
+ <joint joint="robot0:RFJ3" coef="0.01"></joint>
116
+ </fixed>
117
+ <fixed name="robot0:T_RFJ3l" limited="true" range="-0.018 0.018" user="1217">
118
+ <joint joint="robot0:RFJ3" coef="-0.01"></joint>
119
+ </fixed>
120
+ <fixed name="robot0:T_RFJ2u" limited="true" range="-0.007 0.03" user="1214">
121
+ <joint joint="robot0:RFJ2" coef="0.01"></joint>
122
+ </fixed>
123
+ <fixed name="robot0:T_RFJ2d" limited="true" range="-0.03 0.007" user="1215">
124
+ <joint joint="robot0:RFJ2" coef="-0.01"></joint>
125
+ </fixed>
126
+ <fixed name="robot0:T_RFJ1c" limited="true" range="-0.001 0.001">
127
+ <joint joint="robot0:RFJ0" coef="0.00705"></joint>
128
+ <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
129
+ </fixed>
130
+ <fixed name="robot0:T_RFJ1u" limited="true" range="-0.007 0.03" user="1212">
131
+ <joint joint="robot0:RFJ0" coef="0.00705"></joint>
132
+ <joint joint="robot0:RFJ1" coef="0.00805"></joint>
133
+ </fixed>
134
+ <fixed name="robot0:T_RFJ1d" limited="true" range="-0.03 0.007" user="1213">
135
+ <joint joint="robot0:RFJ0" coef="-0.00705"></joint>
136
+ <joint joint="robot0:RFJ1" coef="-0.00805"></joint>
137
+ </fixed>
138
+ <fixed name="robot0:T_LFJ4u" limited="true" range="-0.007 0.03" user="1224">
139
+ <joint joint="robot0:LFJ4" coef="0.01"></joint>
140
+ </fixed>
141
+ <fixed name="robot0:T_LFJ4d" limited="true" range="-0.03 0.007" user="1225">
142
+ <joint joint="robot0:LFJ4" coef="-0.01"></joint>
143
+ </fixed>
144
+ <fixed name="robot0:T_LFJ3r" limited="true" range="-0.018 0.018" user="1222">
145
+ <joint joint="robot0:LFJ3" coef="0.01"></joint>
146
+ </fixed>
147
+ <fixed name="robot0:T_LFJ3l" limited="true" range="-0.018 0.018" user="1223">
148
+ <joint joint="robot0:LFJ3" coef="-0.01"></joint>
149
+ </fixed>
150
+ <fixed name="robot0:T_LFJ2u" limited="true" range="-0.007 0.03" user="1220">
151
+ <joint joint="robot0:LFJ2" coef="0.01"></joint>
152
+ </fixed>
153
+ <fixed name="robot0:T_LFJ2d" limited="true" range="-0.03 0.007" user="1221">
154
+ <joint joint="robot0:LFJ2" coef="-0.01"></joint>
155
+ </fixed>
156
+ <fixed name="robot0:T_LFJ1c" limited="true" range="-0.001 0.001">
157
+ <joint joint="robot0:LFJ0" coef="0.00705"></joint>
158
+ <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
159
+ </fixed>
160
+ <fixed name="robot0:T_LFJ1u" limited="true" range="-0.007 0.03" user="1218">
161
+ <joint joint="robot0:LFJ0" coef="0.00705"></joint>
162
+ <joint joint="robot0:LFJ1" coef="0.00805"></joint>
163
+ </fixed>
164
+ <fixed name="robot0:T_LFJ1d" limited="true" range="-0.03 0.007" user="1219">
165
+ <joint joint="robot0:LFJ0" coef="-0.00705"></joint>
166
+ <joint joint="robot0:LFJ1" coef="-0.00805"></joint>
167
+ </fixed>
168
+ <fixed name="robot0:T_THJ4a" limited="true" range="-0.018 0.018" user="1234">
169
+ <joint joint="robot0:THJ4" coef="0.01636"></joint>
170
+ </fixed>
171
+ <fixed name="robot0:T_THJ4c" limited="true" range="-0.018 0.018" user="1235">
172
+ <joint joint="robot0:THJ4" coef="-0.01636"></joint>
173
+ </fixed>
174
+ <fixed name="robot0:T_THJ3u" limited="true" range="-0.007 0.03" user="1232">
175
+ <joint joint="robot0:THJ3" coef="0.01"></joint>
176
+ </fixed>
177
+ <fixed name="robot0:T_THJ3d" limited="true" range="-0.03 0.007" user="1233">
178
+ <joint joint="robot0:THJ3" coef="-0.01"></joint>
179
+ </fixed>
180
+ <fixed name="robot0:T_THJ2u" limited="true" range="-0.018 0.018" user="1230">
181
+ <joint joint="robot0:THJ2" coef="0.011"></joint>
182
+ </fixed>
183
+ <fixed name="robot0:T_THJ2d" limited="true" range="-0.018 0.018" user="1231">
184
+ <joint joint="robot0:THJ2" coef="-0.011"></joint>
185
+ </fixed>
186
+ <fixed name="robot0:T_THJ1r" limited="true" range="-0.018 0.018" user="1228">
187
+ <joint joint="robot0:THJ1" coef="0.011"></joint>
188
+ </fixed>
189
+ <fixed name="robot0:T_THJ1l" limited="true" range="-0.018 0.018" user="1229">
190
+ <joint joint="robot0:THJ1" coef="-0.011"></joint>
191
+ </fixed>
192
+ <fixed name="robot0:T_THJ0r" limited="true" range="-0.03 0.007" user="1226">
193
+ <joint joint="robot0:THJ0" coef="0.009"></joint>
194
+ </fixed>
195
+ <fixed name="robot0:T_THJ0l" limited="true" range="-0.007 0.03" user="1227">
196
+ <joint joint="robot0:THJ0" coef="-0.009"></joint>
197
+ </fixed>
198
+ </tendon>
199
+
200
+ <sensor>
201
+ <jointpos name="robot0:Sjp_WRJ1" joint="robot0:WRJ1"></jointpos>
202
+ <jointpos name="robot0:Sjp_WRJ0" joint="robot0:WRJ0"></jointpos>
203
+ <jointpos name="robot0:Sjp_FFJ3" joint="robot0:FFJ3"></jointpos>
204
+ <jointpos name="robot0:Sjp_FFJ2" joint="robot0:FFJ2"></jointpos>
205
+ <jointpos name="robot0:Sjp_FFJ1" joint="robot0:FFJ1"></jointpos>
206
+ <jointpos name="robot0:Sjp_FFJ0" joint="robot0:FFJ0"></jointpos>
207
+ <jointpos name="robot0:Sjp_MFJ3" joint="robot0:MFJ3"></jointpos>
208
+ <jointpos name="robot0:Sjp_MFJ2" joint="robot0:MFJ2"></jointpos>
209
+ <jointpos name="robot0:Sjp_MFJ1" joint="robot0:MFJ1"></jointpos>
210
+ <jointpos name="robot0:Sjp_MFJ0" joint="robot0:MFJ0"></jointpos>
211
+ <jointpos name="robot0:Sjp_RFJ3" joint="robot0:RFJ3"></jointpos>
212
+ <jointpos name="robot0:Sjp_RFJ2" joint="robot0:RFJ2"></jointpos>
213
+ <jointpos name="robot0:Sjp_RFJ1" joint="robot0:RFJ1"></jointpos>
214
+ <jointpos name="robot0:Sjp_RFJ0" joint="robot0:RFJ0"></jointpos>
215
+ <jointpos name="robot0:Sjp_LFJ4" joint="robot0:LFJ4"></jointpos>
216
+ <jointpos name="robot0:Sjp_LFJ3" joint="robot0:LFJ3"></jointpos>
217
+ <jointpos name="robot0:Sjp_LFJ2" joint="robot0:LFJ2"></jointpos>
218
+ <jointpos name="robot0:Sjp_LFJ1" joint="robot0:LFJ1"></jointpos>
219
+ <jointpos name="robot0:Sjp_LFJ0" joint="robot0:LFJ0"></jointpos>
220
+ <jointpos name="robot0:Sjp_THJ4" joint="robot0:THJ4"></jointpos>
221
+ <jointpos name="robot0:Sjp_THJ3" joint="robot0:THJ3"></jointpos>
222
+ <jointpos name="robot0:Sjp_THJ2" joint="robot0:THJ2"></jointpos>
223
+ <jointpos name="robot0:Sjp_THJ1" joint="robot0:THJ1"></jointpos>
224
+ <jointpos name="robot0:Sjp_THJ0" joint="robot0:THJ0"></jointpos>
225
+ <touch name="robot0:ST_Tch_fftip" site="robot0:Tch_fftip"></touch>
226
+ <touch name="robot0:ST_Tch_mftip" site="robot0:Tch_mftip"></touch>
227
+ <touch name="robot0:ST_Tch_rftip" site="robot0:Tch_rftip"></touch>
228
+ <touch name="robot0:ST_Tch_lftip" site="robot0:Tch_lftip"></touch>
229
+ <touch name="robot0:ST_Tch_thtip" site="robot0:Tch_thtip"></touch>
230
+ </sensor>
231
+
232
+ <actuator>
233
+ <position name="robot0:A_WRJ1" class="robot0:asset_class" user="2038" joint="robot0:WRJ1" ctrlrange="-0.489 0.14" kp="5" forcerange="-4.785 4.785"></position>
234
+ <position name="robot0:A_WRJ0" class="robot0:asset_class" user="2036" joint="robot0:WRJ0" ctrlrange="-0.698 0.489" kp="5" forcerange="-2.175 2.175"></position>
235
+ <position name="robot0:A_FFJ3" class="robot0:asset_class" user="2004" joint="robot0:FFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
236
+ <position name="robot0:A_FFJ2" class="robot0:asset_class" user="2002" joint="robot0:FFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
237
+ <position name="robot0:A_FFJ1" class="robot0:asset_class" user="2000" joint="robot0:FFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
238
+ <position name="robot0:A_MFJ3" class="robot0:asset_class" user="2010" joint="robot0:MFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
239
+ <position name="robot0:A_MFJ2" class="robot0:asset_class" user="2008" joint="robot0:MFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
240
+ <position name="robot0:A_MFJ1" class="robot0:asset_class" user="2006" joint="robot0:MFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
241
+ <position name="robot0:A_RFJ3" class="robot0:asset_class" user="2016" joint="robot0:RFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
242
+ <position name="robot0:A_RFJ2" class="robot0:asset_class" user="2014" joint="robot0:RFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
243
+ <position name="robot0:A_RFJ1" class="robot0:asset_class" user="2012" joint="robot0:RFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
244
+ <position name="robot0:A_LFJ4" class="robot0:asset_class" user="2024" joint="robot0:LFJ4" ctrlrange="0 0.785" kp="1" forcerange="-0.9 0.9"></position>
245
+ <position name="robot0:A_LFJ3" class="robot0:asset_class" user="2022" joint="robot0:LFJ3" ctrlrange="-0.349 0.349" kp="1" forcerange="-0.9 0.9"></position>
246
+ <position name="robot0:A_LFJ2" class="robot0:asset_class" user="2020" joint="robot0:LFJ2" ctrlrange="0 1.571" kp="1" forcerange="-0.9 0.9"></position>
247
+ <position name="robot0:A_LFJ1" class="robot0:asset_class" user="2018" joint="robot0:LFJ1" ctrlrange="0 1.571" kp="1" forcerange="-0.7245 0.7245"></position>
248
+ <position name="robot0:A_THJ4" class="robot0:asset_class" user="2034" joint="robot0:THJ4" ctrlrange="-1.047 1.047" kp="1" forcerange="-2.3722 2.3722"></position>
249
+ <position name="robot0:A_THJ3" class="robot0:asset_class" user="2032" joint="robot0:THJ3" ctrlrange="0 1.222" kp="1" forcerange="-1.45 1.45"></position>
250
+ <position name="robot0:A_THJ2" class="robot0:asset_class" user="2030" joint="robot0:THJ2" ctrlrange="-0.209 0.209" kp="1" forcerange="-0.99 0.99"></position>
251
+ <position name="robot0:A_THJ1" class="robot0:asset_class" user="2028" joint="robot0:THJ1" ctrlrange="-0.524 0.524" kp="1" forcerange="-0.99 0.99"></position>
252
+ <position name="robot0:A_THJ0" class="robot0:asset_class" user="2026" joint="robot0:THJ0" ctrlrange="-1.571 0" kp="1" forcerange="-0.81 0.81"></position>
253
+ </actuator>
254
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/stls/hand/F2.stl ADDED
Binary file (22.9 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/hand/knuckle.stl ADDED
Binary file (30.2 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl ADDED
Binary file (75.1 kB). View file
 
gym-0.21.0/gym/envs/robotics/fetch/reach.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gym import utils
3
+ from gym.envs.robotics import fetch_env
4
+
5
+
6
+ # Ensure we get the path separator correct on windows
7
+ MODEL_XML_PATH = os.path.join("fetch", "reach.xml")
8
+
9
+
10
+ class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
11
+ def __init__(self, reward_type="sparse"):
12
+ initial_qpos = {
13
+ "robot0:slide0": 0.4049,
14
+ "robot0:slide1": 0.48,
15
+ "robot0:slide2": 0.0,
16
+ }
17
+ fetch_env.FetchEnv.__init__(
18
+ self,
19
+ MODEL_XML_PATH,
20
+ has_object=False,
21
+ block_gripper=True,
22
+ n_substeps=20,
23
+ gripper_extra_height=0.2,
24
+ target_in_the_air=True,
25
+ target_offset=0.0,
26
+ obj_range=0.15,
27
+ target_range=0.15,
28
+ distance_threshold=0.05,
29
+ initial_qpos=initial_qpos,
30
+ reward_type=reward_type,
31
+ )
32
+ utils.EzPickle.__init__(self, reward_type=reward_type)
gym-0.21.0/gym/envs/robotics/hand/__init__.py ADDED
File without changes
gym-0.21.0/gym/envs/robotics/hand/manipulate.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+
4
+ from gym import utils, error
5
+ from gym.envs.robotics import rotations, hand_env
6
+ from gym.envs.robotics.utils import robot_get_obs
7
+
8
+ try:
9
+ import mujoco_py
10
+ except ImportError as e:
11
+ raise error.DependencyNotInstalled(
12
+ "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(
13
+ e
14
+ )
15
+ )
16
+
17
+
18
+ def quat_from_angle_and_axis(angle, axis):
19
+ assert axis.shape == (3,)
20
+ axis /= np.linalg.norm(axis)
21
+ quat = np.concatenate([[np.cos(angle / 2.0)], np.sin(angle / 2.0) * axis])
22
+ quat /= np.linalg.norm(quat)
23
+ return quat
24
+
25
+
26
+ # Ensure we get the path separator correct on windows
27
+ MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block.xml")
28
+ MANIPULATE_EGG_XML = os.path.join("hand", "manipulate_egg.xml")
29
+ MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml")
30
+
31
+
32
+ class ManipulateEnv(hand_env.HandEnv):
33
+ def __init__(
34
+ self,
35
+ model_path,
36
+ target_position,
37
+ target_rotation,
38
+ target_position_range,
39
+ reward_type,
40
+ initial_qpos=None,
41
+ randomize_initial_position=True,
42
+ randomize_initial_rotation=True,
43
+ distance_threshold=0.01,
44
+ rotation_threshold=0.1,
45
+ n_substeps=20,
46
+ relative_control=False,
47
+ ignore_z_target_rotation=False,
48
+ ):
49
+ """Initializes a new Hand manipulation environment.
50
+
51
+ Args:
52
+ model_path (string): path to the environments XML file
53
+ target_position (string): the type of target position:
54
+ - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily
55
+ - fixed: target position is set to the initial position of the object
56
+ - random: target position is fully randomized according to target_position_range
57
+ target_rotation (string): the type of target rotation:
58
+ - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily
59
+ - fixed: target rotation is set to the initial rotation of the object
60
+ - xyz: fully randomized target rotation around the X, Y and Z axis
61
+ - z: fully randomized target rotation around the Z axis
62
+ - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y
63
+ ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored
64
+ target_position_range (np.array of shape (3, 2)): range of the target_position randomization
65
+ reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
66
+ initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
67
+ randomize_initial_position (boolean): whether or not to randomize the initial position of the object
68
+ randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object
69
+ distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved
70
+ rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved
71
+ n_substeps (int): number of substeps the simulation runs on every call to step
72
+ relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state
73
+ """
74
+ self.target_position = target_position
75
+ self.target_rotation = target_rotation
76
+ self.target_position_range = target_position_range
77
+ self.parallel_quats = [
78
+ rotations.euler2quat(r) for r in rotations.get_parallel_rotations()
79
+ ]
80
+ self.randomize_initial_rotation = randomize_initial_rotation
81
+ self.randomize_initial_position = randomize_initial_position
82
+ self.distance_threshold = distance_threshold
83
+ self.rotation_threshold = rotation_threshold
84
+ self.reward_type = reward_type
85
+ self.ignore_z_target_rotation = ignore_z_target_rotation
86
+
87
+ assert self.target_position in ["ignore", "fixed", "random"]
88
+ assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"]
89
+ initial_qpos = initial_qpos or {}
90
+
91
+ hand_env.HandEnv.__init__(
92
+ self,
93
+ model_path,
94
+ n_substeps=n_substeps,
95
+ initial_qpos=initial_qpos,
96
+ relative_control=relative_control,
97
+ )
98
+
99
+ def _get_achieved_goal(self):
100
+ # Object position and rotation.
101
+ object_qpos = self.sim.data.get_joint_qpos("object:joint")
102
+ assert object_qpos.shape == (7,)
103
+ return object_qpos
104
+
105
+ def _goal_distance(self, goal_a, goal_b):
106
+ assert goal_a.shape == goal_b.shape
107
+ assert goal_a.shape[-1] == 7
108
+
109
+ d_pos = np.zeros_like(goal_a[..., 0])
110
+ d_rot = np.zeros_like(goal_b[..., 0])
111
+ if self.target_position != "ignore":
112
+ delta_pos = goal_a[..., :3] - goal_b[..., :3]
113
+ d_pos = np.linalg.norm(delta_pos, axis=-1)
114
+
115
+ if self.target_rotation != "ignore":
116
+ quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]
117
+
118
+ if self.ignore_z_target_rotation:
119
+ # Special case: We want to ignore the Z component of the rotation.
120
+ # This code here assumes Euler angles with xyz convention. We first transform
121
+ # to euler, then set the Z component to be equal between the two, and finally
122
+ # transform back into quaternions.
123
+ euler_a = rotations.quat2euler(quat_a)
124
+ euler_b = rotations.quat2euler(quat_b)
125
+ euler_a[2] = euler_b[2]
126
+ quat_a = rotations.euler2quat(euler_a)
127
+
128
+ # Subtract quaternions and extract angle between them.
129
+ quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
130
+ angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0))
131
+ d_rot = angle_diff
132
+ assert d_pos.shape == d_rot.shape
133
+ return d_pos, d_rot
134
+
135
+ # GoalEnv methods
136
+ # ----------------------------
137
+
138
+ def compute_reward(self, achieved_goal, goal, info):
139
+ if self.reward_type == "sparse":
140
+ success = self._is_success(achieved_goal, goal).astype(np.float32)
141
+ return success - 1.0
142
+ else:
143
+ d_pos, d_rot = self._goal_distance(achieved_goal, goal)
144
+ # We weigh the difference in position to avoid that `d_pos` (in meters) is completely
145
+ # dominated by `d_rot` (in radians).
146
+ return -(10.0 * d_pos + d_rot)
147
+
148
+ # RobotEnv methods
149
+ # ----------------------------
150
+
151
+ def _is_success(self, achieved_goal, desired_goal):
152
+ d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal)
153
+ achieved_pos = (d_pos < self.distance_threshold).astype(np.float32)
154
+ achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32)
155
+ achieved_both = achieved_pos * achieved_rot
156
+ return achieved_both
157
+
158
+ def _env_setup(self, initial_qpos):
159
+ for name, value in initial_qpos.items():
160
+ self.sim.data.set_joint_qpos(name, value)
161
+ self.sim.forward()
162
+
163
+ def _reset_sim(self):
164
+ self.sim.set_state(self.initial_state)
165
+ self.sim.forward()
166
+
167
+ initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy()
168
+ initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:]
169
+ assert initial_qpos.shape == (7,)
170
+ assert initial_pos.shape == (3,)
171
+ assert initial_quat.shape == (4,)
172
+ initial_qpos = None
173
+
174
+ # Randomization initial rotation.
175
+ if self.randomize_initial_rotation:
176
+ if self.target_rotation == "z":
177
+ angle = self.np_random.uniform(-np.pi, np.pi)
178
+ axis = np.array([0.0, 0.0, 1.0])
179
+ offset_quat = quat_from_angle_and_axis(angle, axis)
180
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
181
+ elif self.target_rotation == "parallel":
182
+ angle = self.np_random.uniform(-np.pi, np.pi)
183
+ axis = np.array([0.0, 0.0, 1.0])
184
+ z_quat = quat_from_angle_and_axis(angle, axis)
185
+ parallel_quat = self.parallel_quats[
186
+ self.np_random.randint(len(self.parallel_quats))
187
+ ]
188
+ offset_quat = rotations.quat_mul(z_quat, parallel_quat)
189
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
190
+ elif self.target_rotation in ["xyz", "ignore"]:
191
+ angle = self.np_random.uniform(-np.pi, np.pi)
192
+ axis = self.np_random.uniform(-1.0, 1.0, size=3)
193
+ offset_quat = quat_from_angle_and_axis(angle, axis)
194
+ initial_quat = rotations.quat_mul(initial_quat, offset_quat)
195
+ elif self.target_rotation == "fixed":
196
+ pass
197
+ else:
198
+ raise error.Error(
199
+ 'Unknown target_rotation option "{}".'.format(self.target_rotation)
200
+ )
201
+
202
+ # Randomize initial position.
203
+ if self.randomize_initial_position:
204
+ if self.target_position != "fixed":
205
+ initial_pos += self.np_random.normal(size=3, scale=0.005)
206
+
207
+ initial_quat /= np.linalg.norm(initial_quat)
208
+ initial_qpos = np.concatenate([initial_pos, initial_quat])
209
+ self.sim.data.set_joint_qpos("object:joint", initial_qpos)
210
+
211
+ def is_on_palm():
212
+ self.sim.forward()
213
+ cube_middle_idx = self.sim.model.site_name2id("object:center")
214
+ cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx]
215
+ is_on_palm = cube_middle_pos[2] > 0.04
216
+ return is_on_palm
217
+
218
+ # Run the simulation for a bunch of timesteps to let everything settle in.
219
+ for _ in range(10):
220
+ self._set_action(np.zeros(20))
221
+ try:
222
+ self.sim.step()
223
+ except mujoco_py.MujocoException:
224
+ return False
225
+ return is_on_palm()
226
+
227
+ def _sample_goal(self):
228
+ # Select a goal for the object position.
229
+ target_pos = None
230
+ if self.target_position == "random":
231
+ assert self.target_position_range.shape == (3, 2)
232
+ offset = self.np_random.uniform(
233
+ self.target_position_range[:, 0], self.target_position_range[:, 1]
234
+ )
235
+ assert offset.shape == (3,)
236
+ target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset
237
+ elif self.target_position in ["ignore", "fixed"]:
238
+ target_pos = self.sim.data.get_joint_qpos("object:joint")[:3]
239
+ else:
240
+ raise error.Error(
241
+ 'Unknown target_position option "{}".'.format(self.target_position)
242
+ )
243
+ assert target_pos is not None
244
+ assert target_pos.shape == (3,)
245
+
246
+ # Select a goal for the object rotation.
247
+ target_quat = None
248
+ if self.target_rotation == "z":
249
+ angle = self.np_random.uniform(-np.pi, np.pi)
250
+ axis = np.array([0.0, 0.0, 1.0])
251
+ target_quat = quat_from_angle_and_axis(angle, axis)
252
+ elif self.target_rotation == "parallel":
253
+ angle = self.np_random.uniform(-np.pi, np.pi)
254
+ axis = np.array([0.0, 0.0, 1.0])
255
+ target_quat = quat_from_angle_and_axis(angle, axis)
256
+ parallel_quat = self.parallel_quats[
257
+ self.np_random.randint(len(self.parallel_quats))
258
+ ]
259
+ target_quat = rotations.quat_mul(target_quat, parallel_quat)
260
+ elif self.target_rotation == "xyz":
261
+ angle = self.np_random.uniform(-np.pi, np.pi)
262
+ axis = self.np_random.uniform(-1.0, 1.0, size=3)
263
+ target_quat = quat_from_angle_and_axis(angle, axis)
264
+ elif self.target_rotation in ["ignore", "fixed"]:
265
+ target_quat = self.sim.data.get_joint_qpos("object:joint")
266
+ else:
267
+ raise error.Error(
268
+ 'Unknown target_rotation option "{}".'.format(self.target_rotation)
269
+ )
270
+ assert target_quat is not None
271
+ assert target_quat.shape == (4,)
272
+
273
+ target_quat /= np.linalg.norm(target_quat) # normalized quaternion
274
+ goal = np.concatenate([target_pos, target_quat])
275
+ return goal
276
+
277
+ def _render_callback(self):
278
+ # Assign current state to target object but offset a bit so that the actual object
279
+ # is not obscured.
280
+ goal = self.goal.copy()
281
+ assert goal.shape == (7,)
282
+ if self.target_position == "ignore":
283
+ # Move the object to the side since we do not care about it's position.
284
+ goal[0] += 0.15
285
+ self.sim.data.set_joint_qpos("target:joint", goal)
286
+ self.sim.data.set_joint_qvel("target:joint", np.zeros(6))
287
+
288
+ if "object_hidden" in self.sim.model.geom_names:
289
+ hidden_id = self.sim.model.geom_name2id("object_hidden")
290
+ self.sim.model.geom_rgba[hidden_id, 3] = 1.0
291
+ self.sim.forward()
292
+
293
+ def _get_obs(self):
294
+ robot_qpos, robot_qvel = robot_get_obs(self.sim)
295
+ object_qvel = self.sim.data.get_joint_qvel("object:joint")
296
+ achieved_goal = (
297
+ self._get_achieved_goal().ravel()
298
+ ) # this contains the object position + rotation
299
+ observation = np.concatenate(
300
+ [robot_qpos, robot_qvel, object_qvel, achieved_goal]
301
+ )
302
+ return {
303
+ "observation": observation.copy(),
304
+ "achieved_goal": achieved_goal.copy(),
305
+ "desired_goal": self.goal.ravel().copy(),
306
+ }
307
+
308
+
309
+ class HandBlockEnv(ManipulateEnv, utils.EzPickle):
310
+ def __init__(
311
+ self, target_position="random", target_rotation="xyz", reward_type="sparse"
312
+ ):
313
+ utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
314
+ ManipulateEnv.__init__(
315
+ self,
316
+ model_path=MANIPULATE_BLOCK_XML,
317
+ target_position=target_position,
318
+ target_rotation=target_rotation,
319
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
320
+ reward_type=reward_type,
321
+ )
322
+
323
+
324
+ class HandEggEnv(ManipulateEnv, utils.EzPickle):
325
+ def __init__(
326
+ self, target_position="random", target_rotation="xyz", reward_type="sparse"
327
+ ):
328
+ utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
329
+ ManipulateEnv.__init__(
330
+ self,
331
+ model_path=MANIPULATE_EGG_XML,
332
+ target_position=target_position,
333
+ target_rotation=target_rotation,
334
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
335
+ reward_type=reward_type,
336
+ )
337
+
338
+
339
+ class HandPenEnv(ManipulateEnv, utils.EzPickle):
340
+ def __init__(
341
+ self, target_position="random", target_rotation="xyz", reward_type="sparse"
342
+ ):
343
+ utils.EzPickle.__init__(self, target_position, target_rotation, reward_type)
344
+ ManipulateEnv.__init__(
345
+ self,
346
+ model_path=MANIPULATE_PEN_XML,
347
+ target_position=target_position,
348
+ target_rotation=target_rotation,
349
+ target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]),
350
+ randomize_initial_rotation=False,
351
+ reward_type=reward_type,
352
+ ignore_z_target_rotation=True,
353
+ distance_threshold=0.05,
354
+ )
gym-0.21.0/gym/envs/robotics/rotations.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2009-2017, Matthew Brett and Christoph Gohlke
2
+ # All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are
6
+ # met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ #
11
+ # 2. Redistributions in binary form must reproduce the above copyright
12
+ # notice, this list of conditions and the following disclaimer in the
13
+ # documentation and/or other materials provided with the distribution.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
16
+ # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17
+ # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18
+ # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
+ # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+
27
+ # Many methods borrow heavily or entirely from transforms3d:
28
+ # https://github.com/matthew-brett/transforms3d
29
+ # They have mostly been modified to support batched operations.
30
+
31
+ import numpy as np
32
+ import itertools
33
+
34
+ """
35
+ Rotations
36
+ =========
37
+
38
+ Note: these have caused many subtle bugs in the past.
39
+ Be careful while updating these methods and while using them in clever ways.
40
+
41
+ See MuJoCo documentation here: http://mujoco.org/book/modeling.html#COrientation
42
+
43
+ Conventions
44
+ -----------
45
+ - All functions accept batches as well as individual rotations
46
+ - All rotation conventions match respective MuJoCo defaults
47
+ - All angles are in radians
48
+ - Matricies follow LR convention
49
+ - Euler Angles are all relative with 'xyz' axes ordering
50
+ - See specific representation for more information
51
+
52
+ Representations
53
+ ---------------
54
+
55
+ Euler
56
+ There are many euler angle frames -- here we will strive to use the default
57
+ in MuJoCo, which is eulerseq='xyz'.
58
+ This frame is a relative rotating frame, about x, y, and z axes in order.
59
+ Relative rotating means that after we rotate about x, then we use the
60
+ new (rotated) y, and the same for z.
61
+
62
+ Quaternions
63
+ These are defined in terms of rotation (angle) about a unit vector (x, y, z)
64
+ We use the following <q0, q1, q2, q3> convention:
65
+ q0 = cos(angle / 2)
66
+ q1 = sin(angle / 2) * x
67
+ q2 = sin(angle / 2) * y
68
+ q3 = sin(angle / 2) * z
69
+ This is also sometimes called qw, qx, qy, qz.
70
+ Note that quaternions are ambiguous, because we can represent a rotation by
71
+ angle about vector <x, y, z> and -angle about vector <-x, -y, -z>.
72
+ To choose between these, we pick "first nonzero positive", where we
73
+ make the first nonzero element of the quaternion positive.
74
+ This can result in mismatches if you're converting an quaternion that is not
75
+ "first nonzero positive" to a different representation and back.
76
+
77
+ Axis Angle
78
+ (Not currently implemented)
79
+ These are very straightforward. Rotation is angle about a unit vector.
80
+
81
+ XY Axes
82
+ (Not currently implemented)
83
+ We are given x axis and y axis, and z axis is cross product of x and y.
84
+
85
+ Z Axis
86
+ This is NOT RECOMMENDED. Defines a unit vector for the Z axis,
87
+ but rotation about this axis is not well defined.
88
+ Instead pick a fixed reference direction for another axis (e.g. X)
89
+ and calculate the other (e.g. Y = Z cross-product X),
90
+ then use XY Axes rotation instead.
91
+
92
+ SO3
93
+ (Not currently implemented)
94
+ While not supported by MuJoCo, this representation has a lot of nice features.
95
+ We expect to add support for these in the future.
96
+
97
+ TODO / Missing
98
+ --------------
99
+ - Rotation integration or derivatives (e.g. velocity conversions)
100
+ - More representations (SO3, etc)
101
+ - Random sampling (e.g. sample uniform random rotation)
102
+ - Performance benchmarks/measurements
103
+ - (Maybe) define everything as to/from matricies, for simplicity
104
+ """
105
+
106
+ # For testing whether a number is close to zero
107
+ _FLOAT_EPS = np.finfo(np.float64).eps
108
+ _EPS4 = _FLOAT_EPS * 4.0
109
+
110
+
111
+ def euler2mat(euler):
112
+ """Convert Euler Angles to Rotation Matrix. See rotation.py for notes"""
113
+ euler = np.asarray(euler, dtype=np.float64)
114
+ assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
115
+
116
+ ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
117
+ si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
118
+ ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
119
+ cc, cs = ci * ck, ci * sk
120
+ sc, ss = si * ck, si * sk
121
+
122
+ mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
123
+ mat[..., 2, 2] = cj * ck
124
+ mat[..., 2, 1] = sj * sc - cs
125
+ mat[..., 2, 0] = sj * cc + ss
126
+ mat[..., 1, 2] = cj * sk
127
+ mat[..., 1, 1] = sj * ss + cc
128
+ mat[..., 1, 0] = sj * cs - sc
129
+ mat[..., 0, 2] = -sj
130
+ mat[..., 0, 1] = cj * si
131
+ mat[..., 0, 0] = cj * ci
132
+ return mat
133
+
134
+
135
+ def euler2quat(euler):
136
+ """Convert Euler Angles to Quaternions. See rotation.py for notes"""
137
+ euler = np.asarray(euler, dtype=np.float64)
138
+ assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
139
+
140
+ ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
141
+ si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
142
+ ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
143
+ cc, cs = ci * ck, ci * sk
144
+ sc, ss = si * ck, si * sk
145
+
146
+ quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64)
147
+ quat[..., 0] = cj * cc + sj * ss
148
+ quat[..., 3] = cj * sc - sj * cs
149
+ quat[..., 2] = -(cj * ss + sj * cc)
150
+ quat[..., 1] = cj * cs - sj * sc
151
+ return quat
152
+
153
+
154
+ def mat2euler(mat):
155
+ """Convert Rotation Matrix to Euler Angles. See rotation.py for notes"""
156
+ mat = np.asarray(mat, dtype=np.float64)
157
+ assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
158
+
159
+ cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
160
+ condition = cy > _EPS4
161
+ euler = np.empty(mat.shape[:-1], dtype=np.float64)
162
+ euler[..., 2] = np.where(
163
+ condition,
164
+ -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
165
+ -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]),
166
+ )
167
+ euler[..., 1] = np.where(
168
+ condition, -np.arctan2(-mat[..., 0, 2], cy), -np.arctan2(-mat[..., 0, 2], cy)
169
+ )
170
+ euler[..., 0] = np.where(
171
+ condition, -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]), 0.0
172
+ )
173
+ return euler
174
+
175
+
176
+ def mat2quat(mat):
177
+ """Convert Rotation Matrix to Quaternion. See rotation.py for notes"""
178
+ mat = np.asarray(mat, dtype=np.float64)
179
+ assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
180
+
181
+ Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2]
182
+ Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2]
183
+ Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2]
184
+ # Fill only lower half of symmetric matrix
185
+ K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64)
186
+ K[..., 0, 0] = Qxx - Qyy - Qzz
187
+ K[..., 1, 0] = Qyx + Qxy
188
+ K[..., 1, 1] = Qyy - Qxx - Qzz
189
+ K[..., 2, 0] = Qzx + Qxz
190
+ K[..., 2, 1] = Qzy + Qyz
191
+ K[..., 2, 2] = Qzz - Qxx - Qyy
192
+ K[..., 3, 0] = Qyz - Qzy
193
+ K[..., 3, 1] = Qzx - Qxz
194
+ K[..., 3, 2] = Qxy - Qyx
195
+ K[..., 3, 3] = Qxx + Qyy + Qzz
196
+ K /= 3.0
197
+ # TODO: vectorize this -- probably could be made faster
198
+ q = np.empty(K.shape[:-2] + (4,))
199
+ it = np.nditer(q[..., 0], flags=["multi_index"])
200
+ while not it.finished:
201
+ # Use Hermitian eigenvectors, values for speed
202
+ vals, vecs = np.linalg.eigh(K[it.multi_index])
203
+ # Select largest eigenvector, reorder to w,x,y,z quaternion
204
+ q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)]
205
+ # Prefer quaternion with positive w
206
+ # (q * -1 corresponds to same rotation as q)
207
+ if q[it.multi_index][0] < 0:
208
+ q[it.multi_index] *= -1
209
+ it.iternext()
210
+ return q
211
+
212
+
213
+ def quat2euler(quat):
214
+ """Convert Quaternion to Euler Angles. See rotation.py for notes"""
215
+ return mat2euler(quat2mat(quat))
216
+
217
+
218
+ def subtract_euler(e1, e2):
219
+ assert e1.shape == e2.shape
220
+ assert e1.shape[-1] == 3
221
+ q1 = euler2quat(e1)
222
+ q2 = euler2quat(e2)
223
+ q_diff = quat_mul(q1, quat_conjugate(q2))
224
+ return quat2euler(q_diff)
225
+
226
+
227
+ def quat2mat(quat):
228
+ """Convert Quaternion to Euler Angles. See rotation.py for notes"""
229
+ quat = np.asarray(quat, dtype=np.float64)
230
+ assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
231
+
232
+ w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3]
233
+ Nq = np.sum(quat * quat, axis=-1)
234
+ s = 2.0 / Nq
235
+ X, Y, Z = x * s, y * s, z * s
236
+ wX, wY, wZ = w * X, w * Y, w * Z
237
+ xX, xY, xZ = x * X, x * Y, x * Z
238
+ yY, yZ, zZ = y * Y, y * Z, z * Z
239
+
240
+ mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64)
241
+ mat[..., 0, 0] = 1.0 - (yY + zZ)
242
+ mat[..., 0, 1] = xY - wZ
243
+ mat[..., 0, 2] = xZ + wY
244
+ mat[..., 1, 0] = xY + wZ
245
+ mat[..., 1, 1] = 1.0 - (xX + zZ)
246
+ mat[..., 1, 2] = yZ - wX
247
+ mat[..., 2, 0] = xZ - wY
248
+ mat[..., 2, 1] = yZ + wX
249
+ mat[..., 2, 2] = 1.0 - (xX + yY)
250
+ return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
251
+
252
+
253
+ def quat_conjugate(q):
254
+ inv_q = -q
255
+ inv_q[..., 0] *= -1
256
+ return inv_q
257
+
258
+
259
+ def quat_mul(q0, q1):
260
+ assert q0.shape == q1.shape
261
+ assert q0.shape[-1] == 4
262
+ assert q1.shape[-1] == 4
263
+
264
+ w0 = q0[..., 0]
265
+ x0 = q0[..., 1]
266
+ y0 = q0[..., 2]
267
+ z0 = q0[..., 3]
268
+
269
+ w1 = q1[..., 0]
270
+ x1 = q1[..., 1]
271
+ y1 = q1[..., 2]
272
+ z1 = q1[..., 3]
273
+
274
+ w = w0 * w1 - x0 * x1 - y0 * y1 - z0 * z1
275
+ x = w0 * x1 + x0 * w1 + y0 * z1 - z0 * y1
276
+ y = w0 * y1 + y0 * w1 + z0 * x1 - x0 * z1
277
+ z = w0 * z1 + z0 * w1 + x0 * y1 - y0 * x1
278
+ q = np.array([w, x, y, z])
279
+ if q.ndim == 2:
280
+ q = q.swapaxes(0, 1)
281
+ assert q.shape == q0.shape
282
+ return q
283
+
284
+
285
+ def quat_rot_vec(q, v0):
286
+ q_v0 = np.array([0, v0[0], v0[1], v0[2]])
287
+ q_v = quat_mul(q, quat_mul(q_v0, quat_conjugate(q)))
288
+ v = q_v[1:]
289
+ return v
290
+
291
+
292
+ def quat_identity():
293
+ return np.array([1, 0, 0, 0])
294
+
295
+
296
+ def quat2axisangle(quat):
297
+ theta = 0
298
+ axis = np.array([0, 0, 1])
299
+ sin_theta = np.linalg.norm(quat[1:])
300
+
301
+ if sin_theta > 0.0001:
302
+ theta = 2 * np.arcsin(sin_theta)
303
+ theta *= 1 if quat[0] >= 0 else -1
304
+ axis = quat[1:] / sin_theta
305
+
306
+ return axis, theta
307
+
308
+
309
+ def euler2point_euler(euler):
310
+ _euler = euler.copy()
311
+ if len(_euler.shape) < 2:
312
+ _euler = np.expand_dims(_euler, 0)
313
+ assert _euler.shape[1] == 3
314
+ _euler_sin = np.sin(_euler)
315
+ _euler_cos = np.cos(_euler)
316
+ return np.concatenate([_euler_sin, _euler_cos], axis=-1)
317
+
318
+
319
+ def point_euler2euler(euler):
320
+ _euler = euler.copy()
321
+ if len(_euler.shape) < 2:
322
+ _euler = np.expand_dims(_euler, 0)
323
+ assert _euler.shape[1] == 6
324
+ angle = np.arctan(_euler[..., :3] / _euler[..., 3:])
325
+ angle[_euler[..., 3:] < 0] += np.pi
326
+ return angle
327
+
328
+
329
+ def quat2point_quat(quat):
330
+ # Should be in qw, qx, qy, qz
331
+ _quat = quat.copy()
332
+ if len(_quat.shape) < 2:
333
+ _quat = np.expand_dims(_quat, 0)
334
+ assert _quat.shape[1] == 4
335
+ angle = np.arccos(_quat[:, [0]]) * 2
336
+ xyz = _quat[:, 1:]
337
+ xyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (xyz / np.sin(angle / 2))[
338
+ np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
339
+ ]
340
+ return np.concatenate([np.sin(angle), np.cos(angle), xyz], axis=-1)
341
+
342
+
343
+ def point_quat2quat(quat):
344
+ _quat = quat.copy()
345
+ if len(_quat.shape) < 2:
346
+ _quat = np.expand_dims(_quat, 0)
347
+ assert _quat.shape[1] == 5
348
+ angle = np.arctan(_quat[:, [0]] / _quat[:, [1]])
349
+ qw = np.cos(angle / 2)
350
+
351
+ qxyz = _quat[:, 2:]
352
+ qxyz[np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5] = (qxyz * np.sin(angle / 2))[
353
+ np.squeeze(np.abs(np.sin(angle / 2))) >= 1e-5
354
+ ]
355
+ return np.concatenate([qw, qxyz], axis=-1)
356
+
357
+
358
+ def normalize_angles(angles):
359
+ """Puts angles in [-pi, pi] range."""
360
+ angles = angles.copy()
361
+ if angles.size > 0:
362
+ angles = (angles + np.pi) % (2 * np.pi) - np.pi
363
+ assert -np.pi - 1e-6 <= angles.min() and angles.max() <= np.pi + 1e-6
364
+ return angles
365
+
366
+
367
+ def round_to_straight_angles(angles):
368
+ """Returns closest angle modulo 90 degrees"""
369
+ angles = np.round(angles / (np.pi / 2)) * (np.pi / 2)
370
+ return normalize_angles(angles)
371
+
372
+
373
+ def get_parallel_rotations():
374
+ mult90 = [0, np.pi / 2, -np.pi / 2, np.pi]
375
+ parallel_rotations = []
376
+ for euler in itertools.product(mult90, repeat=3):
377
+ canonical = mat2euler(euler2mat(euler))
378
+ canonical = np.round(canonical / (np.pi / 2))
379
+ if canonical[0] == -2:
380
+ canonical[0] = 2
381
+ if canonical[2] == -2:
382
+ canonical[2] = 2
383
+ canonical *= np.pi / 2
384
+ if all([(canonical != rot).any() for rot in parallel_rotations]):
385
+ parallel_rotations += [canonical]
386
+ assert len(parallel_rotations) == 24
387
+ return parallel_rotations
gym-0.21.0/gym/spaces/discrete.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from .space import Space
3
+
4
+
5
+ class Discrete(Space):
6
+ r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
7
+
8
+ Example::
9
+
10
+ >>> Discrete(2)
11
+
12
+ """
13
+
14
+ def __init__(self, n, seed=None):
15
+ assert n >= 0
16
+ self.n = n
17
+ super(Discrete, self).__init__((), np.int64, seed)
18
+
19
+ def sample(self):
20
+ return self.np_random.randint(self.n)
21
+
22
+ def contains(self, x):
23
+ if isinstance(x, int):
24
+ as_int = x
25
+ elif isinstance(x, (np.generic, np.ndarray)) and (
26
+ x.dtype.char in np.typecodes["AllInteger"] and x.shape == ()
27
+ ):
28
+ as_int = int(x)
29
+ else:
30
+ return False
31
+ return as_int >= 0 and as_int < self.n
32
+
33
+ def __repr__(self):
34
+ return "Discrete(%d)" % self.n
35
+
36
+ def __eq__(self, other):
37
+ return isinstance(other, Discrete) and self.n == other.n
gym-0.21.0/gym/utils/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """A set of common utilities used within the environments. These are
2
+ not intended as API functions, and will not remain stable over time.
3
+ """
4
+
5
+ # These submodules should not have any import-time dependencies.
6
+ # We want this since we use `utils` during our import-time sanity checks
7
+ # that verify that our dependencies are actually present.
8
+ from .colorize import colorize
9
+ from .ezpickle import EzPickle
gym-0.21.0/gym/utils/__pycache__/closer.cpython-38.pyc ADDED
Binary file (2.39 kB). View file
 
gym-0.21.0/gym/utils/__pycache__/colorize.cpython-38.pyc ADDED
Binary file (958 Bytes). View file
 
gym-0.21.0/gym/utils/__pycache__/ezpickle.cpython-38.pyc ADDED
Binary file (1.51 kB). View file
 
gym-0.21.0/gym/utils/__pycache__/json_utils.cpython-38.pyc ADDED
Binary file (635 Bytes). View file
 
gym-0.21.0/gym/utils/__pycache__/seeding.cpython-38.pyc ADDED
Binary file (6.59 kB). View file
 
gym-0.21.0/gym/utils/atomic_write.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Based on http://stackoverflow.com/questions/2333872/atomic-writing-to-file-with-python
2
+
3
+ import os
4
+ from contextlib import contextmanager
5
+
6
+ # We would ideally atomically replace any existing file with the new
7
+ # version. However, on Windows there's no Python-only solution prior
8
+ # to Python 3.3. (This library includes a C extension to do so:
9
+ # https://pypi.python.org/pypi/pyosreplace/0.1.)
10
+ #
11
+ # Correspondingly, we make a best effort, but on Python < 3.3 use a
12
+ # replace method which could result in the file temporarily
13
+ # disappearing.
14
+ import sys
15
+
16
+ if sys.version_info >= (3, 3):
17
+ # Python 3.3 and up have a native `replace` method
18
+ from os import replace
19
+ elif sys.platform.startswith("win"):
20
+
21
+ def replace(src, dst):
22
+ # TODO: on Windows, this will raise if the file is in use,
23
+ # which is possible. We'll need to make this more robust over
24
+ # time.
25
+ try:
26
+ os.remove(dst)
27
+ except OSError:
28
+ pass
29
+ os.rename(src, dst)
30
+
31
+
32
+ else:
33
+ # POSIX rename() is always atomic
34
+ from os import rename as replace
35
+
36
+
37
+ @contextmanager
38
+ def atomic_write(filepath, binary=False, fsync=False):
39
+ """Writeable file object that atomically updates a file (using a temporary file). In some cases (namely Python < 3.3 on Windows), this could result in an existing file being temporarily unlinked.
40
+
41
+ :param filepath: the file path to be opened
42
+ :param binary: whether to open the file in a binary mode instead of textual
43
+ :param fsync: whether to force write the file to disk
44
+ """
45
+
46
+ tmppath = filepath + "~"
47
+ while os.path.isfile(tmppath):
48
+ tmppath += "~"
49
+ try:
50
+ with open(tmppath, "wb" if binary else "w") as file:
51
+ yield file
52
+ if fsync:
53
+ file.flush()
54
+ os.fsync(file.fileno())
55
+ replace(tmppath, filepath)
56
+ finally:
57
+ try:
58
+ os.remove(tmppath)
59
+ except (IOError, OSError):
60
+ pass
gym-0.21.0/gym/utils/closer.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ import threading
3
+ import weakref
4
+
5
+
6
+ class Closer(object):
7
+ """A registry that ensures your objects get closed, whether manually,
8
+ upon garbage collection, or upon exit. To work properly, your
9
+ objects need to cooperate and do something like the following:
10
+
11
+ ```
12
+ closer = Closer()
13
+ class Example(object):
14
+ def __init__(self):
15
+ self._id = closer.register(self)
16
+
17
+ def close(self):
18
+ # Probably worth making idempotent too!
19
+ ...
20
+ closer.unregister(self._id)
21
+
22
+ def __del__(self):
23
+ self.close()
24
+ ```
25
+
26
+ That is, your objects should:
27
+
28
+ - register() themselves and save the returned ID
29
+ - unregister() themselves upon close()
30
+ - include a __del__ method which close()'s the object
31
+ """
32
+
33
+ def __init__(self, atexit_register=True):
34
+ self.lock = threading.Lock()
35
+ self.next_id = -1
36
+ self.closeables = weakref.WeakValueDictionary()
37
+
38
+ if atexit_register:
39
+ atexit.register(self.close)
40
+
41
+ def generate_next_id(self):
42
+ with self.lock:
43
+ self.next_id += 1
44
+ return self.next_id
45
+
46
+ def register(self, closeable):
47
+ """Registers an object with a 'close' method.
48
+
49
+ Returns:
50
+ int: The registration ID of this object. It is the caller's responsibility to save this ID if early closing is desired.
51
+ """
52
+ assert hasattr(closeable, "close"), "No close method for {}".format(closeable)
53
+
54
+ next_id = self.generate_next_id()
55
+ self.closeables[next_id] = closeable
56
+ return next_id
57
+
58
+ def unregister(self, id):
59
+ assert id is not None
60
+ if id in self.closeables:
61
+ del self.closeables[id]
62
+
63
+ def close(self):
64
+ # Explicitly fetch all monitors first so that they can't disappear while
65
+ # we iterate. cf. http://stackoverflow.com/a/12429620
66
+ closeables = list(self.closeables.values())
67
+ for closeable in closeables:
68
+ closeable.close()
gym-0.21.0/gym/utils/play.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import pygame
3
+ import matplotlib
4
+ import argparse
5
+ from gym import logger
6
+
7
+ try:
8
+ matplotlib.use("TkAgg")
9
+ import matplotlib.pyplot as plt
10
+ except ImportError as e:
11
+ logger.warn("failed to set matplotlib backend, plotting will not work: %s" % str(e))
12
+ plt = None
13
+
14
+ from collections import deque
15
+ from pygame.locals import VIDEORESIZE
16
+
17
+
18
+ def display_arr(screen, arr, video_size, transpose):
19
+ arr_min, arr_max = arr.min(), arr.max()
20
+ arr = 255.0 * (arr - arr_min) / (arr_max - arr_min)
21
+ pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr)
22
+ pyg_img = pygame.transform.scale(pyg_img, video_size)
23
+ screen.blit(pyg_img, (0, 0))
24
+
25
+
26
+ def play(env, transpose=True, fps=30, zoom=None, callback=None, keys_to_action=None):
27
+ """Allows one to play the game using keyboard.
28
+
29
+ To simply play the game use:
30
+
31
+ play(gym.make("Pong-v4"))
32
+
33
+ Above code works also if env is wrapped, so it's particularly useful in
34
+ verifying that the frame-level preprocessing does not render the game
35
+ unplayable.
36
+
37
+ If you wish to plot real time statistics as you play, you can use
38
+ gym.utils.play.PlayPlot. Here's a sample code for plotting the reward
39
+ for last 5 second of gameplay.
40
+
41
+ def callback(obs_t, obs_tp1, action, rew, done, info):
42
+ return [rew,]
43
+ plotter = PlayPlot(callback, 30 * 5, ["reward"])
44
+
45
+ env = gym.make("Pong-v4")
46
+ play(env, callback=plotter.callback)
47
+
48
+
49
+ Arguments
50
+ ---------
51
+ env: gym.Env
52
+ Environment to use for playing.
53
+ transpose: bool
54
+ If True the output of observation is transposed.
55
+ Defaults to true.
56
+ fps: int
57
+ Maximum number of steps of the environment to execute every second.
58
+ Defaults to 30.
59
+ zoom: float
60
+ Make screen edge this many times bigger
61
+ callback: lambda or None
62
+ Callback if a callback is provided it will be executed after
63
+ every step. It takes the following input:
64
+ obs_t: observation before performing action
65
+ obs_tp1: observation after performing action
66
+ action: action that was executed
67
+ rew: reward that was received
68
+ done: whether the environment is done or not
69
+ info: debug info
70
+ keys_to_action: dict: tuple(int) -> int or None
71
+ Mapping from keys pressed to action performed.
72
+ For example if pressed 'w' and space at the same time is supposed
73
+ to trigger action number 2 then key_to_action dict would look like this:
74
+
75
+ {
76
+ # ...
77
+ sorted(ord('w'), ord(' ')) -> 2
78
+ # ...
79
+ }
80
+ If None, default key_to_action mapping for that env is used, if provided.
81
+ """
82
+ env.reset()
83
+ rendered = env.render(mode="rgb_array")
84
+
85
+ if keys_to_action is None:
86
+ if hasattr(env, "get_keys_to_action"):
87
+ keys_to_action = env.get_keys_to_action()
88
+ elif hasattr(env.unwrapped, "get_keys_to_action"):
89
+ keys_to_action = env.unwrapped.get_keys_to_action()
90
+ else:
91
+ assert False, (
92
+ env.spec.id
93
+ + " does not have explicit key to action mapping, "
94
+ + "please specify one manually"
95
+ )
96
+ relevant_keys = set(sum(map(list, keys_to_action.keys()), []))
97
+
98
+ video_size = [rendered.shape[1], rendered.shape[0]]
99
+ if zoom is not None:
100
+ video_size = int(video_size[0] * zoom), int(video_size[1] * zoom)
101
+
102
+ pressed_keys = []
103
+ running = True
104
+ env_done = True
105
+
106
+ screen = pygame.display.set_mode(video_size)
107
+ clock = pygame.time.Clock()
108
+
109
+ while running:
110
+ if env_done:
111
+ env_done = False
112
+ obs = env.reset()
113
+ else:
114
+ action = keys_to_action.get(tuple(sorted(pressed_keys)), 0)
115
+ prev_obs = obs
116
+ obs, rew, env_done, info = env.step(action)
117
+ if callback is not None:
118
+ callback(prev_obs, obs, action, rew, env_done, info)
119
+ if obs is not None:
120
+ rendered = env.render(mode="rgb_array")
121
+ display_arr(screen, rendered, transpose=transpose, video_size=video_size)
122
+
123
+ # process pygame events
124
+ for event in pygame.event.get():
125
+ # test events, set key states
126
+ if event.type == pygame.KEYDOWN:
127
+ if event.key in relevant_keys:
128
+ pressed_keys.append(event.key)
129
+ elif event.key == 27:
130
+ running = False
131
+ elif event.type == pygame.KEYUP:
132
+ if event.key in relevant_keys:
133
+ pressed_keys.remove(event.key)
134
+ elif event.type == pygame.QUIT:
135
+ running = False
136
+ elif event.type == VIDEORESIZE:
137
+ video_size = event.size
138
+ screen = pygame.display.set_mode(video_size)
139
+ print(video_size)
140
+
141
+ pygame.display.flip()
142
+ clock.tick(fps)
143
+ pygame.quit()
144
+
145
+
146
+ class PlayPlot(object):
147
+ def __init__(self, callback, horizon_timesteps, plot_names):
148
+ self.data_callback = callback
149
+ self.horizon_timesteps = horizon_timesteps
150
+ self.plot_names = plot_names
151
+
152
+ assert plt is not None, "matplotlib backend failed, plotting will not work"
153
+
154
+ num_plots = len(self.plot_names)
155
+ self.fig, self.ax = plt.subplots(num_plots)
156
+ if num_plots == 1:
157
+ self.ax = [self.ax]
158
+ for axis, name in zip(self.ax, plot_names):
159
+ axis.set_title(name)
160
+ self.t = 0
161
+ self.cur_plot = [None for _ in range(num_plots)]
162
+ self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]
163
+
164
+ def callback(self, obs_t, obs_tp1, action, rew, done, info):
165
+ points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
166
+ for point, data_series in zip(points, self.data):
167
+ data_series.append(point)
168
+ self.t += 1
169
+
170
+ xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t
171
+
172
+ for i, plot in enumerate(self.cur_plot):
173
+ if plot is not None:
174
+ plot.remove()
175
+ self.cur_plot[i] = self.ax[i].scatter(
176
+ range(xmin, xmax), list(self.data[i]), c="blue"
177
+ )
178
+ self.ax[i].set_xlim(xmin, xmax)
179
+ plt.pause(0.000001)
180
+
181
+
182
+ def main():
183
+ parser = argparse.ArgumentParser()
184
+ parser.add_argument(
185
+ "--env",
186
+ type=str,
187
+ default="MontezumaRevengeNoFrameskip-v4",
188
+ help="Define Environment",
189
+ )
190
+ args = parser.parse_args()
191
+ env = gym.make(args.env)
192
+ play(env, zoom=4, fps=60)
193
+
194
+
195
+ if __name__ == "__main__":
196
+ main()
gym-0.21.0/gym/vector/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (2.56 kB). View file
 
gym-0.21.0/gym/vector/utils/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (687 Bytes). View file
 
gym-0.21.0/gym/vector/utils/__pycache__/numpy_utils.cpython-38.pyc ADDED
Binary file (5.5 kB). View file