chenhaojun commited on
Commit
64ad37d
·
verified ·
1 Parent(s): d68fa90

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 +0 -0
  2. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 +0 -0
  3. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray +24 -0
  4. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/.zarray +22 -0
  5. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/1.0 +0 -0
  6. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/17.0 +0 -0
  7. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/3.0 +0 -0
  8. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/4.0 +0 -0
  9. Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/5.0 +0 -0
  10. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/0.0 +0 -0
  11. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/12.0 +0 -0
  12. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/13.0 +0 -0
  13. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/15.0 +0 -0
  14. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/5.0 +0 -0
  15. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/7.0 +0 -0
  16. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/9.0 +0 -0
  17. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/11.0.0.0 +0 -0
  18. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/13.0.0.0 +0 -0
  19. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/15.0.0.0 +0 -0
  20. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/2.0.0.0 +0 -0
  21. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/3.0.0.0 +0 -0
  22. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/5.0.0.0 +0 -0
  23. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/1.0 +0 -0
  24. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/11.0 +0 -0
  25. Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/8.0 +0 -0
  26. gym-0.21.0/gym/__pycache__/__init__.cpython-38.pyc +0 -0
  27. gym-0.21.0/gym/__pycache__/error.cpython-38.pyc +0 -0
  28. gym-0.21.0/gym/__pycache__/logger.cpython-38.pyc +0 -0
  29. gym-0.21.0/gym/envs/__init__.py +646 -0
  30. gym-0.21.0/gym/envs/__pycache__/__init__.cpython-38.pyc +0 -0
  31. gym-0.21.0/gym/envs/box2d/car_racing.py +652 -0
  32. gym-0.21.0/gym/envs/mujoco/ant_v3.py +148 -0
  33. gym-0.21.0/gym/envs/mujoco/humanoid.py +72 -0
  34. gym-0.21.0/gym/envs/mujoco/humanoidstandup.py +64 -0
  35. gym-0.21.0/gym/envs/mujoco/walker2d_v3.py +130 -0
  36. gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml +32 -0
  37. gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml +26 -0
  38. gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl +0 -0
  39. gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl +0 -0
  40. gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl +0 -0
  41. gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl +0 -0
  42. gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl +0 -0
  43. gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl +0 -0
  44. gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl +0 -0
  45. gym-0.21.0/gym/envs/toy_text/discrete.py +61 -0
  46. gym-0.21.0/gym/envs/unittest/memorize_digits.py +146 -0
  47. gym-0.21.0/gym/spaces/__init__.py +26 -0
  48. gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc +0 -0
  49. gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc +0 -0
  50. gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 ADDED
Binary file (1.18 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 ADDED
Binary file (1.17 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 100,
4
+ 128,
5
+ 128
6
+ ],
7
+ "compressor": {
8
+ "blocksize": 0,
9
+ "clevel": 3,
10
+ "cname": "zstd",
11
+ "id": "blosc",
12
+ "shuffle": 1
13
+ },
14
+ "dtype": "<f4",
15
+ "fill_value": 0.0,
16
+ "filters": null,
17
+ "order": "C",
18
+ "shape": [
19
+ 2000,
20
+ 128,
21
+ 128
22
+ ],
23
+ "zarr_format": 2
24
+ }
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/.zarray ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunks": [
3
+ 100,
4
+ 39
5
+ ],
6
+ "compressor": {
7
+ "blocksize": 0,
8
+ "clevel": 3,
9
+ "cname": "zstd",
10
+ "id": "blosc",
11
+ "shuffle": 1
12
+ },
13
+ "dtype": "<f4",
14
+ "fill_value": 0.0,
15
+ "filters": null,
16
+ "order": "C",
17
+ "shape": [
18
+ 2000,
19
+ 39
20
+ ],
21
+ "zarr_format": 2
22
+ }
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/1.0 ADDED
Binary file (2.59 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/17.0 ADDED
Binary file (2.71 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/3.0 ADDED
Binary file (2.76 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/4.0 ADDED
Binary file (2.67 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/5.0 ADDED
Binary file (2.52 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/0.0 ADDED
Binary file (1.24 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/12.0 ADDED
Binary file (1.23 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/13.0 ADDED
Binary file (1.19 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/15.0 ADDED
Binary file (1.2 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/5.0 ADDED
Binary file (1.19 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/7.0 ADDED
Binary file (1.2 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/9.0 ADDED
Binary file (1.21 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/11.0.0.0 ADDED
Binary file (603 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/13.0.0.0 ADDED
Binary file (591 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/15.0.0.0 ADDED
Binary file (567 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/2.0.0.0 ADDED
Binary file (912 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/3.0.0.0 ADDED
Binary file (583 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/5.0.0.0 ADDED
Binary file (591 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/1.0 ADDED
Binary file (2.31 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/11.0 ADDED
Binary file (2.31 kB). View file
 
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/8.0 ADDED
Binary file (2.82 kB). View file
 
gym-0.21.0/gym/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (750 Bytes). View file
 
gym-0.21.0/gym/__pycache__/error.cpython-38.pyc ADDED
Binary file (6.65 kB). View file
 
gym-0.21.0/gym/__pycache__/logger.cpython-38.pyc ADDED
Binary file (1.18 kB). View file
 
gym-0.21.0/gym/envs/__init__.py ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gym.envs.registration import (
2
+ registry,
3
+ register,
4
+ make,
5
+ spec,
6
+ load_env_plugins as _load_env_plugins,
7
+ )
8
+
9
+ # Hook to load plugins from entry points
10
+ _load_env_plugins()
11
+
12
+
13
+ # Classic
14
+ # ----------------------------------------
15
+
16
+ register(
17
+ id="CartPole-v0",
18
+ entry_point="gym.envs.classic_control:CartPoleEnv",
19
+ max_episode_steps=200,
20
+ reward_threshold=195.0,
21
+ )
22
+
23
+ register(
24
+ id="CartPole-v1",
25
+ entry_point="gym.envs.classic_control:CartPoleEnv",
26
+ max_episode_steps=500,
27
+ reward_threshold=475.0,
28
+ )
29
+
30
+ register(
31
+ id="MountainCar-v0",
32
+ entry_point="gym.envs.classic_control:MountainCarEnv",
33
+ max_episode_steps=200,
34
+ reward_threshold=-110.0,
35
+ )
36
+
37
+ register(
38
+ id="MountainCarContinuous-v0",
39
+ entry_point="gym.envs.classic_control:Continuous_MountainCarEnv",
40
+ max_episode_steps=999,
41
+ reward_threshold=90.0,
42
+ )
43
+
44
+ register(
45
+ id="Pendulum-v1",
46
+ entry_point="gym.envs.classic_control:PendulumEnv",
47
+ max_episode_steps=200,
48
+ )
49
+
50
+ register(
51
+ id="Acrobot-v1",
52
+ entry_point="gym.envs.classic_control:AcrobotEnv",
53
+ reward_threshold=-100.0,
54
+ max_episode_steps=500,
55
+ )
56
+
57
+ # Box2d
58
+ # ----------------------------------------
59
+
60
+ register(
61
+ id="LunarLander-v2",
62
+ entry_point="gym.envs.box2d:LunarLander",
63
+ max_episode_steps=1000,
64
+ reward_threshold=200,
65
+ )
66
+
67
+ register(
68
+ id="LunarLanderContinuous-v2",
69
+ entry_point="gym.envs.box2d:LunarLanderContinuous",
70
+ max_episode_steps=1000,
71
+ reward_threshold=200,
72
+ )
73
+
74
+ register(
75
+ id="BipedalWalker-v3",
76
+ entry_point="gym.envs.box2d:BipedalWalker",
77
+ max_episode_steps=1600,
78
+ reward_threshold=300,
79
+ )
80
+
81
+ register(
82
+ id="BipedalWalkerHardcore-v3",
83
+ entry_point="gym.envs.box2d:BipedalWalkerHardcore",
84
+ max_episode_steps=2000,
85
+ reward_threshold=300,
86
+ )
87
+
88
+ register(
89
+ id="CarRacing-v0",
90
+ entry_point="gym.envs.box2d:CarRacing",
91
+ max_episode_steps=1000,
92
+ reward_threshold=900,
93
+ )
94
+
95
+ # Toy Text
96
+ # ----------------------------------------
97
+
98
+ register(
99
+ id="Blackjack-v1",
100
+ entry_point="gym.envs.toy_text:BlackjackEnv",
101
+ kwargs={"sab": True, "natural": False},
102
+ )
103
+
104
+ register(
105
+ id="FrozenLake-v1",
106
+ entry_point="gym.envs.toy_text:FrozenLakeEnv",
107
+ kwargs={"map_name": "4x4"},
108
+ max_episode_steps=100,
109
+ reward_threshold=0.70, # optimum = 0.74
110
+ )
111
+
112
+ register(
113
+ id="FrozenLake8x8-v1",
114
+ entry_point="gym.envs.toy_text:FrozenLakeEnv",
115
+ kwargs={"map_name": "8x8"},
116
+ max_episode_steps=200,
117
+ reward_threshold=0.85, # optimum = 0.91
118
+ )
119
+
120
+ register(
121
+ id="CliffWalking-v0",
122
+ entry_point="gym.envs.toy_text:CliffWalkingEnv",
123
+ )
124
+
125
+ register(
126
+ id="Taxi-v3",
127
+ entry_point="gym.envs.toy_text:TaxiEnv",
128
+ reward_threshold=8, # optimum = 8.46
129
+ max_episode_steps=200,
130
+ )
131
+
132
+ # Mujoco
133
+ # ----------------------------------------
134
+
135
+ # 2D
136
+
137
+ register(
138
+ id="Reacher-v2",
139
+ entry_point="gym.envs.mujoco:ReacherEnv",
140
+ max_episode_steps=50,
141
+ reward_threshold=-3.75,
142
+ )
143
+
144
+ register(
145
+ id="Pusher-v2",
146
+ entry_point="gym.envs.mujoco:PusherEnv",
147
+ max_episode_steps=100,
148
+ reward_threshold=0.0,
149
+ )
150
+
151
+ register(
152
+ id="Thrower-v2",
153
+ entry_point="gym.envs.mujoco:ThrowerEnv",
154
+ max_episode_steps=100,
155
+ reward_threshold=0.0,
156
+ )
157
+
158
+ register(
159
+ id="Striker-v2",
160
+ entry_point="gym.envs.mujoco:StrikerEnv",
161
+ max_episode_steps=100,
162
+ reward_threshold=0.0,
163
+ )
164
+
165
+ register(
166
+ id="InvertedPendulum-v2",
167
+ entry_point="gym.envs.mujoco:InvertedPendulumEnv",
168
+ max_episode_steps=1000,
169
+ reward_threshold=950.0,
170
+ )
171
+
172
+ register(
173
+ id="InvertedDoublePendulum-v2",
174
+ entry_point="gym.envs.mujoco:InvertedDoublePendulumEnv",
175
+ max_episode_steps=1000,
176
+ reward_threshold=9100.0,
177
+ )
178
+
179
+ register(
180
+ id="HalfCheetah-v2",
181
+ entry_point="gym.envs.mujoco:HalfCheetahEnv",
182
+ max_episode_steps=1000,
183
+ reward_threshold=4800.0,
184
+ )
185
+
186
+ register(
187
+ id="HalfCheetah-v3",
188
+ entry_point="gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
189
+ max_episode_steps=1000,
190
+ reward_threshold=4800.0,
191
+ )
192
+
193
+ register(
194
+ id="Hopper-v2",
195
+ entry_point="gym.envs.mujoco:HopperEnv",
196
+ max_episode_steps=1000,
197
+ reward_threshold=3800.0,
198
+ )
199
+
200
+ register(
201
+ id="Hopper-v3",
202
+ entry_point="gym.envs.mujoco.hopper_v3:HopperEnv",
203
+ max_episode_steps=1000,
204
+ reward_threshold=3800.0,
205
+ )
206
+
207
+ register(
208
+ id="Swimmer-v2",
209
+ entry_point="gym.envs.mujoco:SwimmerEnv",
210
+ max_episode_steps=1000,
211
+ reward_threshold=360.0,
212
+ )
213
+
214
+ register(
215
+ id="Swimmer-v3",
216
+ entry_point="gym.envs.mujoco.swimmer_v3:SwimmerEnv",
217
+ max_episode_steps=1000,
218
+ reward_threshold=360.0,
219
+ )
220
+
221
+ register(
222
+ id="Walker2d-v2",
223
+ max_episode_steps=1000,
224
+ entry_point="gym.envs.mujoco:Walker2dEnv",
225
+ )
226
+
227
+ register(
228
+ id="Walker2d-v3",
229
+ max_episode_steps=1000,
230
+ entry_point="gym.envs.mujoco.walker2d_v3:Walker2dEnv",
231
+ )
232
+
233
+ register(
234
+ id="Ant-v2",
235
+ entry_point="gym.envs.mujoco:AntEnv",
236
+ max_episode_steps=1000,
237
+ reward_threshold=6000.0,
238
+ )
239
+
240
+ register(
241
+ id="Ant-v3",
242
+ entry_point="gym.envs.mujoco.ant_v3:AntEnv",
243
+ max_episode_steps=1000,
244
+ reward_threshold=6000.0,
245
+ )
246
+
247
+ register(
248
+ id="Humanoid-v2",
249
+ entry_point="gym.envs.mujoco:HumanoidEnv",
250
+ max_episode_steps=1000,
251
+ )
252
+
253
+ register(
254
+ id="Humanoid-v3",
255
+ entry_point="gym.envs.mujoco.humanoid_v3:HumanoidEnv",
256
+ max_episode_steps=1000,
257
+ )
258
+
259
+ register(
260
+ id="HumanoidStandup-v2",
261
+ entry_point="gym.envs.mujoco:HumanoidStandupEnv",
262
+ max_episode_steps=1000,
263
+ )
264
+
265
+ # Robotics
266
+ # ----------------------------------------
267
+
268
+
269
+ def _merge(a, b):
270
+ a.update(b)
271
+ return a
272
+
273
+
274
+ for reward_type in ["sparse", "dense"]:
275
+ suffix = "Dense" if reward_type == "dense" else ""
276
+ kwargs = {
277
+ "reward_type": reward_type,
278
+ }
279
+
280
+ # Fetch
281
+ register(
282
+ id="FetchSlide{}-v1".format(suffix),
283
+ entry_point="gym.envs.robotics:FetchSlideEnv",
284
+ kwargs=kwargs,
285
+ max_episode_steps=50,
286
+ )
287
+
288
+ register(
289
+ id="FetchPickAndPlace{}-v1".format(suffix),
290
+ entry_point="gym.envs.robotics:FetchPickAndPlaceEnv",
291
+ kwargs=kwargs,
292
+ max_episode_steps=50,
293
+ )
294
+
295
+ register(
296
+ id="FetchReach{}-v1".format(suffix),
297
+ entry_point="gym.envs.robotics:FetchReachEnv",
298
+ kwargs=kwargs,
299
+ max_episode_steps=50,
300
+ )
301
+
302
+ register(
303
+ id="FetchPush{}-v1".format(suffix),
304
+ entry_point="gym.envs.robotics:FetchPushEnv",
305
+ kwargs=kwargs,
306
+ max_episode_steps=50,
307
+ )
308
+
309
+ # Hand
310
+ register(
311
+ id="HandReach{}-v0".format(suffix),
312
+ entry_point="gym.envs.robotics:HandReachEnv",
313
+ kwargs=kwargs,
314
+ max_episode_steps=50,
315
+ )
316
+
317
+ register(
318
+ id="HandManipulateBlockRotateZ{}-v0".format(suffix),
319
+ entry_point="gym.envs.robotics:HandBlockEnv",
320
+ kwargs=_merge({"target_position": "ignore", "target_rotation": "z"}, kwargs),
321
+ max_episode_steps=100,
322
+ )
323
+
324
+ register(
325
+ id="HandManipulateBlockRotateZTouchSensors{}-v0".format(suffix),
326
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
327
+ kwargs=_merge(
328
+ {
329
+ "target_position": "ignore",
330
+ "target_rotation": "z",
331
+ "touch_get_obs": "boolean",
332
+ },
333
+ kwargs,
334
+ ),
335
+ max_episode_steps=100,
336
+ )
337
+
338
+ register(
339
+ id="HandManipulateBlockRotateZTouchSensors{}-v1".format(suffix),
340
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
341
+ kwargs=_merge(
342
+ {
343
+ "target_position": "ignore",
344
+ "target_rotation": "z",
345
+ "touch_get_obs": "sensordata",
346
+ },
347
+ kwargs,
348
+ ),
349
+ max_episode_steps=100,
350
+ )
351
+
352
+ register(
353
+ id="HandManipulateBlockRotateParallel{}-v0".format(suffix),
354
+ entry_point="gym.envs.robotics:HandBlockEnv",
355
+ kwargs=_merge(
356
+ {"target_position": "ignore", "target_rotation": "parallel"}, kwargs
357
+ ),
358
+ max_episode_steps=100,
359
+ )
360
+
361
+ register(
362
+ id="HandManipulateBlockRotateParallelTouchSensors{}-v0".format(suffix),
363
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
364
+ kwargs=_merge(
365
+ {
366
+ "target_position": "ignore",
367
+ "target_rotation": "parallel",
368
+ "touch_get_obs": "boolean",
369
+ },
370
+ kwargs,
371
+ ),
372
+ max_episode_steps=100,
373
+ )
374
+
375
+ register(
376
+ id="HandManipulateBlockRotateParallelTouchSensors{}-v1".format(suffix),
377
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
378
+ kwargs=_merge(
379
+ {
380
+ "target_position": "ignore",
381
+ "target_rotation": "parallel",
382
+ "touch_get_obs": "sensordata",
383
+ },
384
+ kwargs,
385
+ ),
386
+ max_episode_steps=100,
387
+ )
388
+
389
+ register(
390
+ id="HandManipulateBlockRotateXYZ{}-v0".format(suffix),
391
+ entry_point="gym.envs.robotics:HandBlockEnv",
392
+ kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
393
+ max_episode_steps=100,
394
+ )
395
+
396
+ register(
397
+ id="HandManipulateBlockRotateXYZTouchSensors{}-v0".format(suffix),
398
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
399
+ kwargs=_merge(
400
+ {
401
+ "target_position": "ignore",
402
+ "target_rotation": "xyz",
403
+ "touch_get_obs": "boolean",
404
+ },
405
+ kwargs,
406
+ ),
407
+ max_episode_steps=100,
408
+ )
409
+
410
+ register(
411
+ id="HandManipulateBlockRotateXYZTouchSensors{}-v1".format(suffix),
412
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
413
+ kwargs=_merge(
414
+ {
415
+ "target_position": "ignore",
416
+ "target_rotation": "xyz",
417
+ "touch_get_obs": "sensordata",
418
+ },
419
+ kwargs,
420
+ ),
421
+ max_episode_steps=100,
422
+ )
423
+
424
+ register(
425
+ id="HandManipulateBlockFull{}-v0".format(suffix),
426
+ entry_point="gym.envs.robotics:HandBlockEnv",
427
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
428
+ max_episode_steps=100,
429
+ )
430
+
431
+ # Alias for "Full"
432
+ register(
433
+ id="HandManipulateBlock{}-v0".format(suffix),
434
+ entry_point="gym.envs.robotics:HandBlockEnv",
435
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
436
+ max_episode_steps=100,
437
+ )
438
+
439
+ register(
440
+ id="HandManipulateBlockTouchSensors{}-v0".format(suffix),
441
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
442
+ kwargs=_merge(
443
+ {
444
+ "target_position": "random",
445
+ "target_rotation": "xyz",
446
+ "touch_get_obs": "boolean",
447
+ },
448
+ kwargs,
449
+ ),
450
+ max_episode_steps=100,
451
+ )
452
+
453
+ register(
454
+ id="HandManipulateBlockTouchSensors{}-v1".format(suffix),
455
+ entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
456
+ kwargs=_merge(
457
+ {
458
+ "target_position": "random",
459
+ "target_rotation": "xyz",
460
+ "touch_get_obs": "sensordata",
461
+ },
462
+ kwargs,
463
+ ),
464
+ max_episode_steps=100,
465
+ )
466
+
467
+ register(
468
+ id="HandManipulateEggRotate{}-v0".format(suffix),
469
+ entry_point="gym.envs.robotics:HandEggEnv",
470
+ kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
471
+ max_episode_steps=100,
472
+ )
473
+
474
+ register(
475
+ id="HandManipulateEggRotateTouchSensors{}-v0".format(suffix),
476
+ entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
477
+ kwargs=_merge(
478
+ {
479
+ "target_position": "ignore",
480
+ "target_rotation": "xyz",
481
+ "touch_get_obs": "boolean",
482
+ },
483
+ kwargs,
484
+ ),
485
+ max_episode_steps=100,
486
+ )
487
+
488
+ register(
489
+ id="HandManipulateEggRotateTouchSensors{}-v1".format(suffix),
490
+ entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
491
+ kwargs=_merge(
492
+ {
493
+ "target_position": "ignore",
494
+ "target_rotation": "xyz",
495
+ "touch_get_obs": "sensordata",
496
+ },
497
+ kwargs,
498
+ ),
499
+ max_episode_steps=100,
500
+ )
501
+
502
+ register(
503
+ id="HandManipulateEggFull{}-v0".format(suffix),
504
+ entry_point="gym.envs.robotics:HandEggEnv",
505
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
506
+ max_episode_steps=100,
507
+ )
508
+
509
+ # Alias for "Full"
510
+ register(
511
+ id="HandManipulateEgg{}-v0".format(suffix),
512
+ entry_point="gym.envs.robotics:HandEggEnv",
513
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
514
+ max_episode_steps=100,
515
+ )
516
+
517
+ register(
518
+ id="HandManipulateEggTouchSensors{}-v0".format(suffix),
519
+ entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
520
+ kwargs=_merge(
521
+ {
522
+ "target_position": "random",
523
+ "target_rotation": "xyz",
524
+ "touch_get_obs": "boolean",
525
+ },
526
+ kwargs,
527
+ ),
528
+ max_episode_steps=100,
529
+ )
530
+
531
+ register(
532
+ id="HandManipulateEggTouchSensors{}-v1".format(suffix),
533
+ entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
534
+ kwargs=_merge(
535
+ {
536
+ "target_position": "random",
537
+ "target_rotation": "xyz",
538
+ "touch_get_obs": "sensordata",
539
+ },
540
+ kwargs,
541
+ ),
542
+ max_episode_steps=100,
543
+ )
544
+
545
+ register(
546
+ id="HandManipulatePenRotate{}-v0".format(suffix),
547
+ entry_point="gym.envs.robotics:HandPenEnv",
548
+ kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
549
+ max_episode_steps=100,
550
+ )
551
+
552
+ register(
553
+ id="HandManipulatePenRotateTouchSensors{}-v0".format(suffix),
554
+ entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
555
+ kwargs=_merge(
556
+ {
557
+ "target_position": "ignore",
558
+ "target_rotation": "xyz",
559
+ "touch_get_obs": "boolean",
560
+ },
561
+ kwargs,
562
+ ),
563
+ max_episode_steps=100,
564
+ )
565
+
566
+ register(
567
+ id="HandManipulatePenRotateTouchSensors{}-v1".format(suffix),
568
+ entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
569
+ kwargs=_merge(
570
+ {
571
+ "target_position": "ignore",
572
+ "target_rotation": "xyz",
573
+ "touch_get_obs": "sensordata",
574
+ },
575
+ kwargs,
576
+ ),
577
+ max_episode_steps=100,
578
+ )
579
+
580
+ register(
581
+ id="HandManipulatePenFull{}-v0".format(suffix),
582
+ entry_point="gym.envs.robotics:HandPenEnv",
583
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
584
+ max_episode_steps=100,
585
+ )
586
+
587
+ # Alias for "Full"
588
+ register(
589
+ id="HandManipulatePen{}-v0".format(suffix),
590
+ entry_point="gym.envs.robotics:HandPenEnv",
591
+ kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
592
+ max_episode_steps=100,
593
+ )
594
+
595
+ register(
596
+ id="HandManipulatePenTouchSensors{}-v0".format(suffix),
597
+ entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
598
+ kwargs=_merge(
599
+ {
600
+ "target_position": "random",
601
+ "target_rotation": "xyz",
602
+ "touch_get_obs": "boolean",
603
+ },
604
+ kwargs,
605
+ ),
606
+ max_episode_steps=100,
607
+ )
608
+
609
+ register(
610
+ id="HandManipulatePenTouchSensors{}-v1".format(suffix),
611
+ entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
612
+ kwargs=_merge(
613
+ {
614
+ "target_position": "random",
615
+ "target_rotation": "xyz",
616
+ "touch_get_obs": "sensordata",
617
+ },
618
+ kwargs,
619
+ ),
620
+ max_episode_steps=100,
621
+ )
622
+
623
+ # Unit test
624
+ # ---------
625
+
626
+ register(
627
+ id="CubeCrash-v0",
628
+ entry_point="gym.envs.unittest:CubeCrash",
629
+ reward_threshold=0.9,
630
+ )
631
+ register(
632
+ id="CubeCrashSparse-v0",
633
+ entry_point="gym.envs.unittest:CubeCrashSparse",
634
+ reward_threshold=0.9,
635
+ )
636
+ register(
637
+ id="CubeCrashScreenBecomesBlack-v0",
638
+ entry_point="gym.envs.unittest:CubeCrashScreenBecomesBlack",
639
+ reward_threshold=0.9,
640
+ )
641
+
642
+ register(
643
+ id="MemorizeDigits-v0",
644
+ entry_point="gym.envs.unittest:MemorizeDigits",
645
+ reward_threshold=20,
646
+ )
gym-0.21.0/gym/envs/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (7.21 kB). View file
 
gym-0.21.0/gym/envs/box2d/car_racing.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Easiest continuous control task to learn from pixels, a top-down racing
3
+ environment.
4
+ Discrete control is reasonable in this environment as well, on/off
5
+ discretization is fine.
6
+
7
+ State consists of STATE_W x STATE_H pixels.
8
+
9
+ The reward is -0.1 every frame and +1000/N for every track tile visited, where
10
+ N is the total number of tiles visited in the track. For example, if you have
11
+ finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points.
12
+
13
+ The game is solved when the agent consistently gets 900+ points. The generated
14
+ track is random every episode.
15
+
16
+ The episode finishes when all the tiles are visited. The car also can go
17
+ outside of the PLAYFIELD - that is far off the track, then it will get -100
18
+ and die.
19
+
20
+ Some indicators are shown at the bottom of the window along with the state RGB
21
+ buffer. From left to right: the true speed, four ABS sensors, the steering
22
+ wheel position and gyroscope.
23
+
24
+ To play yourself (it's rather fast for humans), type:
25
+
26
+ python gym/envs/box2d/car_racing.py
27
+
28
+ Remember it's a powerful rear-wheel drive car - don't press the accelerator
29
+ and turn at the same time.
30
+
31
+ Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
32
+ """
33
+ import sys
34
+ import math
35
+ import numpy as np
36
+
37
+ import Box2D
38
+ from Box2D.b2 import fixtureDef
39
+ from Box2D.b2 import polygonShape
40
+ from Box2D.b2 import contactListener
41
+
42
+ import gym
43
+ from gym import spaces
44
+ from gym.envs.box2d.car_dynamics import Car
45
+ from gym.utils import seeding, EzPickle
46
+
47
+ import pyglet
48
+
49
+ pyglet.options["debug_gl"] = False
50
+ from pyglet import gl
51
+
52
+ STATE_W = 96 # less than Atari 160x192
53
+ STATE_H = 96
54
+ VIDEO_W = 600
55
+ VIDEO_H = 400
56
+ WINDOW_W = 1000
57
+ WINDOW_H = 800
58
+
59
+ SCALE = 6.0 # Track scale
60
+ TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius
61
+ PLAYFIELD = 2000 / SCALE # Game over boundary
62
+ FPS = 50 # Frames per second
63
+ ZOOM = 2.7 # Camera zoom
64
+ ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
65
+
66
+
67
+ TRACK_DETAIL_STEP = 21 / SCALE
68
+ TRACK_TURN_RATE = 0.31
69
+ TRACK_WIDTH = 40 / SCALE
70
+ BORDER = 8 / SCALE
71
+ BORDER_MIN_COUNT = 4
72
+
73
+ ROAD_COLOR = [0.4, 0.4, 0.4]
74
+
75
+
76
+ class FrictionDetector(contactListener):
77
+ def __init__(self, env):
78
+ contactListener.__init__(self)
79
+ self.env = env
80
+
81
+ def BeginContact(self, contact):
82
+ self._contact(contact, True)
83
+
84
+ def EndContact(self, contact):
85
+ self._contact(contact, False)
86
+
87
+ def _contact(self, contact, begin):
88
+ tile = None
89
+ obj = None
90
+ u1 = contact.fixtureA.body.userData
91
+ u2 = contact.fixtureB.body.userData
92
+ if u1 and "road_friction" in u1.__dict__:
93
+ tile = u1
94
+ obj = u2
95
+ if u2 and "road_friction" in u2.__dict__:
96
+ tile = u2
97
+ obj = u1
98
+ if not tile:
99
+ return
100
+
101
+ tile.color[0] = ROAD_COLOR[0]
102
+ tile.color[1] = ROAD_COLOR[1]
103
+ tile.color[2] = ROAD_COLOR[2]
104
+ if not obj or "tiles" not in obj.__dict__:
105
+ return
106
+ if begin:
107
+ obj.tiles.add(tile)
108
+ if not tile.road_visited:
109
+ tile.road_visited = True
110
+ self.env.reward += 1000.0 / len(self.env.track)
111
+ self.env.tile_visited_count += 1
112
+ else:
113
+ obj.tiles.remove(tile)
114
+
115
+
116
+ class CarRacing(gym.Env, EzPickle):
117
+ metadata = {
118
+ "render.modes": ["human", "rgb_array", "state_pixels"],
119
+ "video.frames_per_second": FPS,
120
+ }
121
+
122
+ def __init__(self, verbose=1):
123
+ EzPickle.__init__(self)
124
+ self.seed()
125
+ self.contactListener_keepref = FrictionDetector(self)
126
+ self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
127
+ self.viewer = None
128
+ self.invisible_state_window = None
129
+ self.invisible_video_window = None
130
+ self.road = None
131
+ self.car = None
132
+ self.reward = 0.0
133
+ self.prev_reward = 0.0
134
+ self.verbose = verbose
135
+ self.fd_tile = fixtureDef(
136
+ shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
137
+ )
138
+
139
+ self.action_space = spaces.Box(
140
+ np.array([-1, 0, 0]).astype(np.float32),
141
+ np.array([+1, +1, +1]).astype(np.float32),
142
+ ) # steer, gas, brake
143
+
144
+ self.observation_space = spaces.Box(
145
+ low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
146
+ )
147
+
148
+ def seed(self, seed=None):
149
+ self.np_random, seed = seeding.np_random(seed)
150
+ return [seed]
151
+
152
+ def _destroy(self):
153
+ if not self.road:
154
+ return
155
+ for t in self.road:
156
+ self.world.DestroyBody(t)
157
+ self.road = []
158
+ self.car.destroy()
159
+
160
+ def _create_track(self):
161
+ CHECKPOINTS = 12
162
+
163
+ # Create checkpoints
164
+ checkpoints = []
165
+ for c in range(CHECKPOINTS):
166
+ noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
167
+ alpha = 2 * math.pi * c / CHECKPOINTS + noise
168
+ rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
169
+
170
+ if c == 0:
171
+ alpha = 0
172
+ rad = 1.5 * TRACK_RAD
173
+ if c == CHECKPOINTS - 1:
174
+ alpha = 2 * math.pi * c / CHECKPOINTS
175
+ self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
176
+ rad = 1.5 * TRACK_RAD
177
+
178
+ checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
179
+ self.road = []
180
+
181
+ # Go from one checkpoint to another to create track
182
+ x, y, beta = 1.5 * TRACK_RAD, 0, 0
183
+ dest_i = 0
184
+ laps = 0
185
+ track = []
186
+ no_freeze = 2500
187
+ visited_other_side = False
188
+ while True:
189
+ alpha = math.atan2(y, x)
190
+ if visited_other_side and alpha > 0:
191
+ laps += 1
192
+ visited_other_side = False
193
+ if alpha < 0:
194
+ visited_other_side = True
195
+ alpha += 2 * math.pi
196
+
197
+ while True: # Find destination from checkpoints
198
+ failed = True
199
+
200
+ while True:
201
+ dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
202
+ if alpha <= dest_alpha:
203
+ failed = False
204
+ break
205
+ dest_i += 1
206
+ if dest_i % len(checkpoints) == 0:
207
+ break
208
+
209
+ if not failed:
210
+ break
211
+
212
+ alpha -= 2 * math.pi
213
+ continue
214
+
215
+ r1x = math.cos(beta)
216
+ r1y = math.sin(beta)
217
+ p1x = -r1y
218
+ p1y = r1x
219
+ dest_dx = dest_x - x # vector towards destination
220
+ dest_dy = dest_y - y
221
+ # destination vector projected on rad:
222
+ proj = r1x * dest_dx + r1y * dest_dy
223
+ while beta - alpha > 1.5 * math.pi:
224
+ beta -= 2 * math.pi
225
+ while beta - alpha < -1.5 * math.pi:
226
+ beta += 2 * math.pi
227
+ prev_beta = beta
228
+ proj *= SCALE
229
+ if proj > 0.3:
230
+ beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
231
+ if proj < -0.3:
232
+ beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
233
+ x += p1x * TRACK_DETAIL_STEP
234
+ y += p1y * TRACK_DETAIL_STEP
235
+ track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
236
+ if laps > 4:
237
+ break
238
+ no_freeze -= 1
239
+ if no_freeze == 0:
240
+ break
241
+
242
+ # Find closed loop range i1..i2, first loop should be ignored, second is OK
243
+ i1, i2 = -1, -1
244
+ i = len(track)
245
+ while True:
246
+ i -= 1
247
+ if i == 0:
248
+ return False # Failed
249
+ pass_through_start = (
250
+ track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
251
+ )
252
+ if pass_through_start and i2 == -1:
253
+ i2 = i
254
+ elif pass_through_start and i1 == -1:
255
+ i1 = i
256
+ break
257
+ if self.verbose == 1:
258
+ print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
259
+ assert i1 != -1
260
+ assert i2 != -1
261
+
262
+ track = track[i1 : i2 - 1]
263
+
264
+ first_beta = track[0][1]
265
+ first_perp_x = math.cos(first_beta)
266
+ first_perp_y = math.sin(first_beta)
267
+ # Length of perpendicular jump to put together head and tail
268
+ well_glued_together = np.sqrt(
269
+ np.square(first_perp_x * (track[0][2] - track[-1][2]))
270
+ + np.square(first_perp_y * (track[0][3] - track[-1][3]))
271
+ )
272
+ if well_glued_together > TRACK_DETAIL_STEP:
273
+ return False
274
+
275
+ # Red-white border on hard turns
276
+ border = [False] * len(track)
277
+ for i in range(len(track)):
278
+ good = True
279
+ oneside = 0
280
+ for neg in range(BORDER_MIN_COUNT):
281
+ beta1 = track[i - neg - 0][1]
282
+ beta2 = track[i - neg - 1][1]
283
+ good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
284
+ oneside += np.sign(beta1 - beta2)
285
+ good &= abs(oneside) == BORDER_MIN_COUNT
286
+ border[i] = good
287
+ for i in range(len(track)):
288
+ for neg in range(BORDER_MIN_COUNT):
289
+ border[i - neg] |= border[i]
290
+
291
+ # Create tiles
292
+ for i in range(len(track)):
293
+ alpha1, beta1, x1, y1 = track[i]
294
+ alpha2, beta2, x2, y2 = track[i - 1]
295
+ road1_l = (
296
+ x1 - TRACK_WIDTH * math.cos(beta1),
297
+ y1 - TRACK_WIDTH * math.sin(beta1),
298
+ )
299
+ road1_r = (
300
+ x1 + TRACK_WIDTH * math.cos(beta1),
301
+ y1 + TRACK_WIDTH * math.sin(beta1),
302
+ )
303
+ road2_l = (
304
+ x2 - TRACK_WIDTH * math.cos(beta2),
305
+ y2 - TRACK_WIDTH * math.sin(beta2),
306
+ )
307
+ road2_r = (
308
+ x2 + TRACK_WIDTH * math.cos(beta2),
309
+ y2 + TRACK_WIDTH * math.sin(beta2),
310
+ )
311
+ vertices = [road1_l, road1_r, road2_r, road2_l]
312
+ self.fd_tile.shape.vertices = vertices
313
+ t = self.world.CreateStaticBody(fixtures=self.fd_tile)
314
+ t.userData = t
315
+ c = 0.01 * (i % 3)
316
+ t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
317
+ t.road_visited = False
318
+ t.road_friction = 1.0
319
+ t.fixtures[0].sensor = True
320
+ self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
321
+ self.road.append(t)
322
+ if border[i]:
323
+ side = np.sign(beta2 - beta1)
324
+ b1_l = (
325
+ x1 + side * TRACK_WIDTH * math.cos(beta1),
326
+ y1 + side * TRACK_WIDTH * math.sin(beta1),
327
+ )
328
+ b1_r = (
329
+ x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
330
+ y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
331
+ )
332
+ b2_l = (
333
+ x2 + side * TRACK_WIDTH * math.cos(beta2),
334
+ y2 + side * TRACK_WIDTH * math.sin(beta2),
335
+ )
336
+ b2_r = (
337
+ x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
338
+ y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
339
+ )
340
+ self.road_poly.append(
341
+ ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))
342
+ )
343
+ self.track = track
344
+ return True
345
+
346
+ def reset(self):
347
+ self._destroy()
348
+ self.reward = 0.0
349
+ self.prev_reward = 0.0
350
+ self.tile_visited_count = 0
351
+ self.t = 0.0
352
+ self.road_poly = []
353
+
354
+ while True:
355
+ success = self._create_track()
356
+ if success:
357
+ break
358
+ if self.verbose == 1:
359
+ print(
360
+ "retry to generate track (normal if there are not many"
361
+ "instances of this message)"
362
+ )
363
+ self.car = Car(self.world, *self.track[0][1:4])
364
+
365
+ return self.step(None)[0]
366
+
367
+ def step(self, action):
368
+ if action is not None:
369
+ self.car.steer(-action[0])
370
+ self.car.gas(action[1])
371
+ self.car.brake(action[2])
372
+
373
+ self.car.step(1.0 / FPS)
374
+ self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
375
+ self.t += 1.0 / FPS
376
+
377
+ self.state = self.render("state_pixels")
378
+
379
+ step_reward = 0
380
+ done = False
381
+ if action is not None: # First step without action, called from reset()
382
+ self.reward -= 0.1
383
+ # We actually don't want to count fuel spent, we want car to be faster.
384
+ # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
385
+ self.car.fuel_spent = 0.0
386
+ step_reward = self.reward - self.prev_reward
387
+ self.prev_reward = self.reward
388
+ if self.tile_visited_count == len(self.track):
389
+ done = True
390
+ x, y = self.car.hull.position
391
+ if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
392
+ done = True
393
+ step_reward = -100
394
+
395
+ return self.state, step_reward, done, {}
396
+
397
+ def render(self, mode="human"):
398
+ assert mode in ["human", "state_pixels", "rgb_array"]
399
+ if self.viewer is None:
400
+ from gym.envs.classic_control import rendering
401
+
402
+ self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
403
+ self.score_label = pyglet.text.Label(
404
+ "0000",
405
+ font_size=36,
406
+ x=20,
407
+ y=WINDOW_H * 2.5 / 40.00,
408
+ anchor_x="left",
409
+ anchor_y="center",
410
+ color=(255, 255, 255, 255),
411
+ )
412
+ self.transform = rendering.Transform()
413
+
414
+ if "t" not in self.__dict__:
415
+ return # reset() not called yet
416
+
417
+ # Animate zoom first second:
418
+ zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
419
+ scroll_x = self.car.hull.position[0]
420
+ scroll_y = self.car.hull.position[1]
421
+ angle = -self.car.hull.angle
422
+ vel = self.car.hull.linearVelocity
423
+ if np.linalg.norm(vel) > 0.5:
424
+ angle = math.atan2(vel[0], vel[1])
425
+ self.transform.set_scale(zoom, zoom)
426
+ self.transform.set_translation(
427
+ WINDOW_W / 2
428
+ - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)),
429
+ WINDOW_H / 4
430
+ - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)),
431
+ )
432
+ self.transform.set_rotation(angle)
433
+
434
+ self.car.draw(self.viewer, mode != "state_pixels")
435
+
436
+ arr = None
437
+ win = self.viewer.window
438
+ win.switch_to()
439
+ win.dispatch_events()
440
+
441
+ win.clear()
442
+ t = self.transform
443
+ if mode == "rgb_array":
444
+ VP_W = VIDEO_W
445
+ VP_H = VIDEO_H
446
+ elif mode == "state_pixels":
447
+ VP_W = STATE_W
448
+ VP_H = STATE_H
449
+ else:
450
+ pixel_scale = 1
451
+ if hasattr(win.context, "_nscontext"):
452
+ pixel_scale = (
453
+ win.context._nscontext.view().backingScaleFactor()
454
+ ) # pylint: disable=protected-access
455
+ VP_W = int(pixel_scale * WINDOW_W)
456
+ VP_H = int(pixel_scale * WINDOW_H)
457
+
458
+ gl.glViewport(0, 0, VP_W, VP_H)
459
+ t.enable()
460
+ self.render_road()
461
+ for geom in self.viewer.onetime_geoms:
462
+ geom.render()
463
+ self.viewer.onetime_geoms = []
464
+ t.disable()
465
+ self.render_indicators(WINDOW_W, WINDOW_H)
466
+
467
+ if mode == "human":
468
+ win.flip()
469
+ return self.viewer.isopen
470
+
471
+ image_data = (
472
+ pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
473
+ )
474
+ arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
475
+ arr = arr.reshape(VP_H, VP_W, 4)
476
+ arr = arr[::-1, :, 0:3]
477
+
478
+ return arr
479
+
480
+ def close(self):
481
+ if self.viewer is not None:
482
+ self.viewer.close()
483
+ self.viewer = None
484
+
485
+ def render_road(self):
486
+ colors = [0.4, 0.8, 0.4, 1.0] * 4
487
+ polygons_ = [
488
+ +PLAYFIELD,
489
+ +PLAYFIELD,
490
+ 0,
491
+ +PLAYFIELD,
492
+ -PLAYFIELD,
493
+ 0,
494
+ -PLAYFIELD,
495
+ -PLAYFIELD,
496
+ 0,
497
+ -PLAYFIELD,
498
+ +PLAYFIELD,
499
+ 0,
500
+ ]
501
+
502
+ k = PLAYFIELD / 20.0
503
+ colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
504
+ for x in range(-20, 20, 2):
505
+ for y in range(-20, 20, 2):
506
+ polygons_.extend(
507
+ [
508
+ k * x + k,
509
+ k * y + 0,
510
+ 0,
511
+ k * x + 0,
512
+ k * y + 0,
513
+ 0,
514
+ k * x + 0,
515
+ k * y + k,
516
+ 0,
517
+ k * x + k,
518
+ k * y + k,
519
+ 0,
520
+ ]
521
+ )
522
+
523
+ for poly, color in self.road_poly:
524
+ colors.extend([color[0], color[1], color[2], 1] * len(poly))
525
+ for p in poly:
526
+ polygons_.extend([p[0], p[1], 0])
527
+
528
+ vl = pyglet.graphics.vertex_list(
529
+ len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors)
530
+ ) # gl.GL_QUADS,
531
+ vl.draw(gl.GL_QUADS)
532
+ vl.delete()
533
+
534
+ def render_indicators(self, W, H):
535
+ s = W / 40.0
536
+ h = H / 40.0
537
+ colors = [0, 0, 0, 1] * 4
538
+ polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]
539
+
540
+ def vertical_ind(place, val, color):
541
+ colors.extend([color[0], color[1], color[2], 1] * 4)
542
+ polygons.extend(
543
+ [
544
+ place * s,
545
+ h + h * val,
546
+ 0,
547
+ (place + 1) * s,
548
+ h + h * val,
549
+ 0,
550
+ (place + 1) * s,
551
+ h,
552
+ 0,
553
+ (place + 0) * s,
554
+ h,
555
+ 0,
556
+ ]
557
+ )
558
+
559
+ def horiz_ind(place, val, color):
560
+ colors.extend([color[0], color[1], color[2], 1] * 4)
561
+ polygons.extend(
562
+ [
563
+ (place + 0) * s,
564
+ 4 * h,
565
+ 0,
566
+ (place + val) * s,
567
+ 4 * h,
568
+ 0,
569
+ (place + val) * s,
570
+ 2 * h,
571
+ 0,
572
+ (place + 0) * s,
573
+ 2 * h,
574
+ 0,
575
+ ]
576
+ )
577
+
578
+ true_speed = np.sqrt(
579
+ np.square(self.car.hull.linearVelocity[0])
580
+ + np.square(self.car.hull.linearVelocity[1])
581
+ )
582
+
583
+ vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
584
+ vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors
585
+ vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
586
+ vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
587
+ vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
588
+ horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
589
+ horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
590
+ vl = pyglet.graphics.vertex_list(
591
+ len(polygons) // 3, ("v3f", polygons), ("c4f", colors)
592
+ ) # gl.GL_QUADS,
593
+ vl.draw(gl.GL_QUADS)
594
+ vl.delete()
595
+ self.score_label.text = "%04i" % self.reward
596
+ self.score_label.draw()
597
+
598
+
599
+ if __name__ == "__main__":
600
+ from pyglet.window import key
601
+
602
+ a = np.array([0.0, 0.0, 0.0])
603
+
604
+ def key_press(k, mod):
605
+ global restart
606
+ if k == 0xFF0D:
607
+ restart = True
608
+ if k == key.LEFT:
609
+ a[0] = -1.0
610
+ if k == key.RIGHT:
611
+ a[0] = +1.0
612
+ if k == key.UP:
613
+ a[1] = +1.0
614
+ if k == key.DOWN:
615
+ a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
616
+
617
+ def key_release(k, mod):
618
+ if k == key.LEFT and a[0] == -1.0:
619
+ a[0] = 0
620
+ if k == key.RIGHT and a[0] == +1.0:
621
+ a[0] = 0
622
+ if k == key.UP:
623
+ a[1] = 0
624
+ if k == key.DOWN:
625
+ a[2] = 0
626
+
627
+ env = CarRacing()
628
+ env.render()
629
+ env.viewer.window.on_key_press = key_press
630
+ env.viewer.window.on_key_release = key_release
631
+ record_video = False
632
+ if record_video:
633
+ from gym.wrappers.monitor import Monitor
634
+
635
+ env = Monitor(env, "/tmp/video-test", force=True)
636
+ isopen = True
637
+ while isopen:
638
+ env.reset()
639
+ total_reward = 0.0
640
+ steps = 0
641
+ restart = False
642
+ while True:
643
+ s, r, done, info = env.step(a)
644
+ total_reward += r
645
+ if steps % 200 == 0 or done:
646
+ print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
647
+ print("step {} total_reward {:+0.2f}".format(steps, total_reward))
648
+ steps += 1
649
+ isopen = env.render()
650
+ if done or restart or isopen == False:
651
+ break
652
+ env.close()
gym-0.21.0/gym/envs/mujoco/ant_v3.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym import utils
3
+ from gym.envs.mujoco import mujoco_env
4
+
5
+
6
+ DEFAULT_CAMERA_CONFIG = {
7
+ "distance": 4.0,
8
+ }
9
+
10
+
11
+ class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
12
+ def __init__(
13
+ self,
14
+ xml_file="ant.xml",
15
+ ctrl_cost_weight=0.5,
16
+ contact_cost_weight=5e-4,
17
+ healthy_reward=1.0,
18
+ terminate_when_unhealthy=True,
19
+ healthy_z_range=(0.2, 1.0),
20
+ contact_force_range=(-1.0, 1.0),
21
+ reset_noise_scale=0.1,
22
+ exclude_current_positions_from_observation=True,
23
+ ):
24
+ utils.EzPickle.__init__(**locals())
25
+
26
+ self._ctrl_cost_weight = ctrl_cost_weight
27
+ self._contact_cost_weight = contact_cost_weight
28
+
29
+ self._healthy_reward = healthy_reward
30
+ self._terminate_when_unhealthy = terminate_when_unhealthy
31
+ self._healthy_z_range = healthy_z_range
32
+
33
+ self._contact_force_range = contact_force_range
34
+
35
+ self._reset_noise_scale = reset_noise_scale
36
+
37
+ self._exclude_current_positions_from_observation = (
38
+ exclude_current_positions_from_observation
39
+ )
40
+
41
+ mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
42
+
43
+ @property
44
+ def healthy_reward(self):
45
+ return (
46
+ float(self.is_healthy or self._terminate_when_unhealthy)
47
+ * self._healthy_reward
48
+ )
49
+
50
+ def control_cost(self, action):
51
+ control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
52
+ return control_cost
53
+
54
+ @property
55
+ def contact_forces(self):
56
+ raw_contact_forces = self.sim.data.cfrc_ext
57
+ min_value, max_value = self._contact_force_range
58
+ contact_forces = np.clip(raw_contact_forces, min_value, max_value)
59
+ return contact_forces
60
+
61
+ @property
62
+ def contact_cost(self):
63
+ contact_cost = self._contact_cost_weight * np.sum(
64
+ np.square(self.contact_forces)
65
+ )
66
+ return contact_cost
67
+
68
+ @property
69
+ def is_healthy(self):
70
+ state = self.state_vector()
71
+ min_z, max_z = self._healthy_z_range
72
+ is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
73
+ return is_healthy
74
+
75
+ @property
76
+ def done(self):
77
+ done = not self.is_healthy if self._terminate_when_unhealthy else False
78
+ return done
79
+
80
+ def step(self, action):
81
+ xy_position_before = self.get_body_com("torso")[:2].copy()
82
+ self.do_simulation(action, self.frame_skip)
83
+ xy_position_after = self.get_body_com("torso")[:2].copy()
84
+
85
+ xy_velocity = (xy_position_after - xy_position_before) / self.dt
86
+ x_velocity, y_velocity = xy_velocity
87
+
88
+ ctrl_cost = self.control_cost(action)
89
+ contact_cost = self.contact_cost
90
+
91
+ forward_reward = x_velocity
92
+ healthy_reward = self.healthy_reward
93
+
94
+ rewards = forward_reward + healthy_reward
95
+ costs = ctrl_cost + contact_cost
96
+
97
+ reward = rewards - costs
98
+ done = self.done
99
+ observation = self._get_obs()
100
+ info = {
101
+ "reward_forward": forward_reward,
102
+ "reward_ctrl": -ctrl_cost,
103
+ "reward_contact": -contact_cost,
104
+ "reward_survive": healthy_reward,
105
+ "x_position": xy_position_after[0],
106
+ "y_position": xy_position_after[1],
107
+ "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
108
+ "x_velocity": x_velocity,
109
+ "y_velocity": y_velocity,
110
+ "forward_reward": forward_reward,
111
+ }
112
+
113
+ return observation, reward, done, info
114
+
115
+ def _get_obs(self):
116
+ position = self.sim.data.qpos.flat.copy()
117
+ velocity = self.sim.data.qvel.flat.copy()
118
+ contact_force = self.contact_forces.flat.copy()
119
+
120
+ if self._exclude_current_positions_from_observation:
121
+ position = position[2:]
122
+
123
+ observations = np.concatenate((position, velocity, contact_force))
124
+
125
+ return observations
126
+
127
+ def reset_model(self):
128
+ noise_low = -self._reset_noise_scale
129
+ noise_high = self._reset_noise_scale
130
+
131
+ qpos = self.init_qpos + self.np_random.uniform(
132
+ low=noise_low, high=noise_high, size=self.model.nq
133
+ )
134
+ qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
135
+ self.model.nv
136
+ )
137
+ self.set_state(qpos, qvel)
138
+
139
+ observation = self._get_obs()
140
+
141
+ return observation
142
+
143
+ def viewer_setup(self):
144
+ for key, value in DEFAULT_CAMERA_CONFIG.items():
145
+ if isinstance(value, np.ndarray):
146
+ getattr(self.viewer.cam, key)[:] = value
147
+ else:
148
+ setattr(self.viewer.cam, key, value)
gym-0.21.0/gym/envs/mujoco/humanoid.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym.envs.mujoco import mujoco_env
3
+ from gym import utils
4
+
5
+
6
+ def mass_center(model, sim):
7
+ mass = np.expand_dims(model.body_mass, 1)
8
+ xpos = sim.data.xipos
9
+ return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
10
+
11
+
12
+ class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
13
+ def __init__(self):
14
+ mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5)
15
+ utils.EzPickle.__init__(self)
16
+
17
+ def _get_obs(self):
18
+ data = self.sim.data
19
+ return np.concatenate(
20
+ [
21
+ data.qpos.flat[2:],
22
+ data.qvel.flat,
23
+ data.cinert.flat,
24
+ data.cvel.flat,
25
+ data.qfrc_actuator.flat,
26
+ data.cfrc_ext.flat,
27
+ ]
28
+ )
29
+
30
+ def step(self, a):
31
+ pos_before = mass_center(self.model, self.sim)
32
+ self.do_simulation(a, self.frame_skip)
33
+ pos_after = mass_center(self.model, self.sim)
34
+ alive_bonus = 5.0
35
+ data = self.sim.data
36
+ lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
37
+ quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
38
+ quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
39
+ quad_impact_cost = min(quad_impact_cost, 10)
40
+ reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
41
+ qpos = self.sim.data.qpos
42
+ done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
43
+ return (
44
+ self._get_obs(),
45
+ reward,
46
+ done,
47
+ dict(
48
+ reward_linvel=lin_vel_cost,
49
+ reward_quadctrl=-quad_ctrl_cost,
50
+ reward_alive=alive_bonus,
51
+ reward_impact=-quad_impact_cost,
52
+ ),
53
+ )
54
+
55
+ def reset_model(self):
56
+ c = 0.01
57
+ self.set_state(
58
+ self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
59
+ self.init_qvel
60
+ + self.np_random.uniform(
61
+ low=-c,
62
+ high=c,
63
+ size=self.model.nv,
64
+ ),
65
+ )
66
+ return self._get_obs()
67
+
68
+ def viewer_setup(self):
69
+ self.viewer.cam.trackbodyid = 1
70
+ self.viewer.cam.distance = self.model.stat.extent * 1.0
71
+ self.viewer.cam.lookat[2] = 2.0
72
+ self.viewer.cam.elevation = -20
gym-0.21.0/gym/envs/mujoco/humanoidstandup.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gym.envs.mujoco import mujoco_env
2
+ from gym import utils
3
+ import numpy as np
4
+
5
+
6
+ class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
7
+ def __init__(self):
8
+ mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
9
+ utils.EzPickle.__init__(self)
10
+
11
+ def _get_obs(self):
12
+ data = self.sim.data
13
+ return np.concatenate(
14
+ [
15
+ data.qpos.flat[2:],
16
+ data.qvel.flat,
17
+ data.cinert.flat,
18
+ data.cvel.flat,
19
+ data.qfrc_actuator.flat,
20
+ data.cfrc_ext.flat,
21
+ ]
22
+ )
23
+
24
+ def step(self, a):
25
+ self.do_simulation(a, self.frame_skip)
26
+ pos_after = self.sim.data.qpos[2]
27
+ data = self.sim.data
28
+ uph_cost = (pos_after - 0) / self.model.opt.timestep
29
+
30
+ quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
31
+ quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
32
+ quad_impact_cost = min(quad_impact_cost, 10)
33
+ reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
34
+
35
+ done = bool(False)
36
+ return (
37
+ self._get_obs(),
38
+ reward,
39
+ done,
40
+ dict(
41
+ reward_linup=uph_cost,
42
+ reward_quadctrl=-quad_ctrl_cost,
43
+ reward_impact=-quad_impact_cost,
44
+ ),
45
+ )
46
+
47
+ def reset_model(self):
48
+ c = 0.01
49
+ self.set_state(
50
+ self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
51
+ self.init_qvel
52
+ + self.np_random.uniform(
53
+ low=-c,
54
+ high=c,
55
+ size=self.model.nv,
56
+ ),
57
+ )
58
+ return self._get_obs()
59
+
60
+ def viewer_setup(self):
61
+ self.viewer.cam.trackbodyid = 1
62
+ self.viewer.cam.distance = self.model.stat.extent * 1.0
63
+ self.viewer.cam.lookat[2] = 0.8925
64
+ self.viewer.cam.elevation = -20
gym-0.21.0/gym/envs/mujoco/walker2d_v3.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from gym.envs.mujoco import mujoco_env
3
+ from gym import utils
4
+
5
+
6
+ DEFAULT_CAMERA_CONFIG = {
7
+ "trackbodyid": 2,
8
+ "distance": 4.0,
9
+ "lookat": np.array((0.0, 0.0, 1.15)),
10
+ "elevation": -20.0,
11
+ }
12
+
13
+
14
+ class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
15
+ def __init__(
16
+ self,
17
+ xml_file="walker2d.xml",
18
+ forward_reward_weight=1.0,
19
+ ctrl_cost_weight=1e-3,
20
+ healthy_reward=1.0,
21
+ terminate_when_unhealthy=True,
22
+ healthy_z_range=(0.8, 2.0),
23
+ healthy_angle_range=(-1.0, 1.0),
24
+ reset_noise_scale=5e-3,
25
+ exclude_current_positions_from_observation=True,
26
+ ):
27
+ utils.EzPickle.__init__(**locals())
28
+
29
+ self._forward_reward_weight = forward_reward_weight
30
+ self._ctrl_cost_weight = ctrl_cost_weight
31
+
32
+ self._healthy_reward = healthy_reward
33
+ self._terminate_when_unhealthy = terminate_when_unhealthy
34
+
35
+ self._healthy_z_range = healthy_z_range
36
+ self._healthy_angle_range = healthy_angle_range
37
+
38
+ self._reset_noise_scale = reset_noise_scale
39
+
40
+ self._exclude_current_positions_from_observation = (
41
+ exclude_current_positions_from_observation
42
+ )
43
+
44
+ mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
45
+
46
+ @property
47
+ def healthy_reward(self):
48
+ return (
49
+ float(self.is_healthy or self._terminate_when_unhealthy)
50
+ * self._healthy_reward
51
+ )
52
+
53
+ def control_cost(self, action):
54
+ control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
55
+ return control_cost
56
+
57
+ @property
58
+ def is_healthy(self):
59
+ z, angle = self.sim.data.qpos[1:3]
60
+
61
+ min_z, max_z = self._healthy_z_range
62
+ min_angle, max_angle = self._healthy_angle_range
63
+
64
+ healthy_z = min_z < z < max_z
65
+ healthy_angle = min_angle < angle < max_angle
66
+ is_healthy = healthy_z and healthy_angle
67
+
68
+ return is_healthy
69
+
70
+ @property
71
+ def done(self):
72
+ done = not self.is_healthy if self._terminate_when_unhealthy else False
73
+ return done
74
+
75
+ def _get_obs(self):
76
+ position = self.sim.data.qpos.flat.copy()
77
+ velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
78
+
79
+ if self._exclude_current_positions_from_observation:
80
+ position = position[1:]
81
+
82
+ observation = np.concatenate((position, velocity)).ravel()
83
+ return observation
84
+
85
+ def step(self, action):
86
+ x_position_before = self.sim.data.qpos[0]
87
+ self.do_simulation(action, self.frame_skip)
88
+ x_position_after = self.sim.data.qpos[0]
89
+ x_velocity = (x_position_after - x_position_before) / self.dt
90
+
91
+ ctrl_cost = self.control_cost(action)
92
+
93
+ forward_reward = self._forward_reward_weight * x_velocity
94
+ healthy_reward = self.healthy_reward
95
+
96
+ rewards = forward_reward + healthy_reward
97
+ costs = ctrl_cost
98
+
99
+ observation = self._get_obs()
100
+ reward = rewards - costs
101
+ done = self.done
102
+ info = {
103
+ "x_position": x_position_after,
104
+ "x_velocity": x_velocity,
105
+ }
106
+
107
+ return observation, reward, done, info
108
+
109
+ def reset_model(self):
110
+ noise_low = -self._reset_noise_scale
111
+ noise_high = self._reset_noise_scale
112
+
113
+ qpos = self.init_qpos + self.np_random.uniform(
114
+ low=noise_low, high=noise_high, size=self.model.nq
115
+ )
116
+ qvel = self.init_qvel + self.np_random.uniform(
117
+ low=noise_low, high=noise_high, size=self.model.nv
118
+ )
119
+
120
+ self.set_state(qpos, qvel)
121
+
122
+ observation = self._get_obs()
123
+ return observation
124
+
125
+ def viewer_setup(self):
126
+ for key, value in DEFAULT_CAMERA_CONFIG.items():
127
+ if isinstance(value, np.ndarray):
128
+ getattr(self.viewer.cam, key)[:] = value
129
+ else:
130
+ setattr(self.viewer.cam, key, value)
gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <mujoco>
3
+ <compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
4
+ <option timestep="0.002">
5
+ <flag warmstart="enable"></flag>
6
+ </option>
7
+
8
+ <include file="shared.xml"></include>
9
+
10
+ <worldbody>
11
+ <geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
12
+ <body name="floor0" pos="1 0.75 0">
13
+ <site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
14
+ </body>
15
+
16
+ <include file="robot.xml"></include>
17
+
18
+ <body name="table0" pos="1.32441906 0.75018422 0.2">
19
+ <geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
20
+ </body>
21
+
22
+ <body name="object0" pos="0.025 0.025 0.02">
23
+ <joint name="object0:joint" type="free" damping="0.01"></joint>
24
+ <geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
25
+ <site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
26
+ </body>
27
+
28
+ <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
29
+ </worldbody>
30
+
31
+ <actuator></actuator>
32
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
2
+ <mujoco>
3
+ <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
4
+
5
+ <texture name="robot0:texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.15 0.2" width="512" height="512"></texture>
6
+ <texture name="robot0:texgeom" type="cube" builtin="flat" mark="cross" width="127" height="127" rgb1="0.3 0.6 0.5" rgb2="0.3 0.6 0.5" markrgb="0 0 0" random="0.01"></texture>
7
+
8
+ <material name="robot0:MatGnd" reflectance="0.5" texture="robot0:texplane" texrepeat="1 1" texuniform="true"></material>
9
+ <material name="robot0:MatColl" specular="1" shininess="0.3" reflectance="0.5" rgba="0.4 0.5 0.6 1"></material>
10
+ <material name="robot0:MatViz" specular="0.75" shininess="0.1" reflectance="0.5" rgba="0.93 0.93 0.93 1"></material>
11
+ <material name="robot0:object" texture="robot0:texgeom" texuniform="false"></material>
12
+ <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 0"></material>
13
+
14
+ <mesh name="robot0:forearm" file="forearm_electric.stl"></mesh>
15
+ <mesh name="robot0:forearm_cvx" file="forearm_electric_cvx.stl"></mesh>
16
+ <mesh name="robot0:wrist" scale="0.001 0.001 0.001" file="wrist.stl"></mesh>
17
+ <mesh name="robot0:palm" scale="0.001 0.001 0.001" file="palm.stl"></mesh>
18
+ <mesh name="robot0:knuckle" scale="0.001 0.001 0.001" file="knuckle.stl"></mesh>
19
+ <mesh name="robot0:F3" scale="0.001 0.001 0.001" file="F3.stl"></mesh>
20
+ <mesh name="robot0:F2" scale="0.001 0.001 0.001" file="F2.stl"></mesh>
21
+ <mesh name="robot0:F1" scale="0.001 0.001 0.001" file="F1.stl"></mesh>
22
+ <mesh name="robot0:lfmetacarpal" scale="0.001 0.001 0.001" file="lfmetacarpal.stl"></mesh>
23
+ <mesh name="robot0:TH3_z" scale="0.001 0.001 0.001" file="TH3_z.stl"></mesh>
24
+ <mesh name="robot0:TH2_z" scale="0.001 0.001 0.001" file="TH2_z.stl"></mesh>
25
+ <mesh name="robot0:TH1_z" scale="0.001 0.001 0.001" file="TH1_z.stl"></mesh>
26
+ </mujoco>
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl ADDED
Binary file (48.7 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl ADDED
Binary file (75.4 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl ADDED
Binary file (68.7 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl ADDED
Binary file (49.8 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl ADDED
Binary file (17.1 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl ADDED
Binary file (63.2 kB). View file
 
gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl ADDED
Binary file (348 kB). View file
 
gym-0.21.0/gym/envs/toy_text/discrete.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from gym import Env, spaces
4
+ from gym.utils import seeding
5
+
6
+
7
+ def categorical_sample(prob_n, np_random):
8
+ """
9
+ Sample from categorical distribution
10
+ Each row specifies class probabilities
11
+ """
12
+ prob_n = np.asarray(prob_n)
13
+ csprob_n = np.cumsum(prob_n)
14
+ return (csprob_n > np_random.rand()).argmax()
15
+
16
+
17
+ class DiscreteEnv(Env):
18
+
19
+ """
20
+ Has the following members
21
+ - nS: number of states
22
+ - nA: number of actions
23
+ - P: transitions (*)
24
+ - isd: initial state distribution (**)
25
+
26
+ (*) dictionary of lists, where
27
+ P[s][a] == [(probability, nextstate, reward, done), ...]
28
+ (**) list or array of length nS
29
+
30
+
31
+ """
32
+
33
+ def __init__(self, nS, nA, P, isd):
34
+ self.P = P
35
+ self.isd = isd
36
+ self.lastaction = None # for rendering
37
+ self.nS = nS
38
+ self.nA = nA
39
+
40
+ self.action_space = spaces.Discrete(self.nA)
41
+ self.observation_space = spaces.Discrete(self.nS)
42
+
43
+ self.seed()
44
+ self.s = categorical_sample(self.isd, self.np_random)
45
+
46
+ def seed(self, seed=None):
47
+ self.np_random, seed = seeding.np_random(seed)
48
+ return [seed]
49
+
50
+ def reset(self):
51
+ self.s = categorical_sample(self.isd, self.np_random)
52
+ self.lastaction = None
53
+ return int(self.s)
54
+
55
+ def step(self, a):
56
+ transitions = self.P[self.s][a]
57
+ i = categorical_sample([t[0] for t in transitions], self.np_random)
58
+ p, s, r, d = transitions[i]
59
+ self.s = s
60
+ self.lastaction = a
61
+ return (int(s), r, d, {"prob": p})
gym-0.21.0/gym/envs/unittest/memorize_digits.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gym
3
+ from gym import spaces
4
+ from gym.utils import seeding
5
+
6
+ # Unit test environment for CNNs.
7
+ # Looks like this (RGB observations):
8
+ #
9
+ # ---------------------------
10
+ # | |
11
+ # | ****** |
12
+ # | ****** |
13
+ # | ** ** |
14
+ # | ** ** |
15
+ # | ** |
16
+ # | ** |
17
+ # | **** |
18
+ # | **** |
19
+ # | **** |
20
+ # | **** |
21
+ # | ********** |
22
+ # | ********** |
23
+ # | |
24
+ # ---------------------------
25
+ #
26
+ # Agent should hit action 2 to gain reward. Catches off-by-one errors in your agent.
27
+ #
28
+ # To see how it works, run:
29
+ #
30
+ # python examples/agents/keyboard_agent.py MemorizeDigits-v0
31
+
32
+ FIELD_W = 32
33
+ FIELD_H = 24
34
+
35
+ bogus_mnist = [
36
+ [" **** ", "* *", "* *", "* *", "* *", " **** "],
37
+ [" ** ", " * * ", " * ", " * ", " * ", " *** "],
38
+ [" **** ", "* *", " *", " *** ", "** ", "******"],
39
+ [" **** ", "* *", " ** ", " *", "* *", " **** "],
40
+ [" * * ", " * * ", " * * ", " **** ", " * ", " * "],
41
+ [" **** ", " * ", " **** ", " * ", " * ", " **** "],
42
+ [" *** ", " * ", " **** ", " * * ", " * * ", " **** "],
43
+ [" **** ", " * ", " * ", " * ", " * ", " * "],
44
+ [" **** ", "* *", " **** ", "* *", "* *", " **** "],
45
+ [" **** ", "* *", "* *", " *****", " *", " **** "],
46
+ ]
47
+
48
+ color_black = np.array((0, 0, 0)).astype("float32")
49
+ color_white = np.array((255, 255, 255)).astype("float32")
50
+
51
+
52
+ class MemorizeDigits(gym.Env):
53
+ metadata = {
54
+ "render.modes": ["human", "rgb_array"],
55
+ "video.frames_per_second": 60,
56
+ "video.res_w": FIELD_W,
57
+ "video.res_h": FIELD_H,
58
+ }
59
+
60
+ use_random_colors = False
61
+
62
+ def __init__(self):
63
+ self.seed()
64
+ self.viewer = None
65
+ self.observation_space = spaces.Box(
66
+ 0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
67
+ )
68
+ self.action_space = spaces.Discrete(10)
69
+ self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
70
+ for digit in range(10):
71
+ for y in range(6):
72
+ self.bogus_mnist[digit, y, :] = [
73
+ ord(char) for char in bogus_mnist[digit][y]
74
+ ]
75
+ self.reset()
76
+
77
+ def seed(self, seed=None):
78
+ self.np_random, seed = seeding.np_random(seed)
79
+ return [seed]
80
+
81
+ def random_color(self):
82
+ return np.array(
83
+ [
84
+ self.np_random.randint(low=0, high=255),
85
+ self.np_random.randint(low=0, high=255),
86
+ self.np_random.randint(low=0, high=255),
87
+ ]
88
+ ).astype("uint8")
89
+
90
+ def reset(self):
91
+ self.digit_x = self.np_random.randint(low=FIELD_W // 5, high=FIELD_W // 5 * 4)
92
+ self.digit_y = self.np_random.randint(low=FIELD_H // 5, high=FIELD_H // 5 * 4)
93
+ self.color_bg = self.random_color() if self.use_random_colors else color_black
94
+ self.step_n = 0
95
+ while 1:
96
+ self.color_digit = (
97
+ self.random_color() if self.use_random_colors else color_white
98
+ )
99
+ if np.linalg.norm(self.color_digit - self.color_bg) < 50:
100
+ continue
101
+ break
102
+ self.digit = -1
103
+ return self.step(0)[0]
104
+
105
+ def step(self, action):
106
+ reward = -1
107
+ done = False
108
+ self.step_n += 1
109
+ if self.digit == -1:
110
+ pass
111
+ else:
112
+ if self.digit == action:
113
+ reward = +1
114
+ done = self.step_n > 20 and 0 == self.np_random.randint(low=0, high=5)
115
+ self.digit = self.np_random.randint(low=0, high=10)
116
+ obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
117
+ obs[:, :, :] = self.color_bg
118
+ digit_img = np.zeros((6, 6, 3), dtype=np.uint8)
119
+ digit_img[:] = self.color_bg
120
+ xxx = self.bogus_mnist[self.digit] == 42
121
+ digit_img[xxx] = self.color_digit
122
+ obs[
123
+ self.digit_y - 3 : self.digit_y + 3, self.digit_x - 3 : self.digit_x + 3
124
+ ] = digit_img
125
+ self.last_obs = obs
126
+ return obs, reward, done, {}
127
+
128
+ def render(self, mode="human"):
129
+ if mode == "rgb_array":
130
+ return self.last_obs
131
+
132
+ elif mode == "human":
133
+ from gym.envs.classic_control import rendering
134
+
135
+ if self.viewer is None:
136
+ self.viewer = rendering.SimpleImageViewer()
137
+ self.viewer.imshow(self.last_obs)
138
+ return self.viewer.isopen
139
+
140
+ else:
141
+ assert 0, "Render mode '%s' is not supported" % mode
142
+
143
+ def close(self):
144
+ if self.viewer is not None:
145
+ self.viewer.close()
146
+ self.viewer = None
gym-0.21.0/gym/spaces/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gym.spaces.space import Space
2
+ from gym.spaces.box import Box
3
+ from gym.spaces.discrete import Discrete
4
+ from gym.spaces.multi_discrete import MultiDiscrete
5
+ from gym.spaces.multi_binary import MultiBinary
6
+ from gym.spaces.tuple import Tuple
7
+ from gym.spaces.dict import Dict
8
+
9
+ from gym.spaces.utils import flatdim
10
+ from gym.spaces.utils import flatten_space
11
+ from gym.spaces.utils import flatten
12
+ from gym.spaces.utils import unflatten
13
+
14
+ __all__ = [
15
+ "Space",
16
+ "Box",
17
+ "Discrete",
18
+ "MultiDiscrete",
19
+ "MultiBinary",
20
+ "Tuple",
21
+ "Dict",
22
+ "flatdim",
23
+ "flatten_space",
24
+ "flatten",
25
+ "unflatten",
26
+ ]
gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (785 Bytes). View file
 
gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc ADDED
Binary file (4.98 kB). View file
 
gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc ADDED
Binary file (5.81 kB). View file