chenhaojun commited on Jan 8, 2025

Commit

64ad37d

verified ·

1 Parent(s): d68fa90

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray +24 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/.zarray +22 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/1.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/17.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/3.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/4.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/5.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/12.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/13.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/15.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/5.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/7.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/9.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/11.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/13.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/15.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/2.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/3.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/5.0.0.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/1.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/11.0 +0 -0
Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/8.0 +0 -0
gym-0.21.0/gym/__pycache__/__init__.cpython-38.pyc +0 -0
gym-0.21.0/gym/__pycache__/error.cpython-38.pyc +0 -0
gym-0.21.0/gym/__pycache__/logger.cpython-38.pyc +0 -0
gym-0.21.0/gym/envs/__init__.py +646 -0
gym-0.21.0/gym/envs/__pycache__/__init__.cpython-38.pyc +0 -0
gym-0.21.0/gym/envs/box2d/car_racing.py +652 -0
gym-0.21.0/gym/envs/mujoco/ant_v3.py +148 -0
gym-0.21.0/gym/envs/mujoco/humanoid.py +72 -0
gym-0.21.0/gym/envs/mujoco/humanoidstandup.py +64 -0
gym-0.21.0/gym/envs/mujoco/walker2d_v3.py +130 -0
gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml +32 -0
gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml +26 -0
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl +0 -0
gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl +0 -0
gym-0.21.0/gym/envs/toy_text/discrete.py +61 -0
gym-0.21.0/gym/envs/unittest/memorize_digits.py +146 -0
gym-0.21.0/gym/spaces/__init__.py +26 -0
gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc +0 -0
gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc +0 -0
gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc +0 -0

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 ADDED Viewed

Binary file (1.18 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 ADDED Viewed

Binary file (1.17 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "chunks": [
+        100,
+        128,
+        128
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 3,
+        "cname": "zstd",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": 0.0,
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2000,
+        128,
+        128
+    ],
+    "zarr_format": 2
+}

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/.zarray ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "chunks": [
+        100,
+        39
+    ],
+    "compressor": {
+        "blocksize": 0,
+        "clevel": 3,
+        "cname": "zstd",
+        "id": "blosc",
+        "shuffle": 1
+    },
+    "dtype": "<f4",
+    "fill_value": 0.0,
+    "filters": null,
+    "order": "C",
+    "shape": [
+        2000,
+        39
+    ],
+    "zarr_format": 2
+}

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/1.0 ADDED Viewed

Binary file (2.59 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/17.0 ADDED Viewed

Binary file (2.71 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/3.0 ADDED Viewed

Binary file (2.76 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/4.0 ADDED Viewed

Binary file (2.67 kB). View file

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/full_state/5.0 ADDED Viewed

Binary file (2.52 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/0.0 ADDED Viewed

Binary file (1.24 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/12.0 ADDED Viewed

Binary file (1.23 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/13.0 ADDED Viewed

Binary file (1.19 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/15.0 ADDED Viewed

Binary file (1.2 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/5.0 ADDED Viewed

Binary file (1.19 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/7.0 ADDED Viewed

Binary file (1.2 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/action/9.0 ADDED Viewed

Binary file (1.21 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/11.0.0.0 ADDED Viewed

Binary file (603 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/13.0.0.0 ADDED Viewed

Binary file (591 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/15.0.0.0 ADDED Viewed

Binary file (567 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/2.0.0.0 ADDED Viewed

Binary file (912 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/3.0.0.0 ADDED Viewed

Binary file (583 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/img/5.0.0.0 ADDED Viewed

Binary file (591 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/1.0 ADDED Viewed

Binary file (2.31 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/11.0 ADDED Viewed

Binary file (2.31 kB). View file

Metaworld/zarr_path: data/metaworld_door-open_expert.zarr/data/state/8.0 ADDED Viewed

Binary file (2.82 kB). View file

gym-0.21.0/gym/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (750 Bytes). View file

gym-0.21.0/gym/__pycache__/error.cpython-38.pyc ADDED Viewed

Binary file (6.65 kB). View file

gym-0.21.0/gym/__pycache__/logger.cpython-38.pyc ADDED Viewed

Binary file (1.18 kB). View file

gym-0.21.0/gym/envs/__init__.py ADDED Viewed

	@@ -0,0 +1,646 @@

+from gym.envs.registration import (
+    registry,
+    register,
+    make,
+    spec,
+    load_env_plugins as _load_env_plugins,
+)
+# Hook to load plugins from entry points
+_load_env_plugins()
+# Classic
+# ----------------------------------------
+register(
+    id="CartPole-v0",
+    entry_point="gym.envs.classic_control:CartPoleEnv",
+    max_episode_steps=200,
+    reward_threshold=195.0,
+)
+register(
+    id="CartPole-v1",
+    entry_point="gym.envs.classic_control:CartPoleEnv",
+    max_episode_steps=500,
+    reward_threshold=475.0,
+)
+register(
+    id="MountainCar-v0",
+    entry_point="gym.envs.classic_control:MountainCarEnv",
+    max_episode_steps=200,
+    reward_threshold=-110.0,
+)
+register(
+    id="MountainCarContinuous-v0",
+    entry_point="gym.envs.classic_control:Continuous_MountainCarEnv",
+    max_episode_steps=999,
+    reward_threshold=90.0,
+)
+register(
+    id="Pendulum-v1",
+    entry_point="gym.envs.classic_control:PendulumEnv",
+    max_episode_steps=200,
+)
+register(
+    id="Acrobot-v1",
+    entry_point="gym.envs.classic_control:AcrobotEnv",
+    reward_threshold=-100.0,
+    max_episode_steps=500,
+)
+# Box2d
+# ----------------------------------------
+register(
+    id="LunarLander-v2",
+    entry_point="gym.envs.box2d:LunarLander",
+    max_episode_steps=1000,
+    reward_threshold=200,
+)
+register(
+    id="LunarLanderContinuous-v2",
+    entry_point="gym.envs.box2d:LunarLanderContinuous",
+    max_episode_steps=1000,
+    reward_threshold=200,
+)
+register(
+    id="BipedalWalker-v3",
+    entry_point="gym.envs.box2d:BipedalWalker",
+    max_episode_steps=1600,
+    reward_threshold=300,
+)
+register(
+    id="BipedalWalkerHardcore-v3",
+    entry_point="gym.envs.box2d:BipedalWalkerHardcore",
+    max_episode_steps=2000,
+    reward_threshold=300,
+)
+register(
+    id="CarRacing-v0",
+    entry_point="gym.envs.box2d:CarRacing",
+    max_episode_steps=1000,
+    reward_threshold=900,
+)
+# Toy Text
+# ----------------------------------------
+register(
+    id="Blackjack-v1",
+    entry_point="gym.envs.toy_text:BlackjackEnv",
+    kwargs={"sab": True, "natural": False},
+)
+register(
+    id="FrozenLake-v1",
+    entry_point="gym.envs.toy_text:FrozenLakeEnv",
+    kwargs={"map_name": "4x4"},
+    max_episode_steps=100,
+    reward_threshold=0.70,  # optimum = 0.74
+)
+register(
+    id="FrozenLake8x8-v1",
+    entry_point="gym.envs.toy_text:FrozenLakeEnv",
+    kwargs={"map_name": "8x8"},
+    max_episode_steps=200,
+    reward_threshold=0.85,  # optimum = 0.91
+)
+register(
+    id="CliffWalking-v0",
+    entry_point="gym.envs.toy_text:CliffWalkingEnv",
+)
+register(
+    id="Taxi-v3",
+    entry_point="gym.envs.toy_text:TaxiEnv",
+    reward_threshold=8,  # optimum = 8.46
+    max_episode_steps=200,
+)
+# Mujoco
+# ----------------------------------------
+# 2D
+register(
+    id="Reacher-v2",
+    entry_point="gym.envs.mujoco:ReacherEnv",
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
+register(
+    id="Pusher-v2",
+    entry_point="gym.envs.mujoco:PusherEnv",
+    max_episode_steps=100,
+    reward_threshold=0.0,
+)
+register(
+    id="Thrower-v2",
+    entry_point="gym.envs.mujoco:ThrowerEnv",
+    max_episode_steps=100,
+    reward_threshold=0.0,
+)
+register(
+    id="Striker-v2",
+    entry_point="gym.envs.mujoco:StrikerEnv",
+    max_episode_steps=100,
+    reward_threshold=0.0,
+)
+register(
+    id="InvertedPendulum-v2",
+    entry_point="gym.envs.mujoco:InvertedPendulumEnv",
+    max_episode_steps=1000,
+    reward_threshold=950.0,
+)
+register(
+    id="InvertedDoublePendulum-v2",
+    entry_point="gym.envs.mujoco:InvertedDoublePendulumEnv",
+    max_episode_steps=1000,
+    reward_threshold=9100.0,
+)
+register(
+    id="HalfCheetah-v2",
+    entry_point="gym.envs.mujoco:HalfCheetahEnv",
+    max_episode_steps=1000,
+    reward_threshold=4800.0,
+)
+register(
+    id="HalfCheetah-v3",
+    entry_point="gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
+    max_episode_steps=1000,
+    reward_threshold=4800.0,
+)
+register(
+    id="Hopper-v2",
+    entry_point="gym.envs.mujoco:HopperEnv",
+    max_episode_steps=1000,
+    reward_threshold=3800.0,
+)
+register(
+    id="Hopper-v3",
+    entry_point="gym.envs.mujoco.hopper_v3:HopperEnv",
+    max_episode_steps=1000,
+    reward_threshold=3800.0,
+)
+register(
+    id="Swimmer-v2",
+    entry_point="gym.envs.mujoco:SwimmerEnv",
+    max_episode_steps=1000,
+    reward_threshold=360.0,
+)
+register(
+    id="Swimmer-v3",
+    entry_point="gym.envs.mujoco.swimmer_v3:SwimmerEnv",
+    max_episode_steps=1000,
+    reward_threshold=360.0,
+)
+register(
+    id="Walker2d-v2",
+    max_episode_steps=1000,
+    entry_point="gym.envs.mujoco:Walker2dEnv",
+)
+register(
+    id="Walker2d-v3",
+    max_episode_steps=1000,
+    entry_point="gym.envs.mujoco.walker2d_v3:Walker2dEnv",
+)
+register(
+    id="Ant-v2",
+    entry_point="gym.envs.mujoco:AntEnv",
+    max_episode_steps=1000,
+    reward_threshold=6000.0,
+)
+register(
+    id="Ant-v3",
+    entry_point="gym.envs.mujoco.ant_v3:AntEnv",
+    max_episode_steps=1000,
+    reward_threshold=6000.0,
+)
+register(
+    id="Humanoid-v2",
+    entry_point="gym.envs.mujoco:HumanoidEnv",
+    max_episode_steps=1000,
+)
+register(
+    id="Humanoid-v3",
+    entry_point="gym.envs.mujoco.humanoid_v3:HumanoidEnv",
+    max_episode_steps=1000,
+)
+register(
+    id="HumanoidStandup-v2",
+    entry_point="gym.envs.mujoco:HumanoidStandupEnv",
+    max_episode_steps=1000,
+)
+# Robotics
+# ----------------------------------------
+def _merge(a, b):
+    a.update(b)
+    return a
+for reward_type in ["sparse", "dense"]:
+    suffix = "Dense" if reward_type == "dense" else ""
+    kwargs = {
+        "reward_type": reward_type,
+    }
+    # Fetch
+    register(
+        id="FetchSlide{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:FetchSlideEnv",
+        kwargs=kwargs,
+        max_episode_steps=50,
+    )
+    register(
+        id="FetchPickAndPlace{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:FetchPickAndPlaceEnv",
+        kwargs=kwargs,
+        max_episode_steps=50,
+    )
+    register(
+        id="FetchReach{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:FetchReachEnv",
+        kwargs=kwargs,
+        max_episode_steps=50,
+    )
+    register(
+        id="FetchPush{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:FetchPushEnv",
+        kwargs=kwargs,
+        max_episode_steps=50,
+    )
+    # Hand
+    register(
+        id="HandReach{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandReachEnv",
+        kwargs=kwargs,
+        max_episode_steps=50,
+    )
+    register(
+        id="HandManipulateBlockRotateZ{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockEnv",
+        kwargs=_merge({"target_position": "ignore", "target_rotation": "z"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateZTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "z",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateZTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "z",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateParallel{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockEnv",
+        kwargs=_merge(
+            {"target_position": "ignore", "target_rotation": "parallel"}, kwargs
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateParallelTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "parallel",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateParallelTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "parallel",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateXYZ{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockEnv",
+        kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateXYZTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockRotateXYZTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockFull{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    # Alias for "Full"
+    register(
+        id="HandManipulateBlock{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateBlockTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandBlockTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggRotate{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandEggEnv",
+        kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggRotateTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggRotateTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggFull{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandEggEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    # Alias for "Full"
+    register(
+        id="HandManipulateEgg{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandEggEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulateEggTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandEggTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenRotate{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandPenEnv",
+        kwargs=_merge({"target_position": "ignore", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenRotateTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenRotateTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "ignore",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenFull{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandPenEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    # Alias for "Full"
+    register(
+        id="HandManipulatePen{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandPenEnv",
+        kwargs=_merge({"target_position": "random", "target_rotation": "xyz"}, kwargs),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenTouchSensors{}-v0".format(suffix),
+        entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "boolean",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+    register(
+        id="HandManipulatePenTouchSensors{}-v1".format(suffix),
+        entry_point="gym.envs.robotics:HandPenTouchSensorsEnv",
+        kwargs=_merge(
+            {
+                "target_position": "random",
+                "target_rotation": "xyz",
+                "touch_get_obs": "sensordata",
+            },
+            kwargs,
+        ),
+        max_episode_steps=100,
+    )
+# Unit test
+# ---------
+register(
+    id="CubeCrash-v0",
+    entry_point="gym.envs.unittest:CubeCrash",
+    reward_threshold=0.9,
+)
+register(
+    id="CubeCrashSparse-v0",
+    entry_point="gym.envs.unittest:CubeCrashSparse",
+    reward_threshold=0.9,
+)
+register(
+    id="CubeCrashScreenBecomesBlack-v0",
+    entry_point="gym.envs.unittest:CubeCrashScreenBecomesBlack",
+    reward_threshold=0.9,
+)
+register(
+    id="MemorizeDigits-v0",
+    entry_point="gym.envs.unittest:MemorizeDigits",
+    reward_threshold=20,
+)

gym-0.21.0/gym/envs/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (7.21 kB). View file

gym-0.21.0/gym/envs/box2d/car_racing.py ADDED Viewed

	@@ -0,0 +1,652 @@

+"""
+Easiest continuous control task to learn from pixels, a top-down racing
+environment.
+Discrete control is reasonable in this environment as well, on/off
+discretization is fine.
+State consists of STATE_W x STATE_H pixels.
+The reward is -0.1 every frame and +1000/N for every track tile visited, where
+N is the total number of tiles visited in the track. For example, if you have
+finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points.
+The game is solved when the agent consistently gets 900+ points. The generated
+track is random every episode.
+The episode finishes when all the tiles are visited. The car also can go
+outside of the PLAYFIELD -  that is far off the track, then it will get -100
+and die.
+Some indicators are shown at the bottom of the window along with the state RGB
+buffer. From left to right: the true speed, four ABS sensors, the steering
+wheel position and gyroscope.
+To play yourself (it's rather fast for humans), type:
+python gym/envs/box2d/car_racing.py
+Remember it's a powerful rear-wheel drive car -  don't press the accelerator
+and turn at the same time.
+Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+"""
+import sys
+import math
+import numpy as np
+import Box2D
+from Box2D.b2 import fixtureDef
+from Box2D.b2 import polygonShape
+from Box2D.b2 import contactListener
+import gym
+from gym import spaces
+from gym.envs.box2d.car_dynamics import Car
+from gym.utils import seeding, EzPickle
+import pyglet
+pyglet.options["debug_gl"] = False
+from pyglet import gl
+STATE_W = 96  # less than Atari 160x192
+STATE_H = 96
+VIDEO_W = 600
+VIDEO_H = 400
+WINDOW_W = 1000
+WINDOW_H = 800
+SCALE = 6.0  # Track scale
+TRACK_RAD = 900 / SCALE  # Track is heavily morphed circle with this radius
+PLAYFIELD = 2000 / SCALE  # Game over boundary
+FPS = 50  # Frames per second
+ZOOM = 2.7  # Camera zoom
+ZOOM_FOLLOW = True  # Set to False for fixed view (don't use zoom)
+TRACK_DETAIL_STEP = 21 / SCALE
+TRACK_TURN_RATE = 0.31
+TRACK_WIDTH = 40 / SCALE
+BORDER = 8 / SCALE
+BORDER_MIN_COUNT = 4
+ROAD_COLOR = [0.4, 0.4, 0.4]
+class FrictionDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+    def BeginContact(self, contact):
+        self._contact(contact, True)
+    def EndContact(self, contact):
+        self._contact(contact, False)
+    def _contact(self, contact, begin):
+        tile = None
+        obj = None
+        u1 = contact.fixtureA.body.userData
+        u2 = contact.fixtureB.body.userData
+        if u1 and "road_friction" in u1.__dict__:
+            tile = u1
+            obj = u2
+        if u2 and "road_friction" in u2.__dict__:
+            tile = u2
+            obj = u1
+        if not tile:
+            return
+        tile.color[0] = ROAD_COLOR[0]
+        tile.color[1] = ROAD_COLOR[1]
+        tile.color[2] = ROAD_COLOR[2]
+        if not obj or "tiles" not in obj.__dict__:
+            return
+        if begin:
+            obj.tiles.add(tile)
+            if not tile.road_visited:
+                tile.road_visited = True
+                self.env.reward += 1000.0 / len(self.env.track)
+                self.env.tile_visited_count += 1
+        else:
+            obj.tiles.remove(tile)
+class CarRacing(gym.Env, EzPickle):
+    metadata = {
+        "render.modes": ["human", "rgb_array", "state_pixels"],
+        "video.frames_per_second": FPS,
+    }
+    def __init__(self, verbose=1):
+        EzPickle.__init__(self)
+        self.seed()
+        self.contactListener_keepref = FrictionDetector(self)
+        self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
+        self.viewer = None
+        self.invisible_state_window = None
+        self.invisible_video_window = None
+        self.road = None
+        self.car = None
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.verbose = verbose
+        self.fd_tile = fixtureDef(
+            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
+        )
+        self.action_space = spaces.Box(
+            np.array([-1, 0, 0]).astype(np.float32),
+            np.array([+1, +1, +1]).astype(np.float32),
+        )  # steer, gas, brake
+        self.observation_space = spaces.Box(
+            low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
+        )
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+    def _destroy(self):
+        if not self.road:
+            return
+        for t in self.road:
+            self.world.DestroyBody(t)
+        self.road = []
+        self.car.destroy()
+    def _create_track(self):
+        CHECKPOINTS = 12
+        # Create checkpoints
+        checkpoints = []
+        for c in range(CHECKPOINTS):
+            noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
+            alpha = 2 * math.pi * c / CHECKPOINTS + noise
+            rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
+            if c == 0:
+                alpha = 0
+                rad = 1.5 * TRACK_RAD
+            if c == CHECKPOINTS - 1:
+                alpha = 2 * math.pi * c / CHECKPOINTS
+                self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
+                rad = 1.5 * TRACK_RAD
+            checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
+        self.road = []
+        # Go from one checkpoint to another to create track
+        x, y, beta = 1.5 * TRACK_RAD, 0, 0
+        dest_i = 0
+        laps = 0
+        track = []
+        no_freeze = 2500
+        visited_other_side = False
+        while True:
+            alpha = math.atan2(y, x)
+            if visited_other_side and alpha > 0:
+                laps += 1
+                visited_other_side = False
+            if alpha < 0:
+                visited_other_side = True
+                alpha += 2 * math.pi
+            while True:  # Find destination from checkpoints
+                failed = True
+                while True:
+                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
+                    if alpha <= dest_alpha:
+                        failed = False
+                        break
+                    dest_i += 1
+                    if dest_i % len(checkpoints) == 0:
+                        break
+                if not failed:
+                    break
+                alpha -= 2 * math.pi
+                continue
+            r1x = math.cos(beta)
+            r1y = math.sin(beta)
+            p1x = -r1y
+            p1y = r1x
+            dest_dx = dest_x - x  # vector towards destination
+            dest_dy = dest_y - y
+            # destination vector projected on rad:
+            proj = r1x * dest_dx + r1y * dest_dy
+            while beta - alpha > 1.5 * math.pi:
+                beta -= 2 * math.pi
+            while beta - alpha < -1.5 * math.pi:
+                beta += 2 * math.pi
+            prev_beta = beta
+            proj *= SCALE
+            if proj > 0.3:
+                beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
+            if proj < -0.3:
+                beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
+            x += p1x * TRACK_DETAIL_STEP
+            y += p1y * TRACK_DETAIL_STEP
+            track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
+            if laps > 4:
+                break
+            no_freeze -= 1
+            if no_freeze == 0:
+                break
+        # Find closed loop range i1..i2, first loop should be ignored, second is OK
+        i1, i2 = -1, -1
+        i = len(track)
+        while True:
+            i -= 1
+            if i == 0:
+                return False  # Failed
+            pass_through_start = (
+                track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
+            )
+            if pass_through_start and i2 == -1:
+                i2 = i
+            elif pass_through_start and i1 == -1:
+                i1 = i
+                break
+        if self.verbose == 1:
+            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
+        assert i1 != -1
+        assert i2 != -1
+        track = track[i1 : i2 - 1]
+        first_beta = track[0][1]
+        first_perp_x = math.cos(first_beta)
+        first_perp_y = math.sin(first_beta)
+        # Length of perpendicular jump to put together head and tail
+        well_glued_together = np.sqrt(
+            np.square(first_perp_x * (track[0][2] - track[-1][2]))
+            + np.square(first_perp_y * (track[0][3] - track[-1][3]))
+        )
+        if well_glued_together > TRACK_DETAIL_STEP:
+            return False
+        # Red-white border on hard turns
+        border = [False] * len(track)
+        for i in range(len(track)):
+            good = True
+            oneside = 0
+            for neg in range(BORDER_MIN_COUNT):
+                beta1 = track[i - neg - 0][1]
+                beta2 = track[i - neg - 1][1]
+                good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
+                oneside += np.sign(beta1 - beta2)
+            good &= abs(oneside) == BORDER_MIN_COUNT
+            border[i] = good
+        for i in range(len(track)):
+            for neg in range(BORDER_MIN_COUNT):
+                border[i - neg] |= border[i]
+        # Create tiles
+        for i in range(len(track)):
+            alpha1, beta1, x1, y1 = track[i]
+            alpha2, beta2, x2, y2 = track[i - 1]
+            road1_l = (
+                x1 - TRACK_WIDTH * math.cos(beta1),
+                y1 - TRACK_WIDTH * math.sin(beta1),
+            )
+            road1_r = (
+                x1 + TRACK_WIDTH * math.cos(beta1),
+                y1 + TRACK_WIDTH * math.sin(beta1),
+            )
+            road2_l = (
+                x2 - TRACK_WIDTH * math.cos(beta2),
+                y2 - TRACK_WIDTH * math.sin(beta2),
+            )
+            road2_r = (
+                x2 + TRACK_WIDTH * math.cos(beta2),
+                y2 + TRACK_WIDTH * math.sin(beta2),
+            )
+            vertices = [road1_l, road1_r, road2_r, road2_l]
+            self.fd_tile.shape.vertices = vertices
+            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
+            t.userData = t
+            c = 0.01 * (i % 3)
+            t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c]
+            t.road_visited = False
+            t.road_friction = 1.0
+            t.fixtures[0].sensor = True
+            self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
+            self.road.append(t)
+            if border[i]:
+                side = np.sign(beta2 - beta1)
+                b1_l = (
+                    x1 + side * TRACK_WIDTH * math.cos(beta1),
+                    y1 + side * TRACK_WIDTH * math.sin(beta1),
+                )
+                b1_r = (
+                    x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
+                    y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
+                )
+                b2_l = (
+                    x2 + side * TRACK_WIDTH * math.cos(beta2),
+                    y2 + side * TRACK_WIDTH * math.sin(beta2),
+                )
+                b2_r = (
+                    x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
+                    y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
+                )
+                self.road_poly.append(
+                    ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))
+                )
+        self.track = track
+        return True
+    def reset(self):
+        self._destroy()
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.tile_visited_count = 0
+        self.t = 0.0
+        self.road_poly = []
+        while True:
+            success = self._create_track()
+            if success:
+                break
+            if self.verbose == 1:
+                print(
+                    "retry to generate track (normal if there are not many"
+                    "instances of this message)"
+                )
+        self.car = Car(self.world, *self.track[0][1:4])
+        return self.step(None)[0]
+    def step(self, action):
+        if action is not None:
+            self.car.steer(-action[0])
+            self.car.gas(action[1])
+            self.car.brake(action[2])
+        self.car.step(1.0 / FPS)
+        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
+        self.t += 1.0 / FPS
+        self.state = self.render("state_pixels")
+        step_reward = 0
+        done = False
+        if action is not None:  # First step without action, called from reset()
+            self.reward -= 0.1
+            # We actually don't want to count fuel spent, we want car to be faster.
+            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
+            self.car.fuel_spent = 0.0
+            step_reward = self.reward - self.prev_reward
+            self.prev_reward = self.reward
+            if self.tile_visited_count == len(self.track):
+                done = True
+            x, y = self.car.hull.position
+            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
+                done = True
+                step_reward = -100
+        return self.state, step_reward, done, {}
+    def render(self, mode="human"):
+        assert mode in ["human", "state_pixels", "rgb_array"]
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
+            self.score_label = pyglet.text.Label(
+                "0000",
+                font_size=36,
+                x=20,
+                y=WINDOW_H * 2.5 / 40.00,
+                anchor_x="left",
+                anchor_y="center",
+                color=(255, 255, 255, 255),
+            )
+            self.transform = rendering.Transform()
+        if "t" not in self.__dict__:
+            return  # reset() not called yet
+        # Animate zoom first second:
+        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
+        scroll_x = self.car.hull.position[0]
+        scroll_y = self.car.hull.position[1]
+        angle = -self.car.hull.angle
+        vel = self.car.hull.linearVelocity
+        if np.linalg.norm(vel) > 0.5:
+            angle = math.atan2(vel[0], vel[1])
+        self.transform.set_scale(zoom, zoom)
+        self.transform.set_translation(
+            WINDOW_W / 2
+            - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)),
+            WINDOW_H / 4
+            - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)),
+        )
+        self.transform.set_rotation(angle)
+        self.car.draw(self.viewer, mode != "state_pixels")
+        arr = None
+        win = self.viewer.window
+        win.switch_to()
+        win.dispatch_events()
+        win.clear()
+        t = self.transform
+        if mode == "rgb_array":
+            VP_W = VIDEO_W
+            VP_H = VIDEO_H
+        elif mode == "state_pixels":
+            VP_W = STATE_W
+            VP_H = STATE_H
+        else:
+            pixel_scale = 1
+            if hasattr(win.context, "_nscontext"):
+                pixel_scale = (
+                    win.context._nscontext.view().backingScaleFactor()
+                )  # pylint: disable=protected-access
+            VP_W = int(pixel_scale * WINDOW_W)
+            VP_H = int(pixel_scale * WINDOW_H)
+        gl.glViewport(0, 0, VP_W, VP_H)
+        t.enable()
+        self.render_road()
+        for geom in self.viewer.onetime_geoms:
+            geom.render()
+        self.viewer.onetime_geoms = []
+        t.disable()
+        self.render_indicators(WINDOW_W, WINDOW_H)
+        if mode == "human":
+            win.flip()
+            return self.viewer.isopen
+        image_data = (
+            pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+        )
+        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="")
+        arr = arr.reshape(VP_H, VP_W, 4)
+        arr = arr[::-1, :, 0:3]
+        return arr
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+    def render_road(self):
+        colors = [0.4, 0.8, 0.4, 1.0] * 4
+        polygons_ = [
+            +PLAYFIELD,
+            +PLAYFIELD,
+            0,
+            +PLAYFIELD,
+            -PLAYFIELD,
+            0,
+            -PLAYFIELD,
+            -PLAYFIELD,
+            0,
+            -PLAYFIELD,
+            +PLAYFIELD,
+            0,
+        ]
+        k = PLAYFIELD / 20.0
+        colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20)
+        for x in range(-20, 20, 2):
+            for y in range(-20, 20, 2):
+                polygons_.extend(
+                    [
+                        k * x + k,
+                        k * y + 0,
+                        0,
+                        k * x + 0,
+                        k * y + 0,
+                        0,
+                        k * x + 0,
+                        k * y + k,
+                        0,
+                        k * x + k,
+                        k * y + k,
+                        0,
+                    ]
+                )
+        for poly, color in self.road_poly:
+            colors.extend([color[0], color[1], color[2], 1] * len(poly))
+            for p in poly:
+                polygons_.extend([p[0], p[1], 0])
+        vl = pyglet.graphics.vertex_list(
+            len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors)
+        )  # gl.GL_QUADS,
+        vl.draw(gl.GL_QUADS)
+        vl.delete()
+    def render_indicators(self, W, H):
+        s = W / 40.0
+        h = H / 40.0
+        colors = [0, 0, 0, 1] * 4
+        polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0]
+        def vertical_ind(place, val, color):
+            colors.extend([color[0], color[1], color[2], 1] * 4)
+            polygons.extend(
+                [
+                    place * s,
+                    h + h * val,
+                    0,
+                    (place + 1) * s,
+                    h + h * val,
+                    0,
+                    (place + 1) * s,
+                    h,
+                    0,
+                    (place + 0) * s,
+                    h,
+                    0,
+                ]
+            )
+        def horiz_ind(place, val, color):
+            colors.extend([color[0], color[1], color[2], 1] * 4)
+            polygons.extend(
+                [
+                    (place + 0) * s,
+                    4 * h,
+                    0,
+                    (place + val) * s,
+                    4 * h,
+                    0,
+                    (place + val) * s,
+                    2 * h,
+                    0,
+                    (place + 0) * s,
+                    2 * h,
+                    0,
+                ]
+            )
+        true_speed = np.sqrt(
+            np.square(self.car.hull.linearVelocity[0])
+            + np.square(self.car.hull.linearVelocity[1])
+        )
+        vertical_ind(5, 0.02 * true_speed, (1, 1, 1))
+        vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1))  # ABS sensors
+        vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1))
+        vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1))
+        vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1))
+        horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0))
+        horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0))
+        vl = pyglet.graphics.vertex_list(
+            len(polygons) // 3, ("v3f", polygons), ("c4f", colors)
+        )  # gl.GL_QUADS,
+        vl.draw(gl.GL_QUADS)
+        vl.delete()
+        self.score_label.text = "%04i" % self.reward
+        self.score_label.draw()
+if __name__ == "__main__":
+    from pyglet.window import key
+    a = np.array([0.0, 0.0, 0.0])
+    def key_press(k, mod):
+        global restart
+        if k == 0xFF0D:
+            restart = True
+        if k == key.LEFT:
+            a[0] = -1.0
+        if k == key.RIGHT:
+            a[0] = +1.0
+        if k == key.UP:
+            a[1] = +1.0
+        if k == key.DOWN:
+            a[2] = +0.8  # set 1.0 for wheels to block to zero rotation
+    def key_release(k, mod):
+        if k == key.LEFT and a[0] == -1.0:
+            a[0] = 0
+        if k == key.RIGHT and a[0] == +1.0:
+            a[0] = 0
+        if k == key.UP:
+            a[1] = 0
+        if k == key.DOWN:
+            a[2] = 0
+    env = CarRacing()
+    env.render()
+    env.viewer.window.on_key_press = key_press
+    env.viewer.window.on_key_release = key_release
+    record_video = False
+    if record_video:
+        from gym.wrappers.monitor import Monitor
+        env = Monitor(env, "/tmp/video-test", force=True)
+    isopen = True
+    while isopen:
+        env.reset()
+        total_reward = 0.0
+        steps = 0
+        restart = False
+        while True:
+            s, r, done, info = env.step(a)
+            total_reward += r
+            if steps % 200 == 0 or done:
+                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
+                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+            steps += 1
+            isopen = env.render()
+            if done or restart or isopen == False:
+                break
+    env.close()

gym-0.21.0/gym/envs/mujoco/ant_v3.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+DEFAULT_CAMERA_CONFIG = {
+    "distance": 4.0,
+}
+class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(
+        self,
+        xml_file="ant.xml",
+        ctrl_cost_weight=0.5,
+        contact_cost_weight=5e-4,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.2, 1.0),
+        contact_force_range=(-1.0, 1.0),
+        reset_noise_scale=0.1,
+        exclude_current_positions_from_observation=True,
+    ):
+        utils.EzPickle.__init__(**locals())
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._contact_cost_weight = contact_cost_weight
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+        self._contact_force_range = contact_force_range
+        self._reset_noise_scale = reset_noise_scale
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+        mujoco_env.MujocoEnv.__init__(self, xml_file, 5)
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+    @property
+    def contact_forces(self):
+        raw_contact_forces = self.sim.data.cfrc_ext
+        min_value, max_value = self._contact_force_range
+        contact_forces = np.clip(raw_contact_forces, min_value, max_value)
+        return contact_forces
+    @property
+    def contact_cost(self):
+        contact_cost = self._contact_cost_weight * np.sum(
+            np.square(self.contact_forces)
+        )
+        return contact_cost
+    @property
+    def is_healthy(self):
+        state = self.state_vector()
+        min_z, max_z = self._healthy_z_range
+        is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z
+        return is_healthy
+    @property
+    def done(self):
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
+        return done
+    def step(self, action):
+        xy_position_before = self.get_body_com("torso")[:2].copy()
+        self.do_simulation(action, self.frame_skip)
+        xy_position_after = self.get_body_com("torso")[:2].copy()
+        xy_velocity = (xy_position_after - xy_position_before) / self.dt
+        x_velocity, y_velocity = xy_velocity
+        ctrl_cost = self.control_cost(action)
+        contact_cost = self.contact_cost
+        forward_reward = x_velocity
+        healthy_reward = self.healthy_reward
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost + contact_cost
+        reward = rewards - costs
+        done = self.done
+        observation = self._get_obs()
+        info = {
+            "reward_forward": forward_reward,
+            "reward_ctrl": -ctrl_cost,
+            "reward_contact": -contact_cost,
+            "reward_survive": healthy_reward,
+            "x_position": xy_position_after[0],
+            "y_position": xy_position_after[1],
+            "distance_from_origin": np.linalg.norm(xy_position_after, ord=2),
+            "x_velocity": x_velocity,
+            "y_velocity": y_velocity,
+            "forward_reward": forward_reward,
+        }
+        return observation, reward, done, info
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = self.sim.data.qvel.flat.copy()
+        contact_force = self.contact_forces.flat.copy()
+        if self._exclude_current_positions_from_observation:
+            position = position[2:]
+        observations = np.concatenate((position, velocity, contact_force))
+        return observations
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn(
+            self.model.nv
+        )
+        self.set_state(qpos, qvel)
+        observation = self._get_obs()
+        return observation
+    def viewer_setup(self):
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)

gym-0.21.0/gym/envs/mujoco/humanoid.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import numpy as np
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+def mass_center(model, sim):
+    mass = np.expand_dims(model.body_mass, 1)
+    xpos = sim.data.xipos
+    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
+class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5)
+        utils.EzPickle.__init__(self)
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )
+    def step(self, a):
+        pos_before = mass_center(self.model, self.sim)
+        self.do_simulation(a, self.frame_skip)
+        pos_after = mass_center(self.model, self.sim)
+        alive_bonus = 5.0
+        data = self.sim.data
+        lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
+        qpos = self.sim.data.qpos
+        done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
+        return (
+            self._get_obs(),
+            reward,
+            done,
+            dict(
+                reward_linvel=lin_vel_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_alive=alive_bonus,
+                reward_impact=-quad_impact_cost,
+            ),
+        )
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
+        )
+        return self._get_obs()
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 2.0
+        self.viewer.cam.elevation = -20

gym-0.21.0/gym/envs/mujoco/humanoidstandup.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from gym.envs.mujoco import mujoco_env
+from gym import utils
+import numpy as np
+class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(self):
+        mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5)
+        utils.EzPickle.__init__(self)
+    def _get_obs(self):
+        data = self.sim.data
+        return np.concatenate(
+            [
+                data.qpos.flat[2:],
+                data.qvel.flat,
+                data.cinert.flat,
+                data.cvel.flat,
+                data.qfrc_actuator.flat,
+                data.cfrc_ext.flat,
+            ]
+        )
+    def step(self, a):
+        self.do_simulation(a, self.frame_skip)
+        pos_after = self.sim.data.qpos[2]
+        data = self.sim.data
+        uph_cost = (pos_after - 0) / self.model.opt.timestep
+        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
+        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
+        quad_impact_cost = min(quad_impact_cost, 10)
+        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
+        done = bool(False)
+        return (
+            self._get_obs(),
+            reward,
+            done,
+            dict(
+                reward_linup=uph_cost,
+                reward_quadctrl=-quad_ctrl_cost,
+                reward_impact=-quad_impact_cost,
+            ),
+        )
+    def reset_model(self):
+        c = 0.01
+        self.set_state(
+            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
+            self.init_qvel
+            + self.np_random.uniform(
+                low=-c,
+                high=c,
+                size=self.model.nv,
+            ),
+        )
+        return self._get_obs()
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 1
+        self.viewer.cam.distance = self.model.stat.extent * 1.0
+        self.viewer.cam.lookat[2] = 0.8925
+        self.viewer.cam.elevation = -20

gym-0.21.0/gym/envs/mujoco/walker2d_v3.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import numpy as np
+from gym.envs.mujoco import mujoco_env
+from gym import utils
+DEFAULT_CAMERA_CONFIG = {
+    "trackbodyid": 2,
+    "distance": 4.0,
+    "lookat": np.array((0.0, 0.0, 1.15)),
+    "elevation": -20.0,
+}
+class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    def __init__(
+        self,
+        xml_file="walker2d.xml",
+        forward_reward_weight=1.0,
+        ctrl_cost_weight=1e-3,
+        healthy_reward=1.0,
+        terminate_when_unhealthy=True,
+        healthy_z_range=(0.8, 2.0),
+        healthy_angle_range=(-1.0, 1.0),
+        reset_noise_scale=5e-3,
+        exclude_current_positions_from_observation=True,
+    ):
+        utils.EzPickle.__init__(**locals())
+        self._forward_reward_weight = forward_reward_weight
+        self._ctrl_cost_weight = ctrl_cost_weight
+        self._healthy_reward = healthy_reward
+        self._terminate_when_unhealthy = terminate_when_unhealthy
+        self._healthy_z_range = healthy_z_range
+        self._healthy_angle_range = healthy_angle_range
+        self._reset_noise_scale = reset_noise_scale
+        self._exclude_current_positions_from_observation = (
+            exclude_current_positions_from_observation
+        )
+        mujoco_env.MujocoEnv.__init__(self, xml_file, 4)
+    @property
+    def healthy_reward(self):
+        return (
+            float(self.is_healthy or self._terminate_when_unhealthy)
+            * self._healthy_reward
+        )
+    def control_cost(self, action):
+        control_cost = self._ctrl_cost_weight * np.sum(np.square(action))
+        return control_cost
+    @property
+    def is_healthy(self):
+        z, angle = self.sim.data.qpos[1:3]
+        min_z, max_z = self._healthy_z_range
+        min_angle, max_angle = self._healthy_angle_range
+        healthy_z = min_z < z < max_z
+        healthy_angle = min_angle < angle < max_angle
+        is_healthy = healthy_z and healthy_angle
+        return is_healthy
+    @property
+    def done(self):
+        done = not self.is_healthy if self._terminate_when_unhealthy else False
+        return done
+    def _get_obs(self):
+        position = self.sim.data.qpos.flat.copy()
+        velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10)
+        if self._exclude_current_positions_from_observation:
+            position = position[1:]
+        observation = np.concatenate((position, velocity)).ravel()
+        return observation
+    def step(self, action):
+        x_position_before = self.sim.data.qpos[0]
+        self.do_simulation(action, self.frame_skip)
+        x_position_after = self.sim.data.qpos[0]
+        x_velocity = (x_position_after - x_position_before) / self.dt
+        ctrl_cost = self.control_cost(action)
+        forward_reward = self._forward_reward_weight * x_velocity
+        healthy_reward = self.healthy_reward
+        rewards = forward_reward + healthy_reward
+        costs = ctrl_cost
+        observation = self._get_obs()
+        reward = rewards - costs
+        done = self.done
+        info = {
+            "x_position": x_position_after,
+            "x_velocity": x_velocity,
+        }
+        return observation, reward, done, info
+    def reset_model(self):
+        noise_low = -self._reset_noise_scale
+        noise_high = self._reset_noise_scale
+        qpos = self.init_qpos + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nq
+        )
+        qvel = self.init_qvel + self.np_random.uniform(
+            low=noise_low, high=noise_high, size=self.model.nv
+        )
+        self.set_state(qpos, qvel)
+        observation = self._get_obs()
+        return observation
+    def viewer_setup(self):
+        for key, value in DEFAULT_CAMERA_CONFIG.items():
+            if isinstance(value, np.ndarray):
+                getattr(self.viewer.cam, key)[:] = value
+            else:
+                setattr(self.viewer.cam, key, value)

gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml ADDED Viewed

	@@ -0,0 +1,32 @@

+<?xml version="1.0" encoding="utf-8"?>
+<mujoco>
+	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
+	<option timestep="0.002">
+		<flag warmstart="enable"></flag>
+	</option>
+	<include file="shared.xml"></include>
+	<worldbody>
+		<geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
+		<body name="floor0" pos="1 0.75 0">
+			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+		<include file="robot.xml"></include>
+		<body name="table0" pos="1.32441906 0.75018422 0.2">
+			<geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
+		</body>
+		<body name="object0" pos="0.025 0.025 0.02">
+			<joint name="object0:joint" type="free" damping="0.01"></joint>
+			<geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
+			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
+		</body>
+		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
+	</worldbody>
+	<actuator></actuator>
+</mujoco>

gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml ADDED Viewed

	@@ -0,0 +1,26 @@

+<!-- See LICENSE.md for legal notices. LICENSE.md must be kept together with this file. -->
+<mujoco>
+    <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
+    <texture name="robot0:texplane" type="2d" builtin="checker" rgb1="0.2 0.3 0.4" rgb2="0.1 0.15 0.2" width="512" height="512"></texture>
+    <texture name="robot0:texgeom" type="cube" builtin="flat" mark="cross" width="127" height="127" rgb1="0.3 0.6 0.5" rgb2="0.3 0.6 0.5" markrgb="0 0 0" random="0.01"></texture>
+    <material name="robot0:MatGnd" reflectance="0.5" texture="robot0:texplane" texrepeat="1 1" texuniform="true"></material>
+    <material name="robot0:MatColl" specular="1" shininess="0.3" reflectance="0.5" rgba="0.4 0.5 0.6 1"></material>
+    <material name="robot0:MatViz" specular="0.75" shininess="0.1" reflectance="0.5" rgba="0.93 0.93 0.93 1"></material>
+    <material name="robot0:object" texture="robot0:texgeom" texuniform="false"></material>
+    <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 0"></material>
+    <mesh name="robot0:forearm" file="forearm_electric.stl"></mesh>
+    <mesh name="robot0:forearm_cvx" file="forearm_electric_cvx.stl"></mesh>
+    <mesh name="robot0:wrist" scale="0.001 0.001 0.001" file="wrist.stl"></mesh>
+    <mesh name="robot0:palm" scale="0.001 0.001 0.001" file="palm.stl"></mesh>
+    <mesh name="robot0:knuckle" scale="0.001 0.001 0.001" file="knuckle.stl"></mesh>
+    <mesh name="robot0:F3" scale="0.001 0.001 0.001" file="F3.stl"></mesh>
+    <mesh name="robot0:F2" scale="0.001 0.001 0.001" file="F2.stl"></mesh>
+    <mesh name="robot0:F1" scale="0.001 0.001 0.001" file="F1.stl"></mesh>
+    <mesh name="robot0:lfmetacarpal" scale="0.001 0.001 0.001" file="lfmetacarpal.stl"></mesh>
+    <mesh name="robot0:TH3_z" scale="0.001 0.001 0.001" file="TH3_z.stl"></mesh>
+    <mesh name="robot0:TH2_z" scale="0.001 0.001 0.001" file="TH2_z.stl"></mesh>
+    <mesh name="robot0:TH1_z" scale="0.001 0.001 0.001" file="TH1_z.stl"></mesh>
+</mujoco>

gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl ADDED Viewed

Binary file (48.7 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl ADDED Viewed

Binary file (75.4 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl ADDED Viewed

Binary file (68.7 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl ADDED Viewed

Binary file (49.8 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl ADDED Viewed

Binary file (17.1 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl ADDED Viewed

Binary file (63.2 kB). View file

gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl ADDED Viewed

Binary file (348 kB). View file

gym-0.21.0/gym/envs/toy_text/discrete.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import numpy as np
+from gym import Env, spaces
+from gym.utils import seeding
+def categorical_sample(prob_n, np_random):
+    """
+    Sample from categorical distribution
+    Each row specifies class probabilities
+    """
+    prob_n = np.asarray(prob_n)
+    csprob_n = np.cumsum(prob_n)
+    return (csprob_n > np_random.rand()).argmax()
+class DiscreteEnv(Env):
+    """
+    Has the following members
+    - nS: number of states
+    - nA: number of actions
+    - P: transitions (*)
+    - isd: initial state distribution (**)
+    (*) dictionary of lists, where
+      P[s][a] == [(probability, nextstate, reward, done), ...]
+    (**) list or array of length nS
+    """
+    def __init__(self, nS, nA, P, isd):
+        self.P = P
+        self.isd = isd
+        self.lastaction = None  # for rendering
+        self.nS = nS
+        self.nA = nA
+        self.action_space = spaces.Discrete(self.nA)
+        self.observation_space = spaces.Discrete(self.nS)
+        self.seed()
+        self.s = categorical_sample(self.isd, self.np_random)
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+    def reset(self):
+        self.s = categorical_sample(self.isd, self.np_random)
+        self.lastaction = None
+        return int(self.s)
+    def step(self, a):
+        transitions = self.P[self.s][a]
+        i = categorical_sample([t[0] for t in transitions], self.np_random)
+        p, s, r, d = transitions[i]
+        self.s = s
+        self.lastaction = a
+        return (int(s), r, d, {"prob": p})

gym-0.21.0/gym/envs/unittest/memorize_digits.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import numpy as np
+import gym
+from gym import spaces
+from gym.utils import seeding
+# Unit test environment for CNNs.
+# Looks like this (RGB observations):
+#
+#  ---------------------------
+# |                           |
+# |         ******            |
+# |         ******            |
+# |       **      **          |
+# |       **      **          |
+# |               **          |
+# |               **          |
+# |           ****            |
+# |           ****            |
+# |       ****                |
+# |       ****                |
+# |       **********          |
+# |       **********          |
+# |                           |
+#  ---------------------------
+#
+# Agent should hit action 2 to gain reward. Catches off-by-one errors in your agent.
+#
+# To see how it works, run:
+#
+# python examples/agents/keyboard_agent.py MemorizeDigits-v0
+FIELD_W = 32
+FIELD_H = 24
+bogus_mnist = [
+    [" **** ", "*    *", "*    *", "*    *", "*    *", " **** "],
+    ["  **  ", " * *  ", "   *  ", "   *  ", "   *  ", "  *** "],
+    [" **** ", "*    *", "     *", "  *** ", "**    ", "******"],
+    [" **** ", "*    *", "   ** ", "     *", "*    *", " **** "],
+    [" *  * ", " *  * ", " *  * ", " **** ", "    * ", "    * "],
+    [" **** ", " *    ", " **** ", "    * ", "    * ", " **** "],
+    ["  *** ", " *    ", " **** ", " *  * ", " *  * ", " **** "],
+    [" **** ", "    * ", "   *  ", "   *  ", "  *   ", "  *   "],
+    [" **** ", "*    *", " **** ", "*    *", "*    *", " **** "],
+    [" **** ", "*    *", "*    *", " *****", "     *", " **** "],
+]
+color_black = np.array((0, 0, 0)).astype("float32")
+color_white = np.array((255, 255, 255)).astype("float32")
+class MemorizeDigits(gym.Env):
+    metadata = {
+        "render.modes": ["human", "rgb_array"],
+        "video.frames_per_second": 60,
+        "video.res_w": FIELD_W,
+        "video.res_h": FIELD_H,
+    }
+    use_random_colors = False
+    def __init__(self):
+        self.seed()
+        self.viewer = None
+        self.observation_space = spaces.Box(
+            0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8
+        )
+        self.action_space = spaces.Discrete(10)
+        self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
+        for digit in range(10):
+            for y in range(6):
+                self.bogus_mnist[digit, y, :] = [
+                    ord(char) for char in bogus_mnist[digit][y]
+                ]
+        self.reset()
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+    def random_color(self):
+        return np.array(
+            [
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+                self.np_random.randint(low=0, high=255),
+            ]
+        ).astype("uint8")
+    def reset(self):
+        self.digit_x = self.np_random.randint(low=FIELD_W // 5, high=FIELD_W // 5 * 4)
+        self.digit_y = self.np_random.randint(low=FIELD_H // 5, high=FIELD_H // 5 * 4)
+        self.color_bg = self.random_color() if self.use_random_colors else color_black
+        self.step_n = 0
+        while 1:
+            self.color_digit = (
+                self.random_color() if self.use_random_colors else color_white
+            )
+            if np.linalg.norm(self.color_digit - self.color_bg) < 50:
+                continue
+            break
+        self.digit = -1
+        return self.step(0)[0]
+    def step(self, action):
+        reward = -1
+        done = False
+        self.step_n += 1
+        if self.digit == -1:
+            pass
+        else:
+            if self.digit == action:
+                reward = +1
+            done = self.step_n > 20 and 0 == self.np_random.randint(low=0, high=5)
+        self.digit = self.np_random.randint(low=0, high=10)
+        obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8)
+        obs[:, :, :] = self.color_bg
+        digit_img = np.zeros((6, 6, 3), dtype=np.uint8)
+        digit_img[:] = self.color_bg
+        xxx = self.bogus_mnist[self.digit] == 42
+        digit_img[xxx] = self.color_digit
+        obs[
+            self.digit_y - 3 : self.digit_y + 3, self.digit_x - 3 : self.digit_x + 3
+        ] = digit_img
+        self.last_obs = obs
+        return obs, reward, done, {}
+    def render(self, mode="human"):
+        if mode == "rgb_array":
+            return self.last_obs
+        elif mode == "human":
+            from gym.envs.classic_control import rendering
+            if self.viewer is None:
+                self.viewer = rendering.SimpleImageViewer()
+            self.viewer.imshow(self.last_obs)
+            return self.viewer.isopen
+        else:
+            assert 0, "Render mode '%s' is not supported" % mode
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None

gym-0.21.0/gym/spaces/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from gym.spaces.space import Space
+from gym.spaces.box import Box
+from gym.spaces.discrete import Discrete
+from gym.spaces.multi_discrete import MultiDiscrete
+from gym.spaces.multi_binary import MultiBinary
+from gym.spaces.tuple import Tuple
+from gym.spaces.dict import Dict
+from gym.spaces.utils import flatdim
+from gym.spaces.utils import flatten_space
+from gym.spaces.utils import flatten
+from gym.spaces.utils import unflatten
+__all__ = [
+    "Space",
+    "Box",
+    "Discrete",
+    "MultiDiscrete",
+    "MultiBinary",
+    "Tuple",
+    "Dict",
+    "flatdim",
+    "flatten_space",
+    "flatten",
+    "unflatten",
+]

gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (785 Bytes). View file

gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc ADDED Viewed

Binary file (4.98 kB). View file

gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc ADDED Viewed

Binary file (5.81 kB). View file