diff --git a/Metaworld/Makefile b/Metaworld/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4ceed49f5f42964064b8d946bd4727bd8851f7d6 --- /dev/null +++ b/Metaworld/Makefile @@ -0,0 +1,74 @@ +SHELL := /bin/bash + +.PHONY: help test ci-job build-ci build-headlessrun-ci run-headless + +.DEFAULT_GOAL := help + +# Set the environment variable MJKEY with the contents of the file specified by +# MJKEY_PATH. +MJKEY_PATH ?= ~/.mujoco/mjkey.txt + +test: ## Run the CI test suite locally +test: RUN_CMD = pytest -v +test: run + @echo "Running test suite..." + +check-memory: ## Profile memory usage +check-memory: RUN_CMD = scripts/profile_memory_usage.py +check-memory: run + @echo "Profiling memory usage..." + +ci-job: + pytest -n 0 --cov=metaworld -v -m 'not large and not skip_on_ci' + coverage xml + # bash <(curl -s https://codecov.io/bash) + +ci-deploy-docker: + echo "${DOCKER_API_KEY}" | docker login -u "${DOCKER_USERNAME}" \ + --password-stdin + docker tag "${TAG}" rlworkgroup/metaworld-ci:latest + docker push rlworkgroup/metaworld-ci + +build-ci: TAG ?= rlworkgroup/metaworld-ci:latest +build-ci: docker/Dockerfile + TAG=${TAG} \ + docker build \ + --cache-from rlworkgroup/metaworld-ci:latest \ + -f docker/Dockerfile \ + -t ${TAG} \ + ${ADD_ARGS} \ + . + +run-ci: TAG ?= rlworkgroup/metaworld-ci +run-ci: + docker run \ + -e TRAVIS_BRANCH \ + -e TRAVIS_PULL_REQUEST \ + -e TRAVIS_COMMIT_RANGE \ + -e TRAVIS \ + -e MJKEY \ + --memory 7500m \ + --memory-swap 7500m \ + ${ADD_ARGS} \ + ${TAG} ${RUN_CMD} + +run: ## Run the Docker container on a local machine +run: CONTAINER_NAME ?= metaworld-ci +run: build-ci + docker run \ + -it \ + --rm \ + -e MJKEY="$$(cat $(MJKEY_PATH))" \ + --memory 7500m \ + --memory-swap 7500m \ + --name $(CONTAINER_NAME) \ + ${ADD_ARGS} \ + rlworkgroup/metaworld-ci $(RUN_CMD) + +# Help target +# See https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html +help: ## Display this message +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ + | sort \ + | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/Metaworld/setup.cfg b/Metaworld/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9085b6b9e7b3cdaa9754b5a2ce0591fa083b74f2 --- /dev/null +++ b/Metaworld/setup.cfg @@ -0,0 +1,5 @@ +[tool:pytest] +addopts = -n auto -s +markers = + large: marks tests as large (deselect with '-m "not large"') + skip_on_ci: marks tests that should be skipped on the CI diff --git a/Metaworld/setup.py b/Metaworld/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1dac2a41393b915ee81dc25b8aebebe58a257f98 --- /dev/null +++ b/Metaworld/setup.py @@ -0,0 +1,33 @@ +from setuptools import find_packages, setup + + +# Required dependencies +required = [ + # Please keep alphabetized + 'gym>=0.15.4', + 'mujoco-py<2.2,>=2.0', + 'numpy>=1.18', +] + + +# Development dependencies +extras = dict() +extras['dev'] = [ + # Please keep alphabetized + 'ipdb', + 'memory_profiler', + 'pylint', + 'pyquaternion==0.9.5', + 'pytest>=4.4.0', # Required for pytest-xdist + 'pytest-xdist', +] + + +setup( + name='metaworld', + version='0.1.0', + packages=find_packages(), + include_package_data=True, + install_requires=required, + extras_require=extras, +) diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/.zgroup b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/.zgroup new file mode 100644 index 0000000000000000000000000000000000000000..3b7daf227c1687f28bc23b69f183e27ce9a475c1 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/action/.zarray b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/action/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..88743f3db80197f5462f98b9e078621c7c162692 --- /dev/null +++ b/Metaworld/zarr_path: /data/haojun/datasets/3d-dp/metaworld_hand-insert_expert.zarr/data/action/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 100, + 4 + ], + "compressor": { + "blocksize": 0, + "clevel": 3, + "cname": "zstd", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " 0 else 1.0 + w = self.world.CreateDynamicBody( + position=(init_x + wx * SIZE, init_y + wy * SIZE), + angle=init_angle, + fixtures=fixtureDef( + shape=polygonShape( + vertices=[ + (x * front_k * SIZE, y * front_k * SIZE) + for x, y in WHEEL_POLY + ] + ), + density=0.1, + categoryBits=0x0020, + maskBits=0x001, + restitution=0.0, + ), + ) + w.wheel_rad = front_k * WHEEL_R * SIZE + w.color = WHEEL_COLOR + w.gas = 0.0 + w.brake = 0.0 + w.steer = 0.0 + w.phase = 0.0 # wheel angle + w.omega = 0.0 # angular velocity + w.skid_start = None + w.skid_particle = None + rjd = revoluteJointDef( + bodyA=self.hull, + bodyB=w, + localAnchorA=(wx * SIZE, wy * SIZE), + localAnchorB=(0, 0), + enableMotor=True, + enableLimit=True, + maxMotorTorque=180 * 900 * SIZE * SIZE, + motorSpeed=0, + lowerAngle=-0.4, + upperAngle=+0.4, + ) + w.joint = self.world.CreateJoint(rjd) + w.tiles = set() + w.userData = w + self.wheels.append(w) + self.drawlist = self.wheels + [self.hull] + self.particles = [] + + def gas(self, gas): + """control: rear wheel drive + + Args: + gas (float): How much gas gets applied. Gets clipped between 0 and 1. + """ + gas = np.clip(gas, 0, 1) + for w in self.wheels[2:4]: + diff = gas - w.gas + if diff > 0.1: + diff = 0.1 # gradually increase, but stop immediately + w.gas += diff + + def brake(self, b): + """control: brake + + Args: + b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation""" + for w in self.wheels: + w.brake = b + + def steer(self, s): + """control: steer + + Args: + s (-1..1): target position, it takes time to rotate steering wheel from side-to-side""" + self.wheels[0].steer = s + self.wheels[1].steer = s + + def step(self, dt): + for w in self.wheels: + # Steer each wheel + dir = np.sign(w.steer - w.joint.angle) + val = abs(w.steer - w.joint.angle) + w.joint.motorSpeed = dir * min(50.0 * val, 3.0) + + # Position => friction_limit + grass = True + friction_limit = FRICTION_LIMIT * 0.6 # Grass friction if no tile + for tile in w.tiles: + friction_limit = max( + friction_limit, FRICTION_LIMIT * tile.road_friction + ) + grass = False + + # Force + forw = w.GetWorldVector((0, 1)) + side = w.GetWorldVector((1, 0)) + v = w.linearVelocity + vf = forw[0] * v[0] + forw[1] * v[1] # forward speed + vs = side[0] * v[0] + side[1] * v[1] # side speed + + # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy + # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power + # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega + + # add small coef not to divide by zero + w.omega += ( + dt + * ENGINE_POWER + * w.gas + / WHEEL_MOMENT_OF_INERTIA + / (abs(w.omega) + 5.0) + ) + self.fuel_spent += dt * ENGINE_POWER * w.gas + + if w.brake >= 0.9: + w.omega = 0 + elif w.brake > 0: + BRAKE_FORCE = 15 # radians per second + dir = -np.sign(w.omega) + val = BRAKE_FORCE * w.brake + if abs(val) > abs(w.omega): + val = abs(w.omega) # low speed => same as = 0 + w.omega += dir * val + w.phase += w.omega * dt + + vr = w.omega * w.wheel_rad # rotating wheel speed + f_force = -vf + vr # force direction is direction of speed difference + p_force = -vs + + # Physically correct is to always apply friction_limit until speed is equal. + # But dt is finite, that will lead to oscillations if difference is already near zero. + + # Random coefficient to cut oscillations in few steps (have no effect on friction_limit) + f_force *= 205000 * SIZE * SIZE + p_force *= 205000 * SIZE * SIZE + force = np.sqrt(np.square(f_force) + np.square(p_force)) + + # Skid trace + if abs(force) > 2.0 * friction_limit: + if ( + w.skid_particle + and w.skid_particle.grass == grass + and len(w.skid_particle.poly) < 30 + ): + w.skid_particle.poly.append((w.position[0], w.position[1])) + elif w.skid_start is None: + w.skid_start = w.position + else: + w.skid_particle = self._create_particle( + w.skid_start, w.position, grass + ) + w.skid_start = None + else: + w.skid_start = None + w.skid_particle = None + + if abs(force) > friction_limit: + f_force /= force + p_force /= force + force = friction_limit # Correct physics here + f_force *= force + p_force *= force + + w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA + + w.ApplyForceToCenter( + ( + p_force * side[0] + f_force * forw[0], + p_force * side[1] + f_force * forw[1], + ), + True, + ) + + def draw(self, viewer, draw_particles=True): + if draw_particles: + for p in self.particles: + viewer.draw_polyline(p.poly, color=p.color, linewidth=5) + for obj in self.drawlist: + for f in obj.fixtures: + trans = f.body.transform + path = [trans * v for v in f.shape.vertices] + viewer.draw_polygon(path, color=obj.color) + if "phase" not in obj.__dict__: + continue + a1 = obj.phase + a2 = obj.phase + 1.2 # radians + s1 = math.sin(a1) + s2 = math.sin(a2) + c1 = math.cos(a1) + c2 = math.cos(a2) + if s1 > 0 and s2 > 0: + continue + if s1 > 0: + c1 = np.sign(c1) + if s2 > 0: + c2 = np.sign(c2) + white_poly = [ + (-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE), + (+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE), + (+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE), + (-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE), + ] + viewer.draw_polygon([trans * v for v in white_poly], color=WHEEL_WHITE) + + def _create_particle(self, point1, point2, grass): + class Particle: + pass + + p = Particle() + p.color = WHEEL_COLOR if not grass else MUD_COLOR + p.ttl = 1 + p.poly = [(point1[0], point1[1]), (point2[0], point2[1])] + p.grass = grass + self.particles.append(p) + while len(self.particles) > 30: + self.particles.pop(0) + return p + + def destroy(self): + self.world.DestroyBody(self.hull) + self.hull = None + for w in self.wheels: + self.world.DestroyBody(w) + self.wheels = [] diff --git a/gym-0.21.0/gym/envs/mujoco/assets/inverted_pendulum.xml b/gym-0.21.0/gym/envs/mujoco/assets/inverted_pendulum.xml new file mode 100644 index 0000000000000000000000000000000000000000..396a0b3499aa03165efa0ea1f34f157dd275c834 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/assets/inverted_pendulum.xml @@ -0,0 +1,27 @@ + + + + + + + + + \ No newline at end of file diff --git a/gym-0.21.0/gym/envs/mujoco/assets/pusher.xml b/gym-0.21.0/gym/envs/mujoco/assets/pusher.xml new file mode 100644 index 0000000000000000000000000000000000000000..31a5ef706065f42c6fa62201edf800cc005b50e7 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/assets/pusher.xml @@ -0,0 +1,91 @@ + + + diff --git a/gym-0.21.0/gym/envs/mujoco/half_cheetah.py b/gym-0.21.0/gym/envs/mujoco/half_cheetah.py new file mode 100644 index 0000000000000000000000000000000000000000..7044e94597b2213783dac24f75b54590bafea2aa --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/half_cheetah.py @@ -0,0 +1,39 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 5) + utils.EzPickle.__init__(self) + + def step(self, action): + xposbefore = self.sim.data.qpos[0] + self.do_simulation(action, self.frame_skip) + xposafter = self.sim.data.qpos[0] + ob = self._get_obs() + reward_ctrl = -0.1 * np.square(action).sum() + reward_run = (xposafter - xposbefore) / self.dt + reward = reward_ctrl + reward_run + done = False + return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) + + def _get_obs(self): + return np.concatenate( + [ + self.sim.data.qpos.flat[1:], + self.sim.data.qvel.flat, + ] + ) + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform( + low=-0.1, high=0.1, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 + self.set_state(qpos, qvel) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.distance = self.model.stat.extent * 0.5 diff --git a/gym-0.21.0/gym/envs/mujoco/humanoid_v3.py b/gym-0.21.0/gym/envs/mujoco/humanoid_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..94a809d3ee9bd6d5270bec1186d7811ca59183e5 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/humanoid_v3.py @@ -0,0 +1,162 @@ +import numpy as np +from gym.envs.mujoco import mujoco_env +from gym import utils + + +DEFAULT_CAMERA_CONFIG = { + "trackbodyid": 1, + "distance": 4.0, + "lookat": np.array((0.0, 0.0, 2.0)), + "elevation": -20.0, +} + + +def mass_center(model, sim): + mass = np.expand_dims(model.body_mass, axis=1) + xpos = sim.data.xipos + return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy() + + +class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__( + self, + xml_file="humanoid.xml", + forward_reward_weight=1.25, + ctrl_cost_weight=0.1, + contact_cost_weight=5e-7, + contact_cost_range=(-np.inf, 10.0), + healthy_reward=5.0, + terminate_when_unhealthy=True, + healthy_z_range=(1.0, 2.0), + reset_noise_scale=1e-2, + exclude_current_positions_from_observation=True, + ): + utils.EzPickle.__init__(**locals()) + + self._forward_reward_weight = forward_reward_weight + self._ctrl_cost_weight = ctrl_cost_weight + self._contact_cost_weight = contact_cost_weight + self._contact_cost_range = contact_cost_range + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + self._healthy_z_range = healthy_z_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + mujoco_env.MujocoEnv.__init__(self, xml_file, 5) + + @property + def healthy_reward(self): + return ( + float(self.is_healthy or self._terminate_when_unhealthy) + * self._healthy_reward + ) + + def control_cost(self, action): + control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl)) + return control_cost + + @property + def contact_cost(self): + contact_forces = self.sim.data.cfrc_ext + contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces)) + min_cost, max_cost = self._contact_cost_range + contact_cost = np.clip(contact_cost, min_cost, max_cost) + return contact_cost + + @property + def is_healthy(self): + min_z, max_z = self._healthy_z_range + is_healthy = min_z < self.sim.data.qpos[2] < max_z + + return is_healthy + + @property + def done(self): + done = (not self.is_healthy) if self._terminate_when_unhealthy else False + return done + + def _get_obs(self): + position = self.sim.data.qpos.flat.copy() + velocity = self.sim.data.qvel.flat.copy() + + com_inertia = self.sim.data.cinert.flat.copy() + com_velocity = self.sim.data.cvel.flat.copy() + + actuator_forces = self.sim.data.qfrc_actuator.flat.copy() + external_contact_forces = self.sim.data.cfrc_ext.flat.copy() + + if self._exclude_current_positions_from_observation: + position = position[2:] + + return np.concatenate( + ( + position, + velocity, + com_inertia, + com_velocity, + actuator_forces, + external_contact_forces, + ) + ) + + def step(self, action): + xy_position_before = mass_center(self.model, self.sim) + self.do_simulation(action, self.frame_skip) + xy_position_after = mass_center(self.model, self.sim) + + xy_velocity = (xy_position_after - xy_position_before) / self.dt + x_velocity, y_velocity = xy_velocity + + ctrl_cost = self.control_cost(action) + contact_cost = self.contact_cost + + forward_reward = self._forward_reward_weight * x_velocity + healthy_reward = self.healthy_reward + + rewards = forward_reward + healthy_reward + costs = ctrl_cost + contact_cost + + observation = self._get_obs() + reward = rewards - costs + done = self.done + info = { + "reward_linvel": forward_reward, + "reward_quadctrl": -ctrl_cost, + "reward_alive": healthy_reward, + "reward_impact": -contact_cost, + "x_position": xy_position_after[0], + "y_position": xy_position_after[1], + "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), + "x_velocity": x_velocity, + "y_velocity": y_velocity, + "forward_reward": forward_reward, + } + + return observation, reward, done, info + + def reset_model(self): + noise_low = -self._reset_noise_scale + noise_high = self._reset_noise_scale + + qpos = self.init_qpos + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nv + ) + self.set_state(qpos, qvel) + + observation = self._get_obs() + return observation + + def viewer_setup(self): + for key, value in DEFAULT_CAMERA_CONFIG.items(): + if isinstance(value, np.ndarray): + getattr(self.viewer.cam, key)[:] = value + else: + setattr(self.viewer.cam, key, value) diff --git a/gym-0.21.0/gym/envs/mujoco/thrower.py b/gym-0.21.0/gym/envs/mujoco/thrower.py new file mode 100644 index 0000000000000000000000000000000000000000..b8b21a22abf123e743cb8542eefbf7dfcc39fec5 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/thrower.py @@ -0,0 +1,67 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class ThrowerEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + utils.EzPickle.__init__(self) + self._ball_hit_ground = False + self._ball_hit_location = None + mujoco_env.MujocoEnv.__init__(self, "thrower.xml", 5) + + def step(self, a): + ball_xy = self.get_body_com("ball")[:2] + goal_xy = self.get_body_com("goal")[:2] + + if not self._ball_hit_ground and self.get_body_com("ball")[2] < -0.25: + self._ball_hit_ground = True + self._ball_hit_location = self.get_body_com("ball") + + if self._ball_hit_ground: + ball_hit_xy = self._ball_hit_location[:2] + reward_dist = -np.linalg.norm(ball_hit_xy - goal_xy) + else: + reward_dist = -np.linalg.norm(ball_xy - goal_xy) + reward_ctrl = -np.square(a).sum() + + reward = reward_dist + 0.002 * reward_ctrl + self.do_simulation(a, self.frame_skip) + ob = self._get_obs() + done = False + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 0 + self.viewer.cam.distance = 4.0 + + def reset_model(self): + self._ball_hit_ground = False + self._ball_hit_location = None + + qpos = self.init_qpos + self.goal = np.array( + [ + self.np_random.uniform(low=-0.3, high=0.3), + self.np_random.uniform(low=-0.3, high=0.3), + ] + ) + + qpos[-9:-7] = self.goal + qvel = self.init_qvel + self.np_random.uniform( + low=-0.005, high=0.005, size=self.model.nv + ) + qvel[7:] = 0 + self.set_state(qpos, qvel) + return self._get_obs() + + def _get_obs(self): + return np.concatenate( + [ + self.sim.data.qpos.flat[:7], + self.sim.data.qvel.flat[:7], + self.get_body_com("r_wrist_roll_link"), + self.get_body_com("ball"), + self.get_body_com("goal"), + ] + ) diff --git a/gym-0.21.0/gym/utils/__pycache__/atomic_write.cpython-38.pyc b/gym-0.21.0/gym/utils/__pycache__/atomic_write.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0be48cc15a81fc4e3e5861087793ec11a30f924 Binary files /dev/null and b/gym-0.21.0/gym/utils/__pycache__/atomic_write.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/utils/ezpickle.py b/gym-0.21.0/gym/utils/ezpickle.py new file mode 100644 index 0000000000000000000000000000000000000000..4fda27aec2a7baf40be569a641ebb0fa729abf25 --- /dev/null +++ b/gym-0.21.0/gym/utils/ezpickle.py @@ -0,0 +1,33 @@ +class EzPickle(object): + """Objects that are pickled and unpickled via their constructor + arguments. + + Example usage: + + class Dog(Animal, EzPickle): + def __init__(self, furcolor, tailkind="bushy"): + Animal.__init__() + EzPickle.__init__(furcolor, tailkind) + ... + + When this object is unpickled, a new Dog will be constructed by passing the provided + furcolor and tailkind into the constructor. However, philosophers are still not sure + whether it is still the same dog. + + This is generally needed only for environments which wrap C/C++ code, such as MuJoCo + and Atari. + """ + + def __init__(self, *args, **kwargs): + self._ezpickle_args = args + self._ezpickle_kwargs = kwargs + + def __getstate__(self): + return { + "_ezpickle_args": self._ezpickle_args, + "_ezpickle_kwargs": self._ezpickle_kwargs, + } + + def __setstate__(self, d): + out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) + self.__dict__.update(out.__dict__) diff --git a/gym-0.21.0/gym/vector/sync_vector_env.py b/gym-0.21.0/gym/vector/sync_vector_env.py new file mode 100644 index 0000000000000000000000000000000000000000..068022c56a0c866697d2eccda379bbc124f49aa6 --- /dev/null +++ b/gym-0.21.0/gym/vector/sync_vector_env.py @@ -0,0 +1,113 @@ +import numpy as np +from copy import deepcopy + +from gym import logger +from gym.vector.vector_env import VectorEnv +from gym.vector.utils import concatenate, create_empty_array + +__all__ = ["SyncVectorEnv"] + + +class SyncVectorEnv(VectorEnv): + """Vectorized environment that serially runs multiple environments. + + Parameters + ---------- + env_fns : iterable of callable + Functions that create the environments. + + observation_space : `gym.spaces.Space` instance, optional + Observation space of a single environment. If `None`, then the + observation space of the first environment is taken. + + action_space : `gym.spaces.Space` instance, optional + Action space of a single environment. If `None`, then the action space + of the first environment is taken. + + copy : bool (default: `True`) + If `True`, then the `reset` and `step` methods return a copy of the + observations. + """ + + def __init__(self, env_fns, observation_space=None, action_space=None, copy=True): + self.env_fns = env_fns + self.envs = [env_fn() for env_fn in env_fns] + self.copy = copy + self.metadata = self.envs[0].metadata + + if (observation_space is None) or (action_space is None): + observation_space = observation_space or self.envs[0].observation_space + action_space = action_space or self.envs[0].action_space + super(SyncVectorEnv, self).__init__( + num_envs=len(env_fns), + observation_space=observation_space, + action_space=action_space, + ) + + self._check_observation_spaces() + self.observations = create_empty_array( + self.single_observation_space, n=self.num_envs, fn=np.zeros + ) + self._rewards = np.zeros((self.num_envs,), dtype=np.float64) + self._dones = np.zeros((self.num_envs,), dtype=np.bool_) + self._actions = None + + def seed(self, seeds=None): + if seeds is None: + seeds = [None for _ in range(self.num_envs)] + if isinstance(seeds, int): + seeds = [seeds + i for i in range(self.num_envs)] + assert len(seeds) == self.num_envs + + for env, seed in zip(self.envs, seeds): + env.seed(seed) + + def reset_wait(self): + self._dones[:] = False + observations = [] + for env in self.envs: + observation = env.reset() + observations.append(observation) + self.observations = concatenate( + observations, self.observations, self.single_observation_space + ) + + return deepcopy(self.observations) if self.copy else self.observations + + def step_async(self, actions): + self._actions = actions + + def step_wait(self): + observations, infos = [], [] + for i, (env, action) in enumerate(zip(self.envs, self._actions)): + observation, self._rewards[i], self._dones[i], info = env.step(action) + if self._dones[i]: + observation = env.reset() + observations.append(observation) + infos.append(info) + self.observations = concatenate( + observations, self.observations, self.single_observation_space + ) + + return ( + deepcopy(self.observations) if self.copy else self.observations, + np.copy(self._rewards), + np.copy(self._dones), + infos, + ) + + def close_extras(self, **kwargs): + [env.close() for env in self.envs] + + def _check_observation_spaces(self): + for env in self.envs: + if not (env.observation_space == self.single_observation_space): + break + else: + return True + raise RuntimeError( + "Some environments have an observation space " + "different from `{0}`. In order to batch observations, the " + "observation spaces from all environments must be " + "equal.".format(self.single_observation_space) + ) diff --git a/gym-0.21.0/gym/vector/utils/shared_memory.py b/gym-0.21.0/gym/vector/utils/shared_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..1021c139f8690203907fb2726c09319d4026dd57 --- /dev/null +++ b/gym-0.21.0/gym/vector/utils/shared_memory.py @@ -0,0 +1,192 @@ +import numpy as np +import multiprocessing as mp +from ctypes import c_bool +from collections import OrderedDict + +from gym import logger +from gym.spaces import Tuple, Dict +from gym.error import CustomSpaceError +from gym.vector.utils.spaces import _BaseGymSpaces + +__all__ = ["create_shared_memory", "read_from_shared_memory", "write_to_shared_memory"] + + +def create_shared_memory(space, n=1, ctx=mp): + """Create a shared memory object, to be shared across processes. This + eventually contains the observations from the vectorized environment. + + Parameters + ---------- + space : `gym.spaces.Space` instance + Observation space of a single environment in the vectorized environment. + + n : int + Number of environments in the vectorized environment (i.e. the number + of processes). + + ctx : `multiprocessing` context + Context for multiprocessing. + + Returns + ------- + shared_memory : dict, tuple, or `multiprocessing.Array` instance + Shared object across processes. + """ + if isinstance(space, _BaseGymSpaces): + return create_base_shared_memory(space, n=n, ctx=ctx) + elif isinstance(space, Tuple): + return create_tuple_shared_memory(space, n=n, ctx=ctx) + elif isinstance(space, Dict): + return create_dict_shared_memory(space, n=n, ctx=ctx) + else: + raise CustomSpaceError( + "Cannot create a shared memory for space with " + "type `{0}`. Shared memory only supports " + "default Gym spaces (e.g. `Box`, `Tuple`, " + "`Dict`, etc...), and does not support custom " + "Gym spaces.".format(type(space)) + ) + + +def create_base_shared_memory(space, n=1, ctx=mp): + dtype = space.dtype.char + if dtype in "?": + dtype = c_bool + return ctx.Array(dtype, n * int(np.prod(space.shape))) + + +def create_tuple_shared_memory(space, n=1, ctx=mp): + return tuple( + create_shared_memory(subspace, n=n, ctx=ctx) for subspace in space.spaces + ) + + +def create_dict_shared_memory(space, n=1, ctx=mp): + return OrderedDict( + [ + (key, create_shared_memory(subspace, n=n, ctx=ctx)) + for (key, subspace) in space.spaces.items() + ] + ) + + +def read_from_shared_memory(shared_memory, space, n=1): + """Read the batch of observations from shared memory as a numpy array. + + Parameters + ---------- + shared_memory : dict, tuple, or `multiprocessing.Array` instance + Shared object across processes. This contains the observations from the + vectorized environment. This object is created with `create_shared_memory`. + + space : `gym.spaces.Space` instance + Observation space of a single environment in the vectorized environment. + + n : int + Number of environments in the vectorized environment (i.e. the number + of processes). + + Returns + ------- + observations : dict, tuple or `np.ndarray` instance + Batch of observations as a (possibly nested) numpy array. + + Notes + ----- + The numpy array objects returned by `read_from_shared_memory` shares the + memory of `shared_memory`. Any changes to `shared_memory` are forwarded + to `observations`, and vice-versa. To avoid any side-effect, use `np.copy`. + """ + if isinstance(space, _BaseGymSpaces): + return read_base_from_shared_memory(shared_memory, space, n=n) + elif isinstance(space, Tuple): + return read_tuple_from_shared_memory(shared_memory, space, n=n) + elif isinstance(space, Dict): + return read_dict_from_shared_memory(shared_memory, space, n=n) + else: + raise CustomSpaceError( + "Cannot read from a shared memory for space with " + "type `{0}`. Shared memory only supports " + "default Gym spaces (e.g. `Box`, `Tuple`, " + "`Dict`, etc...), and does not support custom " + "Gym spaces.".format(type(space)) + ) + + +def read_base_from_shared_memory(shared_memory, space, n=1): + return np.frombuffer(shared_memory.get_obj(), dtype=space.dtype).reshape( + (n,) + space.shape + ) + + +def read_tuple_from_shared_memory(shared_memory, space, n=1): + return tuple( + read_from_shared_memory(memory, subspace, n=n) + for (memory, subspace) in zip(shared_memory, space.spaces) + ) + + +def read_dict_from_shared_memory(shared_memory, space, n=1): + return OrderedDict( + [ + (key, read_from_shared_memory(shared_memory[key], subspace, n=n)) + for (key, subspace) in space.spaces.items() + ] + ) + + +def write_to_shared_memory(index, value, shared_memory, space): + """Write the observation of a single environment into shared memory. + + Parameters + ---------- + index : int + Index of the environment (must be in `[0, num_envs)`). + + value : sample from `space` + Observation of the single environment to write to shared memory. + + shared_memory : dict, tuple, or `multiprocessing.Array` instance + Shared object across processes. This contains the observations from the + vectorized environment. This object is created with `create_shared_memory`. + + space : `gym.spaces.Space` instance + Observation space of a single environment in the vectorized environment. + + Returns + ------- + `None` + """ + if isinstance(space, _BaseGymSpaces): + write_base_to_shared_memory(index, value, shared_memory, space) + elif isinstance(space, Tuple): + write_tuple_to_shared_memory(index, value, shared_memory, space) + elif isinstance(space, Dict): + write_dict_to_shared_memory(index, value, shared_memory, space) + else: + raise CustomSpaceError( + "Cannot write to a shared memory for space with " + "type `{0}`. Shared memory only supports " + "default Gym spaces (e.g. `Box`, `Tuple`, " + "`Dict`, etc...), and does not support custom " + "Gym spaces.".format(type(space)) + ) + + +def write_base_to_shared_memory(index, value, shared_memory, space): + size = int(np.prod(space.shape)) + destination = np.frombuffer(shared_memory.get_obj(), dtype=space.dtype) + np.copyto( + destination[index * size : (index + 1) * size], + np.asarray(value, dtype=space.dtype).flatten(), + ) + + +def write_tuple_to_shared_memory(index, values, shared_memory, space): + for value, memory, subspace in zip(values, shared_memory, space.spaces): + write_to_shared_memory(index, value, memory, subspace) + + +def write_dict_to_shared_memory(index, values, shared_memory, space): + for key, subspace in space.spaces.items(): + write_to_shared_memory(index, values[key], shared_memory[key], subspace)