diff --git a/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..df75a86e7db4c780c95123f427867a79ed16f85e Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/1.0 differ diff --git a/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 new file mode 100644 index 0000000000000000000000000000000000000000..74ccee150470f5ed5dfb0ec275c1fe82e3b45eda Binary files /dev/null and b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/action/3.0 differ diff --git a/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..0a63fa64cd39147af12642b1b70c03062384d8ea --- /dev/null +++ b/Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/depth/.zarray @@ -0,0 +1,24 @@ +{ + "chunks": [ + 100, + 128, + 128 + ], + "compressor": { + "blocksize": 0, + "clevel": 3, + "cname": "zstd", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " 0: + laps += 1 + visited_other_side = False + if alpha < 0: + visited_other_side = True + alpha += 2 * math.pi + + while True: # Find destination from checkpoints + failed = True + + while True: + dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] + if alpha <= dest_alpha: + failed = False + break + dest_i += 1 + if dest_i % len(checkpoints) == 0: + break + + if not failed: + break + + alpha -= 2 * math.pi + continue + + r1x = math.cos(beta) + r1y = math.sin(beta) + p1x = -r1y + p1y = r1x + dest_dx = dest_x - x # vector towards destination + dest_dy = dest_y - y + # destination vector projected on rad: + proj = r1x * dest_dx + r1y * dest_dy + while beta - alpha > 1.5 * math.pi: + beta -= 2 * math.pi + while beta - alpha < -1.5 * math.pi: + beta += 2 * math.pi + prev_beta = beta + proj *= SCALE + if proj > 0.3: + beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) + if proj < -0.3: + beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) + x += p1x * TRACK_DETAIL_STEP + y += p1y * TRACK_DETAIL_STEP + track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) + if laps > 4: + break + no_freeze -= 1 + if no_freeze == 0: + break + + # Find closed loop range i1..i2, first loop should be ignored, second is OK + i1, i2 = -1, -1 + i = len(track) + while True: + i -= 1 + if i == 0: + return False # Failed + pass_through_start = ( + track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha + ) + if pass_through_start and i2 == -1: + i2 = i + elif pass_through_start and i1 == -1: + i1 = i + break + if self.verbose == 1: + print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) + assert i1 != -1 + assert i2 != -1 + + track = track[i1 : i2 - 1] + + first_beta = track[0][1] + first_perp_x = math.cos(first_beta) + first_perp_y = math.sin(first_beta) + # Length of perpendicular jump to put together head and tail + well_glued_together = np.sqrt( + np.square(first_perp_x * (track[0][2] - track[-1][2])) + + np.square(first_perp_y * (track[0][3] - track[-1][3])) + ) + if well_glued_together > TRACK_DETAIL_STEP: + return False + + # Red-white border on hard turns + border = [False] * len(track) + for i in range(len(track)): + good = True + oneside = 0 + for neg in range(BORDER_MIN_COUNT): + beta1 = track[i - neg - 0][1] + beta2 = track[i - neg - 1][1] + good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 + oneside += np.sign(beta1 - beta2) + good &= abs(oneside) == BORDER_MIN_COUNT + border[i] = good + for i in range(len(track)): + for neg in range(BORDER_MIN_COUNT): + border[i - neg] |= border[i] + + # Create tiles + for i in range(len(track)): + alpha1, beta1, x1, y1 = track[i] + alpha2, beta2, x2, y2 = track[i - 1] + road1_l = ( + x1 - TRACK_WIDTH * math.cos(beta1), + y1 - TRACK_WIDTH * math.sin(beta1), + ) + road1_r = ( + x1 + TRACK_WIDTH * math.cos(beta1), + y1 + TRACK_WIDTH * math.sin(beta1), + ) + road2_l = ( + x2 - TRACK_WIDTH * math.cos(beta2), + y2 - TRACK_WIDTH * math.sin(beta2), + ) + road2_r = ( + x2 + TRACK_WIDTH * math.cos(beta2), + y2 + TRACK_WIDTH * math.sin(beta2), + ) + vertices = [road1_l, road1_r, road2_r, road2_l] + self.fd_tile.shape.vertices = vertices + t = self.world.CreateStaticBody(fixtures=self.fd_tile) + t.userData = t + c = 0.01 * (i % 3) + t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] + t.road_visited = False + t.road_friction = 1.0 + t.fixtures[0].sensor = True + self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) + self.road.append(t) + if border[i]: + side = np.sign(beta2 - beta1) + b1_l = ( + x1 + side * TRACK_WIDTH * math.cos(beta1), + y1 + side * TRACK_WIDTH * math.sin(beta1), + ) + b1_r = ( + x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), + y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), + ) + b2_l = ( + x2 + side * TRACK_WIDTH * math.cos(beta2), + y2 + side * TRACK_WIDTH * math.sin(beta2), + ) + b2_r = ( + x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), + y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), + ) + self.road_poly.append( + ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)) + ) + self.track = track + return True + + def reset(self): + self._destroy() + self.reward = 0.0 + self.prev_reward = 0.0 + self.tile_visited_count = 0 + self.t = 0.0 + self.road_poly = [] + + while True: + success = self._create_track() + if success: + break + if self.verbose == 1: + print( + "retry to generate track (normal if there are not many" + "instances of this message)" + ) + self.car = Car(self.world, *self.track[0][1:4]) + + return self.step(None)[0] + + def step(self, action): + if action is not None: + self.car.steer(-action[0]) + self.car.gas(action[1]) + self.car.brake(action[2]) + + self.car.step(1.0 / FPS) + self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) + self.t += 1.0 / FPS + + self.state = self.render("state_pixels") + + step_reward = 0 + done = False + if action is not None: # First step without action, called from reset() + self.reward -= 0.1 + # We actually don't want to count fuel spent, we want car to be faster. + # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER + self.car.fuel_spent = 0.0 + step_reward = self.reward - self.prev_reward + self.prev_reward = self.reward + if self.tile_visited_count == len(self.track): + done = True + x, y = self.car.hull.position + if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: + done = True + step_reward = -100 + + return self.state, step_reward, done, {} + + def render(self, mode="human"): + assert mode in ["human", "state_pixels", "rgb_array"] + if self.viewer is None: + from gym.envs.classic_control import rendering + + self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) + self.score_label = pyglet.text.Label( + "0000", + font_size=36, + x=20, + y=WINDOW_H * 2.5 / 40.00, + anchor_x="left", + anchor_y="center", + color=(255, 255, 255, 255), + ) + self.transform = rendering.Transform() + + if "t" not in self.__dict__: + return # reset() not called yet + + # Animate zoom first second: + zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) + scroll_x = self.car.hull.position[0] + scroll_y = self.car.hull.position[1] + angle = -self.car.hull.angle + vel = self.car.hull.linearVelocity + if np.linalg.norm(vel) > 0.5: + angle = math.atan2(vel[0], vel[1]) + self.transform.set_scale(zoom, zoom) + self.transform.set_translation( + WINDOW_W / 2 + - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), + WINDOW_H / 4 + - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), + ) + self.transform.set_rotation(angle) + + self.car.draw(self.viewer, mode != "state_pixels") + + arr = None + win = self.viewer.window + win.switch_to() + win.dispatch_events() + + win.clear() + t = self.transform + if mode == "rgb_array": + VP_W = VIDEO_W + VP_H = VIDEO_H + elif mode == "state_pixels": + VP_W = STATE_W + VP_H = STATE_H + else: + pixel_scale = 1 + if hasattr(win.context, "_nscontext"): + pixel_scale = ( + win.context._nscontext.view().backingScaleFactor() + ) # pylint: disable=protected-access + VP_W = int(pixel_scale * WINDOW_W) + VP_H = int(pixel_scale * WINDOW_H) + + gl.glViewport(0, 0, VP_W, VP_H) + t.enable() + self.render_road() + for geom in self.viewer.onetime_geoms: + geom.render() + self.viewer.onetime_geoms = [] + t.disable() + self.render_indicators(WINDOW_W, WINDOW_H) + + if mode == "human": + win.flip() + return self.viewer.isopen + + image_data = ( + pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() + ) + arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") + arr = arr.reshape(VP_H, VP_W, 4) + arr = arr[::-1, :, 0:3] + + return arr + + def close(self): + if self.viewer is not None: + self.viewer.close() + self.viewer = None + + def render_road(self): + colors = [0.4, 0.8, 0.4, 1.0] * 4 + polygons_ = [ + +PLAYFIELD, + +PLAYFIELD, + 0, + +PLAYFIELD, + -PLAYFIELD, + 0, + -PLAYFIELD, + -PLAYFIELD, + 0, + -PLAYFIELD, + +PLAYFIELD, + 0, + ] + + k = PLAYFIELD / 20.0 + colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) + for x in range(-20, 20, 2): + for y in range(-20, 20, 2): + polygons_.extend( + [ + k * x + k, + k * y + 0, + 0, + k * x + 0, + k * y + 0, + 0, + k * x + 0, + k * y + k, + 0, + k * x + k, + k * y + k, + 0, + ] + ) + + for poly, color in self.road_poly: + colors.extend([color[0], color[1], color[2], 1] * len(poly)) + for p in poly: + polygons_.extend([p[0], p[1], 0]) + + vl = pyglet.graphics.vertex_list( + len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors) + ) # gl.GL_QUADS, + vl.draw(gl.GL_QUADS) + vl.delete() + + def render_indicators(self, W, H): + s = W / 40.0 + h = H / 40.0 + colors = [0, 0, 0, 1] * 4 + polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] + + def vertical_ind(place, val, color): + colors.extend([color[0], color[1], color[2], 1] * 4) + polygons.extend( + [ + place * s, + h + h * val, + 0, + (place + 1) * s, + h + h * val, + 0, + (place + 1) * s, + h, + 0, + (place + 0) * s, + h, + 0, + ] + ) + + def horiz_ind(place, val, color): + colors.extend([color[0], color[1], color[2], 1] * 4) + polygons.extend( + [ + (place + 0) * s, + 4 * h, + 0, + (place + val) * s, + 4 * h, + 0, + (place + val) * s, + 2 * h, + 0, + (place + 0) * s, + 2 * h, + 0, + ] + ) + + true_speed = np.sqrt( + np.square(self.car.hull.linearVelocity[0]) + + np.square(self.car.hull.linearVelocity[1]) + ) + + vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) + vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors + vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) + vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) + vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) + horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) + horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) + vl = pyglet.graphics.vertex_list( + len(polygons) // 3, ("v3f", polygons), ("c4f", colors) + ) # gl.GL_QUADS, + vl.draw(gl.GL_QUADS) + vl.delete() + self.score_label.text = "%04i" % self.reward + self.score_label.draw() + + +if __name__ == "__main__": + from pyglet.window import key + + a = np.array([0.0, 0.0, 0.0]) + + def key_press(k, mod): + global restart + if k == 0xFF0D: + restart = True + if k == key.LEFT: + a[0] = -1.0 + if k == key.RIGHT: + a[0] = +1.0 + if k == key.UP: + a[1] = +1.0 + if k == key.DOWN: + a[2] = +0.8 # set 1.0 for wheels to block to zero rotation + + def key_release(k, mod): + if k == key.LEFT and a[0] == -1.0: + a[0] = 0 + if k == key.RIGHT and a[0] == +1.0: + a[0] = 0 + if k == key.UP: + a[1] = 0 + if k == key.DOWN: + a[2] = 0 + + env = CarRacing() + env.render() + env.viewer.window.on_key_press = key_press + env.viewer.window.on_key_release = key_release + record_video = False + if record_video: + from gym.wrappers.monitor import Monitor + + env = Monitor(env, "/tmp/video-test", force=True) + isopen = True + while isopen: + env.reset() + total_reward = 0.0 + steps = 0 + restart = False + while True: + s, r, done, info = env.step(a) + total_reward += r + if steps % 200 == 0 or done: + print("\naction " + str(["{:+0.2f}".format(x) for x in a])) + print("step {} total_reward {:+0.2f}".format(steps, total_reward)) + steps += 1 + isopen = env.render() + if done or restart or isopen == False: + break + env.close() diff --git a/gym-0.21.0/gym/envs/mujoco/ant_v3.py b/gym-0.21.0/gym/envs/mujoco/ant_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..473f85daada5929482718169313bb1cbb33ae55c --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/ant_v3.py @@ -0,0 +1,148 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +DEFAULT_CAMERA_CONFIG = { + "distance": 4.0, +} + + +class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__( + self, + xml_file="ant.xml", + ctrl_cost_weight=0.5, + contact_cost_weight=5e-4, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_z_range=(0.2, 1.0), + contact_force_range=(-1.0, 1.0), + reset_noise_scale=0.1, + exclude_current_positions_from_observation=True, + ): + utils.EzPickle.__init__(**locals()) + + self._ctrl_cost_weight = ctrl_cost_weight + self._contact_cost_weight = contact_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + self._healthy_z_range = healthy_z_range + + self._contact_force_range = contact_force_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + mujoco_env.MujocoEnv.__init__(self, xml_file, 5) + + @property + def healthy_reward(self): + return ( + float(self.is_healthy or self._terminate_when_unhealthy) + * self._healthy_reward + ) + + def control_cost(self, action): + control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) + return control_cost + + @property + def contact_forces(self): + raw_contact_forces = self.sim.data.cfrc_ext + min_value, max_value = self._contact_force_range + contact_forces = np.clip(raw_contact_forces, min_value, max_value) + return contact_forces + + @property + def contact_cost(self): + contact_cost = self._contact_cost_weight * np.sum( + np.square(self.contact_forces) + ) + return contact_cost + + @property + def is_healthy(self): + state = self.state_vector() + min_z, max_z = self._healthy_z_range + is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z + return is_healthy + + @property + def done(self): + done = not self.is_healthy if self._terminate_when_unhealthy else False + return done + + def step(self, action): + xy_position_before = self.get_body_com("torso")[:2].copy() + self.do_simulation(action, self.frame_skip) + xy_position_after = self.get_body_com("torso")[:2].copy() + + xy_velocity = (xy_position_after - xy_position_before) / self.dt + x_velocity, y_velocity = xy_velocity + + ctrl_cost = self.control_cost(action) + contact_cost = self.contact_cost + + forward_reward = x_velocity + healthy_reward = self.healthy_reward + + rewards = forward_reward + healthy_reward + costs = ctrl_cost + contact_cost + + reward = rewards - costs + done = self.done + observation = self._get_obs() + info = { + "reward_forward": forward_reward, + "reward_ctrl": -ctrl_cost, + "reward_contact": -contact_cost, + "reward_survive": healthy_reward, + "x_position": xy_position_after[0], + "y_position": xy_position_after[1], + "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), + "x_velocity": x_velocity, + "y_velocity": y_velocity, + "forward_reward": forward_reward, + } + + return observation, reward, done, info + + def _get_obs(self): + position = self.sim.data.qpos.flat.copy() + velocity = self.sim.data.qvel.flat.copy() + contact_force = self.contact_forces.flat.copy() + + if self._exclude_current_positions_from_observation: + position = position[2:] + + observations = np.concatenate((position, velocity, contact_force)) + + return observations + + def reset_model(self): + noise_low = -self._reset_noise_scale + noise_high = self._reset_noise_scale + + qpos = self.init_qpos + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nq + ) + qvel = self.init_qvel + self._reset_noise_scale * self.np_random.randn( + self.model.nv + ) + self.set_state(qpos, qvel) + + observation = self._get_obs() + + return observation + + def viewer_setup(self): + for key, value in DEFAULT_CAMERA_CONFIG.items(): + if isinstance(value, np.ndarray): + getattr(self.viewer.cam, key)[:] = value + else: + setattr(self.viewer.cam, key, value) diff --git a/gym-0.21.0/gym/envs/mujoco/humanoid.py b/gym-0.21.0/gym/envs/mujoco/humanoid.py new file mode 100644 index 0000000000000000000000000000000000000000..ae8f1eaee21945f7e2e5ac2515d1dee0cd74c089 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/humanoid.py @@ -0,0 +1,72 @@ +import numpy as np +from gym.envs.mujoco import mujoco_env +from gym import utils + + +def mass_center(model, sim): + mass = np.expand_dims(model.body_mass, 1) + xpos = sim.data.xipos + return (np.sum(mass * xpos, 0) / np.sum(mass))[0] + + +class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + mujoco_env.MujocoEnv.__init__(self, "humanoid.xml", 5) + utils.EzPickle.__init__(self) + + def _get_obs(self): + data = self.sim.data + return np.concatenate( + [ + data.qpos.flat[2:], + data.qvel.flat, + data.cinert.flat, + data.cvel.flat, + data.qfrc_actuator.flat, + data.cfrc_ext.flat, + ] + ) + + def step(self, a): + pos_before = mass_center(self.model, self.sim) + self.do_simulation(a, self.frame_skip) + pos_after = mass_center(self.model, self.sim) + alive_bonus = 5.0 + data = self.sim.data + lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt + quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() + quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() + quad_impact_cost = min(quad_impact_cost, 10) + reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus + qpos = self.sim.data.qpos + done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) + return ( + self._get_obs(), + reward, + done, + dict( + reward_linvel=lin_vel_cost, + reward_quadctrl=-quad_ctrl_cost, + reward_alive=alive_bonus, + reward_impact=-quad_impact_cost, + ), + ) + + def reset_model(self): + c = 0.01 + self.set_state( + self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), + self.init_qvel + + self.np_random.uniform( + low=-c, + high=c, + size=self.model.nv, + ), + ) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 1 + self.viewer.cam.distance = self.model.stat.extent * 1.0 + self.viewer.cam.lookat[2] = 2.0 + self.viewer.cam.elevation = -20 diff --git a/gym-0.21.0/gym/envs/mujoco/humanoidstandup.py b/gym-0.21.0/gym/envs/mujoco/humanoidstandup.py new file mode 100644 index 0000000000000000000000000000000000000000..7117ab79124d78691ae3d96b83878e148d52468a --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/humanoidstandup.py @@ -0,0 +1,64 @@ +from gym.envs.mujoco import mujoco_env +from gym import utils +import numpy as np + + +class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self): + mujoco_env.MujocoEnv.__init__(self, "humanoidstandup.xml", 5) + utils.EzPickle.__init__(self) + + def _get_obs(self): + data = self.sim.data + return np.concatenate( + [ + data.qpos.flat[2:], + data.qvel.flat, + data.cinert.flat, + data.cvel.flat, + data.qfrc_actuator.flat, + data.cfrc_ext.flat, + ] + ) + + def step(self, a): + self.do_simulation(a, self.frame_skip) + pos_after = self.sim.data.qpos[2] + data = self.sim.data + uph_cost = (pos_after - 0) / self.model.opt.timestep + + quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() + quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() + quad_impact_cost = min(quad_impact_cost, 10) + reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 + + done = bool(False) + return ( + self._get_obs(), + reward, + done, + dict( + reward_linup=uph_cost, + reward_quadctrl=-quad_ctrl_cost, + reward_impact=-quad_impact_cost, + ), + ) + + def reset_model(self): + c = 0.01 + self.set_state( + self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), + self.init_qvel + + self.np_random.uniform( + low=-c, + high=c, + size=self.model.nv, + ), + ) + return self._get_obs() + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 1 + self.viewer.cam.distance = self.model.stat.extent * 1.0 + self.viewer.cam.lookat[2] = 0.8925 + self.viewer.cam.elevation = -20 diff --git a/gym-0.21.0/gym/envs/mujoco/walker2d_v3.py b/gym-0.21.0/gym/envs/mujoco/walker2d_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..eee6bb7d4ffdbbd4774d705e65c97fd11453e7f2 --- /dev/null +++ b/gym-0.21.0/gym/envs/mujoco/walker2d_v3.py @@ -0,0 +1,130 @@ +import numpy as np +from gym.envs.mujoco import mujoco_env +from gym import utils + + +DEFAULT_CAMERA_CONFIG = { + "trackbodyid": 2, + "distance": 4.0, + "lookat": np.array((0.0, 0.0, 1.15)), + "elevation": -20.0, +} + + +class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__( + self, + xml_file="walker2d.xml", + forward_reward_weight=1.0, + ctrl_cost_weight=1e-3, + healthy_reward=1.0, + terminate_when_unhealthy=True, + healthy_z_range=(0.8, 2.0), + healthy_angle_range=(-1.0, 1.0), + reset_noise_scale=5e-3, + exclude_current_positions_from_observation=True, + ): + utils.EzPickle.__init__(**locals()) + + self._forward_reward_weight = forward_reward_weight + self._ctrl_cost_weight = ctrl_cost_weight + + self._healthy_reward = healthy_reward + self._terminate_when_unhealthy = terminate_when_unhealthy + + self._healthy_z_range = healthy_z_range + self._healthy_angle_range = healthy_angle_range + + self._reset_noise_scale = reset_noise_scale + + self._exclude_current_positions_from_observation = ( + exclude_current_positions_from_observation + ) + + mujoco_env.MujocoEnv.__init__(self, xml_file, 4) + + @property + def healthy_reward(self): + return ( + float(self.is_healthy or self._terminate_when_unhealthy) + * self._healthy_reward + ) + + def control_cost(self, action): + control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) + return control_cost + + @property + def is_healthy(self): + z, angle = self.sim.data.qpos[1:3] + + min_z, max_z = self._healthy_z_range + min_angle, max_angle = self._healthy_angle_range + + healthy_z = min_z < z < max_z + healthy_angle = min_angle < angle < max_angle + is_healthy = healthy_z and healthy_angle + + return is_healthy + + @property + def done(self): + done = not self.is_healthy if self._terminate_when_unhealthy else False + return done + + def _get_obs(self): + position = self.sim.data.qpos.flat.copy() + velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10) + + if self._exclude_current_positions_from_observation: + position = position[1:] + + observation = np.concatenate((position, velocity)).ravel() + return observation + + def step(self, action): + x_position_before = self.sim.data.qpos[0] + self.do_simulation(action, self.frame_skip) + x_position_after = self.sim.data.qpos[0] + x_velocity = (x_position_after - x_position_before) / self.dt + + ctrl_cost = self.control_cost(action) + + forward_reward = self._forward_reward_weight * x_velocity + healthy_reward = self.healthy_reward + + rewards = forward_reward + healthy_reward + costs = ctrl_cost + + observation = self._get_obs() + reward = rewards - costs + done = self.done + info = { + "x_position": x_position_after, + "x_velocity": x_velocity, + } + + return observation, reward, done, info + + def reset_model(self): + noise_low = -self._reset_noise_scale + noise_high = self._reset_noise_scale + + qpos = self.init_qpos + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nq + ) + qvel = self.init_qvel + self.np_random.uniform( + low=noise_low, high=noise_high, size=self.model.nv + ) + + self.set_state(qpos, qvel) + + observation = self._get_obs() + return observation + + def viewer_setup(self): + for key, value in DEFAULT_CAMERA_CONFIG.items(): + if isinstance(value, np.ndarray): + getattr(self.viewer.cam, key)[:] = value + else: + setattr(self.viewer.cam, key, value) diff --git a/gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml b/gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml new file mode 100644 index 0000000000000000000000000000000000000000..efbfb51bd06b34257d27afd107df0d7c15619fdf --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/fetch/slide.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml b/gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml new file mode 100644 index 0000000000000000000000000000000000000000..ec9a0b08b641fa520e4b09e8d8ff3772c2962ac7 --- /dev/null +++ b/gym-0.21.0/gym/envs/robotics/assets/hand/shared_asset.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl new file mode 100644 index 0000000000000000000000000000000000000000..b0eea0777a10b5aad2e9237c888883944492c42e Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl new file mode 100644 index 0000000000000000000000000000000000000000..8a1487401a6b3f85edd115fd76419851d0001a6f Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/gripper_link.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl new file mode 100644 index 0000000000000000000000000000000000000000..5c1752487efc163453d32996e667e63713a6a398 Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl new file mode 100644 index 0000000000000000000000000000000000000000..3215d2e1de0f271dfdfc8abb2c00c47cafaabe45 Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl new file mode 100644 index 0000000000000000000000000000000000000000..515d3c90169fb7b7a5ecb344bb63267d5352ad8c Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/F1.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl new file mode 100644 index 0000000000000000000000000000000000000000..5ace8388b9fad8fc9f4a30d25a00592a312864f1 Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/TH2_z.stl differ diff --git a/gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl new file mode 100644 index 0000000000000000000000000000000000000000..65e47eb65dad73dee839c05a7b1b2c285541defb Binary files /dev/null and b/gym-0.21.0/gym/envs/robotics/assets/stls/hand/palm.stl differ diff --git a/gym-0.21.0/gym/envs/toy_text/discrete.py b/gym-0.21.0/gym/envs/toy_text/discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..a49263f0c6077ef893195f3ff20fc9d50de83656 --- /dev/null +++ b/gym-0.21.0/gym/envs/toy_text/discrete.py @@ -0,0 +1,61 @@ +import numpy as np + +from gym import Env, spaces +from gym.utils import seeding + + +def categorical_sample(prob_n, np_random): + """ + Sample from categorical distribution + Each row specifies class probabilities + """ + prob_n = np.asarray(prob_n) + csprob_n = np.cumsum(prob_n) + return (csprob_n > np_random.rand()).argmax() + + +class DiscreteEnv(Env): + + """ + Has the following members + - nS: number of states + - nA: number of actions + - P: transitions (*) + - isd: initial state distribution (**) + + (*) dictionary of lists, where + P[s][a] == [(probability, nextstate, reward, done), ...] + (**) list or array of length nS + + + """ + + def __init__(self, nS, nA, P, isd): + self.P = P + self.isd = isd + self.lastaction = None # for rendering + self.nS = nS + self.nA = nA + + self.action_space = spaces.Discrete(self.nA) + self.observation_space = spaces.Discrete(self.nS) + + self.seed() + self.s = categorical_sample(self.isd, self.np_random) + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def reset(self): + self.s = categorical_sample(self.isd, self.np_random) + self.lastaction = None + return int(self.s) + + def step(self, a): + transitions = self.P[self.s][a] + i = categorical_sample([t[0] for t in transitions], self.np_random) + p, s, r, d = transitions[i] + self.s = s + self.lastaction = a + return (int(s), r, d, {"prob": p}) diff --git a/gym-0.21.0/gym/envs/unittest/memorize_digits.py b/gym-0.21.0/gym/envs/unittest/memorize_digits.py new file mode 100644 index 0000000000000000000000000000000000000000..6b48c9f5fc2cb6ff8b66c21898c02edec22e87aa --- /dev/null +++ b/gym-0.21.0/gym/envs/unittest/memorize_digits.py @@ -0,0 +1,146 @@ +import numpy as np +import gym +from gym import spaces +from gym.utils import seeding + +# Unit test environment for CNNs. +# Looks like this (RGB observations): +# +# --------------------------- +# | | +# | ****** | +# | ****** | +# | ** ** | +# | ** ** | +# | ** | +# | ** | +# | **** | +# | **** | +# | **** | +# | **** | +# | ********** | +# | ********** | +# | | +# --------------------------- +# +# Agent should hit action 2 to gain reward. Catches off-by-one errors in your agent. +# +# To see how it works, run: +# +# python examples/agents/keyboard_agent.py MemorizeDigits-v0 + +FIELD_W = 32 +FIELD_H = 24 + +bogus_mnist = [ + [" **** ", "* *", "* *", "* *", "* *", " **** "], + [" ** ", " * * ", " * ", " * ", " * ", " *** "], + [" **** ", "* *", " *", " *** ", "** ", "******"], + [" **** ", "* *", " ** ", " *", "* *", " **** "], + [" * * ", " * * ", " * * ", " **** ", " * ", " * "], + [" **** ", " * ", " **** ", " * ", " * ", " **** "], + [" *** ", " * ", " **** ", " * * ", " * * ", " **** "], + [" **** ", " * ", " * ", " * ", " * ", " * "], + [" **** ", "* *", " **** ", "* *", "* *", " **** "], + [" **** ", "* *", "* *", " *****", " *", " **** "], +] + +color_black = np.array((0, 0, 0)).astype("float32") +color_white = np.array((255, 255, 255)).astype("float32") + + +class MemorizeDigits(gym.Env): + metadata = { + "render.modes": ["human", "rgb_array"], + "video.frames_per_second": 60, + "video.res_w": FIELD_W, + "video.res_h": FIELD_H, + } + + use_random_colors = False + + def __init__(self): + self.seed() + self.viewer = None + self.observation_space = spaces.Box( + 0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8 + ) + self.action_space = spaces.Discrete(10) + self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8) + for digit in range(10): + for y in range(6): + self.bogus_mnist[digit, y, :] = [ + ord(char) for char in bogus_mnist[digit][y] + ] + self.reset() + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def random_color(self): + return np.array( + [ + self.np_random.randint(low=0, high=255), + self.np_random.randint(low=0, high=255), + self.np_random.randint(low=0, high=255), + ] + ).astype("uint8") + + def reset(self): + self.digit_x = self.np_random.randint(low=FIELD_W // 5, high=FIELD_W // 5 * 4) + self.digit_y = self.np_random.randint(low=FIELD_H // 5, high=FIELD_H // 5 * 4) + self.color_bg = self.random_color() if self.use_random_colors else color_black + self.step_n = 0 + while 1: + self.color_digit = ( + self.random_color() if self.use_random_colors else color_white + ) + if np.linalg.norm(self.color_digit - self.color_bg) < 50: + continue + break + self.digit = -1 + return self.step(0)[0] + + def step(self, action): + reward = -1 + done = False + self.step_n += 1 + if self.digit == -1: + pass + else: + if self.digit == action: + reward = +1 + done = self.step_n > 20 and 0 == self.np_random.randint(low=0, high=5) + self.digit = self.np_random.randint(low=0, high=10) + obs = np.zeros((FIELD_H, FIELD_W, 3), dtype=np.uint8) + obs[:, :, :] = self.color_bg + digit_img = np.zeros((6, 6, 3), dtype=np.uint8) + digit_img[:] = self.color_bg + xxx = self.bogus_mnist[self.digit] == 42 + digit_img[xxx] = self.color_digit + obs[ + self.digit_y - 3 : self.digit_y + 3, self.digit_x - 3 : self.digit_x + 3 + ] = digit_img + self.last_obs = obs + return obs, reward, done, {} + + def render(self, mode="human"): + if mode == "rgb_array": + return self.last_obs + + elif mode == "human": + from gym.envs.classic_control import rendering + + if self.viewer is None: + self.viewer = rendering.SimpleImageViewer() + self.viewer.imshow(self.last_obs) + return self.viewer.isopen + + else: + assert 0, "Render mode '%s' is not supported" % mode + + def close(self): + if self.viewer is not None: + self.viewer.close() + self.viewer = None diff --git a/gym-0.21.0/gym/spaces/__init__.py b/gym-0.21.0/gym/spaces/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..45f403d0e4c96e151194a9015199cc9cda526125 --- /dev/null +++ b/gym-0.21.0/gym/spaces/__init__.py @@ -0,0 +1,26 @@ +from gym.spaces.space import Space +from gym.spaces.box import Box +from gym.spaces.discrete import Discrete +from gym.spaces.multi_discrete import MultiDiscrete +from gym.spaces.multi_binary import MultiBinary +from gym.spaces.tuple import Tuple +from gym.spaces.dict import Dict + +from gym.spaces.utils import flatdim +from gym.spaces.utils import flatten_space +from gym.spaces.utils import flatten +from gym.spaces.utils import unflatten + +__all__ = [ + "Space", + "Box", + "Discrete", + "MultiDiscrete", + "MultiBinary", + "Tuple", + "Dict", + "flatdim", + "flatten_space", + "flatten", + "unflatten", +] diff --git a/gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04bb53dbc806886e4e5c53eb333e7a667139eead Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/__init__.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58847061856abb9698d277aa2290e48d31dbee9d Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/box.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7e64fb333f7afedbb8656d1365d9fb1410df999 Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/dict.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/multi_binary.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/multi_binary.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9aeddfc06c669c8590ef258f085adb9418987119 Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/multi_binary.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/multi_discrete.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/multi_discrete.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44e4ee8b8dc2331e7d897b2f2f0e00b2e7748801 Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/multi_discrete.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/space.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/space.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8afd18b34b0d085b49a6981da001b7f2aa161121 Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/space.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/tuple.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/tuple.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd8f84d0aa2d64e0732ddeb839f135667f60bbbd Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/tuple.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/__pycache__/utils.cpython-38.pyc b/gym-0.21.0/gym/spaces/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8b364c78360d40c84c05d1bd331639e6afbdb6c Binary files /dev/null and b/gym-0.21.0/gym/spaces/__pycache__/utils.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/spaces/dict.py b/gym-0.21.0/gym/spaces/dict.py new file mode 100644 index 0000000000000000000000000000000000000000..b7b7389573bb88925cd03ad79f8d4c0ae6bef5be --- /dev/null +++ b/gym-0.21.0/gym/spaces/dict.py @@ -0,0 +1,158 @@ +from collections import OrderedDict +import numpy as np +from .space import Space + + +class Dict(Space): + """ + A dictionary of simpler spaces. + + Example usage: + self.observation_space = spaces.Dict({"position": spaces.Discrete(2), "velocity": spaces.Discrete(3)}) + + Example usage [nested]: + self.nested_observation_space = spaces.Dict({ + 'sensors': spaces.Dict({ + 'position': spaces.Box(low=-100, high=100, shape=(3,)), + 'velocity': spaces.Box(low=-1, high=1, shape=(3,)), + 'front_cam': spaces.Tuple(( + spaces.Box(low=0, high=1, shape=(10, 10, 3)), + spaces.Box(low=0, high=1, shape=(10, 10, 3)) + )), + 'rear_cam': spaces.Box(low=0, high=1, shape=(10, 10, 3)), + }), + 'ext_controller': spaces.MultiDiscrete((5, 2, 2)), + 'inner_state':spaces.Dict({ + 'charge': spaces.Discrete(100), + 'system_checks': spaces.MultiBinary(10), + 'job_status': spaces.Dict({ + 'task': spaces.Discrete(5), + 'progress': spaces.Box(low=0, high=100, shape=()), + }) + }) + }) + """ + + def __init__(self, spaces=None, seed=None, **spaces_kwargs): + assert (spaces is None) or ( + not spaces_kwargs + ), "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)" + + if spaces is None: + spaces = spaces_kwargs + if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict): + spaces = OrderedDict(sorted(list(spaces.items()))) + if isinstance(spaces, list): + spaces = OrderedDict(spaces) + self.spaces = spaces + for space in spaces.values(): + assert isinstance( + space, Space + ), "Values of the dict should be instances of gym.Space" + super(Dict, self).__init__( + None, None, seed + ) # None for shape and dtype, since it'll require special handling + + def seed(self, seed=None): + seeds = [] + if isinstance(seed, dict): + for key, seed_key in zip(self.spaces, seed): + assert key == seed_key, print( + "Key value", + seed_key, + "in passed seed dict did not match key value", + key, + "in spaces Dict.", + ) + seeds += self.spaces[key].seed(seed[seed_key]) + elif isinstance(seed, int): + seeds = super().seed(seed) + try: + subseeds = self.np_random.choice( + np.iinfo(int).max, + size=len(self.spaces), + replace=False, # unique subseed for each subspace + ) + except ValueError: + subseeds = self.np_random.choice( + np.iinfo(int).max, + size=len(self.spaces), + replace=True, # we get more than INT_MAX subspaces + ) + + for subspace, subseed in zip(self.spaces.values(), subseeds): + seeds.append(subspace.seed(int(subseed))[0]) + elif seed is None: + for space in self.spaces.values(): + seeds += space.seed(seed) + else: + raise TypeError("Passed seed not of an expected type: dict or int or None") + + return seeds + + def sample(self): + return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()]) + + def contains(self, x): + if not isinstance(x, dict) or len(x) != len(self.spaces): + return False + for k, space in self.spaces.items(): + if k not in x: + return False + if not space.contains(x[k]): + return False + return True + + def __getitem__(self, key): + return self.spaces[key] + + def __setitem__(self, key, value): + self.spaces[key] = value + + def __iter__(self): + for key in self.spaces: + yield key + + def __len__(self): + return len(self.spaces) + + def __contains__(self, item): + return self.contains(item) + + def __repr__(self): + return ( + "Dict(" + + ", ".join([str(k) + ":" + str(s) for k, s in self.spaces.items()]) + + ")" + ) + + def to_jsonable(self, sample_n): + # serialize as dict-repr of vectors + return { + key: space.to_jsonable([sample[key] for sample in sample_n]) + for key, space in self.spaces.items() + } + + def from_jsonable(self, sample_n): + dict_of_list = {} + for key, space in self.spaces.items(): + dict_of_list[key] = space.from_jsonable(sample_n[key]) + ret = [] + for i, _ in enumerate(dict_of_list[key]): + entry = {} + for key, value in dict_of_list.items(): + entry[key] = value[i] + ret.append(entry) + return ret + + def __eq__(self, other): + return isinstance(other, Dict) and self.spaces == other.spaces + + def keys(self): + return self.spaces.keys() + + def values(self): + return self.spaces.values() + + def items(self): + return self.spaces.items() diff --git a/gym-0.21.0/gym/spaces/multi_binary.py b/gym-0.21.0/gym/spaces/multi_binary.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b315a1c88789c5dcb2fe7fde53437acd15122b --- /dev/null +++ b/gym-0.21.0/gym/spaces/multi_binary.py @@ -0,0 +1,57 @@ +import numpy as np +from .space import Space + + +class MultiBinary(Space): + """ + An n-shape binary space. + + The argument to MultiBinary defines n, which could be a number or a `list` of numbers. + + Example Usage: + + >> self.observation_space = spaces.MultiBinary(5) + + >> self.observation_space.sample() + + array([0,1,0,1,0], dtype =int8) + + >> self.observation_space = spaces.MultiBinary([3,2]) + + >> self.observation_space.sample() + + array([[0, 0], + [0, 1], + [1, 1]], dtype=int8) + + """ + + def __init__(self, n, seed=None): + self.n = n + if type(n) in [tuple, list, np.ndarray]: + input_n = n + else: + input_n = (n,) + super(MultiBinary, self).__init__(input_n, np.int8, seed) + + def sample(self): + return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype) + + def contains(self, x): + if isinstance(x, list) or isinstance(x, tuple): + x = np.array(x) # Promote list to array for contains check + if self.shape != x.shape: + return False + return ((x == 0) | (x == 1)).all() + + def to_jsonable(self, sample_n): + return np.array(sample_n).tolist() + + def from_jsonable(self, sample_n): + return [np.asarray(sample) for sample in sample_n] + + def __repr__(self): + return "MultiBinary({})".format(self.n) + + def __eq__(self, other): + return isinstance(other, MultiBinary) and self.n == other.n diff --git a/gym-0.21.0/gym/spaces/multi_discrete.py b/gym-0.21.0/gym/spaces/multi_discrete.py new file mode 100644 index 0000000000000000000000000000000000000000..fdfdb19fd510d1d1565c247733617b90a68978d3 --- /dev/null +++ b/gym-0.21.0/gym/spaces/multi_discrete.py @@ -0,0 +1,73 @@ +import numpy as np +from gym.logger import warn +from .space import Space +from .discrete import Discrete + + +class MultiDiscrete(Space): + """ + - The multi-discrete action space consists of a series of discrete action spaces with different number of actions in each + - It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space + - It is parametrized by passing an array of positive integers specifying number of actions for each discrete action space + + Note: Some environment wrappers assume a value of 0 always represents the NOOP action. + + e.g. Nintendo Game Controller + - Can be conceptualized as 3 discrete action spaces: + + 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 + 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 + 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 + + - Can be initialized as + + MultiDiscrete([ 5, 2, 2 ]) + + """ + + def __init__(self, nvec, dtype=np.int64, seed=None): + """ + nvec: vector of counts of each categorical variable + """ + assert (np.array(nvec) > 0).all(), "nvec (counts) have to be positive" + self.nvec = np.asarray(nvec, dtype=dtype) + + super(MultiDiscrete, self).__init__(self.nvec.shape, dtype, seed) + + def sample(self): + return (self.np_random.random_sample(self.nvec.shape) * self.nvec).astype( + self.dtype + ) + + def contains(self, x): + if isinstance(x, list): + x = np.array(x) # Promote list to array for contains check + # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x + # is within correct bounds for space dtype (even though x does not have to be unsigned) + return x.shape == self.shape and (0 <= x).all() and (x < self.nvec).all() + + def to_jsonable(self, sample_n): + return [sample.tolist() for sample in sample_n] + + def from_jsonable(self, sample_n): + return np.array(sample_n) + + def __repr__(self): + return "MultiDiscrete({})".format(self.nvec) + + def __getitem__(self, index): + nvec = self.nvec[index] + if nvec.ndim == 0: + subspace = Discrete(nvec) + else: + subspace = MultiDiscrete(nvec, self.dtype) + subspace.np_random.set_state(self.np_random.get_state()) # for reproducibility + return subspace + + def __len__(self): + if self.nvec.ndim >= 2: + warn("Get length of a multi-dimensional MultiDiscrete space.") + return len(self.nvec) + + def __eq__(self, other): + return isinstance(other, MultiDiscrete) and np.all(self.nvec == other.nvec) diff --git a/gym-0.21.0/gym/spaces/space.py b/gym-0.21.0/gym/spaces/space.py new file mode 100644 index 0000000000000000000000000000000000000000..4c077064390888285514524ad3c28d0bf901a987 --- /dev/null +++ b/gym-0.21.0/gym/spaces/space.py @@ -0,0 +1,90 @@ +from gym.utils import seeding + + +class Space(object): + """Defines the observation and action spaces, so you can write generic + code that applies to any Env. For example, you can choose a random + action. + + WARNING - Custom observation & action spaces can inherit from the `Space` + class. However, most use-cases should be covered by the existing space + classes (e.g. `Box`, `Discrete`, etc...), and container classes (`Tuple` & + `Dict`). Note that parametrized probability distributions (through the + `sample()` method), and batching functions (in `gym.vector.VectorEnv`), are + only well-defined for instances of spaces provided in gym by default. + Moreover, some implementations of Reinforcement Learning algorithms might + not handle custom spaces properly. Use custom spaces with care. + """ + + def __init__(self, shape=None, dtype=None, seed=None): + import numpy as np # takes about 300-400ms to import, so we load lazily + + self._shape = None if shape is None else tuple(shape) + self.dtype = None if dtype is None else np.dtype(dtype) + self._np_random = None + if seed is not None: + self.seed(seed) + + @property + def np_random(self): + """Lazily seed the rng since this is expensive and only needed if + sampling from this space. + """ + if self._np_random is None: + self.seed() + + return self._np_random + + @property + def shape(self): + """Return the shape of the space as an immutable property""" + return self._shape + + def sample(self): + """Randomly sample an element of this space. Can be + uniform or non-uniform sampling based on boundedness of space.""" + raise NotImplementedError + + def seed(self, seed=None): + """Seed the PRNG of this space.""" + self._np_random, seed = seeding.np_random(seed) + return [seed] + + def contains(self, x): + """ + Return boolean specifying if x is a valid + member of this space + """ + raise NotImplementedError + + def __contains__(self, x): + return self.contains(x) + + def __setstate__(self, state): + # Don't mutate the original state + state = dict(state) + + # Allow for loading of legacy states. + # See: + # https://github.com/openai/gym/pull/2397 -- shape + # https://github.com/openai/gym/pull/1913 -- np_random + # + if "shape" in state: + state["_shape"] = state["shape"] + del state["shape"] + if "np_random" in state: + state["_np_random"] = state["np_random"] + del state["np_random"] + + # Update our state + self.__dict__.update(state) + + def to_jsonable(self, sample_n): + """Convert a batch of samples from this space to a JSONable data type.""" + # By default, assume identity is JSONable + return sample_n + + def from_jsonable(self, sample_n): + """Convert a JSONable data type to a batch of samples from this space.""" + # By default, assume identity is JSONable + return sample_n diff --git a/gym-0.21.0/gym/spaces/tuple.py b/gym-0.21.0/gym/spaces/tuple.py new file mode 100644 index 0000000000000000000000000000000000000000..bb3133a506329148a4d6c280982eb0f0d5276619 --- /dev/null +++ b/gym-0.21.0/gym/spaces/tuple.py @@ -0,0 +1,92 @@ +import numpy as np +from .space import Space + + +class Tuple(Space): + """ + A tuple (i.e., product) of simpler spaces + + Example usage: + self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3))) + """ + + def __init__(self, spaces, seed=None): + self.spaces = spaces + for space in spaces: + assert isinstance( + space, Space + ), "Elements of the tuple must be instances of gym.Space" + super(Tuple, self).__init__(None, None, seed) + + def seed(self, seed=None): + seeds = [] + + if isinstance(seed, list): + for i, space in enumerate(self.spaces): + seeds += space.seed(seed[i]) + elif isinstance(seed, int): + seeds = super().seed(seed) + try: + subseeds = self.np_random.choice( + np.iinfo(int).max, + size=len(self.spaces), + replace=False, # unique subseed for each subspace + ) + except ValueError: + subseeds = self.np_random.choice( + np.iinfo(int).max, + size=len(self.spaces), + replace=True, # we get more than INT_MAX subspaces + ) + + for subspace, subseed in zip(self.spaces, subseeds): + seeds.append(subspace.seed(int(subseed))[0]) + elif seed is None: + for space in self.spaces: + seeds += space.seed(seed) + else: + raise TypeError("Passed seed not of an expected type: list or int or None") + + return seeds + + def sample(self): + return tuple([space.sample() for space in self.spaces]) + + def contains(self, x): + if isinstance(x, list): + x = tuple(x) # Promote list to tuple for contains check + return ( + isinstance(x, tuple) + and len(x) == len(self.spaces) + and all(space.contains(part) for (space, part) in zip(self.spaces, x)) + ) + + def __repr__(self): + return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")" + + def to_jsonable(self, sample_n): + # serialize as list-repr of tuple of vectors + return [ + space.to_jsonable([sample[i] for sample in sample_n]) + for i, space in enumerate(self.spaces) + ] + + def from_jsonable(self, sample_n): + return [ + sample + for sample in zip( + *[ + space.from_jsonable(sample_n[i]) + for i, space in enumerate(self.spaces) + ] + ) + ] + + def __getitem__(self, index): + return self.spaces[index] + + def __len__(self): + return len(self.spaces) + + def __eq__(self, other): + return isinstance(other, Tuple) and self.spaces == other.spaces diff --git a/gym-0.21.0/gym/vector/__init__.py b/gym-0.21.0/gym/vector/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c42a25caf4f02e32619362c999454b60a29e82f1 --- /dev/null +++ b/gym-0.21.0/gym/vector/__init__.py @@ -0,0 +1,66 @@ +try: + from collections.abc import Iterable +except ImportError: + Iterable = (tuple, list) + +from gym.vector.async_vector_env import AsyncVectorEnv +from gym.vector.sync_vector_env import SyncVectorEnv +from gym.vector.vector_env import VectorEnv, VectorEnvWrapper + +__all__ = ["AsyncVectorEnv", "SyncVectorEnv", "VectorEnv", "VectorEnvWrapper", "make"] + + +def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs): + """Create a vectorized environment from multiple copies of an environment, + from its id + + Parameters + ---------- + id : str + The environment ID. This must be a valid ID from the registry. + + num_envs : int + Number of copies of the environment. + + asynchronous : bool (default: `True`) + If `True`, wraps the environments in an `AsyncVectorEnv` (which uses + `multiprocessing` to run the environments in parallel). If `False`, + wraps the environments in a `SyncVectorEnv`. + + wrappers : Callable or Iterable of Callables (default: `None`) + If not `None`, then apply the wrappers to each internal + environment during creation. + + Returns + ------- + env : `gym.vector.VectorEnv` instance + The vectorized environment. + + Example + ------- + >>> import gym + >>> env = gym.vector.make('CartPole-v1', 3) + >>> env.reset() + array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827], + [ 0.03073904, 0.00145001, -0.03088818, -0.03131252], + [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]], + dtype=float32) + """ + from gym.envs import make as make_ + + def _make_env(): + env = make_(id, **kwargs) + if wrappers is not None: + if callable(wrappers): + env = wrappers(env) + elif isinstance(wrappers, Iterable) and all( + [callable(w) for w in wrappers] + ): + for wrapper in wrappers: + env = wrapper(env) + else: + raise NotImplementedError + return env + + env_fns = [_make_env for _ in range(num_envs)] + return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) diff --git a/gym-0.21.0/gym/vector/__pycache__/async_vector_env.cpython-38.pyc b/gym-0.21.0/gym/vector/__pycache__/async_vector_env.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c78c9a8b19377a1ac469f4ed3c67dc0b63292de4 Binary files /dev/null and b/gym-0.21.0/gym/vector/__pycache__/async_vector_env.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/__pycache__/sync_vector_env.cpython-38.pyc b/gym-0.21.0/gym/vector/__pycache__/sync_vector_env.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32644401551c8b51dcec69bad8775b5203dde9f2 Binary files /dev/null and b/gym-0.21.0/gym/vector/__pycache__/sync_vector_env.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/vector/utils/numpy_utils.py b/gym-0.21.0/gym/vector/utils/numpy_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2465ab24946e243d2f18084c9a38bbd30d51cdde --- /dev/null +++ b/gym-0.21.0/gym/vector/utils/numpy_utils.py @@ -0,0 +1,146 @@ +import numpy as np + +from gym.spaces import Space, Tuple, Dict +from gym.vector.utils.spaces import _BaseGymSpaces +from collections import OrderedDict + +__all__ = ["concatenate", "create_empty_array"] + + +def concatenate(items, out, space): + """Concatenate multiple samples from space into a single object. + + Parameters + ---------- + items : iterable of samples of `space` + Samples to be concatenated. + + out : tuple, dict, or `np.ndarray` + The output object. This object is a (possibly nested) numpy array. + + space : `gym.spaces.Space` instance + Observation space of a single environment in the vectorized environment. + + Returns + ------- + out : tuple, dict, or `np.ndarray` + The output object. This object is a (possibly nested) numpy array. + + Example + ------- + >>> from gym.spaces import Box + >>> space = Box(low=0, high=1, shape=(3,), dtype=np.float32) + >>> out = np.zeros((2, 3), dtype=np.float32) + >>> items = [space.sample() for _ in range(2)] + >>> concatenate(items, out, space) + array([[0.6348213 , 0.28607962, 0.60760117], + [0.87383074, 0.192658 , 0.2148103 ]], dtype=float32) + """ + assert isinstance(items, (list, tuple)) + if isinstance(space, _BaseGymSpaces): + return concatenate_base(items, out, space) + elif isinstance(space, Tuple): + return concatenate_tuple(items, out, space) + elif isinstance(space, Dict): + return concatenate_dict(items, out, space) + elif isinstance(space, Space): + return concatenate_custom(items, out, space) + else: + raise ValueError( + "Space of type `{0}` is not a valid `gym.Space` " + "instance.".format(type(space)) + ) + + +def concatenate_base(items, out, space): + return np.stack(items, axis=0, out=out) + + +def concatenate_tuple(items, out, space): + return tuple( + concatenate([item[i] for item in items], out[i], subspace) + for (i, subspace) in enumerate(space.spaces) + ) + + +def concatenate_dict(items, out, space): + return OrderedDict( + [ + (key, concatenate([item[key] for item in items], out[key], subspace)) + for (key, subspace) in space.spaces.items() + ] + ) + + +def concatenate_custom(items, out, space): + return tuple(items) + + +def create_empty_array(space, n=1, fn=np.zeros): + """Create an empty (possibly nested) numpy array. + + Parameters + ---------- + space : `gym.spaces.Space` instance + Observation space of a single environment in the vectorized environment. + + n : int + Number of environments in the vectorized environment. If `None`, creates + an empty sample from `space`. + + fn : callable + Function to apply when creating the empty numpy array. Examples of such + functions are `np.empty` or `np.zeros`. + + Returns + ------- + out : tuple, dict, or `np.ndarray` + The output object. This object is a (possibly nested) numpy array. + + Example + ------- + >>> from gym.spaces import Box, Dict + >>> space = Dict({ + ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32), + ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)}) + >>> create_empty_array(space, n=2, fn=np.zeros) + OrderedDict([('position', array([[0., 0., 0.], + [0., 0., 0.]], dtype=float32)), + ('velocity', array([[0., 0.], + [0., 0.]], dtype=float32))]) + """ + if isinstance(space, _BaseGymSpaces): + return create_empty_array_base(space, n=n, fn=fn) + elif isinstance(space, Tuple): + return create_empty_array_tuple(space, n=n, fn=fn) + elif isinstance(space, Dict): + return create_empty_array_dict(space, n=n, fn=fn) + elif isinstance(space, Space): + return create_empty_array_custom(space, n=n, fn=fn) + else: + raise ValueError( + "Space of type `{0}` is not a valid `gym.Space` " + "instance.".format(type(space)) + ) + + +def create_empty_array_base(space, n=1, fn=np.zeros): + shape = space.shape if (n is None) else (n,) + space.shape + return fn(shape, dtype=space.dtype) + + +def create_empty_array_tuple(space, n=1, fn=np.zeros): + return tuple(create_empty_array(subspace, n=n, fn=fn) for subspace in space.spaces) + + +def create_empty_array_dict(space, n=1, fn=np.zeros): + return OrderedDict( + [ + (key, create_empty_array(subspace, n=n, fn=fn)) + for (key, subspace) in space.spaces.items() + ] + ) + + +def create_empty_array_custom(space, n=1, fn=np.zeros): + return None diff --git a/gym-0.21.0/gym/wrappers/README.md b/gym-0.21.0/gym/wrappers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d5307a1582ccb54420c8c8d81834553934240f8e --- /dev/null +++ b/gym-0.21.0/gym/wrappers/README.md @@ -0,0 +1,25 @@ +# Wrappers + +Wrappers are used to transform an environment in a modular way: + +```python +env = gym.make('Pong-v0') +env = MyWrapper(env) +``` + +Note that we may later restructure any of the files in this directory, +but will keep the wrappers available at the wrappers' top-level +folder. So for example, you should access `MyWrapper` as follows: + +```python +from gym.wrappers import MyWrapper +``` + +## Quick tips for writing your own wrapper + +- Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function +- You can access the inner environment with `self.unwrapped` +- You can access the previous layer using `self.env` +- The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer +- Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed` +- Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) diff --git a/gym-0.21.0/gym/wrappers/__init__.py b/gym-0.21.0/gym/wrappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5b215bf51c5652231f4bbd32677563bdc9846cae --- /dev/null +++ b/gym-0.21.0/gym/wrappers/__init__.py @@ -0,0 +1,18 @@ +from gym import error +from gym.wrappers.monitor import Monitor +from gym.wrappers.time_limit import TimeLimit +from gym.wrappers.filter_observation import FilterObservation +from gym.wrappers.atari_preprocessing import AtariPreprocessing +from gym.wrappers.time_aware_observation import TimeAwareObservation +from gym.wrappers.rescale_action import RescaleAction +from gym.wrappers.flatten_observation import FlattenObservation +from gym.wrappers.gray_scale_observation import GrayScaleObservation +from gym.wrappers.frame_stack import LazyFrames +from gym.wrappers.frame_stack import FrameStack +from gym.wrappers.transform_observation import TransformObservation +from gym.wrappers.transform_reward import TransformReward +from gym.wrappers.resize_observation import ResizeObservation +from gym.wrappers.clip_action import ClipAction +from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics +from gym.wrappers.normalize import NormalizeObservation, NormalizeReward +from gym.wrappers.record_video import RecordVideo, capped_cubic_video_schedule diff --git a/gym-0.21.0/gym/wrappers/__pycache__/filter_observation.cpython-38.pyc b/gym-0.21.0/gym/wrappers/__pycache__/filter_observation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b771eda2f6fb5aeb27e3d681ef4cecf00b6667c Binary files /dev/null and b/gym-0.21.0/gym/wrappers/__pycache__/filter_observation.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/wrappers/__pycache__/frame_stack.cpython-38.pyc b/gym-0.21.0/gym/wrappers/__pycache__/frame_stack.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5193e280e8c464b3d49b29d884d0318aa56b17f Binary files /dev/null and b/gym-0.21.0/gym/wrappers/__pycache__/frame_stack.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/wrappers/__pycache__/time_aware_observation.cpython-38.pyc b/gym-0.21.0/gym/wrappers/__pycache__/time_aware_observation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c185924d31ff24fe5e2b8626ed943b89a577574e Binary files /dev/null and b/gym-0.21.0/gym/wrappers/__pycache__/time_aware_observation.cpython-38.pyc differ diff --git a/gym-0.21.0/gym/wrappers/filter_observation.py b/gym-0.21.0/gym/wrappers/filter_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..0f52c04492ba2aba6c34d734491e4ad37917978d --- /dev/null +++ b/gym-0.21.0/gym/wrappers/filter_observation.py @@ -0,0 +1,72 @@ +import copy +from gym import spaces +from gym import ObservationWrapper + + +class FilterObservation(ObservationWrapper): + """Filter dictionary observations by their keys. + + Args: + env: The environment to wrap. + filter_keys: List of keys to be included in the observations. + + Raises: + ValueError: If observation keys in not instance of None or + iterable. + ValueError: If any of the `filter_keys` are not included in + the original `env`'s observation space + + """ + + def __init__(self, env, filter_keys=None): + super(FilterObservation, self).__init__(env) + + wrapped_observation_space = env.observation_space + assert isinstance( + wrapped_observation_space, spaces.Dict + ), "FilterObservationWrapper is only usable with dict observations." + + observation_keys = wrapped_observation_space.spaces.keys() + + if filter_keys is None: + filter_keys = tuple(observation_keys) + + missing_keys = set(key for key in filter_keys if key not in observation_keys) + + if missing_keys: + raise ValueError( + "All the filter_keys must be included in the " + "original obsrevation space.\n" + "Filter keys: {filter_keys}\n" + "Observation keys: {observation_keys}\n" + "Missing keys: {missing_keys}".format( + filter_keys=filter_keys, + observation_keys=observation_keys, + missing_keys=missing_keys, + ) + ) + + self.observation_space = type(wrapped_observation_space)( + [ + (name, copy.deepcopy(space)) + for name, space in wrapped_observation_space.spaces.items() + if name in filter_keys + ] + ) + + self._env = env + self._filter_keys = tuple(filter_keys) + + def observation(self, observation): + filter_observation = self._filter_observation(observation) + return filter_observation + + def _filter_observation(self, observation): + observation = type(observation)( + [ + (name, value) + for name, value in observation.items() + if name in self._filter_keys + ] + ) + return observation diff --git a/gym-0.21.0/gym/wrappers/flatten_observation.py b/gym-0.21.0/gym/wrappers/flatten_observation.py new file mode 100644 index 0000000000000000000000000000000000000000..9dca4da256a398e53407533850b620a13fdaf1f0 --- /dev/null +++ b/gym-0.21.0/gym/wrappers/flatten_observation.py @@ -0,0 +1,13 @@ +import gym.spaces as spaces +from gym import ObservationWrapper + + +class FlattenObservation(ObservationWrapper): + r"""Observation wrapper that flattens the observation.""" + + def __init__(self, env): + super(FlattenObservation, self).__init__(env) + self.observation_space = spaces.flatten_space(env.observation_space) + + def observation(self, observation): + return spaces.flatten(self.env.observation_space, observation) diff --git a/gym-0.21.0/gym/wrappers/monitor.py b/gym-0.21.0/gym/wrappers/monitor.py new file mode 100644 index 0000000000000000000000000000000000000000..fceeb538d7b59a11f30df086c083a0484e69e1cd --- /dev/null +++ b/gym-0.21.0/gym/wrappers/monitor.py @@ -0,0 +1,487 @@ +import json +import os + +import numpy as np + +import gym +import warnings +from gym import Wrapper +from gym import error, version, logger +from gym.wrappers.monitoring import stats_recorder, video_recorder +from gym.utils import atomic_write, closer +from gym.utils.json_utils import json_encode_np + +FILE_PREFIX = "openaigym" +MANIFEST_PREFIX = FILE_PREFIX + ".manifest" + + +class Monitor(Wrapper): + def __init__( + self, + env, + directory, + video_callable=None, + force=False, + resume=False, + write_upon_reset=False, + uid=None, + mode=None, + ): + super(Monitor, self).__init__(env) + warnings.warn( + "The Monitor wrapper is being deprecated in favor of gym.wrappers.RecordVideo and gym.wrappers.RecordEpisodeStatistics (see https://github.com/openai/gym/issues/2297)" + ) + + self.videos = [] + + self.stats_recorder = None + self.video_recorder = None + self.enabled = False + self.episode_id = 0 + self._monitor_id = None + self.env_semantics_autoreset = env.metadata.get("semantics.autoreset") + + self._start( + directory, video_callable, force, resume, write_upon_reset, uid, mode + ) + + def step(self, action): + self._before_step(action) + observation, reward, done, info = self.env.step(action) + done = self._after_step(observation, reward, done, info) + + return observation, reward, done, info + + def reset(self, **kwargs): + self._before_reset() + observation = self.env.reset(**kwargs) + self._after_reset(observation) + + return observation + + def set_monitor_mode(self, mode): + logger.info("Setting the monitor mode is deprecated and will be removed soon") + self._set_mode(mode) + + def _start( + self, + directory, + video_callable=None, + force=False, + resume=False, + write_upon_reset=False, + uid=None, + mode=None, + ): + """Start monitoring. + + Args: + directory (str): A per-training run directory where to record stats. + video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording. + force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym."). + resume (bool): Retain the training data already in this directory, which will be merged with our new data + write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.) + uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid(). + mode (['evaluation', 'training']): Whether this is an evaluation or training episode. + """ + if self.env.spec is None: + logger.warn( + "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users." + ) + env_id = "(unknown)" + else: + env_id = self.env.spec.id + + self.directory = os.path.abspath(directory) + + if not os.path.exists(self.directory): + logger.info("Creating monitor directory %s", self.directory) + os.makedirs(self.directory, exist_ok=True) + + if video_callable is None: + video_callable = capped_cubic_video_schedule + elif video_callable == False: + video_callable = disable_videos + elif not callable(video_callable): + raise error.Error( + "You must provide a function, None, or False for video_callable, not {}: {}".format( + type(video_callable), video_callable + ) + ) + self.video_callable = video_callable + + # Check on whether we need to clear anything + if force: + clear_monitor_files(self.directory) + elif not resume: + training_manifests = detect_training_manifests(self.directory) + if len(training_manifests) > 0: + raise error.Error( + """Trying to write to monitor directory {} with existing monitor files: {}. + You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.""".format( + directory, ", ".join(training_manifests[:5]) + ) + ) + + self._monitor_id = monitor_closer.register(self) + + self.enabled = True + # We use the 'openai-gym' prefix to determine if a file is + # ours + self.file_prefix = FILE_PREFIX + self.file_infix = "{}.{}".format(self._monitor_id, uid if uid else os.getpid()) + + self.stats_recorder = stats_recorder.StatsRecorder( + self.directory, + "{}.episode_batch.{}".format(self.file_prefix, self.file_infix), + autoreset=self.env_semantics_autoreset, + env_id=env_id, + ) + + self.write_upon_reset = write_upon_reset + + if mode is not None: + self._set_mode(mode) + + def _flush(self, force=False): + """Flush all relevant monitor information to disk.""" + if not self.write_upon_reset and not force: + return + + self.stats_recorder.flush() + + # Give it a very distinguished name, since we need to pick it + # up from the filesystem later. + path = os.path.join( + self.directory, + "{}.manifest.{}.manifest.json".format(self.file_prefix, self.file_infix), + ) + logger.debug("Writing training manifest file to %s", path) + with atomic_write.atomic_write(path) as f: + # We need to write relative paths here since people may + # move the training_dir around. It would be cleaner to + # already have the basenames rather than basename'ing + # manually, but this works for now. + json.dump( + { + "stats": os.path.basename(self.stats_recorder.path), + "videos": [ + (os.path.basename(v), os.path.basename(m)) + for v, m in self.videos + ], + "env_info": self._env_info(), + }, + f, + default=json_encode_np, + ) + + def close(self): + """Flush all monitor data to disk and close any open rending windows.""" + super(Monitor, self).close() + + if not self.enabled: + return + self.stats_recorder.close() + if self.video_recorder is not None: + self._close_video_recorder() + self._flush(force=True) + + # Stop tracking this for autoclose + monitor_closer.unregister(self._monitor_id) + self.enabled = False + + logger.info( + """Finished writing results. You can upload them to the scoreboard via gym.upload(%r)""", + self.directory, + ) + + def _set_mode(self, mode): + if mode == "evaluation": + type = "e" + elif mode == "training": + type = "t" + else: + raise error.Error( + 'Invalid mode {}: must be "training" or "evaluation"', mode + ) + self.stats_recorder.type = type + + def _before_step(self, action): + if not self.enabled: + return + self.stats_recorder.before_step(action) + + def _after_step(self, observation, reward, done, info): + if not self.enabled: + return done + + if done and self.env_semantics_autoreset: + # For envs with BlockingReset wrapping VNCEnv, this observation will be the first one of the new episode + self.reset_video_recorder() + self.episode_id += 1 + self._flush() + + # Record stats + self.stats_recorder.after_step(observation, reward, done, info) + # Record video + self.video_recorder.capture_frame() + + return done + + def _before_reset(self): + if not self.enabled: + return + self.stats_recorder.before_reset() + + def _after_reset(self, observation): + if not self.enabled: + return + + # Reset the stat count + self.stats_recorder.after_reset(observation) + + self.reset_video_recorder() + + # Bump *after* all reset activity has finished + self.episode_id += 1 + + self._flush() + + def reset_video_recorder(self): + # Close any existing video recorder + if self.video_recorder: + self._close_video_recorder() + + # Start recording the next video. + # + # TODO: calculate a more correct 'episode_id' upon merge + self.video_recorder = video_recorder.VideoRecorder( + env=self.env, + base_path=os.path.join( + self.directory, + "{}.video.{}.video{:06}".format( + self.file_prefix, self.file_infix, self.episode_id + ), + ), + metadata={"episode_id": self.episode_id}, + enabled=self._video_enabled(), + ) + self.video_recorder.capture_frame() + + def _close_video_recorder(self): + self.video_recorder.close() + if self.video_recorder.functional: + self.videos.append( + (self.video_recorder.path, self.video_recorder.metadata_path) + ) + + def _video_enabled(self): + return self.video_callable(self.episode_id) + + def _env_info(self): + env_info = { + "gym_version": version.VERSION, + } + if self.env.spec: + env_info["env_id"] = self.env.spec.id + return env_info + + def __del__(self): + # Make sure we've closed up shop when garbage collecting + self.close() + + def get_total_steps(self): + return self.stats_recorder.total_steps + + def get_episode_rewards(self): + return self.stats_recorder.episode_rewards + + def get_episode_lengths(self): + return self.stats_recorder.episode_lengths + + +def detect_training_manifests(training_dir, files=None): + if files is None: + files = os.listdir(training_dir) + return [ + os.path.join(training_dir, f) + for f in files + if f.startswith(MANIFEST_PREFIX + ".") + ] + + +def detect_monitor_files(training_dir): + return [ + os.path.join(training_dir, f) + for f in os.listdir(training_dir) + if f.startswith(FILE_PREFIX + ".") + ] + + +def clear_monitor_files(training_dir): + files = detect_monitor_files(training_dir) + if len(files) == 0: + return + + logger.info( + "Clearing %d monitor files from previous run (because force=True was provided)", + len(files), + ) + for file in files: + os.unlink(file) + + +def capped_cubic_video_schedule(episode_id): + if episode_id < 1000: + return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id + else: + return episode_id % 1000 == 0 + + +def disable_videos(episode_id): + return False + + +monitor_closer = closer.Closer() + + +# This method gets used for a sanity check in scoreboard/api.py. It's +# not intended for use outside of the gym codebase. +def _open_monitors(): + return list(monitor_closer.closeables.values()) + + +def load_env_info_from_manifests(manifests, training_dir): + env_infos = [] + for manifest in manifests: + with open(manifest) as f: + contents = json.load(f) + env_infos.append(contents["env_info"]) + + env_info = collapse_env_infos(env_infos, training_dir) + return env_info + + +def load_results(training_dir): + if not os.path.exists(training_dir): + logger.error("Training directory %s not found", training_dir) + return + + manifests = detect_training_manifests(training_dir) + if not manifests: + logger.error("No manifests found in training directory %s", training_dir) + return + + logger.debug("Uploading data from manifest %s", ", ".join(manifests)) + + # Load up stats + video files + stats_files = [] + videos = [] + env_infos = [] + + for manifest in manifests: + with open(manifest) as f: + contents = json.load(f) + # Make these paths absolute again + stats_files.append(os.path.join(training_dir, contents["stats"])) + videos += [ + (os.path.join(training_dir, v), os.path.join(training_dir, m)) + for v, m in contents["videos"] + ] + env_infos.append(contents["env_info"]) + + env_info = collapse_env_infos(env_infos, training_dir) + ( + data_sources, + initial_reset_timestamps, + timestamps, + episode_lengths, + episode_rewards, + episode_types, + initial_reset_timestamp, + ) = merge_stats_files(stats_files) + + return { + "manifests": manifests, + "env_info": env_info, + "data_sources": data_sources, + "timestamps": timestamps, + "episode_lengths": episode_lengths, + "episode_rewards": episode_rewards, + "episode_types": episode_types, + "initial_reset_timestamps": initial_reset_timestamps, + "initial_reset_timestamp": initial_reset_timestamp, + "videos": videos, + } + + +def merge_stats_files(stats_files): + timestamps = [] + episode_lengths = [] + episode_rewards = [] + episode_types = [] + initial_reset_timestamps = [] + data_sources = [] + + for i, path in enumerate(stats_files): + with open(path) as f: + content = json.load(f) + if len(content["timestamps"]) == 0: + continue # so empty file doesn't mess up results, due to null initial_reset_timestamp + data_sources += [i] * len(content["timestamps"]) + timestamps += content["timestamps"] + episode_lengths += content["episode_lengths"] + episode_rewards += content["episode_rewards"] + # Recent addition + episode_types += content.get("episode_types", []) + # Keep track of where each episode came from. + initial_reset_timestamps.append(content["initial_reset_timestamp"]) + + idxs = np.argsort(timestamps) + timestamps = np.array(timestamps)[idxs].tolist() + episode_lengths = np.array(episode_lengths)[idxs].tolist() + episode_rewards = np.array(episode_rewards)[idxs].tolist() + data_sources = np.array(data_sources)[idxs].tolist() + + if episode_types: + episode_types = np.array(episode_types)[idxs].tolist() + else: + episode_types = None + + if len(initial_reset_timestamps) > 0: + initial_reset_timestamp = min(initial_reset_timestamps) + else: + initial_reset_timestamp = 0 + + return ( + data_sources, + initial_reset_timestamps, + timestamps, + episode_lengths, + episode_rewards, + episode_types, + initial_reset_timestamp, + ) + + +# TODO training_dir isn't used except for error messages, clean up the layering +def collapse_env_infos(env_infos, training_dir): + assert len(env_infos) > 0 + + first = env_infos[0] + for other in env_infos[1:]: + if first != other: + raise error.Error( + "Found two unequal env_infos: {} and {}. This usually indicates that your training directory {} has commingled results from multiple runs.".format( + first, other, training_dir + ) + ) + + for key in ["env_id", "gym_version"]: + if key not in first: + raise error.Error( + "env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym.".format( + first, training_dir, key + ) + ) + return first diff --git a/gym-0.21.0/gym/wrappers/monitoring/__init__.py b/gym-0.21.0/gym/wrappers/monitoring/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/gym-0.21.0/gym/wrappers/monitoring/stats_recorder.py b/gym-0.21.0/gym/wrappers/monitoring/stats_recorder.py new file mode 100644 index 0000000000000000000000000000000000000000..32a693e50eb3bb8b7980d91f6c0901ec88ccdda6 --- /dev/null +++ b/gym-0.21.0/gym/wrappers/monitoring/stats_recorder.py @@ -0,0 +1,123 @@ +import json +import os +import time + +from gym import error +from gym.utils import atomic_write +from gym.utils.json_utils import json_encode_np + + +class StatsRecorder(object): + def __init__(self, directory, file_prefix, autoreset=False, env_id=None): + self.autoreset = autoreset + self.env_id = env_id + + self.initial_reset_timestamp = None + self.directory = directory + self.file_prefix = file_prefix + self.episode_lengths = [] + self.episode_rewards = [] + self.episode_types = [] # experimental addition + self._type = "t" + self.timestamps = [] + self.steps = None + self.total_steps = 0 + self.rewards = None + + self.done = None + self.closed = False + + filename = "{}.stats.json".format(self.file_prefix) + self.path = os.path.join(self.directory, filename) + + @property + def type(self): + return self._type + + @type.setter + def type(self, type): + if type not in ["t", "e"]: + raise error.Error( + "Invalid episode type {}: must be t for training or e for evaluation", + type, + ) + self._type = type + + def before_step(self, action): + assert not self.closed + + if self.done: + raise error.ResetNeeded( + "Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format( + self.env_id + ) + ) + elif self.steps is None: + raise error.ResetNeeded( + "Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format( + self.env_id + ) + ) + + def after_step(self, observation, reward, done, info): + self.steps += 1 + self.total_steps += 1 + self.rewards += reward + self.done = done + + if done: + self.save_complete() + + if done: + if self.autoreset: + self.before_reset() + self.after_reset(observation) + + def before_reset(self): + assert not self.closed + + if self.done is not None and not self.done and self.steps > 0: + raise error.Error( + "Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over.".format( + self.env_id + ) + ) + + self.done = False + if self.initial_reset_timestamp is None: + self.initial_reset_timestamp = time.perf_counter() + + def after_reset(self, observation): + self.steps = 0 + self.rewards = 0 + # We write the type at the beginning of the episode. If a user + # changes the type, it's more natural for it to apply next + # time the user calls reset(). + self.episode_types.append(self._type) + + def save_complete(self): + if self.steps is not None: + self.episode_lengths.append(self.steps) + self.episode_rewards.append(float(self.rewards)) + self.timestamps.append(time.perf_counter()) + + def close(self): + self.flush() + self.closed = True + + def flush(self): + if self.closed: + return + + with atomic_write.atomic_write(self.path) as f: + json.dump( + { + "initial_reset_timestamp": self.initial_reset_timestamp, + "timestamps": self.timestamps, + "episode_lengths": self.episode_lengths, + "episode_rewards": self.episode_rewards, + "episode_types": self.episode_types, + }, + f, + default=json_encode_np, + ) diff --git a/gym-0.21.0/gym/wrappers/monitoring/video_recorder.py b/gym-0.21.0/gym/wrappers/monitoring/video_recorder.py new file mode 100644 index 0000000000000000000000000000000000000000..1425bb0b2cdc8dd132228befb3875a13c142db0e --- /dev/null +++ b/gym-0.21.0/gym/wrappers/monitoring/video_recorder.py @@ -0,0 +1,453 @@ +import json +import os +import os.path +import subprocess +import tempfile +from io import StringIO + +import distutils.spawn +import distutils.version +import numpy as np + +from gym import error, logger + + +def touch(path): + open(path, "a").close() + + +class VideoRecorder(object): + """VideoRecorder renders a nice movie of a rollout, frame by frame. It + comes with an `enabled` option so you can still use the same code + on episodes where you don't want to record video. + + Note: + You are responsible for calling `close` on a created + VideoRecorder, or else you may leak an encoder process. + + Args: + env (Env): Environment to take video of. + path (Optional[str]): Path to the video file; will be randomly chosen if omitted. + base_path (Optional[str]): Alternatively, path to the video file without extension, which will be added. + metadata (Optional[dict]): Contents to save to the metadata file. + enabled (bool): Whether to actually record video, or just no-op (for convenience) + """ + + def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None): + modes = env.metadata.get("render.modes", []) + self._async = env.metadata.get("semantics.async") + self.enabled = enabled + self._closed = False + + # Don't bother setting anything else if not enabled + if not self.enabled: + return + + self.ansi_mode = False + if "rgb_array" not in modes: + if "ansi" in modes: + self.ansi_mode = True + else: + logger.info( + 'Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'.format( + env + ) + ) + # Whoops, turns out we shouldn't be enabled after all + self.enabled = False + return + + if path is not None and base_path is not None: + raise error.Error("You can pass at most one of `path` or `base_path`.") + + self.last_frame = None + self.env = env + + required_ext = ".json" if self.ansi_mode else ".mp4" + if path is None: + if base_path is not None: + # Base path given, append ext + path = base_path + required_ext + else: + # Otherwise, just generate a unique filename + with tempfile.NamedTemporaryFile( + suffix=required_ext, delete=False + ) as f: + path = f.name + self.path = path + + path_base, actual_ext = os.path.splitext(self.path) + + if actual_ext != required_ext: + hint = ( + " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." + if self.ansi_mode + else "" + ) + raise error.Error( + "Invalid path given: {} -- must have file extension {}.{}".format( + self.path, required_ext, hint + ) + ) + # Touch the file in any case, so we know it's present. (This + # corrects for platform platform differences. Using ffmpeg on + # OS X, the file is precreated, but not on Linux. + touch(path) + + self.frames_per_sec = env.metadata.get("video.frames_per_second", 30) + self.output_frames_per_sec = env.metadata.get( + "video.output_frames_per_second", self.frames_per_sec + ) + self.encoder = None # lazily start the process + self.broken = False + + # Dump metadata + self.metadata = metadata or {} + self.metadata["content_type"] = ( + "video/vnd.openai.ansivid" if self.ansi_mode else "video/mp4" + ) + self.metadata_path = "{}.meta.json".format(path_base) + self.write_metadata() + + logger.info("Starting new video recorder writing to %s", self.path) + self.empty = True + + @property + def functional(self): + return self.enabled and not self.broken + + def capture_frame(self): + """Render the given `env` and add the resulting frame to the video.""" + if not self.functional: + return + if self._closed: + logger.warn( + "The video recorder has been closed and no frames will be captured anymore." + ) + return + logger.debug("Capturing video frame: path=%s", self.path) + + render_mode = "ansi" if self.ansi_mode else "rgb_array" + frame = self.env.render(mode=render_mode) + + if frame is None: + if self._async: + return + else: + # Indicates a bug in the environment: don't want to raise + # an error here. + logger.warn( + "Env returned None on render(). Disabling further rendering for video recorder by marking as disabled: path=%s metadata_path=%s", + self.path, + self.metadata_path, + ) + self.broken = True + else: + self.last_frame = frame + if self.ansi_mode: + self._encode_ansi_frame(frame) + else: + self._encode_image_frame(frame) + + def close(self): + """Flush all data to disk and close any open frame encoders.""" + if not self.enabled or self._closed: + return + + if self.encoder: + logger.debug("Closing video encoder: path=%s", self.path) + self.encoder.close() + self.encoder = None + else: + # No frames captured. Set metadata, and remove the empty output file. + os.remove(self.path) + + if self.metadata is None: + self.metadata = {} + self.metadata["empty"] = True + + # If broken, get rid of the output file, otherwise we'd leak it. + if self.broken: + logger.info( + "Cleaning up paths for broken video recorder: path=%s metadata_path=%s", + self.path, + self.metadata_path, + ) + + # Might have crashed before even starting the output file, don't try to remove in that case. + if os.path.exists(self.path): + os.remove(self.path) + + if self.metadata is None: + self.metadata = {} + self.metadata["broken"] = True + + self.write_metadata() + + # Stop tracking this for autoclose + self._closed = True + + def write_metadata(self): + with open(self.metadata_path, "w") as f: + json.dump(self.metadata, f) + + def __del__(self): + # Make sure we've closed up shop when garbage collecting + self.close() + + def _encode_ansi_frame(self, frame): + if not self.encoder: + self.encoder = TextEncoder(self.path, self.frames_per_sec) + self.metadata["encoder_version"] = self.encoder.version_info + self.encoder.capture_frame(frame) + self.empty = False + + def _encode_image_frame(self, frame): + if not self.encoder: + self.encoder = ImageEncoder( + self.path, frame.shape, self.frames_per_sec, self.output_frames_per_sec + ) + self.metadata["encoder_version"] = self.encoder.version_info + + try: + self.encoder.capture_frame(frame) + except error.InvalidFrame as e: + logger.warn("Tried to pass invalid video frame, marking as broken: %s", e) + self.broken = True + else: + self.empty = False + + +class TextEncoder(object): + """Store a moving picture made out of ANSI frames. Format adapted from + https://github.com/asciinema/asciinema/blob/master/doc/asciicast-v1.md""" + + def __init__(self, output_path, frames_per_sec): + self.output_path = output_path + self.frames_per_sec = frames_per_sec + self.frames = [] + + def capture_frame(self, frame): + string = None + if isinstance(frame, str): + string = frame + elif isinstance(frame, StringIO): + string = frame.getvalue() + else: + raise error.InvalidFrame( + "Wrong type {} for {}: text frame must be a string or StringIO".format( + type(frame), frame + ) + ) + + frame_bytes = string.encode("utf-8") + + if frame_bytes[-1:] != b"\n": + raise error.InvalidFrame( + 'Frame must end with a newline: """{}"""'.format(string) + ) + + if b"\r" in frame_bytes: + raise error.InvalidFrame( + 'Frame contains carriage returns (only newlines are allowed: """{}"""'.format( + string + ) + ) + + self.frames.append(frame_bytes) + + def close(self): + # frame_duration = float(1) / self.frames_per_sec + frame_duration = 0.5 + + # Turn frames into events: clear screen beforehand + # https://rosettacode.org/wiki/Terminal_control/Clear_the_screen#Python + # https://rosettacode.org/wiki/Terminal_control/Cursor_positioning#Python + clear_code = b"%c[2J\033[1;1H" % (27) + # Decode the bytes as UTF-8 since JSON may only contain UTF-8 + events = [ + ( + frame_duration, + (clear_code + frame.replace(b"\n", b"\r\n")).decode("utf-8"), + ) + for frame in self.frames + ] + + # Calculate frame size from the largest frames. + # Add some padding since we'll get cut off otherwise. + height = max([frame.count(b"\n") for frame in self.frames]) + 1 + width = ( + max( + [ + max([len(line) for line in frame.split(b"\n")]) + for frame in self.frames + ] + ) + + 2 + ) + + data = { + "version": 1, + "width": width, + "height": height, + "duration": len(self.frames) * frame_duration, + "command": "-", + "title": "gym VideoRecorder episode", + "env": {}, # could add some env metadata here + "stdout": events, + } + + with open(self.output_path, "w") as f: + json.dump(data, f) + + @property + def version_info(self): + return {"backend": "TextEncoder", "version": 1} + + +class ImageEncoder(object): + def __init__(self, output_path, frame_shape, frames_per_sec, output_frames_per_sec): + self.proc = None + self.output_path = output_path + # Frame shape should be lines-first, so w and h are swapped + h, w, pixfmt = frame_shape + if pixfmt != 3 and pixfmt != 4: + raise error.InvalidFrame( + "Your frame has shape {}, but we require (w,h,3) or (w,h,4), i.e., RGB values for a w-by-h image, with an optional alpha channel.".format( + frame_shape + ) + ) + self.wh = (w, h) + self.includes_alpha = pixfmt == 4 + self.frame_shape = frame_shape + self.frames_per_sec = frames_per_sec + self.output_frames_per_sec = output_frames_per_sec + + if distutils.spawn.find_executable("avconv") is not None: + self.backend = "avconv" + elif distutils.spawn.find_executable("ffmpeg") is not None: + self.backend = "ffmpeg" + else: + raise error.DependencyNotInstalled( + """Found neither the ffmpeg nor avconv executables. On OS X, you can install ffmpeg via `brew install ffmpeg`. On most Ubuntu variants, `sudo apt-get install ffmpeg` should do it. On Ubuntu 14.04, however, you'll need to install avconv with `sudo apt-get install libav-tools`.""" + ) + + self.start() + + @property + def version_info(self): + return { + "backend": self.backend, + "version": str( + subprocess.check_output( + [self.backend, "-version"], stderr=subprocess.STDOUT + ) + ), + "cmdline": self.cmdline, + } + + def start(self): + if self.backend == "ffmpeg": + self.cmdline = ( + self.backend, + "-nostats", + "-loglevel", + "error", # suppress warnings + "-y", + # input + "-f", + "rawvideo", + "-s:v", + "{}x{}".format(*self.wh), + "-pix_fmt", + ("rgb32" if self.includes_alpha else "rgb24"), + "-r", + "%d" % self.frames_per_sec, + "-i", + "-", # this used to be /dev/stdin, which is not Windows-friendly + # output + "-an", + "-r", + "%d" % self.frames_per_sec, + "-vcodec", + "mpeg4", + "-pix_fmt", + "bgr24", + "-r", + "%d" % self.output_frames_per_sec, + self.output_path, + ) + else: + self.cmdline = ( + self.backend, + "-nostats", + "-loglevel", + "error", # suppress warnings + "-y", + # input + "-f", + "rawvideo", + "-s:v", + "{}x{}".format(*self.wh), + "-pix_fmt", + ("rgb32" if self.includes_alpha else "rgb24"), + "-framerate", + "%d" % self.frames_per_sec, + "-i", + "-", # this used to be /dev/stdin, which is not Windows-friendly + # output + "-vf", + "scale=trunc(iw/2)*2:trunc(ih/2)*2", + "-vcodec", + "libx264", + "-pix_fmt", + "yuv420p", + "-r", + "%d" % self.output_frames_per_sec, + self.output_path, + ) + + logger.debug('Starting %s with "%s"', self.backend, " ".join(self.cmdline)) + if hasattr(os, "setsid"): # setsid not present on Windows + self.proc = subprocess.Popen( + self.cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid + ) + else: + self.proc = subprocess.Popen(self.cmdline, stdin=subprocess.PIPE) + + def capture_frame(self, frame): + if not isinstance(frame, (np.ndarray, np.generic)): + raise error.InvalidFrame( + "Wrong type {} for {} (must be np.ndarray or np.generic)".format( + type(frame), frame + ) + ) + if frame.shape != self.frame_shape: + raise error.InvalidFrame( + "Your frame has shape {}, but the VideoRecorder is configured for shape {}.".format( + frame.shape, self.frame_shape + ) + ) + if frame.dtype != np.uint8: + raise error.InvalidFrame( + "Your frame has data type {}, but we require uint8 (i.e. RGB values from 0-255).".format( + frame.dtype + ) + ) + + try: + if distutils.version.LooseVersion( + np.__version__ + ) >= distutils.version.LooseVersion("1.9.0"): + self.proc.stdin.write(frame.tobytes()) + else: + self.proc.stdin.write(frame.tostring()) + except Exception as e: + stdout, stderr = self.proc.communicate() + logger.error("VideoRecorder encoder failed: %s", stderr) + + def close(self): + self.proc.stdin.close() + ret = self.proc.wait() + if ret != 0: + logger.error("VideoRecorder encoder exited with status {}".format(ret))