Upload 254 files

5960497 verified 4 months ago

5.18 kB

	"""
	An interface for asynchronous vectorized environments.
	"""

	import multiprocessing as mp
	import numpy as np
	from .vec_env import VecEnv, CloudpickleWrapper, clear_mpi_env_vars
	import ctypes
	from baselines import logger

	from .util import dict_to_obs, obs_space_info, obs_to_dict

	_NP_TO_CT = {np.float32: ctypes.c_float,
	np.int32: ctypes.c_int32,
	np.int8: ctypes.c_int8,
	np.uint8: ctypes.c_char,
	np.bool: ctypes.c_bool}


	class ShmemVecEnv(VecEnv):
	"""
	Optimized version of SubprocVecEnv that uses shared variables to communicate observations.
	"""

	def __init__(self, env_fns, spaces=None, context='spawn'):
	"""
	If you don't specify observation_space, we'll have to create a dummy
	environment to get it.
	"""
	ctx = mp.get_context(context)
	if spaces:
	observation_space, action_space = spaces
	else:
	logger.log('Creating dummy env object to get spaces')
	with logger.scoped_configure(format_strs=[]):
	dummy = env_fns[0]()
	observation_space, action_space = dummy.observation_space, dummy.action_space
	dummy.close()
	del dummy
	VecEnv.__init__(self, len(env_fns), observation_space, action_space)
	self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space)
	self.obs_bufs = [
	{k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys}
	for _ in env_fns]
	self.parent_pipes = []
	self.procs = []
	with clear_mpi_env_vars():
	for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
	wrapped_fn = CloudpickleWrapper(env_fn)
	parent_pipe, child_pipe = ctx.Pipe()
	proc = ctx.Process(target=_subproc_worker,
	args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys))
	proc.daemon = True
	self.procs.append(proc)
	self.parent_pipes.append(parent_pipe)
	proc.start()
	child_pipe.close()
	self.waiting_step = False
	self.viewer = None

	def reset(self):
	if self.waiting_step:
	logger.warn('Called reset() while waiting for the step to complete')
	self.step_wait()
	for pipe in self.parent_pipes:
	pipe.send(('reset', None))
	return self._decode_obses([pipe.recv() for pipe in self.parent_pipes])

	def step_async(self, actions):
	assert len(actions) == len(self.parent_pipes)
	for pipe, act in zip(self.parent_pipes, actions):
	pipe.send(('step', act))
	self.waiting_step = True

	def step_wait(self):
	outs = [pipe.recv() for pipe in self.parent_pipes]
	self.waiting_step = False
	obs, rews, dones, infos = zip(*outs)
	return self._decode_obses(obs), np.array(rews), np.array(dones), infos

	def close_extras(self):
	if self.waiting_step:
	self.step_wait()
	for pipe in self.parent_pipes:
	pipe.send(('close', None))
	for pipe in self.parent_pipes:
	pipe.recv()
	pipe.close()
	for proc in self.procs:
	proc.join()

	def get_images(self, mode='human'):
	for pipe in self.parent_pipes:
	pipe.send(('render', None))
	return [pipe.recv() for pipe in self.parent_pipes]

	def _decode_obses(self, obs):
	result = {}
	for k in self.obs_keys:

	bufs = [b[k] for b in self.obs_bufs]
	o = [np.frombuffer(b.get_obj(), dtype=self.obs_dtypes[k]).reshape(self.obs_shapes[k]) for b in bufs]
	result[k] = np.array(o)
	return dict_to_obs(result)


	def _subproc_worker(pipe, parent_pipe, env_fn_wrapper, obs_bufs, obs_shapes, obs_dtypes, keys):
	"""
	Control a single environment instance using IPC and
	shared memory.
	"""
	def _write_obs(maybe_dict_obs):
	flatdict = obs_to_dict(maybe_dict_obs)
	for k in keys:
	dst = obs_bufs[k].get_obj()
	dst_np = np.frombuffer(dst, dtype=obs_dtypes[k]).reshape(obs_shapes[k]) # pylint: disable=W0212
	np.copyto(dst_np, flatdict[k])

	env = env_fn_wrapper.x()
	parent_pipe.close()
	try:
	while True:
	cmd, data = pipe.recv()
	if cmd == 'reset':
	pipe.send(_write_obs(env.reset()))
	elif cmd == 'step':
	obs, reward, done, info = env.step(data)
	if done:
	obs = env.reset()
	pipe.send((_write_obs(obs), reward, done, info))
	elif cmd == 'render':
	pipe.send(env.render(mode='rgb_array'))
	elif cmd == 'close':
	pipe.send(None)
	break
	else:
	raise RuntimeError('Got unrecognized cmd %s' % cmd)
	except KeyboardInterrupt:
	print('ShmemVecEnv worker: got KeyboardInterrupt')
	finally:
	env.close()