Upload 254 files

5960497 verified 4 months ago

13.1 kB

	import math

	import numpy as np
	import tensorflow as tf
	from baselines.a2c import utils
	from baselines.a2c.utils import conv, fc, conv_to_fc, batch_to_seq, seq_to_batch
	from baselines.common.mpi_running_mean_std import RunningMeanStd
	from keras import layers
	from itertools import combinations

	mapping = {}


	def register(name):
	def _thunk(func):
	mapping[name] = func
	return func

	return _thunk


	def nature_cnn(unscaled_images, **conv_kwargs):
	"""
	CNN from Nature paper.
	"""
	scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
	activ = tf.nn.relu
	h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2),
	**conv_kwargs))
	h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
	h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
	h3 = conv_to_fc(h3)
	return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))


	def build_impala_cnn(unscaled_images, depths=[16, 32, 32], **conv_kwargs):
	"""
	Model used in the paper "IMPALA: Scalable Distributed Deep-RL with
	Importance Weighted Actor-Learner Architectures" https://arxiv.org/abs/1802.01561
	"""

	layer_num = 0

	def get_layer_num_str():
	nonlocal layer_num
	num_str = str(layer_num)
	layer_num += 1
	return num_str

	def conv_layer(out, depth):
	return tf.compat.v1.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str())

	def residual_block(inputs):
	try:
	depth = inputs.get_shape()[-1].value
	except:
	depth = inputs.get_shape()[-1]

	out = tf.nn.relu(inputs)

	out = conv_layer(out, depth)
	out = tf.nn.relu(out)
	out = conv_layer(out, depth)
	return out + inputs

	def conv_sequence(inputs, depth):
	out = conv_layer(inputs, depth)
	out = tf.compat.v1.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same')
	out = residual_block(out)
	out = residual_block(out)
	return out

	out = tf.cast(unscaled_images, tf.float32) / 255.

	for depth in depths:
	out = conv_sequence(out, depth)

	out = tf.compat.v1.layers.flatten(out)
	out = tf.nn.relu(out)
	out = tf.compat.v1.layers.dense(out, 256, activation=tf.nn.relu, name='layer_' + get_layer_num_str())

	return out


	def build_skill_impala_cnn(unscaled_images, depths=[16, 32, 32], emb_dim=256, num_embeddings=8, seed=0,
	**conv_kwargs):
	"""
	Modified impala cnn model by adding the skill module
	"""

	layer_num = 0

	def get_layer_num_str():
	nonlocal layer_num
	num_str = str(layer_num)
	layer_num += 1
	return num_str

	def conv_layer(out, depth):
	return tf.compat.v1.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str())

	def residual_block(inputs):
	# depth = inputs.get_shape()[-1].value
	depth = inputs.get_shape()[-1]

	out = tf.nn.relu(inputs)

	out = conv_layer(out, depth)
	out = tf.nn.relu(out)
	out = conv_layer(out, depth)
	return out + inputs

	def conv_sequence(inputs, depth):
	out = conv_layer(inputs, depth)
	out = tf.compat.v1.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same')
	out = residual_block(out)
	out = residual_block(out)
	return out

	out = tf.cast(unscaled_images, tf.float32) / 255.

	for depth in depths:
	out = conv_sequence(out, depth)

	out = tf.compat.v1.layers.flatten(out)
	out = tf.nn.relu(out)
	pure_out = tf.compat.v1.layers.dense(out, emb_dim, activation=tf.nn.relu, name='layer_' + get_layer_num_str())

	# skill module
	skill_out = tf.compat.v1.layers.dense(pure_out, emb_dim // 2, activation=None, name='layer_' + get_layer_num_str())
	skill_out = tf.compat.v1.layers.dense(skill_out, 2, activation=None, name='layer_' + get_layer_num_str())
	vq_layer = VectorQuantizer(num_embeddings, 2, seed=seed, name="vector_quantizer")
	vq_out, pure_vq_out, encoding_indices = vq_layer(skill_out)

	encoding_indices_ = tf.cast(
	tf.tile(encoding_indices / vq_layer.num_embeddings, tf.constant([1, emb_dim], tf.int32)), tf.float32)

	# add the normalized skill indices to features
	out = tf.math.add(pure_out, encoding_indices_)

	return out, skill_out, pure_out, vq_out, pure_vq_out, vq_layer.embeddings, encoding_indices


	@register("mlp")
	def mlp(num_layers=2, num_hidden=64, activation=tf.tanh, layer_norm=False):
	"""
	Stack of fully-connected layers to be used in a policy / q-function approximator

	Parameters:
	----------

	num_layers: int number of fully-connected layers (default: 2)

	num_hidden: int size of fully-connected layers (default: 64)

	activation: activation function (default: tf.tanh)

	Returns:
	-------

	function that builds fully connected network with a given input tensor / placeholder
	"""

	def network_fn(X):
	h = tf.compat.v1.layers.flatten(X)
	for i in range(num_layers):
	h = fc(h, 'mlp_fc{}'.format(i), nh=num_hidden, init_scale=np.sqrt(2))
	if layer_norm:
	h = tf.contrib.layers.layer_norm(h, center=True, scale=True)
	h = activation(h)

	return h

	return network_fn


	@register("cnn")
	def cnn(**conv_kwargs):
	def network_fn(X):
	return nature_cnn(X, **conv_kwargs)

	return network_fn


	@register("impala_cnn")
	def impala_cnn(**conv_kwargs):
	def network_fn(X):
	return build_impala_cnn(X)

	return network_fn


	@register("cnn_small")
	def cnn_small(**conv_kwargs):
	def network_fn(X):
	h = tf.cast(X, tf.float32) / 255.

	activ = tf.nn.relu
	h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs))
	h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))
	h = conv_to_fc(h)
	h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2)))
	return h

	return network_fn


	@register("lstm")
	def lstm(nlstm=128, layer_norm=False):
	"""
	Builds LSTM (Long-Short Term Memory) network to be used in a policy.
	Note that the resulting function returns not only the output of the LSTM
	(i.e. hidden state of lstm for each step in the sequence), but also a dictionary
	with auxiliary tensors to be set as policy attributes.

	Specifically,
	S is a placeholder to feed current state (LSTM state has to be managed outside policy)
	M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too)
	initial_state is a numpy array containing initial lstm state (usually zeros)
	state is the output LSTM state (to be fed into S at the next call)


	An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example

	Parameters:
	----------

	nlstm: int LSTM hidden state size

	layer_norm: bool if True, layer-normalized version of LSTM is used

	Returns:
	-------

	function that builds LSTM with a given input tensor / placeholder
	"""

	def network_fn(X, nenv=1):
	nbatch = X.shape[0]
	nsteps = nbatch // nenv

	h = tf.compat.v1.layers.flatten(X)

	M = tf.compat.v1.placeholder(tf.float32, [nbatch]) # mask (done t-1)
	S = tf.compat.v1.placeholder(tf.float32, [nenv, 2 * nlstm]) # states

	xs = batch_to_seq(h, nenv, nsteps)
	ms = batch_to_seq(M, nenv, nsteps)

	if layer_norm:
	h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm)
	else:
	h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm)

	h = seq_to_batch(h5)
	initial_state = np.zeros(S.shape.as_list(), dtype=float)

	return h, {'S': S, 'M': M, 'state': snew, 'initial_state': initial_state}

	return network_fn


	@register("cnn_lstm")
	def cnn_lstm(nlstm=128, layer_norm=False, conv_fn=nature_cnn, **conv_kwargs):
	def network_fn(X, nenv=1):
	nbatch = X.shape[0]
	nsteps = nbatch // nenv

	h = conv_fn(X, **conv_kwargs)

	M = tf.compat.v1.placeholder(tf.float32, [nbatch]) # mask (done t-1)
	S = tf.compat.v1.placeholder(tf.float32, [nenv, 2 * nlstm]) # states

	xs = batch_to_seq(h, nenv, nsteps)
	ms = batch_to_seq(M, nenv, nsteps)

	if layer_norm:
	h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm)
	else:
	h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm)

	h = seq_to_batch(h5)
	initial_state = np.zeros(S.shape.as_list(), dtype=float)

	return h, {'S': S, 'M': M, 'state': snew, 'initial_state': initial_state}

	return network_fn


	@register("impala_cnn_lstm")
	def impala_cnn_lstm():
	return cnn_lstm(nlstm=256, conv_fn=build_impala_cnn)


	@register("cnn_lnlstm")
	def cnn_lnlstm(nlstm=128, **conv_kwargs):
	return cnn_lstm(nlstm, layer_norm=True, **conv_kwargs)


	@register("conv_only")
	def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs):
	'''
	convolutions-only net

	Parameters:
	----------

	conv: list of triples (filter_number, filter_size, stride) specifying parameters for each layer.

	Returns:

	function that takes tensorflow tensor as input and returns the output of the last convolutional layer

	'''

	def network_fn(X):
	out = tf.cast(X, tf.float32) / 255.
	with tf.compat.v1.variable_scope("convnet"):
	for num_outputs, kernel_size, stride in convs:
	out = tf.contrib.layers.convolution2d(out,
	num_outputs=num_outputs,
	kernel_size=kernel_size,
	stride=stride,
	activation_fn=tf.nn.relu,
	**conv_kwargs)

	return out

	return network_fn


	def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]):
	rms = RunningMeanStd(shape=x.shape[1:])
	norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range))
	return norm_x, rms


	def get_network_builder(name):
	"""
	If you want to register your own network outside models.py, you just need:

	Usage Example:
	-------------
	from baselines.common.models import register
	@register("your_network_name")
	def your_network_define(**net_kwargs):
	...
	return network_fn

	"""
	if callable(name):
	return name
	elif name in mapping:
	return mapping[name]
	else:
	raise ValueError('Unknown network type: {}'.format(name))


	class VectorQuantizer(layers.Layer):
	def __init__(self, num_embeddings, embedding_dim, seed=0, **kwargs):
	super().__init__(**kwargs)
	self.embedding_dim = embedding_dim
	self.num_embeddings = num_embeddings

	# Initialize the embeddings which we will quantize.
	w_init = tf.compat.v1.random_uniform_initializer(minval=-1 / num_embeddings, maxval=1 / num_embeddings,
	seed=seed)

	self.embeddings = tf.compat.v1.get_variable(
	initializer=w_init(
	shape=(self.embedding_dim, self.num_embeddings), dtype="float32"
	),
	trainable=True,
	name="embeddings_vqvae",
	)

	def call(self, x):
	# Calculate the input shape of the inputs and
	# then flatten the inputs keeping `embedding_dim` intact.
	input_shape = tf.shape(input=x)
	flattened = tf.reshape(x, [-1, self.embedding_dim])

	# Quantization.
	encoding_indices = self.get_code_indices(flattened)
	encoding_indices = tf.reshape(encoding_indices, [input_shape[0], -1])
	encodings = tf.one_hot(encoding_indices, self.num_embeddings)
	quantized = tf.matmul(encodings, self.embeddings, transpose_b=True)
	quantized = tf.reshape(quantized, input_shape)

	# Straight-through estimator.
	quantized_ = x + tf.stop_gradient(quantized - x)

	return quantized_, quantized, encoding_indices

	def get_code_indices(self, flattened_inputs):
	# Calculate L2-normalized distance between the inputs and the codes.
	similarity = tf.matmul(flattened_inputs, self.embeddings)
	distances = (
	tf.reduce_sum(input_tensor=flattened_inputs ** 2, axis=1, keepdims=True)
	+ tf.reduce_sum(input_tensor=self.embeddings ** 2, axis=0)
	- 2 * similarity
	)

	# Derive the indices for minimum distances.
	encoding_indices = tf.argmin(input=distances, axis=1)
	return encoding_indices