| | import math |
| |
|
| | import numpy as np |
| | import tensorflow as tf |
| | from baselines.a2c import utils |
| | from baselines.a2c.utils import conv, fc, conv_to_fc, batch_to_seq, seq_to_batch |
| | from baselines.common.mpi_running_mean_std import RunningMeanStd |
| | from keras import layers |
| | from itertools import combinations |
| |
|
| | mapping = {} |
| |
|
| |
|
| | def register(name): |
| | def _thunk(func): |
| | mapping[name] = func |
| | return func |
| |
|
| | return _thunk |
| |
|
| |
|
| | def nature_cnn(unscaled_images, **conv_kwargs): |
| | """ |
| | CNN from Nature paper. |
| | """ |
| | scaled_images = tf.cast(unscaled_images, tf.float32) / 255. |
| | activ = tf.nn.relu |
| | h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), |
| | **conv_kwargs)) |
| | h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) |
| | h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs)) |
| | h3 = conv_to_fc(h3) |
| | return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))) |
| |
|
| |
|
| | def build_impala_cnn(unscaled_images, depths=[16, 32, 32], **conv_kwargs): |
| | """ |
| | Model used in the paper "IMPALA: Scalable Distributed Deep-RL with |
| | Importance Weighted Actor-Learner Architectures" https://arxiv.org/abs/1802.01561 |
| | """ |
| |
|
| | layer_num = 0 |
| |
|
| | def get_layer_num_str(): |
| | nonlocal layer_num |
| | num_str = str(layer_num) |
| | layer_num += 1 |
| | return num_str |
| |
|
| | def conv_layer(out, depth): |
| | return tf.compat.v1.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str()) |
| |
|
| | def residual_block(inputs): |
| | try: |
| | depth = inputs.get_shape()[-1].value |
| | except: |
| | depth = inputs.get_shape()[-1] |
| |
|
| | out = tf.nn.relu(inputs) |
| |
|
| | out = conv_layer(out, depth) |
| | out = tf.nn.relu(out) |
| | out = conv_layer(out, depth) |
| | return out + inputs |
| |
|
| | def conv_sequence(inputs, depth): |
| | out = conv_layer(inputs, depth) |
| | out = tf.compat.v1.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same') |
| | out = residual_block(out) |
| | out = residual_block(out) |
| | return out |
| |
|
| | out = tf.cast(unscaled_images, tf.float32) / 255. |
| |
|
| | for depth in depths: |
| | out = conv_sequence(out, depth) |
| |
|
| | out = tf.compat.v1.layers.flatten(out) |
| | out = tf.nn.relu(out) |
| | out = tf.compat.v1.layers.dense(out, 256, activation=tf.nn.relu, name='layer_' + get_layer_num_str()) |
| |
|
| | return out |
| |
|
| |
|
| | def build_skill_impala_cnn(unscaled_images, depths=[16, 32, 32], emb_dim=256, num_embeddings=8, seed=0, |
| | **conv_kwargs): |
| | """ |
| | Modified impala cnn model by adding the skill module |
| | """ |
| |
|
| | layer_num = 0 |
| |
|
| | def get_layer_num_str(): |
| | nonlocal layer_num |
| | num_str = str(layer_num) |
| | layer_num += 1 |
| | return num_str |
| |
|
| | def conv_layer(out, depth): |
| | return tf.compat.v1.layers.conv2d(out, depth, 3, padding='same', name='layer_' + get_layer_num_str()) |
| |
|
| | def residual_block(inputs): |
| | |
| | depth = inputs.get_shape()[-1] |
| |
|
| | out = tf.nn.relu(inputs) |
| |
|
| | out = conv_layer(out, depth) |
| | out = tf.nn.relu(out) |
| | out = conv_layer(out, depth) |
| | return out + inputs |
| |
|
| | def conv_sequence(inputs, depth): |
| | out = conv_layer(inputs, depth) |
| | out = tf.compat.v1.layers.max_pooling2d(out, pool_size=3, strides=2, padding='same') |
| | out = residual_block(out) |
| | out = residual_block(out) |
| | return out |
| |
|
| | out = tf.cast(unscaled_images, tf.float32) / 255. |
| |
|
| | for depth in depths: |
| | out = conv_sequence(out, depth) |
| |
|
| | out = tf.compat.v1.layers.flatten(out) |
| | out = tf.nn.relu(out) |
| | pure_out = tf.compat.v1.layers.dense(out, emb_dim, activation=tf.nn.relu, name='layer_' + get_layer_num_str()) |
| |
|
| | |
| | skill_out = tf.compat.v1.layers.dense(pure_out, emb_dim // 2, activation=None, name='layer_' + get_layer_num_str()) |
| | skill_out = tf.compat.v1.layers.dense(skill_out, 2, activation=None, name='layer_' + get_layer_num_str()) |
| | vq_layer = VectorQuantizer(num_embeddings, 2, seed=seed, name="vector_quantizer") |
| | vq_out, pure_vq_out, encoding_indices = vq_layer(skill_out) |
| |
|
| | encoding_indices_ = tf.cast( |
| | tf.tile(encoding_indices / vq_layer.num_embeddings, tf.constant([1, emb_dim], tf.int32)), tf.float32) |
| |
|
| | |
| | out = tf.math.add(pure_out, encoding_indices_) |
| |
|
| | return out, skill_out, pure_out, vq_out, pure_vq_out, vq_layer.embeddings, encoding_indices |
| |
|
| |
|
| | @register("mlp") |
| | def mlp(num_layers=2, num_hidden=64, activation=tf.tanh, layer_norm=False): |
| | """ |
| | Stack of fully-connected layers to be used in a policy / q-function approximator |
| | |
| | Parameters: |
| | ---------- |
| | |
| | num_layers: int number of fully-connected layers (default: 2) |
| | |
| | num_hidden: int size of fully-connected layers (default: 64) |
| | |
| | activation: activation function (default: tf.tanh) |
| | |
| | Returns: |
| | ------- |
| | |
| | function that builds fully connected network with a given input tensor / placeholder |
| | """ |
| |
|
| | def network_fn(X): |
| | h = tf.compat.v1.layers.flatten(X) |
| | for i in range(num_layers): |
| | h = fc(h, 'mlp_fc{}'.format(i), nh=num_hidden, init_scale=np.sqrt(2)) |
| | if layer_norm: |
| | h = tf.contrib.layers.layer_norm(h, center=True, scale=True) |
| | h = activation(h) |
| |
|
| | return h |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("cnn") |
| | def cnn(**conv_kwargs): |
| | def network_fn(X): |
| | return nature_cnn(X, **conv_kwargs) |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("impala_cnn") |
| | def impala_cnn(**conv_kwargs): |
| | def network_fn(X): |
| | return build_impala_cnn(X) |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("cnn_small") |
| | def cnn_small(**conv_kwargs): |
| | def network_fn(X): |
| | h = tf.cast(X, tf.float32) / 255. |
| |
|
| | activ = tf.nn.relu |
| | h = activ(conv(h, 'c1', nf=8, rf=8, stride=4, init_scale=np.sqrt(2), **conv_kwargs)) |
| | h = activ(conv(h, 'c2', nf=16, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs)) |
| | h = conv_to_fc(h) |
| | h = activ(fc(h, 'fc1', nh=128, init_scale=np.sqrt(2))) |
| | return h |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("lstm") |
| | def lstm(nlstm=128, layer_norm=False): |
| | """ |
| | Builds LSTM (Long-Short Term Memory) network to be used in a policy. |
| | Note that the resulting function returns not only the output of the LSTM |
| | (i.e. hidden state of lstm for each step in the sequence), but also a dictionary |
| | with auxiliary tensors to be set as policy attributes. |
| | |
| | Specifically, |
| | S is a placeholder to feed current state (LSTM state has to be managed outside policy) |
| | M is a placeholder for the mask (used to mask out observations after the end of the episode, but can be used for other purposes too) |
| | initial_state is a numpy array containing initial lstm state (usually zeros) |
| | state is the output LSTM state (to be fed into S at the next call) |
| | |
| | |
| | An example of usage of lstm-based policy can be found here: common/tests/test_doc_examples.py/test_lstm_example |
| | |
| | Parameters: |
| | ---------- |
| | |
| | nlstm: int LSTM hidden state size |
| | |
| | layer_norm: bool if True, layer-normalized version of LSTM is used |
| | |
| | Returns: |
| | ------- |
| | |
| | function that builds LSTM with a given input tensor / placeholder |
| | """ |
| |
|
| | def network_fn(X, nenv=1): |
| | nbatch = X.shape[0] |
| | nsteps = nbatch // nenv |
| |
|
| | h = tf.compat.v1.layers.flatten(X) |
| |
|
| | M = tf.compat.v1.placeholder(tf.float32, [nbatch]) |
| | S = tf.compat.v1.placeholder(tf.float32, [nenv, 2 * nlstm]) |
| |
|
| | xs = batch_to_seq(h, nenv, nsteps) |
| | ms = batch_to_seq(M, nenv, nsteps) |
| |
|
| | if layer_norm: |
| | h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm) |
| | else: |
| | h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm) |
| |
|
| | h = seq_to_batch(h5) |
| | initial_state = np.zeros(S.shape.as_list(), dtype=float) |
| |
|
| | return h, {'S': S, 'M': M, 'state': snew, 'initial_state': initial_state} |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("cnn_lstm") |
| | def cnn_lstm(nlstm=128, layer_norm=False, conv_fn=nature_cnn, **conv_kwargs): |
| | def network_fn(X, nenv=1): |
| | nbatch = X.shape[0] |
| | nsteps = nbatch // nenv |
| |
|
| | h = conv_fn(X, **conv_kwargs) |
| |
|
| | M = tf.compat.v1.placeholder(tf.float32, [nbatch]) |
| | S = tf.compat.v1.placeholder(tf.float32, [nenv, 2 * nlstm]) |
| |
|
| | xs = batch_to_seq(h, nenv, nsteps) |
| | ms = batch_to_seq(M, nenv, nsteps) |
| |
|
| | if layer_norm: |
| | h5, snew = utils.lnlstm(xs, ms, S, scope='lnlstm', nh=nlstm) |
| | else: |
| | h5, snew = utils.lstm(xs, ms, S, scope='lstm', nh=nlstm) |
| |
|
| | h = seq_to_batch(h5) |
| | initial_state = np.zeros(S.shape.as_list(), dtype=float) |
| |
|
| | return h, {'S': S, 'M': M, 'state': snew, 'initial_state': initial_state} |
| |
|
| | return network_fn |
| |
|
| |
|
| | @register("impala_cnn_lstm") |
| | def impala_cnn_lstm(): |
| | return cnn_lstm(nlstm=256, conv_fn=build_impala_cnn) |
| |
|
| |
|
| | @register("cnn_lnlstm") |
| | def cnn_lnlstm(nlstm=128, **conv_kwargs): |
| | return cnn_lstm(nlstm, layer_norm=True, **conv_kwargs) |
| |
|
| |
|
| | @register("conv_only") |
| | def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs): |
| | ''' |
| | convolutions-only net |
| | |
| | Parameters: |
| | ---------- |
| | |
| | conv: list of triples (filter_number, filter_size, stride) specifying parameters for each layer. |
| | |
| | Returns: |
| | |
| | function that takes tensorflow tensor as input and returns the output of the last convolutional layer |
| | |
| | ''' |
| |
|
| | def network_fn(X): |
| | out = tf.cast(X, tf.float32) / 255. |
| | with tf.compat.v1.variable_scope("convnet"): |
| | for num_outputs, kernel_size, stride in convs: |
| | out = tf.contrib.layers.convolution2d(out, |
| | num_outputs=num_outputs, |
| | kernel_size=kernel_size, |
| | stride=stride, |
| | activation_fn=tf.nn.relu, |
| | **conv_kwargs) |
| |
|
| | return out |
| |
|
| | return network_fn |
| |
|
| |
|
| | def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): |
| | rms = RunningMeanStd(shape=x.shape[1:]) |
| | norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range)) |
| | return norm_x, rms |
| |
|
| |
|
| | def get_network_builder(name): |
| | """ |
| | If you want to register your own network outside models.py, you just need: |
| | |
| | Usage Example: |
| | ------------- |
| | from baselines.common.models import register |
| | @register("your_network_name") |
| | def your_network_define(**net_kwargs): |
| | ... |
| | return network_fn |
| | |
| | """ |
| | if callable(name): |
| | return name |
| | elif name in mapping: |
| | return mapping[name] |
| | else: |
| | raise ValueError('Unknown network type: {}'.format(name)) |
| |
|
| |
|
| | class VectorQuantizer(layers.Layer): |
| | def __init__(self, num_embeddings, embedding_dim, seed=0, **kwargs): |
| | super().__init__(**kwargs) |
| | self.embedding_dim = embedding_dim |
| | self.num_embeddings = num_embeddings |
| |
|
| | |
| | w_init = tf.compat.v1.random_uniform_initializer(minval=-1 / num_embeddings, maxval=1 / num_embeddings, |
| | seed=seed) |
| |
|
| | self.embeddings = tf.compat.v1.get_variable( |
| | initializer=w_init( |
| | shape=(self.embedding_dim, self.num_embeddings), dtype="float32" |
| | ), |
| | trainable=True, |
| | name="embeddings_vqvae", |
| | ) |
| |
|
| | def call(self, x): |
| | |
| | |
| | input_shape = tf.shape(input=x) |
| | flattened = tf.reshape(x, [-1, self.embedding_dim]) |
| |
|
| | |
| | encoding_indices = self.get_code_indices(flattened) |
| | encoding_indices = tf.reshape(encoding_indices, [input_shape[0], -1]) |
| | encodings = tf.one_hot(encoding_indices, self.num_embeddings) |
| | quantized = tf.matmul(encodings, self.embeddings, transpose_b=True) |
| | quantized = tf.reshape(quantized, input_shape) |
| |
|
| | |
| | quantized_ = x + tf.stop_gradient(quantized - x) |
| |
|
| | return quantized_, quantized, encoding_indices |
| |
|
| | def get_code_indices(self, flattened_inputs): |
| | |
| | similarity = tf.matmul(flattened_inputs, self.embeddings) |
| | distances = ( |
| | tf.reduce_sum(input_tensor=flattened_inputs ** 2, axis=1, keepdims=True) |
| | + tf.reduce_sum(input_tensor=self.embeddings ** 2, axis=0) |
| | - 2 * similarity |
| | ) |
| |
|
| | |
| | encoding_indices = tf.argmin(input=distances, axis=1) |
| | return encoding_indices |
| |
|