import tensorflow as tf from baselines.common.models import get_network_builder class Model(object): def __init__(self, name, network='mlp', **network_kwargs): self.name = name self.network_builder = get_network_builder(network)(**network_kwargs) @property def vars(self): return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=self.name) @property def trainable_vars(self): return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) @property def perturbable_vars(self): return [var for var in self.trainable_vars if 'LayerNorm' not in var.name] class Actor(Model): def __init__(self, nb_actions, name='actor', network='mlp', **network_kwargs): super().__init__(name=name, network=network, **network_kwargs) self.nb_actions = nb_actions def __call__(self, obs, reuse=False): with tf.compat.v1.variable_scope(self.name, reuse=tf.compat.v1.AUTO_REUSE): x = self.network_builder(obs) x = tf.compat.v1.layers.dense(x, self.nb_actions, kernel_initializer=tf.compat.v1.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) x = tf.nn.tanh(x) return x class Critic(Model): def __init__(self, name='critic', network='mlp', **network_kwargs): super().__init__(name=name, network=network, **network_kwargs) self.layer_norm = True def __call__(self, obs, action, reuse=False): with tf.compat.v1.variable_scope(self.name, reuse=tf.compat.v1.AUTO_REUSE): x = tf.concat([obs, action], axis=-1) # this assumes observation and action can be concatenated x = self.network_builder(x) x = tf.compat.v1.layers.dense(x, 1, kernel_initializer=tf.compat.v1.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') return x @property def output_vars(self): output_vars = [var for var in self.trainable_vars if 'output' in var.name] return output_vars