| import tensorflow as tf | |
| def dense(x, size, name, weight_init=None, bias_init=0, weight_loss_dict=None, reuse=None): | |
| with tf.compat.v1.variable_scope(name, reuse=reuse): | |
| assert (len(tf.compat.v1.get_variable_scope().name.split('/')) == 2) | |
| w = tf.compat.v1.get_variable("w", [x.get_shape()[1], size], initializer=weight_init) | |
| b = tf.compat.v1.get_variable("b", [size], initializer=tf.compat.v1.constant_initializer(bias_init)) | |
| weight_decay_fc = 3e-4 | |
| if weight_loss_dict is not None: | |
| weight_decay = tf.multiply(tf.nn.l2_loss(w), weight_decay_fc, name='weight_decay_loss') | |
| if weight_loss_dict is not None: | |
| weight_loss_dict[w] = weight_decay_fc | |
| weight_loss_dict[b] = 0.0 | |
| tf.compat.v1.add_to_collection(tf.compat.v1.get_variable_scope().name.split('/')[0] + '_' + 'losses', weight_decay) | |
| return tf.nn.bias_add(tf.matmul(x, w), b) | |
| def kl_div(action_dist1, action_dist2, action_size): | |
| mean1, std1 = action_dist1[:, :action_size], action_dist1[:, action_size:] | |
| mean2, std2 = action_dist2[:, :action_size], action_dist2[:, action_size:] | |
| numerator = tf.square(mean1 - mean2) + tf.square(std1) - tf.square(std2) | |
| denominator = 2 * tf.square(std2) + 1e-8 | |
| return tf.reduce_sum( | |
| input_tensor=numerator/denominator + tf.math.log(std2) - tf.math.log(std1),axis=-1) | |