leonepson
/

semantic_rl

Reinforcement Learning

interpretablity

Model card Files Files and versions

semantic_rl / baselines /acktr /utils.py

leonepson's picture

Upload 254 files

5960497 verified 4 months ago

history blame contribute delete

1.4 kB

	import tensorflow as tf

	def dense(x, size, name, weight_init=None, bias_init=0, weight_loss_dict=None, reuse=None):
	with tf.compat.v1.variable_scope(name, reuse=reuse):
	assert (len(tf.compat.v1.get_variable_scope().name.split('/')) == 2)

	w = tf.compat.v1.get_variable("w", [x.get_shape()[1], size], initializer=weight_init)
	b = tf.compat.v1.get_variable("b", [size], initializer=tf.compat.v1.constant_initializer(bias_init))
	weight_decay_fc = 3e-4

	if weight_loss_dict is not None:
	weight_decay = tf.multiply(tf.nn.l2_loss(w), weight_decay_fc, name='weight_decay_loss')
	if weight_loss_dict is not None:
	weight_loss_dict[w] = weight_decay_fc
	weight_loss_dict[b] = 0.0

	tf.compat.v1.add_to_collection(tf.compat.v1.get_variable_scope().name.split('/')[0] + '_' + 'losses', weight_decay)

	return tf.nn.bias_add(tf.matmul(x, w), b)

	def kl_div(action_dist1, action_dist2, action_size):
	mean1, std1 = action_dist1[:, :action_size], action_dist1[:, action_size:]
	mean2, std2 = action_dist2[:, :action_size], action_dist2[:, action_size:]

	numerator = tf.square(mean1 - mean2) + tf.square(std1) - tf.square(std2)
	denominator = 2 * tf.square(std2) + 1e-8
	return tf.reduce_sum(
	input_tensor=numerator/denominator + tf.math.log(std2) - tf.math.log(std1),axis=-1)