Upload 254 files

5960497 verified 4 months ago

10 kB

	import os
	import numpy as np
	import tensorflow as tf
	from collections import deque

	def sample(logits):
	noise = tf.random.uniform(tf.shape(input=logits))
	return tf.argmax(input=logits - tf.math.log(-tf.math.log(noise)), axis=1)

	def cat_entropy(logits):
	a0 = logits - tf.reduce_max(input_tensor=logits, axis=1, keepdims=True)
	ea0 = tf.exp(a0)
	z0 = tf.reduce_sum(input_tensor=ea0, axis=1, keepdims=True)
	p0 = ea0 / z0
	return tf.reduce_sum(input_tensor=p0 * (tf.math.log(z0) - a0), axis=1)

	def cat_entropy_softmax(p0):
	return - tf.reduce_sum(input_tensor=p0 * tf.math.log(p0 + 1e-6), axis = 1)

	def ortho_init(scale=1.0):
	def _ortho_init(shape, dtype, partition_info=None):
	#lasagne ortho init for tf
	shape = tuple(shape)
	if len(shape) == 2:
	flat_shape = shape
	elif len(shape) == 4: # assumes NHWC
	flat_shape = (np.prod(shape[:-1]), shape[-1])
	else:
	raise NotImplementedError
	a = np.random.normal(0.0, 1.0, flat_shape)
	u, _, v = np.linalg.svd(a, full_matrices=False)
	q = u if u.shape == flat_shape else v # pick the one with the correct shape
	q = q.reshape(shape)
	return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
	return _ortho_init

	def conv(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, data_format='NHWC', one_dim_bias=False):
	if data_format == 'NHWC':
	channel_ax = 3
	strides = [1, stride, stride, 1]
	bshape = [1, 1, 1, nf]
	elif data_format == 'NCHW':
	channel_ax = 1
	strides = [1, 1, stride, stride]
	bshape = [1, nf, 1, 1]
	else:
	raise NotImplementedError
	bias_var_shape = [nf] if one_dim_bias else [1, nf, 1, 1]
	try:
	nin = x.get_shape()[channel_ax].value
	except:
	nin = x.get_shape()[channel_ax]
	wshape = [rf, rf, nin, nf]
	with tf.compat.v1.variable_scope(scope):
	w = tf.compat.v1.get_variable("w", wshape, initializer=ortho_init(init_scale))
	b = tf.compat.v1.get_variable("b", bias_var_shape, initializer=tf.compat.v1.constant_initializer(0.0))
	if not one_dim_bias and data_format == 'NHWC':
	b = tf.reshape(b, bshape)
	return tf.nn.conv2d(input=x, filters=w, strides=strides, padding=pad, data_format=data_format) + b

	def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0):
	with tf.compat.v1.variable_scope(scope):
	try:
	nin = x.get_shape()[1].value
	except:
	nin = x.get_shape()[1]
	w = tf.compat.v1.get_variable("w", [nin, nh], initializer=ortho_init(init_scale))
	b = tf.compat.v1.get_variable("b", [nh], initializer=tf.compat.v1.constant_initializer(init_bias))
	return tf.matmul(x, w)+b

	def batch_to_seq(h, nbatch, nsteps, flat=False):
	if flat:
	h = tf.reshape(h, [nbatch, nsteps])
	else:
	h = tf.reshape(h, [nbatch, nsteps, -1])
	return [tf.squeeze(v, [1]) for v in tf.split(axis=1, num_or_size_splits=nsteps, value=h)]

	def seq_to_batch(h, flat = False):
	shape = h[0].get_shape().as_list()
	if not flat:
	assert(len(shape) > 1)
	nh = h[0].get_shape()[-1].value
	return tf.reshape(tf.concat(axis=1, values=h), [-1, nh])
	else:
	return tf.reshape(tf.stack(values=h, axis=1), [-1])

	def lstm(xs, ms, s, scope, nh, init_scale=1.0):
	nbatch, nin = [v.value for v in xs[0].get_shape()]
	with tf.compat.v1.variable_scope(scope):
	wx = tf.compat.v1.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
	wh = tf.compat.v1.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
	b = tf.compat.v1.get_variable("b", [nh*4], initializer=tf.compat.v1.constant_initializer(0.0))

	c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
	for idx, (x, m) in enumerate(zip(xs, ms)):
	c = c*(1-m)
	h = h*(1-m)
	z = tf.matmul(x, wx) + tf.matmul(h, wh) + b
	i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
	i = tf.nn.sigmoid(i)
	f = tf.nn.sigmoid(f)
	o = tf.nn.sigmoid(o)
	u = tf.tanh(u)
	c = fc + iu
	h = o*tf.tanh(c)
	xs[idx] = h
	s = tf.concat(axis=1, values=[c, h])
	return xs, s

	def _ln(x, g, b, e=1e-5, axes=[1]):
	u, s = tf.nn.moments(x=x, axes=axes, keepdims=True)
	x = (x-u)/tf.sqrt(s+e)
	x = x*g+b
	return x

	def lnlstm(xs, ms, s, scope, nh, init_scale=1.0):
	nbatch, nin = [v.value for v in xs[0].get_shape()]
	with tf.compat.v1.variable_scope(scope):
	wx = tf.compat.v1.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale))
	gx = tf.compat.v1.get_variable("gx", [nh*4], initializer=tf.compat.v1.constant_initializer(1.0))
	bx = tf.compat.v1.get_variable("bx", [nh*4], initializer=tf.compat.v1.constant_initializer(0.0))

	wh = tf.compat.v1.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale))
	gh = tf.compat.v1.get_variable("gh", [nh*4], initializer=tf.compat.v1.constant_initializer(1.0))
	bh = tf.compat.v1.get_variable("bh", [nh*4], initializer=tf.compat.v1.constant_initializer(0.0))

	b = tf.compat.v1.get_variable("b", [nh*4], initializer=tf.compat.v1.constant_initializer(0.0))

	gc = tf.compat.v1.get_variable("gc", [nh], initializer=tf.compat.v1.constant_initializer(1.0))
	bc = tf.compat.v1.get_variable("bc", [nh], initializer=tf.compat.v1.constant_initializer(0.0))

	c, h = tf.split(axis=1, num_or_size_splits=2, value=s)
	for idx, (x, m) in enumerate(zip(xs, ms)):
	c = c*(1-m)
	h = h*(1-m)
	z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b
	i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z)
	i = tf.nn.sigmoid(i)
	f = tf.nn.sigmoid(f)
	o = tf.nn.sigmoid(o)
	u = tf.tanh(u)
	c = fc + iu
	h = o*tf.tanh(_ln(c, gc, bc))
	xs[idx] = h
	s = tf.concat(axis=1, values=[c, h])
	return xs, s

	def conv_to_fc(x):
	try:
	nh = np.prod([v.value for v in x.get_shape()[1:]])
	except:
	nh = np.prod([v for v in x.get_shape()[1:]])
	x = tf.reshape(x, [-1, nh])
	return x

	def discount_with_dones(rewards, dones, gamma):
	discounted = []
	r = 0
	for reward, done in zip(rewards[::-1], dones[::-1]):
	r = reward + gammar(1.-done) # fixed off by one bug
	discounted.append(r)
	return discounted[::-1]

	def find_trainable_variables(key):
	return tf.compat.v1.trainable_variables(key)

	def make_path(f):
	return os.makedirs(f, exist_ok=True)

	def constant(p):
	return 1

	def linear(p):
	return 1-p

	def middle_drop(p):
	eps = 0.75
	if 1-p<eps:
	return eps*0.1
	return 1-p

	def double_linear_con(p):
	p *= 2
	eps = 0.125
	if 1-p<eps:
	return eps
	return 1-p

	def double_middle_drop(p):
	eps1 = 0.75
	eps2 = 0.25
	if 1-p<eps1:
	if 1-p<eps2:
	return eps2*0.5
	return eps1*0.1
	return 1-p

	schedules = {
	'linear':linear,
	'constant':constant,
	'double_linear_con': double_linear_con,
	'middle_drop': middle_drop,
	'double_middle_drop': double_middle_drop
	}

	class Scheduler(object):

	def __init__(self, v, nvalues, schedule):
	self.n = 0.
	self.v = v
	self.nvalues = nvalues
	self.schedule = schedules[schedule]

	def value(self):
	current_value = self.v*self.schedule(self.n/self.nvalues)
	self.n += 1.
	return current_value

	def value_steps(self, steps):
	return self.v*self.schedule(steps/self.nvalues)


	class EpisodeStats:
	def __init__(self, nsteps, nenvs):
	self.episode_rewards = []
	for i in range(nenvs):
	self.episode_rewards.append([])
	self.lenbuffer = deque(maxlen=40) # rolling buffer for episode lengths
	self.rewbuffer = deque(maxlen=40) # rolling buffer for episode rewards
	self.nsteps = nsteps
	self.nenvs = nenvs

	def feed(self, rewards, masks):
	rewards = np.reshape(rewards, [self.nenvs, self.nsteps])
	masks = np.reshape(masks, [self.nenvs, self.nsteps])
	for i in range(0, self.nenvs):
	for j in range(0, self.nsteps):
	self.episode_rewards[i].append(rewards[i][j])
	if masks[i][j]:
	l = len(self.episode_rewards[i])
	s = sum(self.episode_rewards[i])
	self.lenbuffer.append(l)
	self.rewbuffer.append(s)
	self.episode_rewards[i] = []

	def mean_length(self):
	if self.lenbuffer:
	return np.mean(self.lenbuffer)
	else:
	return 0 # on the first params dump, no episodes are finished

	def mean_reward(self):
	if self.rewbuffer:
	return np.mean(self.rewbuffer)
	else:
	return 0


	# For ACER
	def get_by_index(x, idx):
	assert(len(x.get_shape()) == 2)
	assert(len(idx.get_shape()) == 1)
	idx_flattened = tf.range(0, x.shape[0]) * x.shape[1] + idx
	y = tf.gather(tf.reshape(x, [-1]), # flatten input
	idx_flattened) # use flattened indices
	return y

	def check_shape(ts,shapes):
	i = 0
	for (t,shape) in zip(ts,shapes):
	assert t.get_shape().as_list()==shape, "id " + str(i) + " shape " + str(t.get_shape()) + str(shape)
	i += 1

	def avg_norm(t):
	return tf.reduce_mean(input_tensor=tf.sqrt(tf.reduce_sum(input_tensor=tf.square(t), axis=-1)))

	def gradient_add(g1, g2, param):
	print([g1, g2, param.name])
	assert (not (g1 is None and g2 is None)), param.name
	if g1 is None:
	return g2
	elif g2 is None:
	return g1
	else:
	return g1 + g2

	def q_explained_variance(qpred, q):
	_, vary = tf.nn.moments(x=q, axes=[0, 1])
	_, varpred = tf.nn.moments(x=q - qpred, axes=[0, 1])
	check_shape([vary, varpred], [[]] * 2)
	return 1.0 - (varpred / vary)