STLDM_official / nowcasting /my_module.py

Upload 99 files

6021dd1 verified 3 days ago

11.5 kB

	# Extension of MXNet Module
	import logging
	import mxnet as mx
	import numpy as np
	import mxnet.ndarray as nd
	from collections import OrderedDict
	from mxnet.module import Module


	def nd_global_norm(t_list):
	"""Computes the global norm of multiple tensors.

	Given a tuple or list of tensors t_list, this operation returns the global norm of the elements
	in all tensors in t_list. The global norm is computed as:

	``global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))``

	Any entries in t_list that are of type None are ignored.

	Parameters
	----------
	t_list: list or tuple
	The NDArray list

	Returns
	-------
	ret: NDArray
	The global norm. The shape of the NDArray will be (1,)

	Examples
	--------
	>>> x = mx.nd.ones((2, 3))
	>>> y = mx.nd.ones((5, 6))
	>>> z = mx.nd.ones((4, 2, 3))
	>>> print(nd_global_norm([x, y, z]).asscalar())
	7.74597
	>>> xnone = None
	>>> ret = nd_global_norm([x, y, z, xnone])
	>>> print(ret.asscalar())
	7.74597
	"""
	ret = None
	for arr in t_list:
	if arr is not None:
	if ret is None:
	ret = nd.square(nd.norm(arr))
	else:
	ret += nd.square(nd.norm(arr))
	ret = nd.sqrt(ret)
	return ret


	class MyModule(Module):
	"""Some enhancement to the mx.mod.Module

	"""

	def __init__(self, symbol, data_names=('data',), label_names=('softmax_label',),
	logger=logging, context=mx.context.gpu(), work_load_list=None,
	fixed_param_names=None, state_names=None, name=None):
	self._name = name
	super(MyModule, self).__init__(symbol=symbol,
	data_names=data_names,
	label_names=label_names,
	logger=logger,
	context=context,
	work_load_list=work_load_list,
	fixed_param_names=fixed_param_names,
	state_names=state_names)
	self._tmp_grads = None

	def clip_by_global_norm(self, max_norm=1.0):
	"""Clips gradient norm.
	The norm is computed over all gradients together, as if they were
	concatenated into a single vector. Gradients are modified in-place.
	The method is first used in
	`[ICML2013] On the difficulty of training recurrent neural networks`
	Parameters
	----------
	max_norm : float or int
	The maximum clipping threshold of the gradient norm.
	Returns
	-------
	norm_val : float
	The computed norm of the gradients.
	Examples
	--------
	An example of using clip_grad_norm to clip the gradient before updating the parameters::
	>>> #Get the gradient via back-propagation
	>>> net.forward_backward(data_batch=data_batch)
	>>> norm_val = net.clip_by_global_norm(max_norm=1.0)
	>>> net.update()
	"""
	assert self.binded and self.params_initialized and self.optimizer_initialized
	norm_val = self.global_grad_norm()
	if norm_val > max_norm:
	ratio = max_norm / float(norm_val)
	for grads in self._exec_group.grad_arrays:
	for grad in grads:
	grad *= ratio
	return norm_val

	def global_grad_norm(self):
	"""Calculate global gradient norm.
	The L2 norm is computed over all gradients together, as if they were
	concatenated into a single vector.
	Could be used to debug the optimization process.
	See http://videolectures.net/deeplearning2015_goodfellow_network_optimization/
	Returns
	-------
	norm_val : float
	The computed norm of the gradients.
	Examples
	--------
	An example of using global_norm to calculate the gradient norm after back-propgation::
	>>> #Get the gradient via back-propagation
	>>> net.forward_backward(data_batch=data_batch)
	>>> norm_val = net.global_grad_norm()
	>>> print(norm_val)
	"""
	assert self.binded and self.params_initialized and self.optimizer_initialized
	# The code in the following will cause the estimated norm to be different for multiple gpus
	norm_val = 0.0
	for exe in self._exec_group.execs:
	norm_val += nd_global_norm(exe.grad_arrays).asscalar()
	norm_val /= float(len(self._exec_group.execs))
	norm_val *= self._optimizer.rescale_grad
	return norm_val

	def debug_norm_all(self, debug_gnorm=True):
	if debug_gnorm:
	for k, v, grad_v in zip(self._param_names, self._exec_group.param_arrays,
	self._exec_group.grad_arrays):
	logging.debug("%s: v-norm: %g, g-norm: %g"
	%(k,
	nd.norm(v[0]).asnumpy()[0],
	nd.norm(grad_v[0]).asnumpy()[0]))
	else:
	for k, v in zip(self._param_names, self._exec_group.param_arrays):
	logging.debug("%s: v-norm: %g"
	%(k,
	nd.norm(v[0]).asnumpy()[0]))

	def summary(self, level=2):
	"""Summarize the network parameters.

	Parameters
	----------
	level : int, optional
	Level of the summarization logs to print.
	The log becomes more verbose with higher summary level.
	- Level = 0
	Print the total param number + aux param number
	- Level = 1
	Print the shape of all parameters + The total number of paremter numbers
	- Level = 2
	Print the shape of the data/state and other available information in Level 1
	"""
	self.logger.info("Summary of %s" %self._name)
	assert self.binded and self.params_initialized
	assert 0 <= level <= 2, \
	"Level must be between 0 and 2, level=%d is not supported" % level

	def _log_var(key, value, typ="param"):
	if typ == "param":
	if k in self._fixed_param_names:
	self.logger.info(" %s: %s, %d, req = %s, fixed"
	% (key,
	str(value.shape),
	np.prod(value.shape),
	self._exec_group.grad_req[k]))
	else:
	self.logger.info(" %s: %s, %d, req = %s"
	% (key,
	str(value.shape),
	np.prod(value.shape),
	self._exec_group.grad_req[k]))
	elif typ == "data" or typ == "aux":
	self.logger.info(" %s: %s, %d"
	% (key,
	str(value.shape),
	np.prod(value.shape)))

	total_param_num = 0
	total_fixed_param_num = 0
	total_aux_param_num = 0
	if level >= 2:
	if len(self.data_names) == 0:
	self.logger.info("Data: None")
	else:
	self.logger.info("Data:")
	for k, v in zip(self.data_names, self.data_shapes):
	_log_var(k, v, typ="data")
	if len(self._state_names) == 0:
	self.logger.info("State: None")
	else:
	self.logger.info("State:")
	for k in self._state_names:
	v = self._exec_group.execs[0].arg_dict[k]
	_log_var(k, v, typ="data")
	if level >= 1:
	if len(self._param_names) == 0:
	self.logger.info("Param: None")
	else:
	self.logger.info("Params:")
	for k in self._param_names:
	v = self._arg_params[k]
	_log_var(k, v)
	if k in self._fixed_param_names:
	total_fixed_param_num += np.prod(v.shape)
	else:
	total_param_num += np.prod(v.shape)
	if len(self._aux_names) == 0:
	self.logger.info("Aux States: None")
	else:
	self.logger.info("Aux States: ")
	for k in self._aux_names:
	v = self._aux_params[k]
	_log_var(k, v, typ="aux")
	total_aux_param_num += np.prod(v.shape)
	else:
	for k in self._param_names:
	v = self._arg_params[k]
	total_param_num += np.prod(v.shape)
	for k in self._aux_names:
	v = self._aux_params[k]
	total_aux_param_num += np.prod(v.shape)
	self.logger.info("Total Param Num (exclude fixed ones): " + str(total_param_num))
	self.logger.info("Total Fixed Param Num: " + str(total_fixed_param_num))
	self.logger.info("Total Aux Param Num: " + str(total_aux_param_num))

	def get_output_dict(self):
	outputs = self.get_outputs()
	return OrderedDict([(k, v) for k, v in zip(self._output_names, outputs)])

	def clear_grad(self):
	assert self.binded and self.params_initialized and self.optimizer_initialized
	# clear the gradient
	for grads in self._exec_group.grad_arrays:
	for grad in grads:
	grad[:] = 0

	def save_tmp_grad(self):
	if self._tmp_grads is None:
	self._tmp_grads = []
	for grads in self._exec_group.grad_arrays:
	vec = []
	for grad in grads:
	vec.append(grad.copyto(grad.context))
	self._tmp_grads.append(vec)
	else:
	for i, grads in enumerate(self._exec_group.grad_arrays):
	for j, grad in enumerate(grads):
	self._tmp_grads[i][j][:] = grad

	def acc_grad_with_tmp(self):
	assert self._tmp_grads is not None
	for i, grads in enumerate(self._exec_group.grad_arrays):
	for j, grad in enumerate(grads):
	grad += self._tmp_grads[i][j]


	def load_params_allow_missing(self, fname):
	"""Loads model parameters from file.

	Parameters
	----------
	fname : str
	Path to input param file.

	Examples
	--------
	>>> # An example of loading module parameters.
	>>> mod.load_params('myfile')
	"""
	logging.info("Load Param From %s" %fname)
	save_dict = mx.nd.load(fname)
	arg_params = {}
	aux_params = {}
	for k, value in save_dict.items():
	arg_type, name = k.split(':', 1)
	if arg_type == 'arg':
	if name in self._param_names:
	logging.info("set %s" %name)
	arg_params[name] = value
	elif arg_type == 'aux':
	if name in self._aux_names:
	logging.info("set %s" % name)
	aux_params[name] = value
	else:
	raise ValueError("Invalid param file " + fname)
	self.set_params(arg_params, aux_params, allow_missing=True)