thanks to vye16 ❤

fb5159d over 2 years ago

12.8 kB

	## @package fc_with_bootstrap
	# Module caffe2.python.layers.fc_with_bootstrap


	import math

	import numpy as np
	from caffe2.python import core, schema
	from caffe2.python.helpers.arg_scope import get_current_scope
	from caffe2.python.layers.layers import ModelLayer
	from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin


	def get_fc_predictor_version(fc_version):
	assert fc_version in ["fp32"], (
	"Only support fp32 for the fully connected layer "
	"in the predictor net, the provided FC precision is {}".format(fc_version)
	)
	return fc_version


	class FCWithBootstrap(SamplingTrainableMixin, ModelLayer):
	def __init__(
	self,
	model,
	input_record,
	output_dims,
	num_bootstrap,
	weight_init=None,
	bias_init=None,
	weight_optim=None,
	bias_optim=None,
	name="fc_with_bootstrap",
	weight_reg=None,
	bias_reg=None,
	clip_param=None,
	axis=1,
	**kwargs
	):
	super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs)
	assert isinstance(
	input_record, schema.Scalar
	), "Incorrect input type {}".format(input_record)
	assert (
	len(input_record.field_types()[0].shape) > 0
	), "FC expects limited dimensions of the input tensor"
	assert axis >= 1, "axis {} should >= 1.".format(axis)
	self.axis = axis
	input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :])

	assert input_dims > 0, "FC expects input dimensions > 0, got {}".format(
	input_dims
	)

	self.clip_args = None

	# attributes for bootstrapping below
	self.num_bootstrap = num_bootstrap

	# input dim shape
	self.input_dims = input_dims

	# bootstrapped fully-connected layers to be used in eval time
	self.bootstrapped_FCs = []

	# scalar containing batch_size blob so that we don't need to recompute
	self.batch_size = None

	# we want this to be the last FC, so the output_dim should be 1, set to None
	self.output_dim_vec = None

	# lower bound when creating random indices
	self.lower_bound = None

	# upper bound when creating random indices
	self.upper_bound = None

	if clip_param is not None:
	assert len(clip_param) == 2, (
	"clip_param must be a tuple / list "
	"of length 2 and in the form of (clip_min, clip max)"
	)
	clip_min, clip_max = clip_param
	assert (
	clip_min is not None or clip_max is not None
	), "clip_min, and clip_max in clip_param cannot both be None"
	assert (
	clip_min is None or clip_max is None
	) or clip_min < clip_max, (
	"clip_param = [clip_min, clip_max] must have clip_min < clip_max"
	)
	self.clip_args = {}
	if clip_min is not None:
	self.clip_args["min"] = clip_min
	if clip_max is not None:
	self.clip_args["max"] = clip_max

	scale = math.sqrt(1.0 / input_dims)
	weight_init = (
	weight_init
	if weight_init
	else ("UniformFill", {"min": -scale, "max": scale})
	)
	bias_init = (
	bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale})
	)

	"""
	bootstrapped FCs:
	Ex: [
	bootstrapped_weights_blob_1, bootstrapped_bias_blob_1,
	...,
	...,
	bootstrapped_weights_blob_b, bootstrapped_bias_blob_b
	]

	output_schema:
	Note: indices will always be on even indices.
	Ex: Struct(
	indices_0_blob,
	preds_0_blob,
	...
	...
	indices_b_blob,
	preds_b_blob
	)
	"""
	bootstrapped_FCs = []
	output_schema = schema.Struct()
	for i in range(num_bootstrap):
	output_schema += schema.Struct(
	(
	"bootstrap_iteration_{}/indices".format(i),
	self.get_next_blob_reference(
	"bootstrap_iteration_{}/indices".format(i)
	),
	),
	(
	"bootstrap_iteration_{}/preds".format(i),
	self.get_next_blob_reference(
	"bootstrap_iteration_{}/preds".format(i)
	),
	),
	)
	self.bootstrapped_FCs.extend(
	[
	self.create_param(
	param_name="bootstrap_iteration_{}/w".format(i),
	shape=[output_dims, input_dims],
	initializer=weight_init,
	optimizer=weight_optim,
	regularizer=weight_reg,
	),
	self.create_param(
	param_name="bootstrap_iteration_{}/b".format(i),
	shape=[output_dims],
	initializer=bias_init,
	optimizer=bias_optim,
	regularizer=bias_reg,
	),
	]
	)

	self.output_schema = output_schema

	if axis == 1:
	output_shape = (output_dims,)
	else:
	output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1]
	output_shape = tuple(output_shape + [output_dims])

	def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration):
	"""
	Args:
	net: the caffe2 net to insert operator

	copied_cur_layer: blob of the bootstrapped features (make sure this
	blob has a stop_gradient on)

	iteration: the bootstrap interation to generate for. Used to correctly
	populate the output_schema

	Return:
	A blob containing the generated indices of shape: (batch_size,)
	"""
	with core.NameScope("bootstrap_iteration_{}".format(iteration)):
	if iteration == 0:
	# capture batch_size once for efficiency
	input_shape = net.Shape(copied_cur_layer, "input_shape")
	batch_size_index = net.Const(np.array([0]), "batch_size_index")
	batch_size = net.Gather([input_shape, batch_size_index], "batch_size")
	self.batch_size = batch_size

	lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32)
	offset = net.Const(np.array([1]), "offset", dtype=np.int32)
	int_batch_size = net.Cast(
	[self.batch_size], "int_batch_size", to=core.DataType.INT32
	)
	upper_bound = net.Sub([int_batch_size, offset], "upper_bound")

	self.lower_bound = lower_bound
	self.upper_bound = upper_bound

	indices = net.UniformIntFill(
	[self.batch_size, self.lower_bound, self.upper_bound],
	self.output_schema[iteration * 2].field_blobs()[0],
	input_as_shape=1,
	)

	return indices

	def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration):
	"""
	This method contains all the bootstrapping logic used to bootstrap
	the features. Only used by the train_net.

	Args:
	net: the caffe2 net to insert bootstrapping operators

	copied_cur_layer: the blob representing the current features.
	Note, this layer should have a stop_gradient on it.

	Returns:
	bootstrapped_features: blob of bootstrapped version of cur_layer
	with same dimensions
	"""

	# draw features based upon the bootstrapped indices
	bootstrapped_features = net.Gather(
	[copied_cur_layer, indices],
	net.NextScopedBlob("bootstrapped_features_{}".format(iteration)),
	)

	bootstrapped_features = schema.Scalar(
	(np.float32, self.input_dims), bootstrapped_features
	)

	return bootstrapped_features

	def _insert_fc_ops(self, net, features, params, outputs, version):
	"""
	Args:
	net: the caffe2 net to insert operator

	features: Scalar containing blob of the bootstrapped features or
	actual cur_layer features

	params: weight and bias for FC

	outputs: the output blobs

	version: support fp32 for now.
	"""

	if version == "fp32":
	pred_blob = net.FC(
	features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs
	)
	return pred_blob
	else:
	raise Exception("unsupported FC type version {}".format(version))

	def _add_ops(self, net, features, iteration, params, version):
	"""
	Args:
	params: the weight and bias, passed by either add_ops or
	add_train_ops function

	features: feature blobs to predict on. Can be the actual cur_layer
	or the bootstrapped_feature blobs.

	version: currently fp32 support only
	"""

	if self.clip_args is not None:
	clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params]
	for p, cp in zip(params, clipped_params):
	net.Clip([p], [cp], **self.clip_args)
	params = clipped_params

	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	self._insert_fc_ops(
	net=net,
	features=features,
	params=params,
	outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]],
	version=version,
	)

	def add_ops(self, net):
	"""
	Both the predict net and the eval net will call this function.

	For bootstrapping approach, the goal is to pass the cur_layer feature
	inputs through all the bootstrapped FCs that are stored under
	self.bootstrapped_FCs. Return the preds in the same output_schema
	with dummy indices (because they are not needed).
	"""

	version_info = get_current_scope().get(
	get_fc_predictor_version.__name__, {"fc_version": "fp32"}
	)
	predictor_fc_fp_version = version_info["fc_version"]

	for i in range(self.num_bootstrap):
	# these are dummy indices, not to be used anywhere
	indices = self._generate_bootstrapped_indices(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	iteration=i,
	)

	params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2]

	self._add_ops(
	net=net,
	features=self.input_record,
	params=params,
	iteration=i,
	version=predictor_fc_fp_version,
	)

	def add_train_ops(self, net):
	# use the train_param_blobs to be consistent with the SamplingTrain unittest

	# obtain features
	for i in range(self.num_bootstrap):
	indices = self._generate_bootstrapped_indices(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	iteration=i,
	)
	bootstrapped_features = self._bootstrap_ops(
	net=net,
	copied_cur_layer=self.input_record.field_blobs()[0],
	indices=indices,
	iteration=i,
	)
	self._add_ops(
	net,
	features=bootstrapped_features,
	iteration=i,
	params=self.train_param_blobs[i * 2 : (i * 2) + 2],
	version="fp32",
	)

	def get_fp16_compatible_parameters(self):
	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	return [
	blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0
	]

	else:
	raise Exception(
	"Currently only supports functionality for output_dim_vec == 1"
	)

	@property
	def param_blobs(self):
	if self.output_dim_vec is None or len(self.output_dim_vec) == 1:
	return self.bootstrapped_FCs
	else:
	raise Exception("FCWithBootstrap layer only supports output_dim_vec==1")