thanks to vye16 ❤

fb5159d over 2 years ago

5.81 kB






	from caffe2.python import schema
	from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap
	import numpy as np


	class SemiRandomFeatures(ArcCosineFeatureMap):
	"""
	Implementation of the semi-random kernel feature map.

	Applies H(x_rand) * x_rand^s * x_learned, where
	H is the Heaviside step function,
	x_rand is the input after applying FC with randomized parameters,
	and x_learned is the input after applying FC with learnable parameters.

	If using multilayer model with semi-random layers, then input and output records
	should have a 'full' and 'random' Scalar. The random Scalar will be passed as
	input to process the random features.

	For more information, see the original paper:
	https://arxiv.org/pdf/1702.08882.pdf

	Inputs :
	output_dims -- dimensions of the output vector
	s -- if s == 0, will obtain linear semi-random features;
	else if s == 1, will obtain squared semi-random features;
	else s >= 2, will obtain higher order semi-random features
	scale_random -- amount to scale the standard deviation
	(for random parameter initialization when weight_init or
	bias_init hasn't been specified)
	scale_learned -- amount to scale the standard deviation
	(for learned parameter initialization when weight_init or
	bias_init hasn't been specified)

	weight_init_random -- initialization distribution for random weight parameter
	(if None, will use Gaussian distribution)
	bias_init_random -- initialization distribution for random bias pararmeter
	(if None, will use Uniform distribution)
	weight_init_learned -- initialization distribution for learned weight parameter
	(if None, will use Gaussian distribution)
	bias_init_learned -- initialization distribution for learned bias pararmeter
	(if None, will use Uniform distribution)
	weight_optim -- optimizer for weight params for learned features
	bias_optim -- optimizer for bias param for learned features

	set_weight_as_global_constant -- if True, initialized random parameters
	will be constant across all distributed
	instances of the layer
	"""
	def __init__(
	self,
	model,
	input_record,
	output_dims,
	s=1,
	scale_random=1.0,
	scale_learned=1.0,
	weight_init_random=None,
	bias_init_random=None,
	weight_init_learned=None,
	bias_init_learned=None,
	weight_optim=None,
	bias_optim=None,
	set_weight_as_global_constant=False,
	name='semi_random_features',
	**kwargs):

	if isinstance(input_record, schema.Struct):
	schema.is_schema_subset(
	schema.Struct(
	('full', schema.Scalar()),
	('random', schema.Scalar()),
	),
	input_record
	)
	self.input_record_full = input_record.full
	self.input_record_random = input_record.random

	elif isinstance(input_record, schema.Scalar):
	self.input_record_full = input_record
	self.input_record_random = input_record

	super(SemiRandomFeatures, self).__init__(
	model,
	self.input_record_full,
	output_dims,
	s=s,
	scale=scale_random, # To initialize the random parameters
	weight_init=weight_init_random,
	bias_init=bias_init_random,
	weight_optim=None,
	bias_optim=None,
	set_weight_as_global_constant=set_weight_as_global_constant,
	initialize_output_schema=False,
	name=name,
	**kwargs)

	self.output_schema = schema.Struct(
	('full', schema.Scalar(
	(np.float32, output_dims),
	model.net.NextScopedBlob(name + '_full_output')
	),),
	('random', schema.Scalar(
	(np.float32, output_dims),
	model.net.NextScopedBlob(name + '_random_output')
	),),
	)

	# To initialize the learnable parameters
	assert (scale_learned > 0.0), \
	"Expected scale (learned) > 0, got %s" % scale_learned
	self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims)

	# Learned Parameters
	(self.learned_w, self.learned_b) = self._initialize_params(
	'learned_w',
	'learned_b',
	w_init=weight_init_learned,
	b_init=bias_init_learned,
	w_optim=weight_optim,
	b_optim=bias_optim
	)

	def add_ops(self, net):
	# Learned features: wx + b
	learned_features = net.FC(self.input_record_full.field_blobs() +
	[self.learned_w, self.learned_b],
	net.NextScopedBlob('learned_features'))
	# Random features: wx + b
	random_features = net.FC(self.input_record_random.field_blobs() +
	[self.random_w, self.random_b],
	net.NextScopedBlob('random_features'))
	processed_random_features = self._heaviside_with_power(
	net,
	random_features,
	self.output_schema.random.field_blobs(),
	self.s
	)
	net.Mul([processed_random_features, learned_features],
	self.output_schema.full.field_blobs())