| |
|
| |
|
| |
|
| |
|
| |
|
| | from caffe2.python import schema |
| | from caffe2.python.layers.arc_cosine_feature_map import ArcCosineFeatureMap |
| | import numpy as np |
| |
|
| |
|
| | class SemiRandomFeatures(ArcCosineFeatureMap): |
| | """ |
| | Implementation of the semi-random kernel feature map. |
| | |
| | Applies H(x_rand) * x_rand^s * x_learned, where |
| | H is the Heaviside step function, |
| | x_rand is the input after applying FC with randomized parameters, |
| | and x_learned is the input after applying FC with learnable parameters. |
| | |
| | If using multilayer model with semi-random layers, then input and output records |
| | should have a 'full' and 'random' Scalar. The random Scalar will be passed as |
| | input to process the random features. |
| | |
| | For more information, see the original paper: |
| | https://arxiv.org/pdf/1702.08882.pdf |
| | |
| | Inputs : |
| | output_dims -- dimensions of the output vector |
| | s -- if s == 0, will obtain linear semi-random features; |
| | else if s == 1, will obtain squared semi-random features; |
| | else s >= 2, will obtain higher order semi-random features |
| | scale_random -- amount to scale the standard deviation |
| | (for random parameter initialization when weight_init or |
| | bias_init hasn't been specified) |
| | scale_learned -- amount to scale the standard deviation |
| | (for learned parameter initialization when weight_init or |
| | bias_init hasn't been specified) |
| | |
| | weight_init_random -- initialization distribution for random weight parameter |
| | (if None, will use Gaussian distribution) |
| | bias_init_random -- initialization distribution for random bias pararmeter |
| | (if None, will use Uniform distribution) |
| | weight_init_learned -- initialization distribution for learned weight parameter |
| | (if None, will use Gaussian distribution) |
| | bias_init_learned -- initialization distribution for learned bias pararmeter |
| | (if None, will use Uniform distribution) |
| | weight_optim -- optimizer for weight params for learned features |
| | bias_optim -- optimizer for bias param for learned features |
| | |
| | set_weight_as_global_constant -- if True, initialized random parameters |
| | will be constant across all distributed |
| | instances of the layer |
| | """ |
| | def __init__( |
| | self, |
| | model, |
| | input_record, |
| | output_dims, |
| | s=1, |
| | scale_random=1.0, |
| | scale_learned=1.0, |
| | weight_init_random=None, |
| | bias_init_random=None, |
| | weight_init_learned=None, |
| | bias_init_learned=None, |
| | weight_optim=None, |
| | bias_optim=None, |
| | set_weight_as_global_constant=False, |
| | name='semi_random_features', |
| | **kwargs): |
| |
|
| | if isinstance(input_record, schema.Struct): |
| | schema.is_schema_subset( |
| | schema.Struct( |
| | ('full', schema.Scalar()), |
| | ('random', schema.Scalar()), |
| | ), |
| | input_record |
| | ) |
| | self.input_record_full = input_record.full |
| | self.input_record_random = input_record.random |
| |
|
| | elif isinstance(input_record, schema.Scalar): |
| | self.input_record_full = input_record |
| | self.input_record_random = input_record |
| |
|
| | super(SemiRandomFeatures, self).__init__( |
| | model, |
| | self.input_record_full, |
| | output_dims, |
| | s=s, |
| | scale=scale_random, |
| | weight_init=weight_init_random, |
| | bias_init=bias_init_random, |
| | weight_optim=None, |
| | bias_optim=None, |
| | set_weight_as_global_constant=set_weight_as_global_constant, |
| | initialize_output_schema=False, |
| | name=name, |
| | **kwargs) |
| |
|
| | self.output_schema = schema.Struct( |
| | ('full', schema.Scalar( |
| | (np.float32, output_dims), |
| | model.net.NextScopedBlob(name + '_full_output') |
| | ),), |
| | ('random', schema.Scalar( |
| | (np.float32, output_dims), |
| | model.net.NextScopedBlob(name + '_random_output') |
| | ),), |
| | ) |
| |
|
| | |
| | assert (scale_learned > 0.0), \ |
| | "Expected scale (learned) > 0, got %s" % scale_learned |
| | self.stddev = scale_learned * np.sqrt(1.0 / self.input_dims) |
| |
|
| | |
| | (self.learned_w, self.learned_b) = self._initialize_params( |
| | 'learned_w', |
| | 'learned_b', |
| | w_init=weight_init_learned, |
| | b_init=bias_init_learned, |
| | w_optim=weight_optim, |
| | b_optim=bias_optim |
| | ) |
| |
|
| | def add_ops(self, net): |
| | |
| | learned_features = net.FC(self.input_record_full.field_blobs() + |
| | [self.learned_w, self.learned_b], |
| | net.NextScopedBlob('learned_features')) |
| | |
| | random_features = net.FC(self.input_record_random.field_blobs() + |
| | [self.random_w, self.random_b], |
| | net.NextScopedBlob('random_features')) |
| | processed_random_features = self._heaviside_with_power( |
| | net, |
| | random_features, |
| | self.output_schema.random.field_blobs(), |
| | self.s |
| | ) |
| | net.Mul([processed_random_features, learned_features], |
| | self.output_schema.full.field_blobs()) |
| |
|