| | |
| | |
| |
|
| |
|
| | import math |
| |
|
| | import numpy as np |
| | from caffe2.python import core, schema |
| | from caffe2.python.helpers.arg_scope import get_current_scope |
| | from caffe2.python.layers.layers import ModelLayer |
| | from caffe2.python.layers.sampling_trainable_mixin import SamplingTrainableMixin |
| |
|
| |
|
| | def get_fc_predictor_version(fc_version): |
| | assert fc_version in ["fp32"], ( |
| | "Only support fp32 for the fully connected layer " |
| | "in the predictor net, the provided FC precision is {}".format(fc_version) |
| | ) |
| | return fc_version |
| |
|
| |
|
| | class FCWithBootstrap(SamplingTrainableMixin, ModelLayer): |
| | def __init__( |
| | self, |
| | model, |
| | input_record, |
| | output_dims, |
| | num_bootstrap, |
| | weight_init=None, |
| | bias_init=None, |
| | weight_optim=None, |
| | bias_optim=None, |
| | name="fc_with_bootstrap", |
| | weight_reg=None, |
| | bias_reg=None, |
| | clip_param=None, |
| | axis=1, |
| | **kwargs |
| | ): |
| | super(FCWithBootstrap, self).__init__(model, name, input_record, **kwargs) |
| | assert isinstance( |
| | input_record, schema.Scalar |
| | ), "Incorrect input type {}".format(input_record) |
| | assert ( |
| | len(input_record.field_types()[0].shape) > 0 |
| | ), "FC expects limited dimensions of the input tensor" |
| | assert axis >= 1, "axis {} should >= 1.".format(axis) |
| | self.axis = axis |
| | input_dims = np.prod(input_record.field_types()[0].shape[axis - 1 :]) |
| |
|
| | assert input_dims > 0, "FC expects input dimensions > 0, got {}".format( |
| | input_dims |
| | ) |
| |
|
| | self.clip_args = None |
| |
|
| | |
| | self.num_bootstrap = num_bootstrap |
| |
|
| | |
| | self.input_dims = input_dims |
| |
|
| | |
| | self.bootstrapped_FCs = [] |
| |
|
| | |
| | self.batch_size = None |
| |
|
| | |
| | self.output_dim_vec = None |
| |
|
| | |
| | self.lower_bound = None |
| |
|
| | |
| | self.upper_bound = None |
| |
|
| | if clip_param is not None: |
| | assert len(clip_param) == 2, ( |
| | "clip_param must be a tuple / list " |
| | "of length 2 and in the form of (clip_min, clip max)" |
| | ) |
| | clip_min, clip_max = clip_param |
| | assert ( |
| | clip_min is not None or clip_max is not None |
| | ), "clip_min, and clip_max in clip_param cannot both be None" |
| | assert ( |
| | clip_min is None or clip_max is None |
| | ) or clip_min < clip_max, ( |
| | "clip_param = [clip_min, clip_max] must have clip_min < clip_max" |
| | ) |
| | self.clip_args = {} |
| | if clip_min is not None: |
| | self.clip_args["min"] = clip_min |
| | if clip_max is not None: |
| | self.clip_args["max"] = clip_max |
| |
|
| | scale = math.sqrt(1.0 / input_dims) |
| | weight_init = ( |
| | weight_init |
| | if weight_init |
| | else ("UniformFill", {"min": -scale, "max": scale}) |
| | ) |
| | bias_init = ( |
| | bias_init if bias_init else ("UniformFill", {"min": -scale, "max": scale}) |
| | ) |
| |
|
| | """ |
| | bootstrapped FCs: |
| | Ex: [ |
| | bootstrapped_weights_blob_1, bootstrapped_bias_blob_1, |
| | ..., |
| | ..., |
| | bootstrapped_weights_blob_b, bootstrapped_bias_blob_b |
| | ] |
| | |
| | output_schema: |
| | Note: indices will always be on even indices. |
| | Ex: Struct( |
| | indices_0_blob, |
| | preds_0_blob, |
| | ... |
| | ... |
| | indices_b_blob, |
| | preds_b_blob |
| | ) |
| | """ |
| | bootstrapped_FCs = [] |
| | output_schema = schema.Struct() |
| | for i in range(num_bootstrap): |
| | output_schema += schema.Struct( |
| | ( |
| | "bootstrap_iteration_{}/indices".format(i), |
| | self.get_next_blob_reference( |
| | "bootstrap_iteration_{}/indices".format(i) |
| | ), |
| | ), |
| | ( |
| | "bootstrap_iteration_{}/preds".format(i), |
| | self.get_next_blob_reference( |
| | "bootstrap_iteration_{}/preds".format(i) |
| | ), |
| | ), |
| | ) |
| | self.bootstrapped_FCs.extend( |
| | [ |
| | self.create_param( |
| | param_name="bootstrap_iteration_{}/w".format(i), |
| | shape=[output_dims, input_dims], |
| | initializer=weight_init, |
| | optimizer=weight_optim, |
| | regularizer=weight_reg, |
| | ), |
| | self.create_param( |
| | param_name="bootstrap_iteration_{}/b".format(i), |
| | shape=[output_dims], |
| | initializer=bias_init, |
| | optimizer=bias_optim, |
| | regularizer=bias_reg, |
| | ), |
| | ] |
| | ) |
| |
|
| | self.output_schema = output_schema |
| |
|
| | if axis == 1: |
| | output_shape = (output_dims,) |
| | else: |
| | output_shape = list(input_record.field_types()[0].shape)[0 : axis - 1] |
| | output_shape = tuple(output_shape + [output_dims]) |
| |
|
| | def _generate_bootstrapped_indices(self, net, copied_cur_layer, iteration): |
| | """ |
| | Args: |
| | net: the caffe2 net to insert operator |
| | |
| | copied_cur_layer: blob of the bootstrapped features (make sure this |
| | blob has a stop_gradient on) |
| | |
| | iteration: the bootstrap interation to generate for. Used to correctly |
| | populate the output_schema |
| | |
| | Return: |
| | A blob containing the generated indices of shape: (batch_size,) |
| | """ |
| | with core.NameScope("bootstrap_iteration_{}".format(iteration)): |
| | if iteration == 0: |
| | |
| | input_shape = net.Shape(copied_cur_layer, "input_shape") |
| | batch_size_index = net.Const(np.array([0]), "batch_size_index") |
| | batch_size = net.Gather([input_shape, batch_size_index], "batch_size") |
| | self.batch_size = batch_size |
| |
|
| | lower_bound = net.Const(np.array([0]), "lower_bound", dtype=np.int32) |
| | offset = net.Const(np.array([1]), "offset", dtype=np.int32) |
| | int_batch_size = net.Cast( |
| | [self.batch_size], "int_batch_size", to=core.DataType.INT32 |
| | ) |
| | upper_bound = net.Sub([int_batch_size, offset], "upper_bound") |
| |
|
| | self.lower_bound = lower_bound |
| | self.upper_bound = upper_bound |
| |
|
| | indices = net.UniformIntFill( |
| | [self.batch_size, self.lower_bound, self.upper_bound], |
| | self.output_schema[iteration * 2].field_blobs()[0], |
| | input_as_shape=1, |
| | ) |
| |
|
| | return indices |
| |
|
| | def _bootstrap_ops(self, net, copied_cur_layer, indices, iteration): |
| | """ |
| | This method contains all the bootstrapping logic used to bootstrap |
| | the features. Only used by the train_net. |
| | |
| | Args: |
| | net: the caffe2 net to insert bootstrapping operators |
| | |
| | copied_cur_layer: the blob representing the current features. |
| | Note, this layer should have a stop_gradient on it. |
| | |
| | Returns: |
| | bootstrapped_features: blob of bootstrapped version of cur_layer |
| | with same dimensions |
| | """ |
| |
|
| | |
| | bootstrapped_features = net.Gather( |
| | [copied_cur_layer, indices], |
| | net.NextScopedBlob("bootstrapped_features_{}".format(iteration)), |
| | ) |
| |
|
| | bootstrapped_features = schema.Scalar( |
| | (np.float32, self.input_dims), bootstrapped_features |
| | ) |
| |
|
| | return bootstrapped_features |
| |
|
| | def _insert_fc_ops(self, net, features, params, outputs, version): |
| | """ |
| | Args: |
| | net: the caffe2 net to insert operator |
| | |
| | features: Scalar containing blob of the bootstrapped features or |
| | actual cur_layer features |
| | |
| | params: weight and bias for FC |
| | |
| | outputs: the output blobs |
| | |
| | version: support fp32 for now. |
| | """ |
| |
|
| | if version == "fp32": |
| | pred_blob = net.FC( |
| | features.field_blobs() + params, outputs, axis=self.axis, **self.kwargs |
| | ) |
| | return pred_blob |
| | else: |
| | raise Exception("unsupported FC type version {}".format(version)) |
| |
|
| | def _add_ops(self, net, features, iteration, params, version): |
| | """ |
| | Args: |
| | params: the weight and bias, passed by either add_ops or |
| | add_train_ops function |
| | |
| | features: feature blobs to predict on. Can be the actual cur_layer |
| | or the bootstrapped_feature blobs. |
| | |
| | version: currently fp32 support only |
| | """ |
| |
|
| | if self.clip_args is not None: |
| | clipped_params = [net.NextScopedBlob("clipped_%s" % str(p)) for p in params] |
| | for p, cp in zip(params, clipped_params): |
| | net.Clip([p], [cp], **self.clip_args) |
| | params = clipped_params |
| |
|
| | if self.output_dim_vec is None or len(self.output_dim_vec) == 1: |
| | self._insert_fc_ops( |
| | net=net, |
| | features=features, |
| | params=params, |
| | outputs=[self.output_schema.field_blobs()[(iteration * 2) + 1]], |
| | version=version, |
| | ) |
| |
|
| | def add_ops(self, net): |
| | """ |
| | Both the predict net and the eval net will call this function. |
| | |
| | For bootstrapping approach, the goal is to pass the cur_layer feature |
| | inputs through all the bootstrapped FCs that are stored under |
| | self.bootstrapped_FCs. Return the preds in the same output_schema |
| | with dummy indices (because they are not needed). |
| | """ |
| |
|
| | version_info = get_current_scope().get( |
| | get_fc_predictor_version.__name__, {"fc_version": "fp32"} |
| | ) |
| | predictor_fc_fp_version = version_info["fc_version"] |
| |
|
| | for i in range(self.num_bootstrap): |
| | |
| | indices = self._generate_bootstrapped_indices( |
| | net=net, |
| | copied_cur_layer=self.input_record.field_blobs()[0], |
| | iteration=i, |
| | ) |
| |
|
| | params = self.bootstrapped_FCs[i * 2 : (i * 2) + 2] |
| |
|
| | self._add_ops( |
| | net=net, |
| | features=self.input_record, |
| | params=params, |
| | iteration=i, |
| | version=predictor_fc_fp_version, |
| | ) |
| |
|
| | def add_train_ops(self, net): |
| | |
| |
|
| | |
| | for i in range(self.num_bootstrap): |
| | indices = self._generate_bootstrapped_indices( |
| | net=net, |
| | copied_cur_layer=self.input_record.field_blobs()[0], |
| | iteration=i, |
| | ) |
| | bootstrapped_features = self._bootstrap_ops( |
| | net=net, |
| | copied_cur_layer=self.input_record.field_blobs()[0], |
| | indices=indices, |
| | iteration=i, |
| | ) |
| | self._add_ops( |
| | net, |
| | features=bootstrapped_features, |
| | iteration=i, |
| | params=self.train_param_blobs[i * 2 : (i * 2) + 2], |
| | version="fp32", |
| | ) |
| |
|
| | def get_fp16_compatible_parameters(self): |
| | if self.output_dim_vec is None or len(self.output_dim_vec) == 1: |
| | return [ |
| | blob for idx, blob in enumerate(self.bootstrapped_FCs) if idx % 2 == 0 |
| | ] |
| |
|
| | else: |
| | raise Exception( |
| | "Currently only supports functionality for output_dim_vec == 1" |
| | ) |
| |
|
| | @property |
| | def param_blobs(self): |
| | if self.output_dim_vec is None or len(self.output_dim_vec) == 1: |
| | return self.bootstrapped_FCs |
| | else: |
| | raise Exception("FCWithBootstrap layer only supports output_dim_vec==1") |
| |
|