| | |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| | from caffe2.python import core, schema |
| | from caffe2.python.layers.layers import ModelLayer |
| | import numpy as np |
| |
|
| |
|
| | class BatchSoftmaxLoss(ModelLayer): |
| | def __init__( |
| | self, |
| | model, |
| | input_record, |
| | name='batch_softmax_loss', |
| | label_smoothing_matrix=None, |
| | label_prob=False, |
| | scale=1.0, |
| | average_by_batch_size=False, |
| | **kwargs |
| | ): |
| | super(BatchSoftmaxLoss, self).__init__( |
| | model, name, input_record, **kwargs) |
| |
|
| | assert schema.is_schema_subset( |
| | schema.Struct( |
| | ('label', schema.Scalar()), |
| | ('prediction', schema.Scalar()), |
| | ), |
| | input_record |
| | ) |
| | self.label_prob = label_prob |
| | self.scale = scale |
| | self.average_by_batch_size = average_by_batch_size |
| |
|
| | |
| | |
| | |
| | self.label_smoothing_matrix = label_smoothing_matrix |
| | if self.label_smoothing_matrix is not None: |
| | self.initialize_label_smoothing_constants() |
| |
|
| | self.output_schema = schema.Struct( |
| | ( |
| | 'softmax', schema.Scalar( |
| | input_record.prediction.field_type(), |
| | self.get_next_blob_reference('softmax') |
| | ) |
| | ), |
| | ( |
| | 'loss', schema.Scalar( |
| | np.float32, self.get_next_blob_reference('loss') |
| | ) |
| | ), |
| | ) |
| |
|
| | def initialize_label_smoothing_constants(self): |
| | assert self.label_smoothing_matrix is not None |
| | self.label_smoothing_matrix = np.array( |
| | self.label_smoothing_matrix).astype(np.float32) |
| | assert len(self.label_smoothing_matrix.shape) == 2 |
| | label_dim = self.label_smoothing_matrix.shape[0] |
| | assert label_dim == self.label_smoothing_matrix.shape[1] |
| |
|
| | self.label_smoothing_matrix = self.model.add_global_constant( |
| | '%s_label_smoothing_matrix' % self.name, |
| | array=self.label_smoothing_matrix, |
| | dtype=np.dtype(np.float32), |
| | ) |
| | self.label_dim = self.model.add_global_constant( |
| | '%s_label_dim' % self.name, |
| | array=label_dim, |
| | dtype=np.dtype(np.int64), |
| | ) |
| | |
| | |
| | self.label_prob = True |
| |
|
| | def compute_smoothed_label(self, net): |
| | assert self.label_smoothing_matrix is not None |
| | label = self.input_record.label() |
| | original_label_type = self.input_record.label.field_type() |
| | if original_label_type.base != np.int64: |
| | int64_label = net.NextScopedBlob('int64_label') |
| | net.Cast([label], [int64_label], to=core.DataType.INT64) |
| | else: |
| | int64_label = label |
| | one_hot_label = net.NextScopedBlob('one_hot_label') |
| | smoothed_label = net.NextScopedBlob('smoothed_label') |
| | net.OneHot([int64_label, self.label_dim], [one_hot_label]) |
| | net.MatMul([one_hot_label, self.label_smoothing_matrix], smoothed_label) |
| | return smoothed_label |
| |
|
| | def add_ops(self, net): |
| | label = self.input_record.label.field_blobs() |
| | if self.label_smoothing_matrix is not None: |
| | label = [self.compute_smoothed_label(net)] |
| | elif not self.label_prob: |
| | if self.input_record.label.field_types()[0].base != np.int32: |
| | label = [ |
| | net.Cast(label, |
| | net.NextScopedBlob('int32_label'), |
| | to=core.DataType.INT32) |
| | ] |
| |
|
| | softmax_input = self.input_record.prediction.field_blobs() + label |
| |
|
| | if 'weight' in self.input_record: |
| | weight_blob = self.input_record.weight() |
| | if self.input_record.weight.field_type().base != np.float32: |
| | weight_blob = net.Cast( |
| | weight_blob, |
| | weight_blob + '_float32', |
| | to=core.DataType.FLOAT |
| | ) |
| |
|
| | softmax_input += [weight_blob] |
| |
|
| | net.SoftmaxWithLoss( |
| | softmax_input, |
| | self.output_schema.field_blobs(), |
| | label_prob=self.label_prob, |
| | scale=self.scale, |
| | average_by_batch_size=self.average_by_batch_size, |
| | ) |
| |
|