| | |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | from caffe2.python import core, schema |
| | from caffe2.python.layers.layers import ModelLayer |
| | import numpy as np |
| | import logging |
| | logger = logging.getLogger(__name__) |
| | ''' |
| | Homotopy Weighting between two weights x, y by doing: |
| | alpha x + beta y |
| | where alpha is a decreasing scalar parameter ranging from [min, max] (default, |
| | [0, 1]), and alpha + beta = max + min, which means that beta is increasing in |
| | the range [min, max]; |
| | |
| | Homotopy methods first solves an "easy" problem (one to which the solution is |
| | well known), and is gradually transformed into the target problem |
| | ''' |
| |
|
| |
|
| | class HomotopyWeight(ModelLayer): |
| | def __init__( |
| | self, |
| | model, |
| | input_record, |
| | name='homotopy_weight', |
| | min_weight=0., |
| | max_weight=1., |
| | half_life=1e6, |
| | quad_life=3e6, |
| | atomic_iter=None, |
| | **kwargs |
| | ): |
| | super(HomotopyWeight, |
| | self).__init__(model, name, input_record, **kwargs) |
| | self.output_schema = schema.Scalar( |
| | np.float32, self.get_next_blob_reference('homotopy_weight') |
| | ) |
| | data = self.input_record.field_blobs() |
| | assert len(data) == 2 |
| | self.x = data[0] |
| | self.y = data[1] |
| | |
| | |
| | self.use_external_iter = (atomic_iter is not None) |
| | self.atomic_iter = ( |
| | atomic_iter if self.use_external_iter else self.create_atomic_iter() |
| | ) |
| | |
| | assert max_weight > min_weight |
| | self.scale = float(max_weight - min_weight) |
| | self.offset = self.model.add_global_constant( |
| | '%s_offset_1dfloat' % self.name, float(min_weight) |
| | ) |
| | self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life) |
| |
|
| | def solve_inv_lr_params(self, half_life, quad_life): |
| | |
| | assert half_life > 0 |
| | |
| | assert quad_life > 2 * half_life |
| | t = float(quad_life) / float(half_life) |
| | x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0) |
| | gamma = (x - 1.0) / float(half_life) |
| | power = np.log(2.0) / np.log(x) |
| | logger.info( |
| | 'homotopy_weighting: found lr param: gamma=%g, power=%g' % |
| | (gamma, power) |
| | ) |
| | return gamma, power |
| |
|
| | def create_atomic_iter(self): |
| | self.mutex = self.create_param( |
| | param_name=('%s_mutex' % self.name), |
| | shape=None, |
| | initializer=('CreateMutex', ), |
| | optimizer=self.model.NoOptim, |
| | ) |
| | self.atomic_iter = self.create_param( |
| | param_name=('%s_atomic_iter' % self.name), |
| | shape=[1], |
| | initializer=( |
| | 'ConstantFill', { |
| | 'value': 0, |
| | 'dtype': core.DataType.INT64 |
| | } |
| | ), |
| | optimizer=self.model.NoOptim, |
| | ) |
| | return self.atomic_iter |
| |
|
| | def update_weight(self, net): |
| | alpha = net.NextScopedBlob('alpha') |
| | beta = net.NextScopedBlob('beta') |
| | lr = net.NextScopedBlob('lr') |
| | comp_lr = net.NextScopedBlob('complementary_lr') |
| | scaled_lr = net.NextScopedBlob('scaled_lr') |
| | scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr') |
| | if not self.use_external_iter: |
| | net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter]) |
| | net.LearningRate( |
| | [self.atomic_iter], |
| | [lr], |
| | policy='inv', |
| | gamma=self.gamma, |
| | power=self.power, |
| | base_lr=1.0, |
| | ) |
| | net.Sub([self.model.global_constants['ONE'], lr], [comp_lr]) |
| | net.Scale([lr], [scaled_lr], scale=self.scale) |
| | net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale) |
| | net.Add([scaled_lr, self.offset], [alpha]) |
| | net.Add([scaled_comp_lr, self.offset], [beta]) |
| | return alpha, beta |
| |
|
| | def add_ops(self, net): |
| | alpha, beta = self.update_weight(net) |
| | |
| | net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema()) |
| |
|