| | |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| | from caffe2.python import schema, core |
| | from caffe2.python.layers.layers import ( |
| | ModelLayer, |
| | IdList, |
| | IdScoreList, |
| | ) |
| | from caffe2.python.layers.tags import ( |
| | Tags |
| | ) |
| |
|
| | import numpy as np |
| |
|
| |
|
| | class SparseFeatureHash(ModelLayer): |
| |
|
| | def __init__(self, model, input_record, seed=0, modulo=None, |
| | use_hashing=True, use_divide_mod=False, divisor=None, name='sparse_feature_hash', **kwargs): |
| | super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs) |
| |
|
| | assert use_hashing + use_divide_mod < 2, "use_hashing and use_divide_mod cannot be set true at the same time." |
| |
|
| | if use_divide_mod: |
| | assert divisor >= 1, 'Unexpected divisor: {}'.format(divisor) |
| |
|
| | self.divisor = self.create_param(param_name='divisor', |
| | shape=[1], |
| | initializer=('GivenTensorInt64Fill', {'values': np.array([divisor])}), |
| | optimizer=model.NoOptim) |
| |
|
| | self.seed = seed |
| | self.use_hashing = use_hashing |
| | self.use_divide_mod = use_divide_mod |
| |
|
| | if schema.equal_schemas(input_record, IdList): |
| | self.modulo = modulo or self.extract_hash_size(input_record.items.metadata) |
| | metadata = schema.Metadata( |
| | categorical_limit=self.modulo, |
| | feature_specs=input_record.items.metadata.feature_specs if input_record.items.metadata else None, |
| | expected_value=input_record.items.metadata.expected_value if input_record.items.metadata else None |
| | ) |
| | with core.NameScope(name): |
| | self.output_schema = schema.NewRecord(model.net, IdList) |
| | self.output_schema.items.set_metadata(metadata) |
| |
|
| | elif schema.equal_schemas(input_record, IdScoreList): |
| | self.modulo = modulo or self.extract_hash_size(input_record.keys.metadata) |
| | metadata = schema.Metadata( |
| | categorical_limit=self.modulo, |
| | feature_specs=input_record.keys.metadata.feature_specs, |
| | expected_value=input_record.keys.metadata.expected_value |
| | ) |
| | with core.NameScope(name): |
| | self.output_schema = schema.NewRecord(model.net, IdScoreList) |
| | self.output_schema.keys.set_metadata(metadata) |
| |
|
| | else: |
| | assert False, "Input type must be one of (IdList, IdScoreList)" |
| |
|
| | assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo) |
| | if input_record.lengths.metadata: |
| | self.output_schema.lengths.set_metadata(input_record.lengths.metadata) |
| |
|
| | |
| | |
| | |
| | self.tags.update([Tags.CPU_ONLY]) |
| |
|
| | def extract_hash_size(self, metadata): |
| | if metadata.feature_specs and metadata.feature_specs.desired_hash_size: |
| | return metadata.feature_specs.desired_hash_size |
| | elif metadata.categorical_limit is not None: |
| | return metadata.categorical_limit |
| | else: |
| | assert False, "desired_hash_size or categorical_limit must be set" |
| |
|
| | def add_ops(self, net): |
| | net.Copy( |
| | self.input_record.lengths(), |
| | self.output_schema.lengths() |
| | ) |
| | if schema.equal_schemas(self.output_schema, IdList): |
| | input_blob = self.input_record.items() |
| | output_blob = self.output_schema.items() |
| | elif schema.equal_schemas(self.output_schema, IdScoreList): |
| | input_blob = self.input_record.keys() |
| | output_blob = self.output_schema.keys() |
| | net.Copy( |
| | self.input_record.values(), |
| | self.output_schema.values() |
| | ) |
| | else: |
| | raise NotImplementedError() |
| |
|
| | if self.use_hashing: |
| | net.IndexHash( |
| | input_blob, output_blob, seed=self.seed, modulo=self.modulo |
| | ) |
| | else: |
| | if self.use_divide_mod: |
| | quotient = net.Div([input_blob, self.divisor], [net.NextScopedBlob('quotient')]) |
| | net.Mod( |
| | quotient, output_blob, divisor=self.modulo, sign_follow_divisor=True |
| | ) |
| | else: |
| | net.Mod( |
| | input_blob, output_blob, divisor=self.modulo, sign_follow_divisor=True |
| | ) |
| |
|