| | |
| |
|
| | |
| |
|
| | import flatbuffers |
| | from flatbuffers.compat import import_numpy |
| | np = import_numpy() |
| |
|
| | class FeatureProcessorOptions(object): |
| | __slots__ = ['_tab'] |
| |
|
| | @classmethod |
| | def GetRootAsFeatureProcessorOptions(cls, buf, offset): |
| | n = flatbuffers.encode.Get(flatbuffers.packer.uoffset, buf, offset) |
| | x = FeatureProcessorOptions() |
| | x.Init(buf, n + offset) |
| | return x |
| |
|
| | @classmethod |
| | def FeatureProcessorOptionsBufferHasIdentifier(cls, buf, offset, size_prefixed=False): |
| | return flatbuffers.util.BufferHasIdentifier(buf, offset, b"\x54\x43\x32\x20", size_prefixed=size_prefixed) |
| |
|
| | |
| | def Init(self, buf, pos): |
| | self._tab = flatbuffers.table.Table(buf, pos) |
| |
|
| | |
| | def NumBuckets(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(4)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return -1 |
| |
|
| | |
| | def EmbeddingSize(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(6)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return -1 |
| |
|
| | |
| | def EmbeddingQuantizationBits(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(8)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return 8 |
| |
|
| | |
| | def ContextSize(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(10)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return -1 |
| |
|
| | |
| | def MaxSelectionSpan(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(12)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return -1 |
| |
|
| | |
| | def ChargramOrders(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
| | if o != 0: |
| | a = self._tab.Vector(o) |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
| | return 0 |
| |
|
| | |
| | def ChargramOrdersAsNumpy(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
| | if o != 0: |
| | return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int32Flags, o) |
| | return 0 |
| |
|
| | |
| | def ChargramOrdersLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def ChargramOrdersIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(14)) |
| | return o == 0 |
| |
|
| | |
| | def MaxWordLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(16)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return 20 |
| |
|
| | |
| | def UnicodeAwareFeatures(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(18)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def ExtractCaseFeature(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(20)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def ExtractSelectionMaskFeature(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(22)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def RegexpFeature(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
| | if o != 0: |
| | a = self._tab.Vector(o) |
| | return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
| | return "" |
| |
|
| | |
| | def RegexpFeatureLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def RegexpFeatureIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(24)) |
| | return o == 0 |
| |
|
| | |
| | def RemapDigits(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(26)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def LowercaseTokens(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(28)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def SelectionReducedOutputSpace(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(30)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return True |
| |
|
| | |
| | def Collections(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
| | if o != 0: |
| | a = self._tab.Vector(o) |
| | return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
| | return "" |
| |
|
| | |
| | def CollectionsLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def CollectionsIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(32)) |
| | return o == 0 |
| |
|
| | |
| | def DefaultCollection(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(34)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return -1 |
| |
|
| | |
| | def OnlyUseLineWithClick(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(36)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def SplitTokensOnSelectionBoundaries(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(38)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def TokenizationCodepointConfig(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
| | if o != 0: |
| | x = self._tab.Vector(o) |
| | x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
| | x = self._tab.Indirect(x) |
| | from libtextclassifier3.TokenizationCodepointRange import TokenizationCodepointRange |
| | obj = TokenizationCodepointRange() |
| | obj.Init(self._tab.Bytes, x) |
| | return obj |
| | return None |
| |
|
| | |
| | def TokenizationCodepointConfigLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def TokenizationCodepointConfigIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(40)) |
| | return o == 0 |
| |
|
| | |
| | def CenterTokenSelectionMethod(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(42)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return 0 |
| |
|
| | |
| | def SnapLabelSpanBoundariesToContainingTokens(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(44)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def SupportedCodepointRanges(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
| | if o != 0: |
| | x = self._tab.Vector(o) |
| | x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
| | x = self._tab.Indirect(x) |
| | from libtextclassifier3.CodepointRange import CodepointRange |
| | obj = CodepointRange() |
| | obj.Init(self._tab.Bytes, x) |
| | return obj |
| | return None |
| |
|
| | |
| | def SupportedCodepointRangesLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def SupportedCodepointRangesIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(46)) |
| | return o == 0 |
| |
|
| | |
| | def InternalTokenizerCodepointRanges(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
| | if o != 0: |
| | x = self._tab.Vector(o) |
| | x += flatbuffers.number_types.UOffsetTFlags.py_type(j) * 4 |
| | x = self._tab.Indirect(x) |
| | from libtextclassifier3.CodepointRange import CodepointRange |
| | obj = CodepointRange() |
| | obj.Init(self._tab.Bytes, x) |
| | return obj |
| | return None |
| |
|
| | |
| | def InternalTokenizerCodepointRangesLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def InternalTokenizerCodepointRangesIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(48)) |
| | return o == 0 |
| |
|
| | |
| | def MinSupportedCodepointRatio(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(50)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Float32Flags, o + self._tab.Pos) |
| | return 0.0 |
| |
|
| | |
| | def FeatureVersion(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(52)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return 0 |
| |
|
| | |
| | def TokenizationType(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(54)) |
| | if o != 0: |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, o + self._tab.Pos) |
| | return 1 |
| |
|
| | |
| | def IcuPreserveWhitespaceTokens(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(56)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def IgnoredSpanBoundaryCodepoints(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
| | if o != 0: |
| | a = self._tab.Vector(o) |
| | return self._tab.Get(flatbuffers.number_types.Int32Flags, a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
| | return 0 |
| |
|
| | |
| | def IgnoredSpanBoundaryCodepointsAsNumpy(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
| | if o != 0: |
| | return self._tab.GetVectorAsNumpy(flatbuffers.number_types.Int32Flags, o) |
| | return 0 |
| |
|
| | |
| | def IgnoredSpanBoundaryCodepointsLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def IgnoredSpanBoundaryCodepointsIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(58)) |
| | return o == 0 |
| |
|
| | |
| | def BoundsSensitiveFeatures(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(60)) |
| | if o != 0: |
| | x = self._tab.Indirect(o + self._tab.Pos) |
| | from libtextclassifier3.FeatureProcessorOptions_.BoundsSensitiveFeatures import BoundsSensitiveFeatures |
| | obj = BoundsSensitiveFeatures() |
| | obj.Init(self._tab.Bytes, x) |
| | return obj |
| | return None |
| |
|
| | |
| | def AllowedChargrams(self, j): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
| | if o != 0: |
| | a = self._tab.Vector(o) |
| | return self._tab.String(a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)) |
| | return "" |
| |
|
| | |
| | def AllowedChargramsLength(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
| | if o != 0: |
| | return self._tab.VectorLen(o) |
| | return 0 |
| |
|
| | |
| | def AllowedChargramsIsNone(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(62)) |
| | return o == 0 |
| |
|
| | |
| | def TokenizeOnScriptChange(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(64)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return False |
| |
|
| | |
| | def UsePipeCharacterForNewline(self): |
| | o = flatbuffers.number_types.UOffsetTFlags.py_type(self._tab.Offset(66)) |
| | if o != 0: |
| | return bool(self._tab.Get(flatbuffers.number_types.BoolFlags, o + self._tab.Pos)) |
| | return True |
| |
|
| | def FeatureProcessorOptionsStart(builder): builder.StartObject(32) |
| | def FeatureProcessorOptionsAddNumBuckets(builder, numBuckets): builder.PrependInt32Slot(0, numBuckets, -1) |
| | def FeatureProcessorOptionsAddEmbeddingSize(builder, embeddingSize): builder.PrependInt32Slot(1, embeddingSize, -1) |
| | def FeatureProcessorOptionsAddEmbeddingQuantizationBits(builder, embeddingQuantizationBits): builder.PrependInt32Slot(2, embeddingQuantizationBits, 8) |
| | def FeatureProcessorOptionsAddContextSize(builder, contextSize): builder.PrependInt32Slot(3, contextSize, -1) |
| | def FeatureProcessorOptionsAddMaxSelectionSpan(builder, maxSelectionSpan): builder.PrependInt32Slot(4, maxSelectionSpan, -1) |
| | def FeatureProcessorOptionsAddChargramOrders(builder, chargramOrders): builder.PrependUOffsetTRelativeSlot(5, flatbuffers.number_types.UOffsetTFlags.py_type(chargramOrders), 0) |
| | def FeatureProcessorOptionsStartChargramOrdersVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddMaxWordLength(builder, maxWordLength): builder.PrependInt32Slot(6, maxWordLength, 20) |
| | def FeatureProcessorOptionsAddUnicodeAwareFeatures(builder, unicodeAwareFeatures): builder.PrependBoolSlot(7, unicodeAwareFeatures, 0) |
| | def FeatureProcessorOptionsAddExtractCaseFeature(builder, extractCaseFeature): builder.PrependBoolSlot(8, extractCaseFeature, 0) |
| | def FeatureProcessorOptionsAddExtractSelectionMaskFeature(builder, extractSelectionMaskFeature): builder.PrependBoolSlot(9, extractSelectionMaskFeature, 0) |
| | def FeatureProcessorOptionsAddRegexpFeature(builder, regexpFeature): builder.PrependUOffsetTRelativeSlot(10, flatbuffers.number_types.UOffsetTFlags.py_type(regexpFeature), 0) |
| | def FeatureProcessorOptionsStartRegexpFeatureVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddRemapDigits(builder, remapDigits): builder.PrependBoolSlot(11, remapDigits, 0) |
| | def FeatureProcessorOptionsAddLowercaseTokens(builder, lowercaseTokens): builder.PrependBoolSlot(12, lowercaseTokens, 0) |
| | def FeatureProcessorOptionsAddSelectionReducedOutputSpace(builder, selectionReducedOutputSpace): builder.PrependBoolSlot(13, selectionReducedOutputSpace, 1) |
| | def FeatureProcessorOptionsAddCollections(builder, collections): builder.PrependUOffsetTRelativeSlot(14, flatbuffers.number_types.UOffsetTFlags.py_type(collections), 0) |
| | def FeatureProcessorOptionsStartCollectionsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddDefaultCollection(builder, defaultCollection): builder.PrependInt32Slot(15, defaultCollection, -1) |
| | def FeatureProcessorOptionsAddOnlyUseLineWithClick(builder, onlyUseLineWithClick): builder.PrependBoolSlot(16, onlyUseLineWithClick, 0) |
| | def FeatureProcessorOptionsAddSplitTokensOnSelectionBoundaries(builder, splitTokensOnSelectionBoundaries): builder.PrependBoolSlot(17, splitTokensOnSelectionBoundaries, 0) |
| | def FeatureProcessorOptionsAddTokenizationCodepointConfig(builder, tokenizationCodepointConfig): builder.PrependUOffsetTRelativeSlot(18, flatbuffers.number_types.UOffsetTFlags.py_type(tokenizationCodepointConfig), 0) |
| | def FeatureProcessorOptionsStartTokenizationCodepointConfigVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddCenterTokenSelectionMethod(builder, centerTokenSelectionMethod): builder.PrependInt32Slot(19, centerTokenSelectionMethod, 0) |
| | def FeatureProcessorOptionsAddSnapLabelSpanBoundariesToContainingTokens(builder, snapLabelSpanBoundariesToContainingTokens): builder.PrependBoolSlot(20, snapLabelSpanBoundariesToContainingTokens, 0) |
| | def FeatureProcessorOptionsAddSupportedCodepointRanges(builder, supportedCodepointRanges): builder.PrependUOffsetTRelativeSlot(21, flatbuffers.number_types.UOffsetTFlags.py_type(supportedCodepointRanges), 0) |
| | def FeatureProcessorOptionsStartSupportedCodepointRangesVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddInternalTokenizerCodepointRanges(builder, internalTokenizerCodepointRanges): builder.PrependUOffsetTRelativeSlot(22, flatbuffers.number_types.UOffsetTFlags.py_type(internalTokenizerCodepointRanges), 0) |
| | def FeatureProcessorOptionsStartInternalTokenizerCodepointRangesVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddMinSupportedCodepointRatio(builder, minSupportedCodepointRatio): builder.PrependFloat32Slot(23, minSupportedCodepointRatio, 0.0) |
| | def FeatureProcessorOptionsAddFeatureVersion(builder, featureVersion): builder.PrependInt32Slot(24, featureVersion, 0) |
| | def FeatureProcessorOptionsAddTokenizationType(builder, tokenizationType): builder.PrependInt32Slot(25, tokenizationType, 1) |
| | def FeatureProcessorOptionsAddIcuPreserveWhitespaceTokens(builder, icuPreserveWhitespaceTokens): builder.PrependBoolSlot(26, icuPreserveWhitespaceTokens, 0) |
| | def FeatureProcessorOptionsAddIgnoredSpanBoundaryCodepoints(builder, ignoredSpanBoundaryCodepoints): builder.PrependUOffsetTRelativeSlot(27, flatbuffers.number_types.UOffsetTFlags.py_type(ignoredSpanBoundaryCodepoints), 0) |
| | def FeatureProcessorOptionsStartIgnoredSpanBoundaryCodepointsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddBoundsSensitiveFeatures(builder, boundsSensitiveFeatures): builder.PrependUOffsetTRelativeSlot(28, flatbuffers.number_types.UOffsetTFlags.py_type(boundsSensitiveFeatures), 0) |
| | def FeatureProcessorOptionsAddAllowedChargrams(builder, allowedChargrams): builder.PrependUOffsetTRelativeSlot(29, flatbuffers.number_types.UOffsetTFlags.py_type(allowedChargrams), 0) |
| | def FeatureProcessorOptionsStartAllowedChargramsVector(builder, numElems): return builder.StartVector(4, numElems, 4) |
| | def FeatureProcessorOptionsAddTokenizeOnScriptChange(builder, tokenizeOnScriptChange): builder.PrependBoolSlot(30, tokenizeOnScriptChange, 0) |
| | def FeatureProcessorOptionsAddUsePipeCharacterForNewline(builder, usePipeCharacterForNewline): builder.PrependBoolSlot(31, usePipeCharacterForNewline, 1) |
| | def FeatureProcessorOptionsEnd(builder): return builder.EndObject() |
| |
|