Spaces:
Runtime error
Runtime error
| # Generated by the protocol buffer compiler. DO NOT EDIT! | |
| # source: sentencepiece_model.proto | |
| # Copyright 2022 The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from google.protobuf import descriptor as _descriptor | |
| from google.protobuf import message as _message | |
| from google.protobuf import reflection as _reflection | |
| from google.protobuf import symbol_database as _symbol_database | |
| # @@protoc_insertion_point(imports) | |
| _sym_db = _symbol_database.Default() | |
| DESCRIPTOR = _descriptor.FileDescriptor( | |
| name="sentencepiece_model.proto", | |
| package="sentencepiece", | |
| syntax="proto2", | |
| serialized_options=b"H\003", | |
| create_key=_descriptor._internal_create_key, | |
| serialized_pb=( | |
| b'\n\x19sentencepiece_model.proto\x12\rsentencepiece"\xa1\n\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01' | |
| b" \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02" | |
| b" \x01(\t\x12\x41\n\nmodel_type\x18\x03" | |
| b" \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04" | |
| b" \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12" | |
| b' \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12"\n\x12\x63haracter_coverage\x18\n' | |
| b" \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b" | |
| b" \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12" | |
| b' \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12"\n\x16training_sentence_size\x18\r' | |
| b" \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e" | |
| b" \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f" | |
| b" \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12" | |
| b" \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10" | |
| b" \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11" | |
| b" \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14" | |
| b" \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15" | |
| b" \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17" | |
| b" \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16" | |
| b" \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18" | |
| b" \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19" | |
| b" \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e" | |
| b" \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$" | |
| b" \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18" | |
| b' \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18"' | |
| b" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18)" | |
| b" \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+" | |
| b" \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05<unk>\x12\x16\n\tbos_piece\x18." | |
| b" \x01(\t:\x03<s>\x12\x17\n\teos_piece\x18/ \x01(\t:\x04</s>\x12\x18\n\tpad_piece\x18\x30" | |
| b" \x01(\t:\x05<pad>\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87" | |
| b" \x12+\n\x1ctrain_extremely_large_corpus\x18\x31" | |
| b' \x01(\x08:\x05\x66\x61lse"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01' | |
| b" \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03" | |
| b" \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12" | |
| b" \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06" | |
| b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01' | |
| b' \x03(\x0b\x32".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01' | |
| b" \x01(\t\x12\x10\n\x08\x65xpected\x18\x02" | |
| b' \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01' | |
| b" \x03(\x0b\x32'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02" | |
| b" \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03" | |
| b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04" | |
| b" \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05" | |
| b" \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01" | |
| b" \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03" | |
| b' \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03' | |
| ), | |
| ) | |
| _TRAINERSPEC_MODELTYPE = _descriptor.EnumDescriptor( | |
| name="ModelType", | |
| full_name="sentencepiece.TrainerSpec.ModelType", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| values=[ | |
| _descriptor.EnumValueDescriptor( | |
| name="UNIGRAM", | |
| index=0, | |
| number=1, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="BPE", | |
| index=1, | |
| number=2, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="WORD", | |
| index=2, | |
| number=3, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="CHAR", | |
| index=3, | |
| number=4, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| containing_type=None, | |
| serialized_options=None, | |
| serialized_start=1294, | |
| serialized_end=1347, | |
| ) | |
| _sym_db.RegisterEnumDescriptor(_TRAINERSPEC_MODELTYPE) | |
| _MODELPROTO_SENTENCEPIECE_TYPE = _descriptor.EnumDescriptor( | |
| name="Type", | |
| full_name="sentencepiece.ModelProto.SentencePiece.Type", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| values=[ | |
| _descriptor.EnumValueDescriptor( | |
| name="NORMAL", | |
| index=0, | |
| number=1, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="UNKNOWN", | |
| index=1, | |
| number=2, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="CONTROL", | |
| index=2, | |
| number=3, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="USER_DEFINED", | |
| index=3, | |
| number=4, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="BYTE", | |
| index=4, | |
| number=6, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.EnumValueDescriptor( | |
| name="UNUSED", | |
| index=5, | |
| number=5, | |
| serialized_options=None, | |
| type=None, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| containing_type=None, | |
| serialized_options=None, | |
| serialized_start=2100, | |
| serialized_end=2184, | |
| ) | |
| _sym_db.RegisterEnumDescriptor(_MODELPROTO_SENTENCEPIECE_TYPE) | |
| _TRAINERSPEC = _descriptor.Descriptor( | |
| name="TrainerSpec", | |
| full_name="sentencepiece.TrainerSpec", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="input", | |
| full_name="sentencepiece.TrainerSpec.input", | |
| index=0, | |
| number=1, | |
| type=9, | |
| cpp_type=9, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="input_format", | |
| full_name="sentencepiece.TrainerSpec.input_format", | |
| index=1, | |
| number=7, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="model_prefix", | |
| full_name="sentencepiece.TrainerSpec.model_prefix", | |
| index=2, | |
| number=2, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="model_type", | |
| full_name="sentencepiece.TrainerSpec.model_type", | |
| index=3, | |
| number=3, | |
| type=14, | |
| cpp_type=8, | |
| label=1, | |
| has_default_value=True, | |
| default_value=1, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="vocab_size", | |
| full_name="sentencepiece.TrainerSpec.vocab_size", | |
| index=4, | |
| number=4, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=8000, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="accept_language", | |
| full_name="sentencepiece.TrainerSpec.accept_language", | |
| index=5, | |
| number=5, | |
| type=9, | |
| cpp_type=9, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="self_test_sample_size", | |
| full_name="sentencepiece.TrainerSpec.self_test_sample_size", | |
| index=6, | |
| number=6, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=0, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="character_coverage", | |
| full_name="sentencepiece.TrainerSpec.character_coverage", | |
| index=7, | |
| number=10, | |
| type=2, | |
| cpp_type=6, | |
| label=1, | |
| has_default_value=True, | |
| default_value=float(0.9995), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="input_sentence_size", | |
| full_name="sentencepiece.TrainerSpec.input_sentence_size", | |
| index=8, | |
| number=11, | |
| type=4, | |
| cpp_type=4, | |
| label=1, | |
| has_default_value=True, | |
| default_value=0, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="shuffle_input_sentence", | |
| full_name="sentencepiece.TrainerSpec.shuffle_input_sentence", | |
| index=9, | |
| number=19, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="mining_sentence_size", | |
| full_name="sentencepiece.TrainerSpec.mining_sentence_size", | |
| index=10, | |
| number=12, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=False, | |
| default_value=0, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=b"\030\001", | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="training_sentence_size", | |
| full_name="sentencepiece.TrainerSpec.training_sentence_size", | |
| index=11, | |
| number=13, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=False, | |
| default_value=0, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=b"\030\001", | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="seed_sentencepiece_size", | |
| full_name="sentencepiece.TrainerSpec.seed_sentencepiece_size", | |
| index=12, | |
| number=14, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=1000000, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="shrinking_factor", | |
| full_name="sentencepiece.TrainerSpec.shrinking_factor", | |
| index=13, | |
| number=15, | |
| type=2, | |
| cpp_type=6, | |
| label=1, | |
| has_default_value=True, | |
| default_value=float(0.75), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="max_sentence_length", | |
| full_name="sentencepiece.TrainerSpec.max_sentence_length", | |
| index=14, | |
| number=18, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=4192, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="num_threads", | |
| full_name="sentencepiece.TrainerSpec.num_threads", | |
| index=15, | |
| number=16, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=16, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="num_sub_iterations", | |
| full_name="sentencepiece.TrainerSpec.num_sub_iterations", | |
| index=16, | |
| number=17, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=2, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="max_sentencepiece_length", | |
| full_name="sentencepiece.TrainerSpec.max_sentencepiece_length", | |
| index=17, | |
| number=20, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=16, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="split_by_unicode_script", | |
| full_name="sentencepiece.TrainerSpec.split_by_unicode_script", | |
| index=18, | |
| number=21, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="split_by_number", | |
| full_name="sentencepiece.TrainerSpec.split_by_number", | |
| index=19, | |
| number=23, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="split_by_whitespace", | |
| full_name="sentencepiece.TrainerSpec.split_by_whitespace", | |
| index=20, | |
| number=22, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="treat_whitespace_as_suffix", | |
| full_name="sentencepiece.TrainerSpec.treat_whitespace_as_suffix", | |
| index=21, | |
| number=24, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=False, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="split_digits", | |
| full_name="sentencepiece.TrainerSpec.split_digits", | |
| index=22, | |
| number=25, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=False, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="control_symbols", | |
| full_name="sentencepiece.TrainerSpec.control_symbols", | |
| index=23, | |
| number=30, | |
| type=9, | |
| cpp_type=9, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="user_defined_symbols", | |
| full_name="sentencepiece.TrainerSpec.user_defined_symbols", | |
| index=24, | |
| number=31, | |
| type=9, | |
| cpp_type=9, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="required_chars", | |
| full_name="sentencepiece.TrainerSpec.required_chars", | |
| index=25, | |
| number=36, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="byte_fallback", | |
| full_name="sentencepiece.TrainerSpec.byte_fallback", | |
| index=26, | |
| number=35, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=False, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="vocabulary_output_piece_score", | |
| full_name="sentencepiece.TrainerSpec.vocabulary_output_piece_score", | |
| index=27, | |
| number=32, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="hard_vocab_limit", | |
| full_name="sentencepiece.TrainerSpec.hard_vocab_limit", | |
| index=28, | |
| number=33, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="use_all_vocab", | |
| full_name="sentencepiece.TrainerSpec.use_all_vocab", | |
| index=29, | |
| number=34, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=False, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="unk_id", | |
| full_name="sentencepiece.TrainerSpec.unk_id", | |
| index=30, | |
| number=40, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=0, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="bos_id", | |
| full_name="sentencepiece.TrainerSpec.bos_id", | |
| index=31, | |
| number=41, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=1, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="eos_id", | |
| full_name="sentencepiece.TrainerSpec.eos_id", | |
| index=32, | |
| number=42, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=2, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="pad_id", | |
| full_name="sentencepiece.TrainerSpec.pad_id", | |
| index=33, | |
| number=43, | |
| type=5, | |
| cpp_type=1, | |
| label=1, | |
| has_default_value=True, | |
| default_value=-1, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="unk_piece", | |
| full_name="sentencepiece.TrainerSpec.unk_piece", | |
| index=34, | |
| number=45, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=True, | |
| default_value=b"<unk>".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="bos_piece", | |
| full_name="sentencepiece.TrainerSpec.bos_piece", | |
| index=35, | |
| number=46, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=True, | |
| default_value=b"<s>".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="eos_piece", | |
| full_name="sentencepiece.TrainerSpec.eos_piece", | |
| index=36, | |
| number=47, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=True, | |
| default_value=b"</s>".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="pad_piece", | |
| full_name="sentencepiece.TrainerSpec.pad_piece", | |
| index=37, | |
| number=48, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=True, | |
| default_value=b"<pad>".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="unk_surface", | |
| full_name="sentencepiece.TrainerSpec.unk_surface", | |
| index=38, | |
| number=44, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=True, | |
| default_value=b" \342\201\207 ".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="train_extremely_large_corpus", | |
| full_name="sentencepiece.TrainerSpec.train_extremely_large_corpus", | |
| index=39, | |
| number=49, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=False, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[], | |
| enum_types=[ | |
| _TRAINERSPEC_MODELTYPE, | |
| ], | |
| serialized_options=None, | |
| is_extendable=True, | |
| syntax="proto2", | |
| extension_ranges=[ | |
| (200, 536870912), | |
| ], | |
| oneofs=[], | |
| serialized_start=45, | |
| serialized_end=1358, | |
| ) | |
| _NORMALIZERSPEC = _descriptor.Descriptor( | |
| name="NormalizerSpec", | |
| full_name="sentencepiece.NormalizerSpec", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="name", | |
| full_name="sentencepiece.NormalizerSpec.name", | |
| index=0, | |
| number=1, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="precompiled_charsmap", | |
| full_name="sentencepiece.NormalizerSpec.precompiled_charsmap", | |
| index=1, | |
| number=2, | |
| type=12, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"", | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="add_dummy_prefix", | |
| full_name="sentencepiece.NormalizerSpec.add_dummy_prefix", | |
| index=2, | |
| number=3, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="remove_extra_whitespaces", | |
| full_name="sentencepiece.NormalizerSpec.remove_extra_whitespaces", | |
| index=3, | |
| number=4, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="escape_whitespaces", | |
| full_name="sentencepiece.NormalizerSpec.escape_whitespaces", | |
| index=4, | |
| number=5, | |
| type=8, | |
| cpp_type=7, | |
| label=1, | |
| has_default_value=True, | |
| default_value=True, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="normalization_rule_tsv", | |
| full_name="sentencepiece.NormalizerSpec.normalization_rule_tsv", | |
| index=5, | |
| number=6, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[], | |
| enum_types=[], | |
| serialized_options=None, | |
| is_extendable=True, | |
| syntax="proto2", | |
| extension_ranges=[ | |
| (200, 536870912), | |
| ], | |
| oneofs=[], | |
| serialized_start=1361, | |
| serialized_end=1570, | |
| ) | |
| _SELFTESTDATA_SAMPLE = _descriptor.Descriptor( | |
| name="Sample", | |
| full_name="sentencepiece.SelfTestData.Sample", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="input", | |
| full_name="sentencepiece.SelfTestData.Sample.input", | |
| index=0, | |
| number=1, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="expected", | |
| full_name="sentencepiece.SelfTestData.Sample.expected", | |
| index=1, | |
| number=2, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[], | |
| enum_types=[], | |
| serialized_options=None, | |
| is_extendable=False, | |
| syntax="proto2", | |
| extension_ranges=[], | |
| oneofs=[], | |
| serialized_start=1641, | |
| serialized_end=1682, | |
| ) | |
| _SELFTESTDATA = _descriptor.Descriptor( | |
| name="SelfTestData", | |
| full_name="sentencepiece.SelfTestData", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="samples", | |
| full_name="sentencepiece.SelfTestData.samples", | |
| index=0, | |
| number=1, | |
| type=11, | |
| cpp_type=10, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[ | |
| _SELFTESTDATA_SAMPLE, | |
| ], | |
| enum_types=[], | |
| serialized_options=None, | |
| is_extendable=True, | |
| syntax="proto2", | |
| extension_ranges=[ | |
| (200, 536870912), | |
| ], | |
| oneofs=[], | |
| serialized_start=1572, | |
| serialized_end=1693, | |
| ) | |
| _MODELPROTO_SENTENCEPIECE = _descriptor.Descriptor( | |
| name="SentencePiece", | |
| full_name="sentencepiece.ModelProto.SentencePiece", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="piece", | |
| full_name="sentencepiece.ModelProto.SentencePiece.piece", | |
| index=0, | |
| number=1, | |
| type=9, | |
| cpp_type=9, | |
| label=1, | |
| has_default_value=False, | |
| default_value=b"".decode("utf-8"), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="score", | |
| full_name="sentencepiece.ModelProto.SentencePiece.score", | |
| index=1, | |
| number=2, | |
| type=2, | |
| cpp_type=6, | |
| label=1, | |
| has_default_value=False, | |
| default_value=float(0), | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="type", | |
| full_name="sentencepiece.ModelProto.SentencePiece.type", | |
| index=2, | |
| number=3, | |
| type=14, | |
| cpp_type=8, | |
| label=1, | |
| has_default_value=True, | |
| default_value=1, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[], | |
| enum_types=[ | |
| _MODELPROTO_SENTENCEPIECE_TYPE, | |
| ], | |
| serialized_options=None, | |
| is_extendable=True, | |
| syntax="proto2", | |
| extension_ranges=[ | |
| (200, 536870912), | |
| ], | |
| oneofs=[], | |
| serialized_start=1985, | |
| serialized_end=2195, | |
| ) | |
| _MODELPROTO = _descriptor.Descriptor( | |
| name="ModelProto", | |
| full_name="sentencepiece.ModelProto", | |
| filename=None, | |
| file=DESCRIPTOR, | |
| containing_type=None, | |
| create_key=_descriptor._internal_create_key, | |
| fields=[ | |
| _descriptor.FieldDescriptor( | |
| name="pieces", | |
| full_name="sentencepiece.ModelProto.pieces", | |
| index=0, | |
| number=1, | |
| type=11, | |
| cpp_type=10, | |
| label=3, | |
| has_default_value=False, | |
| default_value=[], | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="trainer_spec", | |
| full_name="sentencepiece.ModelProto.trainer_spec", | |
| index=1, | |
| number=2, | |
| type=11, | |
| cpp_type=10, | |
| label=1, | |
| has_default_value=False, | |
| default_value=None, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="normalizer_spec", | |
| full_name="sentencepiece.ModelProto.normalizer_spec", | |
| index=2, | |
| number=3, | |
| type=11, | |
| cpp_type=10, | |
| label=1, | |
| has_default_value=False, | |
| default_value=None, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="self_test_data", | |
| full_name="sentencepiece.ModelProto.self_test_data", | |
| index=3, | |
| number=4, | |
| type=11, | |
| cpp_type=10, | |
| label=1, | |
| has_default_value=False, | |
| default_value=None, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| _descriptor.FieldDescriptor( | |
| name="denormalizer_spec", | |
| full_name="sentencepiece.ModelProto.denormalizer_spec", | |
| index=4, | |
| number=5, | |
| type=11, | |
| cpp_type=10, | |
| label=1, | |
| has_default_value=False, | |
| default_value=None, | |
| message_type=None, | |
| enum_type=None, | |
| containing_type=None, | |
| is_extension=False, | |
| extension_scope=None, | |
| serialized_options=None, | |
| file=DESCRIPTOR, | |
| create_key=_descriptor._internal_create_key, | |
| ), | |
| ], | |
| extensions=[], | |
| nested_types=[ | |
| _MODELPROTO_SENTENCEPIECE, | |
| ], | |
| enum_types=[], | |
| serialized_options=None, | |
| is_extendable=True, | |
| syntax="proto2", | |
| extension_ranges=[ | |
| (200, 536870912), | |
| ], | |
| oneofs=[], | |
| serialized_start=1696, | |
| serialized_end=2206, | |
| ) | |
| _TRAINERSPEC.fields_by_name["model_type"].enum_type = _TRAINERSPEC_MODELTYPE | |
| _TRAINERSPEC_MODELTYPE.containing_type = _TRAINERSPEC | |
| _SELFTESTDATA_SAMPLE.containing_type = _SELFTESTDATA | |
| _SELFTESTDATA.fields_by_name["samples"].message_type = _SELFTESTDATA_SAMPLE | |
| _MODELPROTO_SENTENCEPIECE.fields_by_name["type"].enum_type = _MODELPROTO_SENTENCEPIECE_TYPE | |
| _MODELPROTO_SENTENCEPIECE.containing_type = _MODELPROTO | |
| _MODELPROTO_SENTENCEPIECE_TYPE.containing_type = _MODELPROTO_SENTENCEPIECE | |
| _MODELPROTO.fields_by_name["pieces"].message_type = _MODELPROTO_SENTENCEPIECE | |
| _MODELPROTO.fields_by_name["trainer_spec"].message_type = _TRAINERSPEC | |
| _MODELPROTO.fields_by_name["normalizer_spec"].message_type = _NORMALIZERSPEC | |
| _MODELPROTO.fields_by_name["self_test_data"].message_type = _SELFTESTDATA | |
| _MODELPROTO.fields_by_name["denormalizer_spec"].message_type = _NORMALIZERSPEC | |
| DESCRIPTOR.message_types_by_name["TrainerSpec"] = _TRAINERSPEC | |
| DESCRIPTOR.message_types_by_name["NormalizerSpec"] = _NORMALIZERSPEC | |
| DESCRIPTOR.message_types_by_name["SelfTestData"] = _SELFTESTDATA | |
| DESCRIPTOR.message_types_by_name["ModelProto"] = _MODELPROTO | |
| _sym_db.RegisterFileDescriptor(DESCRIPTOR) | |
| TrainerSpec = _reflection.GeneratedProtocolMessageType( | |
| "TrainerSpec", | |
| (_message.Message,), | |
| { | |
| "DESCRIPTOR": _TRAINERSPEC, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec) | |
| }, | |
| ) | |
| _sym_db.RegisterMessage(TrainerSpec) | |
| NormalizerSpec = _reflection.GeneratedProtocolMessageType( | |
| "NormalizerSpec", | |
| (_message.Message,), | |
| { | |
| "DESCRIPTOR": _NORMALIZERSPEC, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec) | |
| }, | |
| ) | |
| _sym_db.RegisterMessage(NormalizerSpec) | |
| SelfTestData = _reflection.GeneratedProtocolMessageType( | |
| "SelfTestData", | |
| (_message.Message,), | |
| { | |
| "Sample": _reflection.GeneratedProtocolMessageType( | |
| "Sample", | |
| (_message.Message,), | |
| { | |
| "DESCRIPTOR": _SELFTESTDATA_SAMPLE, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample) | |
| }, | |
| ), | |
| "DESCRIPTOR": _SELFTESTDATA, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData) | |
| }, | |
| ) | |
| _sym_db.RegisterMessage(SelfTestData) | |
| _sym_db.RegisterMessage(SelfTestData.Sample) | |
| ModelProto = _reflection.GeneratedProtocolMessageType( | |
| "ModelProto", | |
| (_message.Message,), | |
| { | |
| "SentencePiece": _reflection.GeneratedProtocolMessageType( | |
| "SentencePiece", | |
| (_message.Message,), | |
| { | |
| "DESCRIPTOR": _MODELPROTO_SENTENCEPIECE, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece) | |
| }, | |
| ), | |
| "DESCRIPTOR": _MODELPROTO, | |
| "__module__": "sentencepiece_model_pb2" | |
| # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto) | |
| }, | |
| ) | |
| _sym_db.RegisterMessage(ModelProto) | |
| _sym_db.RegisterMessage(ModelProto.SentencePiece) | |
| DESCRIPTOR._options = None | |
| _TRAINERSPEC.fields_by_name["mining_sentence_size"]._options = None | |
| _TRAINERSPEC.fields_by_name["training_sentence_size"]._options = None | |
| # @@protoc_insertion_point(module_scope) | |