apinge commited on
Commit
311b542
·
verified ·
1 Parent(s): 770cb93

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. ov_models/bert-base-multilingual-uncased/README.md +6 -0
  3. ov_models/bert-base-multilingual-uncased/create_tokenizer_for_subword_i64.py +127 -0
  4. ov_models/bert-base-multilingual-uncased/openvino_detokenizer.bin +3 -0
  5. ov_models/bert-base-multilingual-uncased/openvino_detokenizer.xml +141 -0
  6. ov_models/bert-base-multilingual-uncased/openvino_tokenizer.bin +3 -0
  7. ov_models/bert-base-multilingual-uncased/openvino_tokenizer.xml +997 -0
  8. ov_models/bert-base-multilingual-uncased/test_openvino_tokenizer.py +33 -0
  9. ov_models/bert-base-uncased/openvino_detokenizer.bin +3 -0
  10. ov_models/bert-base-uncased/openvino_detokenizer.xml +141 -0
  11. ov_models/bert-base-uncased/openvino_tokenizer.bin +3 -0
  12. ov_models/bert-base-uncased/openvino_tokenizer.xml +997 -0
  13. ov_models/bert_EN_int8.bin +3 -0
  14. ov_models/bert_EN_int8.xml +0 -0
  15. ov_models/bert_ZH_int8.bin +3 -0
  16. ov_models/bert_ZH_int8.xml +0 -0
  17. ov_models/bert_ZH_static_int8.xml +0 -0
  18. ov_models/cmudict_cache.txt +0 -0
  19. ov_models/cppinyin/cpp_pinyin.raw +3 -0
  20. ov_models/cppjieba/dict/README.md +31 -0
  21. ov_models/cppjieba/dict/hmm_model.utf8 +0 -0
  22. ov_models/cppjieba/dict/idf.utf8 +0 -0
  23. ov_models/cppjieba/dict/jieba.dict.utf8 +0 -0
  24. ov_models/cppjieba/dict/pos_dict/char_state_tab.utf8 +0 -0
  25. ov_models/cppjieba/dict/pos_dict/prob_emit.utf8 +0 -0
  26. ov_models/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  27. ov_models/cppjieba/dict/pos_dict/prob_trans.utf8 +0 -0
  28. ov_models/cppjieba/dict/stop_words.utf8 +1534 -0
  29. ov_models/cppjieba/dict/user.dict.utf8 +4 -0
  30. ov_models/deepfilternet3/df_dec.bin +3 -0
  31. ov_models/deepfilternet3/df_dec.xml +0 -0
  32. ov_models/deepfilternet3/enc.bin +3 -0
  33. ov_models/deepfilternet3/enc.xml +0 -0
  34. ov_models/deepfilternet3/erb_dec.bin +3 -0
  35. ov_models/deepfilternet3/erb_dec.xml +0 -0
  36. ov_models/mini-bart-g2p-no_cache/config.json +46 -0
  37. ov_models/mini-bart-g2p-no_cache/generation_config.json +9 -0
  38. ov_models/mini-bart-g2p-no_cache/openvino_decoder_model.bin +3 -0
  39. ov_models/mini-bart-g2p-no_cache/openvino_decoder_model.xml +0 -0
  40. ov_models/mini-bart-g2p-no_cache/openvino_encoder_model.bin +3 -0
  41. ov_models/mini-bart-g2p-no_cache/openvino_encoder_model.xml +0 -0
  42. ov_models/mini-bart-g2p-no_cache/special_tokens_map.json +51 -0
  43. ov_models/mini-bart-g2p-no_cache/tokenizer.json +191 -0
  44. ov_models/mini-bart-g2p-no_cache/tokenizer_config.json +61 -0
  45. ov_models/mini-bart-g2p-no_cache/vocab.json +1 -0
  46. ov_models/opencpop-strict.txt +429 -0
  47. ov_models/punc.dic +0 -0
  48. ov_models/s2t_map.bin +3 -0
  49. ov_models/t2s_map.bin +3 -0
  50. ov_models/tts_en.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ov_models/cppinyin/cpp_pinyin.raw filter=lfs diff=lfs merge=lfs -text
ov_models/bert-base-multilingual-uncased/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## How to get openvino tokenizer
2
+ ```python
3
+ pip install transfomers
4
+ pip install openvino_tokenizers
5
+ convert_tokenizer google-bert/bert-base-multilingual-uncased -o bert-base-multilingual-uncased --skip-special-tokens --trust-remote-code --utf8_replace_mode replace
6
+ ```
ov_models/bert-base-multilingual-uncased/create_tokenizer_for_subword_i64.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ from openvino import compile_model, save_model
3
+ from openvino.runtime import Model, PartialShape, Type, op
4
+ from openvino_tokenizers import _get_factory
5
+ from openvino_tokenizers.constants import TOKEN_IDS_INPUT_NAME, ATTENTION_MASK_INPUT_NAME, TOKEN_TYPE_IDS_INPUT_NAME, \
6
+ TOKENIZER_NAME, DETOKENIZER_NAME
7
+ from openvino_tokenizers.hf_parser import TransformersTokenizerPipelineParser
8
+ from openvino_tokenizers.tokenizer_pipeline import SpecialTokensSplit, BasePipelineStep
9
+ from openvino_tokenizers.utils import TokenzierConversionParams, change_outputs_type
10
+ from openvino.preprocess import PrePostProcessor
11
+
12
+ def get_tokenizer_detokenizer(hf_tokenizer, params):
13
+ pipeline = TransformersTokenizerPipelineParser(hf_tokenizer, params).parse()
14
+ pipeline.finalize()
15
+
16
+ string_inputs = [op.Parameter(Type.string, PartialShape(["?"])) for _ in range(pipeline.number_of_inputs)]
17
+
18
+
19
+ processing_outputs = []
20
+ for input_node in string_inputs:
21
+ input_node = _get_factory().create("StringTensorUnpack", input_node.outputs()).outputs()
22
+ ragged = []
23
+ regex_split_outputs = []
24
+ if isinstance(pipeline.steps[0], SpecialTokensSplit):
25
+ input_node = pipeline.add_ragged_dimension(input_node)
26
+ input_node = pipeline.steps[0].get_ov_subgraph(input_node)
27
+ ragged, input_node = input_node[:2], input_node[2:]
28
+
29
+ for step in pipeline.normalization_steps:
30
+ input_node = step.get_ov_subgraph(input_node)
31
+
32
+ if not ragged:
33
+ input_node = pipeline.add_ragged_dimension(input_node)
34
+ else:
35
+ input_node = ragged + input_node
36
+
37
+ for step in pipeline.pre_tokenization_steps:
38
+ input_node = step.get_ov_subgraph(input_node)
39
+
40
+ regex_split_outputs = input_node[2:5]
41
+ for step in pipeline.tokenization_steps:
42
+ input_node = step.get_ov_subgraph(input_node[:-1])
43
+
44
+ processing_outputs.extend(input_node)
45
+
46
+ for step in pipeline.post_tokenization_steps:
47
+ processing_outputs = step.get_ov_subgraph(processing_outputs)
48
+
49
+ ov_tokenizer = Model(processing_outputs, string_inputs, name=TOKENIZER_NAME)
50
+ change_outputs_type(ov_tokenizer, Type.i64)
51
+ output_names = hf_tokenizer.model_input_names
52
+
53
+ ov_tokenizer_output_names = [TOKEN_IDS_INPUT_NAME, ATTENTION_MASK_INPUT_NAME]
54
+ if len(output_names) == 3 and len(ov_tokenizer.outputs) == 3:
55
+ ov_tokenizer_output_names.insert(1, TOKEN_TYPE_IDS_INPUT_NAME)
56
+
57
+ filtered_outputs = []
58
+ for i, output_name in enumerate(ov_tokenizer_output_names):
59
+ current_output = next(
60
+ (output for output in ov_tokenizer.outputs if output.any_name == output_name),
61
+ False,
62
+ )
63
+ if current_output:
64
+ filtered_outputs.append(current_output)
65
+ continue
66
+
67
+ if output_name in output_names:
68
+ ov_tokenizer.output(i).tensor.add_names({output_name})
69
+ filtered_outputs.append(ov_tokenizer.output(i))
70
+
71
+ regex_split_outputs = _get_factory().create("StringTensorPack", regex_split_outputs).outputs()
72
+ regex_split_outputs[0].tensor.add_names({"regex_string_output"})
73
+
74
+ tokenizer_model = Model(filtered_outputs + regex_split_outputs, ov_tokenizer.get_parameters(), TOKENIZER_NAME)
75
+
76
+ ### detokenizer
77
+
78
+ vocab = pipeline.tokenization_steps[-1].vocab
79
+ detokenizer_input = op.Parameter(Type.i64, PartialShape(["?", "?"]))
80
+ detokenizer_outputs = (
81
+ _get_factory()
82
+ .create(
83
+ "VocabDecoder",
84
+ [*detokenizer_input.outputs(), *BasePipelineStep.create_string_constant_node(vocab).outputs()],
85
+ )
86
+ .outputs()
87
+ )[2:5]
88
+ detokenizer_outputs = _get_factory().create("StringTensorPack", detokenizer_outputs).outputs()
89
+ detokenizer_model = Model(detokenizer_outputs, [detokenizer_input], DETOKENIZER_NAME)
90
+ # transpose the input shape of detokenizer_model
91
+ ppp = PrePostProcessor(detokenizer_model)
92
+ # transpose [number_of_tokens, 1] to [1, number_of_tokens]
93
+ # refer to https://github.com/openvinotoolkit/openvino/issues/16331
94
+ # refer to https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/Make-inference-faster-via-pre-process-of-data/m-p/1397729
95
+ ppp.input().preprocess().convert_layout([1,0])
96
+ detokenizer_model = ppp.build()
97
+ return tokenizer_model, detokenizer_model
98
+
99
+
100
+ hf_tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")
101
+ ov_tokenizer, ov_detokenizer = get_tokenizer_detokenizer(hf_tokenizer, TokenzierConversionParams())
102
+
103
+
104
+
105
+ bert_subword_tokenizer_path = "bert_subword_tokenizer.xml"
106
+ bert_subword_detokenizer_path = "bert_subword_detokenizer.xml"
107
+
108
+ print("Save bert tokenizer: ", bert_subword_tokenizer_path)
109
+ print("Save bert detokenizer: ", bert_subword_detokenizer_path)
110
+
111
+ save_model(ov_tokenizer, bert_subword_tokenizer_path)
112
+ save_model(ov_detokenizer, bert_subword_detokenizer_path)
113
+
114
+ compiled_tokenzier = compile_model(ov_tokenizer)
115
+ compiled_detokenzier = compile_model(ov_detokenizer)
116
+ #text_input = ["I have a new GPU!"]
117
+ text_input = ["i have installed a fortran compiler"]
118
+ print("text_input: ", text_input)
119
+ #hf_output = hf_tokenizer(text_input, return_tensors="np")
120
+ hf_output = hf_tokenizer.tokenize(text_input[0])
121
+ print("hf_output: ", hf_output)
122
+ ov_output = compiled_tokenzier(text_input)
123
+
124
+ print("ov_output: ", ov_output["regex_string_output"])
125
+ # have to transpose input ids to get the individual tokens detokenized
126
+ # shape is [number_of_tokens, 1]
127
+ print(compiled_detokenzier(ov_output["input_ids"]))
ov_models/bert-base-multilingual-uncased/openvino_detokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:087c9fb7c107545d56a345a282ab8e7d8b98047b05f0520cf8ccfa7ea592bc0f
3
+ size 1189552
ov_models/bert-base-multilingual-uncased/openvino_detokenizer.xml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_133" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_133">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Constant_141" type="Const" version="opset1">
14
+ <data element_type="u64" shape="2" offset="0" size="16" />
15
+ <output>
16
+ <port id="0" precision="U64">
17
+ <dim>2</dim>
18
+ </port>
19
+ </output>
20
+ </layer>
21
+ <layer id="2" name="Transpose_142" type="Transpose" version="opset1">
22
+ <input>
23
+ <port id="0" precision="I64">
24
+ <dim>-1</dim>
25
+ <dim>-1</dim>
26
+ </port>
27
+ <port id="1" precision="U64">
28
+ <dim>2</dim>
29
+ </port>
30
+ </input>
31
+ <output>
32
+ <port id="2" precision="I64">
33
+ <dim>-1</dim>
34
+ <dim>-1</dim>
35
+ </port>
36
+ </output>
37
+ </layer>
38
+ <layer id="3" name="Constant_135" type="Const" version="opset1">
39
+ <data element_type="u8" shape="1189536" offset="16" size="1189536" />
40
+ <output>
41
+ <port id="0" precision="U8">
42
+ <dim>1189536</dim>
43
+ </port>
44
+ </output>
45
+ </layer>
46
+ <layer id="4" name="StringTensorUnpack_136" type="StringTensorUnpack" version="extension">
47
+ <data mode="begins_ends" />
48
+ <input>
49
+ <port id="0" precision="U8">
50
+ <dim>1189536</dim>
51
+ </port>
52
+ </input>
53
+ <output>
54
+ <port id="1" precision="I32">
55
+ <dim>-1</dim>
56
+ </port>
57
+ <port id="2" precision="I32">
58
+ <dim>-1</dim>
59
+ </port>
60
+ <port id="3" precision="U8">
61
+ <dim>-1</dim>
62
+ </port>
63
+ </output>
64
+ </layer>
65
+ <layer id="5" name="VocabDecoder_137" type="VocabDecoder" version="extension">
66
+ <data skip_tokens="" />
67
+ <input>
68
+ <port id="0" precision="I64">
69
+ <dim>-1</dim>
70
+ <dim>-1</dim>
71
+ </port>
72
+ <port id="1" precision="I32">
73
+ <dim>-1</dim>
74
+ </port>
75
+ <port id="2" precision="I32">
76
+ <dim>-1</dim>
77
+ </port>
78
+ <port id="3" precision="U8">
79
+ <dim>-1</dim>
80
+ </port>
81
+ </input>
82
+ <output>
83
+ <port id="4" precision="I32">
84
+ <dim>-1</dim>
85
+ </port>
86
+ <port id="5" precision="I32">
87
+ <dim>-1</dim>
88
+ </port>
89
+ <port id="6" precision="I32">
90
+ <dim>-1</dim>
91
+ </port>
92
+ <port id="7" precision="I32">
93
+ <dim>-1</dim>
94
+ </port>
95
+ <port id="8" precision="U8">
96
+ <dim>-1</dim>
97
+ </port>
98
+ </output>
99
+ </layer>
100
+ <layer id="6" name="StringTensorPack_138" type="StringTensorPack" version="extension">
101
+ <data mode="begins_ends" />
102
+ <input>
103
+ <port id="0" precision="I32">
104
+ <dim>-1</dim>
105
+ </port>
106
+ <port id="1" precision="I32">
107
+ <dim>-1</dim>
108
+ </port>
109
+ <port id="2" precision="U8">
110
+ <dim>-1</dim>
111
+ </port>
112
+ </input>
113
+ <output>
114
+ <port id="3" precision="STRING">
115
+ <dim>-1</dim>
116
+ </port>
117
+ </output>
118
+ </layer>
119
+ <layer id="7" name="Result_139" type="Result" version="opset1">
120
+ <input>
121
+ <port id="0" precision="STRING">
122
+ <dim>-1</dim>
123
+ </port>
124
+ </input>
125
+ </layer>
126
+ </layers>
127
+ <edges>
128
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
129
+ <edge from-layer="1" from-port="0" to-layer="2" to-port="1" />
130
+ <edge from-layer="2" from-port="2" to-layer="5" to-port="0" />
131
+ <edge from-layer="3" from-port="0" to-layer="4" to-port="0" />
132
+ <edge from-layer="4" from-port="1" to-layer="5" to-port="1" />
133
+ <edge from-layer="4" from-port="2" to-layer="5" to-port="2" />
134
+ <edge from-layer="4" from-port="3" to-layer="5" to-port="3" />
135
+ <edge from-layer="5" from-port="6" to-layer="6" to-port="0" />
136
+ <edge from-layer="5" from-port="7" to-layer="6" to-port="1" />
137
+ <edge from-layer="5" from-port="8" to-layer="6" to-port="2" />
138
+ <edge from-layer="6" from-port="3" to-layer="7" to-port="0" />
139
+ </edges>
140
+ <rt_info />
141
+ </net>
ov_models/bert-base-multilingual-uncased/openvino_tokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84631968723cc740023c6cd0cf27221bd9f7ab4d28837058b342c06d86c35aa5
3
+ size 1189893
ov_models/bert-base-multilingual-uncased/openvino_tokenizer.xml ADDED
@@ -0,0 +1,997 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="tokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_1" type="Parameter" version="opset1">
5
+ <data shape="?" element_type="string" />
6
+ <output>
7
+ <port id="0" precision="STRING" names="Parameter_1">
8
+ <dim>-1</dim>
9
+ </port>
10
+ </output>
11
+ </layer>
12
+ <layer id="1" name="Constant_7" type="Const" version="opset1">
13
+ <data element_type="i64" shape="" offset="0" size="8" />
14
+ <output>
15
+ <port id="0" precision="I64" />
16
+ </output>
17
+ </layer>
18
+ <layer id="2" name="StringTensorUnpack_2" type="StringTensorUnpack" version="extension">
19
+ <data mode="begins_ends" />
20
+ <input>
21
+ <port id="0" precision="STRING">
22
+ <dim>-1</dim>
23
+ </port>
24
+ </input>
25
+ <output>
26
+ <port id="1" precision="I32">
27
+ <dim>-1</dim>
28
+ </port>
29
+ <port id="2" precision="I32">
30
+ <dim>-1</dim>
31
+ </port>
32
+ <port id="3" precision="U8">
33
+ <dim>-1</dim>
34
+ </port>
35
+ </output>
36
+ </layer>
37
+ <layer id="3" name="ShapeOf_3" type="ShapeOf" version="opset3">
38
+ <data output_type="i64" />
39
+ <input>
40
+ <port id="0" precision="I32">
41
+ <dim>-1</dim>
42
+ </port>
43
+ </input>
44
+ <output>
45
+ <port id="1" precision="I64">
46
+ <dim>1</dim>
47
+ </port>
48
+ </output>
49
+ </layer>
50
+ <layer id="4" name="Constant_4" type="Const" version="opset1">
51
+ <data element_type="i64" shape="" offset="0" size="8" />
52
+ <output>
53
+ <port id="0" precision="I64" />
54
+ </output>
55
+ </layer>
56
+ <layer id="5" name="Constant_5" type="Const" version="opset1">
57
+ <data element_type="i64" shape="" offset="0" size="8" />
58
+ <output>
59
+ <port id="0" precision="I64" />
60
+ </output>
61
+ </layer>
62
+ <layer id="6" name="Gather_6" type="Gather" version="opset8">
63
+ <data batch_dims="0" />
64
+ <input>
65
+ <port id="0" precision="I64">
66
+ <dim>1</dim>
67
+ </port>
68
+ <port id="1" precision="I64" />
69
+ <port id="2" precision="I64" />
70
+ </input>
71
+ <output>
72
+ <port id="3" precision="I64" />
73
+ </output>
74
+ </layer>
75
+ <layer id="7" name="Constant_8" type="Const" version="opset1">
76
+ <data element_type="i64" shape="" offset="8" size="8" />
77
+ <output>
78
+ <port id="0" precision="I64" />
79
+ </output>
80
+ </layer>
81
+ <layer id="8" name="Range_9" type="Range" version="opset4">
82
+ <data output_type="i32" />
83
+ <input>
84
+ <port id="0" precision="I64" />
85
+ <port id="1" precision="I64" />
86
+ <port id="2" precision="I64" />
87
+ </input>
88
+ <output>
89
+ <port id="3" precision="I32">
90
+ <dim>-1</dim>
91
+ </port>
92
+ </output>
93
+ </layer>
94
+ <layer id="9" name="Constant_10" type="Const" version="opset1">
95
+ <data element_type="i64" shape="" offset="8" size="8" />
96
+ <output>
97
+ <port id="0" precision="I64" />
98
+ </output>
99
+ </layer>
100
+ <layer id="10" name="Constant_11" type="Const" version="opset1">
101
+ <data element_type="i64" shape="" offset="8" size="8" />
102
+ <output>
103
+ <port id="0" precision="I64" />
104
+ </output>
105
+ </layer>
106
+ <layer id="11" name="Add_12" type="Add" version="opset1">
107
+ <data auto_broadcast="numpy" />
108
+ <input>
109
+ <port id="0" precision="I64" />
110
+ <port id="1" precision="I64" />
111
+ </input>
112
+ <output>
113
+ <port id="2" precision="I64" />
114
+ </output>
115
+ </layer>
116
+ <layer id="12" name="Constant_13" type="Const" version="opset1">
117
+ <data element_type="i64" shape="" offset="8" size="8" />
118
+ <output>
119
+ <port id="0" precision="I64" />
120
+ </output>
121
+ </layer>
122
+ <layer id="13" name="Range_14" type="Range" version="opset4">
123
+ <data output_type="i32" />
124
+ <input>
125
+ <port id="0" precision="I64" />
126
+ <port id="1" precision="I64" />
127
+ <port id="2" precision="I64" />
128
+ </input>
129
+ <output>
130
+ <port id="3" precision="I32">
131
+ <dim>-1</dim>
132
+ </port>
133
+ </output>
134
+ </layer>
135
+ <layer id="14" name="Constant_76" type="Const" version="opset1">
136
+ <data element_type="u8" shape="50" offset="16" size="50" />
137
+ <output>
138
+ <port id="0" precision="U8">
139
+ <dim>50</dim>
140
+ </port>
141
+ </output>
142
+ </layer>
143
+ <layer id="15" name="SpecialTokensSplit_77" type="SpecialTokensSplit" version="extension">
144
+ <input>
145
+ <port id="0" precision="I32">
146
+ <dim>-1</dim>
147
+ </port>
148
+ <port id="1" precision="I32">
149
+ <dim>-1</dim>
150
+ </port>
151
+ <port id="2" precision="I32">
152
+ <dim>-1</dim>
153
+ </port>
154
+ <port id="3" precision="I32">
155
+ <dim>-1</dim>
156
+ </port>
157
+ <port id="4" precision="U8">
158
+ <dim>-1</dim>
159
+ </port>
160
+ <port id="5" precision="U8">
161
+ <dim>50</dim>
162
+ </port>
163
+ </input>
164
+ <output>
165
+ <port id="6" precision="I32">
166
+ <dim>-1</dim>
167
+ </port>
168
+ <port id="7" precision="I32">
169
+ <dim>-1</dim>
170
+ </port>
171
+ <port id="8" precision="I32">
172
+ <dim>-1</dim>
173
+ </port>
174
+ <port id="9" precision="I32">
175
+ <dim>-1</dim>
176
+ </port>
177
+ <port id="10" precision="U8">
178
+ <dim>-1</dim>
179
+ </port>
180
+ <port id="11" precision="BOOL">
181
+ <dim>-1</dim>
182
+ </port>
183
+ </output>
184
+ </layer>
185
+ <layer id="16" name="Constant_79" type="Const" version="opset1">
186
+ <data element_type="u8" shape="39" offset="66" size="39" />
187
+ <output>
188
+ <port id="0" precision="U8">
189
+ <dim>39</dim>
190
+ </port>
191
+ </output>
192
+ </layer>
193
+ <layer id="17" name="Constant_81" type="Const" version="opset1">
194
+ <data element_type="u8" shape="0" offset="105" size="1" />
195
+ <output>
196
+ <port id="0" precision="U8">
197
+ <dim>0</dim>
198
+ </port>
199
+ </output>
200
+ </layer>
201
+ <layer id="18" name="RegexNormalization_82" type="RegexNormalization" version="extension">
202
+ <data global_replace="true" />
203
+ <input>
204
+ <port id="0" precision="I32">
205
+ <dim>-1</dim>
206
+ </port>
207
+ <port id="1" precision="I32">
208
+ <dim>-1</dim>
209
+ </port>
210
+ <port id="2" precision="U8">
211
+ <dim>-1</dim>
212
+ </port>
213
+ <port id="3" precision="BOOL">
214
+ <dim>-1</dim>
215
+ </port>
216
+ <port id="4" precision="U8">
217
+ <dim>39</dim>
218
+ </port>
219
+ <port id="5" precision="U8">
220
+ <dim>0</dim>
221
+ </port>
222
+ </input>
223
+ <output>
224
+ <port id="6" precision="I32">
225
+ <dim>-1</dim>
226
+ </port>
227
+ <port id="7" precision="I32">
228
+ <dim>-1</dim>
229
+ </port>
230
+ <port id="8" precision="U8">
231
+ <dim>-1</dim>
232
+ </port>
233
+ <port id="9" precision="BOOL">
234
+ <dim>-1</dim>
235
+ </port>
236
+ </output>
237
+ </layer>
238
+ <layer id="19" name="NormalizeUnicode_83" type="NormalizeUnicode" version="extension">
239
+ <data normalization_form="NFD" />
240
+ <input>
241
+ <port id="0" precision="I32">
242
+ <dim>-1</dim>
243
+ </port>
244
+ <port id="1" precision="I32">
245
+ <dim>-1</dim>
246
+ </port>
247
+ <port id="2" precision="U8">
248
+ <dim>-1</dim>
249
+ </port>
250
+ <port id="3" precision="BOOL">
251
+ <dim>-1</dim>
252
+ </port>
253
+ </input>
254
+ <output>
255
+ <port id="4" precision="I32">
256
+ <dim>-1</dim>
257
+ </port>
258
+ <port id="5" precision="I32">
259
+ <dim>-1</dim>
260
+ </port>
261
+ <port id="6" precision="U8">
262
+ <dim>-1</dim>
263
+ </port>
264
+ <port id="7" precision="BOOL">
265
+ <dim>-1</dim>
266
+ </port>
267
+ </output>
268
+ </layer>
269
+ <layer id="20" name="Constant_85" type="Const" version="opset1">
270
+ <data element_type="u8" shape="6" offset="106" size="6" />
271
+ <output>
272
+ <port id="0" precision="U8">
273
+ <dim>6</dim>
274
+ </port>
275
+ </output>
276
+ </layer>
277
+ <layer id="21" name="Constant_87" type="Const" version="opset1">
278
+ <data element_type="u8" shape="0" offset="105" size="1" />
279
+ <output>
280
+ <port id="0" precision="U8">
281
+ <dim>0</dim>
282
+ </port>
283
+ </output>
284
+ </layer>
285
+ <layer id="22" name="RegexNormalization_88" type="RegexNormalization" version="extension">
286
+ <data global_replace="true" />
287
+ <input>
288
+ <port id="0" precision="I32">
289
+ <dim>-1</dim>
290
+ </port>
291
+ <port id="1" precision="I32">
292
+ <dim>-1</dim>
293
+ </port>
294
+ <port id="2" precision="U8">
295
+ <dim>-1</dim>
296
+ </port>
297
+ <port id="3" precision="BOOL">
298
+ <dim>-1</dim>
299
+ </port>
300
+ <port id="4" precision="U8">
301
+ <dim>6</dim>
302
+ </port>
303
+ <port id="5" precision="U8">
304
+ <dim>0</dim>
305
+ </port>
306
+ </input>
307
+ <output>
308
+ <port id="6" precision="I32">
309
+ <dim>-1</dim>
310
+ </port>
311
+ <port id="7" precision="I32">
312
+ <dim>-1</dim>
313
+ </port>
314
+ <port id="8" precision="U8">
315
+ <dim>-1</dim>
316
+ </port>
317
+ <port id="9" precision="BOOL">
318
+ <dim>-1</dim>
319
+ </port>
320
+ </output>
321
+ </layer>
322
+ <layer id="23" name="CaseFold_89" type="CaseFold" version="extension">
323
+ <data encoding="utf-8" />
324
+ <input>
325
+ <port id="0" precision="I32">
326
+ <dim>-1</dim>
327
+ </port>
328
+ <port id="1" precision="I32">
329
+ <dim>-1</dim>
330
+ </port>
331
+ <port id="2" precision="U8">
332
+ <dim>-1</dim>
333
+ </port>
334
+ <port id="3" precision="BOOL">
335
+ <dim>-1</dim>
336
+ </port>
337
+ </input>
338
+ <output>
339
+ <port id="4" precision="I32">
340
+ <dim>-1</dim>
341
+ </port>
342
+ <port id="5" precision="I32">
343
+ <dim>-1</dim>
344
+ </port>
345
+ <port id="6" precision="U8">
346
+ <dim>-1</dim>
347
+ </port>
348
+ <port id="7" precision="BOOL">
349
+ <dim>-1</dim>
350
+ </port>
351
+ </output>
352
+ </layer>
353
+ <layer id="24" name="Constant_91" type="Const" version="opset1">
354
+ <data element_type="u8" shape="3" offset="112" size="3" />
355
+ <output>
356
+ <port id="0" precision="U8">
357
+ <dim>3</dim>
358
+ </port>
359
+ </output>
360
+ </layer>
361
+ <layer id="25" name="RegexSplit_92" type="RegexSplit" version="extension">
362
+ <data behaviour="remove" invert="false" max_splits="-1" />
363
+ <input>
364
+ <port id="0" precision="I32">
365
+ <dim>-1</dim>
366
+ </port>
367
+ <port id="1" precision="I32">
368
+ <dim>-1</dim>
369
+ </port>
370
+ <port id="2" precision="I32">
371
+ <dim>-1</dim>
372
+ </port>
373
+ <port id="3" precision="I32">
374
+ <dim>-1</dim>
375
+ </port>
376
+ <port id="4" precision="U8">
377
+ <dim>-1</dim>
378
+ </port>
379
+ <port id="5" precision="BOOL">
380
+ <dim>-1</dim>
381
+ </port>
382
+ <port id="6" precision="U8">
383
+ <dim>3</dim>
384
+ </port>
385
+ </input>
386
+ <output>
387
+ <port id="7" precision="I32">
388
+ <dim>-1</dim>
389
+ </port>
390
+ <port id="8" precision="I32">
391
+ <dim>-1</dim>
392
+ </port>
393
+ <port id="9" precision="I32">
394
+ <dim>-1</dim>
395
+ </port>
396
+ <port id="10" precision="I32">
397
+ <dim>-1</dim>
398
+ </port>
399
+ <port id="11" precision="U8">
400
+ <dim>-1</dim>
401
+ </port>
402
+ <port id="12" precision="BOOL">
403
+ <dim>-1</dim>
404
+ </port>
405
+ </output>
406
+ </layer>
407
+ <layer id="26" name="Constant_94" type="Const" version="opset1">
408
+ <data element_type="u8" shape="202" offset="115" size="202" />
409
+ <output>
410
+ <port id="0" precision="U8">
411
+ <dim>202</dim>
412
+ </port>
413
+ </output>
414
+ </layer>
415
+ <layer id="27" name="RegexSplit_95" type="RegexSplit" version="extension">
416
+ <data behaviour="isolate" invert="false" max_splits="-1" />
417
+ <input>
418
+ <port id="0" precision="I32">
419
+ <dim>-1</dim>
420
+ </port>
421
+ <port id="1" precision="I32">
422
+ <dim>-1</dim>
423
+ </port>
424
+ <port id="2" precision="I32">
425
+ <dim>-1</dim>
426
+ </port>
427
+ <port id="3" precision="I32">
428
+ <dim>-1</dim>
429
+ </port>
430
+ <port id="4" precision="U8">
431
+ <dim>-1</dim>
432
+ </port>
433
+ <port id="5" precision="BOOL">
434
+ <dim>-1</dim>
435
+ </port>
436
+ <port id="6" precision="U8">
437
+ <dim>202</dim>
438
+ </port>
439
+ </input>
440
+ <output>
441
+ <port id="7" precision="I32">
442
+ <dim>-1</dim>
443
+ </port>
444
+ <port id="8" precision="I32">
445
+ <dim>-1</dim>
446
+ </port>
447
+ <port id="9" precision="I32">
448
+ <dim>-1</dim>
449
+ </port>
450
+ <port id="10" precision="I32">
451
+ <dim>-1</dim>
452
+ </port>
453
+ <port id="11" precision="U8">
454
+ <dim>-1</dim>
455
+ </port>
456
+ <port id="12" precision="BOOL">
457
+ <dim>-1</dim>
458
+ </port>
459
+ </output>
460
+ </layer>
461
+ <layer id="28" name="StringTensorPack_131" type="StringTensorPack" version="extension">
462
+ <data mode="begins_ends" />
463
+ <input>
464
+ <port id="0" precision="I32">
465
+ <dim>-1</dim>
466
+ </port>
467
+ <port id="1" precision="I32">
468
+ <dim>-1</dim>
469
+ </port>
470
+ <port id="2" precision="U8">
471
+ <dim>-1</dim>
472
+ </port>
473
+ </input>
474
+ <output>
475
+ <port id="3" precision="STRING" names="regex_string_output">
476
+ <dim>-1</dim>
477
+ </port>
478
+ </output>
479
+ </layer>
480
+ <layer id="30" name="Constant_105" type="Const" version="opset1">
481
+ <data element_type="i32" shape="" offset="317" size="4" />
482
+ <output>
483
+ <port id="0" precision="I32" />
484
+ </output>
485
+ </layer>
486
+ <layer id="31" name="Constant_106" type="Const" version="opset1">
487
+ <data element_type="i32" shape="" offset="321" size="4" />
488
+ <output>
489
+ <port id="0" precision="I32" />
490
+ </output>
491
+ </layer>
492
+ <layer id="32" name="Constant_107" type="Const" version="opset1">
493
+ <data element_type="i32" shape="1" offset="325" size="4" />
494
+ <output>
495
+ <port id="0" precision="I32">
496
+ <dim>1</dim>
497
+ </port>
498
+ </output>
499
+ </layer>
500
+ <layer id="33" name="Constant_97" type="Const" version="opset1">
501
+ <data element_type="u8" shape="1189536" offset="329" size="1189536" />
502
+ <output>
503
+ <port id="0" precision="U8">
504
+ <dim>1189536</dim>
505
+ </port>
506
+ </output>
507
+ </layer>
508
+ <layer id="34" name="StringTensorUnpack_98" type="StringTensorUnpack" version="extension">
509
+ <data mode="begins_ends" />
510
+ <input>
511
+ <port id="0" precision="U8">
512
+ <dim>1189536</dim>
513
+ </port>
514
+ </input>
515
+ <output>
516
+ <port id="1" precision="I32">
517
+ <dim>-1</dim>
518
+ </port>
519
+ <port id="2" precision="I32">
520
+ <dim>-1</dim>
521
+ </port>
522
+ <port id="3" precision="U8">
523
+ <dim>-1</dim>
524
+ </port>
525
+ </output>
526
+ </layer>
527
+ <layer id="35" name="Constant_99" type="Const" version="opset1">
528
+ <data element_type="i64" shape="" offset="1189865" size="8" />
529
+ <output>
530
+ <port id="0" precision="I64" />
531
+ </output>
532
+ </layer>
533
+ <layer id="36" name="WordpieceTokenizer_100" type="WordpieceTokenizer" version="extension">
534
+ <data suffix_indicator="##" max_bytes_per_word="100" />
535
+ <input>
536
+ <port id="0" precision="I32">
537
+ <dim>-1</dim>
538
+ </port>
539
+ <port id="1" precision="I32">
540
+ <dim>-1</dim>
541
+ </port>
542
+ <port id="2" precision="I32">
543
+ <dim>-1</dim>
544
+ </port>
545
+ <port id="3" precision="I32">
546
+ <dim>-1</dim>
547
+ </port>
548
+ <port id="4" precision="U8">
549
+ <dim>-1</dim>
550
+ </port>
551
+ <port id="5" precision="I32">
552
+ <dim>-1</dim>
553
+ </port>
554
+ <port id="6" precision="I32">
555
+ <dim>-1</dim>
556
+ </port>
557
+ <port id="7" precision="U8">
558
+ <dim>-1</dim>
559
+ </port>
560
+ <port id="8" precision="I64" />
561
+ </input>
562
+ <output>
563
+ <port id="9" precision="I32">
564
+ <dim>-1</dim>
565
+ </port>
566
+ <port id="10" precision="I32">
567
+ <dim>-1</dim>
568
+ </port>
569
+ <port id="11" precision="I32">
570
+ <dim>-1</dim>
571
+ </port>
572
+ </output>
573
+ </layer>
574
+ <layer id="37" name="Subtract_101" type="Subtract" version="opset1">
575
+ <data auto_broadcast="numpy" />
576
+ <input>
577
+ <port id="0" precision="I32">
578
+ <dim>-1</dim>
579
+ </port>
580
+ <port id="1" precision="I32">
581
+ <dim>-1</dim>
582
+ </port>
583
+ </input>
584
+ <output>
585
+ <port id="2" precision="I32">
586
+ <dim>-1</dim>
587
+ </port>
588
+ </output>
589
+ </layer>
590
+ <layer id="38" name="Constant_102" type="Const" version="opset1">
591
+ <data element_type="i32" shape="" offset="1189873" size="4" />
592
+ <output>
593
+ <port id="0" precision="I32" />
594
+ </output>
595
+ </layer>
596
+ <layer id="39" name="Minimum_103" type="Minimum" version="opset1">
597
+ <data auto_broadcast="numpy" />
598
+ <input>
599
+ <port id="0" precision="I32">
600
+ <dim>-1</dim>
601
+ </port>
602
+ <port id="1" precision="I32" />
603
+ </input>
604
+ <output>
605
+ <port id="2" precision="I32">
606
+ <dim>-1</dim>
607
+ </port>
608
+ </output>
609
+ </layer>
610
+ <layer id="40" name="Add_104" type="Add" version="opset1">
611
+ <data auto_broadcast="numpy" />
612
+ <input>
613
+ <port id="0" precision="I32">
614
+ <dim>-1</dim>
615
+ </port>
616
+ <port id="1" precision="I32">
617
+ <dim>-1</dim>
618
+ </port>
619
+ </input>
620
+ <output>
621
+ <port id="2" precision="I32">
622
+ <dim>-1</dim>
623
+ </port>
624
+ </output>
625
+ </layer>
626
+ <layer id="41" name="Constant_108" type="Const" version="opset1">
627
+ <data element_type="i32" shape="" offset="317" size="4" />
628
+ <output>
629
+ <port id="0" precision="I32" />
630
+ </output>
631
+ </layer>
632
+ <layer id="42" name="Constant_109" type="Const" version="opset1">
633
+ <data element_type="i32" shape="" offset="321" size="4" />
634
+ <output>
635
+ <port id="0" precision="I32" />
636
+ </output>
637
+ </layer>
638
+ <layer id="43" name="Constant_110" type="Const" version="opset1">
639
+ <data element_type="i32" shape="1" offset="1189877" size="4" />
640
+ <output>
641
+ <port id="0" precision="I32">
642
+ <dim>1</dim>
643
+ </port>
644
+ </output>
645
+ </layer>
646
+ <layer id="44" name="Constant_111" type="Const" version="opset1">
647
+ <data element_type="i32" shape="3" offset="1189881" size="12" />
648
+ <output>
649
+ <port id="0" precision="I32">
650
+ <dim>3</dim>
651
+ </port>
652
+ </output>
653
+ </layer>
654
+ <layer id="45" name="CombineSegments_112" type="CombineSegments" version="extension">
655
+ <input>
656
+ <port id="0" precision="I32" />
657
+ <port id="1" precision="I32" />
658
+ <port id="2" precision="I32">
659
+ <dim>1</dim>
660
+ </port>
661
+ <port id="3" precision="I32">
662
+ <dim>-1</dim>
663
+ </port>
664
+ <port id="4" precision="I32">
665
+ <dim>-1</dim>
666
+ </port>
667
+ <port id="5" precision="I32">
668
+ <dim>-1</dim>
669
+ </port>
670
+ <port id="6" precision="I32" />
671
+ <port id="7" precision="I32" />
672
+ <port id="8" precision="I32">
673
+ <dim>1</dim>
674
+ </port>
675
+ <port id="9" precision="I32">
676
+ <dim>3</dim>
677
+ </port>
678
+ </input>
679
+ <output>
680
+ <port id="10" precision="I32">
681
+ <dim>-1</dim>
682
+ </port>
683
+ <port id="11" precision="I32">
684
+ <dim>-1</dim>
685
+ </port>
686
+ <port id="12" precision="I32">
687
+ <dim>-1</dim>
688
+ </port>
689
+ <port id="13" precision="I32">
690
+ <dim>-1</dim>
691
+ </port>
692
+ <port id="14" precision="I32">
693
+ <dim>-1</dim>
694
+ </port>
695
+ <port id="15" precision="I32">
696
+ <dim>-1</dim>
697
+ </port>
698
+ </output>
699
+ </layer>
700
+ <layer id="46" name="Subtract_113" type="Subtract" version="opset1">
701
+ <data auto_broadcast="numpy" />
702
+ <input>
703
+ <port id="0" precision="I32">
704
+ <dim>-1</dim>
705
+ </port>
706
+ <port id="1" precision="I32">
707
+ <dim>-1</dim>
708
+ </port>
709
+ </input>
710
+ <output>
711
+ <port id="2" precision="I32">
712
+ <dim>-1</dim>
713
+ </port>
714
+ </output>
715
+ </layer>
716
+ <layer id="47" name="Constant_114" type="Const" version="opset1">
717
+ <data element_type="i32" shape="" offset="317" size="4" />
718
+ <output>
719
+ <port id="0" precision="I32" />
720
+ </output>
721
+ </layer>
722
+ <layer id="48" name="ReduceMax_115" type="ReduceMax" version="opset1">
723
+ <data keep_dims="false" />
724
+ <input>
725
+ <port id="0" precision="I32">
726
+ <dim>-1</dim>
727
+ </port>
728
+ <port id="1" precision="I32" />
729
+ </input>
730
+ <output>
731
+ <port id="2" precision="I32" />
732
+ </output>
733
+ </layer>
734
+ <layer id="49" name="Constant_116" type="Const" version="opset1">
735
+ <data element_type="i32" shape="" offset="317" size="4" />
736
+ <output>
737
+ <port id="0" precision="I32" />
738
+ </output>
739
+ </layer>
740
+ <layer id="50" name="RaggedToDense_117" type="RaggedToDense" version="extension">
741
+ <data pad_right="true" />
742
+ <input>
743
+ <port id="0" precision="I32">
744
+ <dim>-1</dim>
745
+ </port>
746
+ <port id="1" precision="I32">
747
+ <dim>-1</dim>
748
+ </port>
749
+ <port id="2" precision="I32">
750
+ <dim>-1</dim>
751
+ </port>
752
+ <port id="3" precision="I32" />
753
+ <port id="4" precision="I32" />
754
+ </input>
755
+ <output>
756
+ <port id="5" precision="I32">
757
+ <dim>-1</dim>
758
+ <dim>-1</dim>
759
+ </port>
760
+ <port id="6" precision="BOOL">
761
+ <dim>-1</dim>
762
+ <dim>-1</dim>
763
+ </port>
764
+ </output>
765
+ </layer>
766
+ <layer id="51" name="Convert_118" type="Convert" version="opset1">
767
+ <data destination_type="i32" />
768
+ <input>
769
+ <port id="0" precision="BOOL">
770
+ <dim>-1</dim>
771
+ <dim>-1</dim>
772
+ </port>
773
+ </input>
774
+ <output>
775
+ <port id="1" precision="I32">
776
+ <dim>-1</dim>
777
+ <dim>-1</dim>
778
+ </port>
779
+ </output>
780
+ </layer>
781
+ <layer id="52" name="Convert_118" type="Convert" version="opset1">
782
+ <data destination_type="i64" />
783
+ <input>
784
+ <port id="0" precision="I32">
785
+ <dim>-1</dim>
786
+ <dim>-1</dim>
787
+ </port>
788
+ </input>
789
+ <output>
790
+ <port id="1" precision="I64" names="attention_mask">
791
+ <dim>-1</dim>
792
+ <dim>-1</dim>
793
+ </port>
794
+ </output>
795
+ </layer>
796
+ <layer id="54" name="Constant_119" type="Const" version="opset1">
797
+ <data element_type="i32" shape="" offset="317" size="4" />
798
+ <output>
799
+ <port id="0" precision="I32" />
800
+ </output>
801
+ </layer>
802
+ <layer id="55" name="RaggedToDense_120" type="RaggedToDense" version="extension">
803
+ <data pad_right="true" />
804
+ <input>
805
+ <port id="0" precision="I32">
806
+ <dim>-1</dim>
807
+ </port>
808
+ <port id="1" precision="I32">
809
+ <dim>-1</dim>
810
+ </port>
811
+ <port id="2" precision="I32">
812
+ <dim>-1</dim>
813
+ </port>
814
+ <port id="3" precision="I32" />
815
+ <port id="4" precision="I32" />
816
+ </input>
817
+ <output>
818
+ <port id="5" precision="I32">
819
+ <dim>-1</dim>
820
+ <dim>-1</dim>
821
+ </port>
822
+ <port id="6" precision="BOOL">
823
+ <dim>-1</dim>
824
+ <dim>-1</dim>
825
+ </port>
826
+ </output>
827
+ </layer>
828
+ <layer id="56" name="RaggedToDense_120.0" type="Convert" version="opset1">
829
+ <data destination_type="i64" />
830
+ <input>
831
+ <port id="0" precision="I32">
832
+ <dim>-1</dim>
833
+ <dim>-1</dim>
834
+ </port>
835
+ </input>
836
+ <output>
837
+ <port id="1" precision="I64" names="token_type_ids">
838
+ <dim>-1</dim>
839
+ <dim>-1</dim>
840
+ </port>
841
+ </output>
842
+ </layer>
843
+ <layer id="58" name="RaggedToDense_117.0" type="Convert" version="opset1">
844
+ <data destination_type="i64" />
845
+ <input>
846
+ <port id="0" precision="I32">
847
+ <dim>-1</dim>
848
+ <dim>-1</dim>
849
+ </port>
850
+ </input>
851
+ <output>
852
+ <port id="1" precision="I64" names="input_ids">
853
+ <dim>-1</dim>
854
+ <dim>-1</dim>
855
+ </port>
856
+ </output>
857
+ </layer>
858
+ <layer id="59" name="Result_121" type="Result" version="opset1">
859
+ <input>
860
+ <port id="0" precision="I64">
861
+ <dim>-1</dim>
862
+ <dim>-1</dim>
863
+ </port>
864
+ </input>
865
+ </layer>
866
+ <layer id="57" name="Result_122" type="Result" version="opset1">
867
+ <input>
868
+ <port id="0" precision="I64">
869
+ <dim>-1</dim>
870
+ <dim>-1</dim>
871
+ </port>
872
+ </input>
873
+ </layer>
874
+ <layer id="53" name="Result_123" type="Result" version="opset1">
875
+ <input>
876
+ <port id="0" precision="I64">
877
+ <dim>-1</dim>
878
+ <dim>-1</dim>
879
+ </port>
880
+ </input>
881
+ </layer>
882
+ <layer id="29" name="Result_132" type="Result" version="opset1">
883
+ <input>
884
+ <port id="0" precision="STRING">
885
+ <dim>-1</dim>
886
+ </port>
887
+ </input>
888
+ </layer>
889
+ </layers>
890
+ <edges>
891
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
892
+ <edge from-layer="1" from-port="0" to-layer="8" to-port="0" />
893
+ <edge from-layer="2" from-port="1" to-layer="3" to-port="0" />
894
+ <edge from-layer="2" from-port="3" to-layer="15" to-port="4" />
895
+ <edge from-layer="2" from-port="2" to-layer="15" to-port="3" />
896
+ <edge from-layer="2" from-port="1" to-layer="15" to-port="2" />
897
+ <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
898
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="1" />
899
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="2" />
900
+ <edge from-layer="6" from-port="3" to-layer="11" to-port="0" />
901
+ <edge from-layer="6" from-port="3" to-layer="8" to-port="1" />
902
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
903
+ <edge from-layer="8" from-port="3" to-layer="15" to-port="0" />
904
+ <edge from-layer="9" from-port="0" to-layer="13" to-port="0" />
905
+ <edge from-layer="10" from-port="0" to-layer="11" to-port="1" />
906
+ <edge from-layer="11" from-port="2" to-layer="13" to-port="1" />
907
+ <edge from-layer="12" from-port="0" to-layer="13" to-port="2" />
908
+ <edge from-layer="13" from-port="3" to-layer="15" to-port="1" />
909
+ <edge from-layer="14" from-port="0" to-layer="15" to-port="5" />
910
+ <edge from-layer="15" from-port="6" to-layer="25" to-port="0" />
911
+ <edge from-layer="15" from-port="7" to-layer="25" to-port="1" />
912
+ <edge from-layer="15" from-port="11" to-layer="18" to-port="3" />
913
+ <edge from-layer="15" from-port="10" to-layer="18" to-port="2" />
914
+ <edge from-layer="15" from-port="9" to-layer="18" to-port="1" />
915
+ <edge from-layer="15" from-port="8" to-layer="18" to-port="0" />
916
+ <edge from-layer="16" from-port="0" to-layer="18" to-port="4" />
917
+ <edge from-layer="17" from-port="0" to-layer="18" to-port="5" />
918
+ <edge from-layer="18" from-port="6" to-layer="19" to-port="0" />
919
+ <edge from-layer="18" from-port="7" to-layer="19" to-port="1" />
920
+ <edge from-layer="18" from-port="8" to-layer="19" to-port="2" />
921
+ <edge from-layer="18" from-port="9" to-layer="19" to-port="3" />
922
+ <edge from-layer="19" from-port="4" to-layer="22" to-port="0" />
923
+ <edge from-layer="19" from-port="6" to-layer="22" to-port="2" />
924
+ <edge from-layer="19" from-port="5" to-layer="22" to-port="1" />
925
+ <edge from-layer="19" from-port="7" to-layer="22" to-port="3" />
926
+ <edge from-layer="20" from-port="0" to-layer="22" to-port="4" />
927
+ <edge from-layer="21" from-port="0" to-layer="22" to-port="5" />
928
+ <edge from-layer="22" from-port="6" to-layer="23" to-port="0" />
929
+ <edge from-layer="22" from-port="7" to-layer="23" to-port="1" />
930
+ <edge from-layer="22" from-port="8" to-layer="23" to-port="2" />
931
+ <edge from-layer="22" from-port="9" to-layer="23" to-port="3" />
932
+ <edge from-layer="23" from-port="5" to-layer="25" to-port="3" />
933
+ <edge from-layer="23" from-port="7" to-layer="25" to-port="5" />
934
+ <edge from-layer="23" from-port="6" to-layer="25" to-port="4" />
935
+ <edge from-layer="23" from-port="4" to-layer="25" to-port="2" />
936
+ <edge from-layer="24" from-port="0" to-layer="25" to-port="6" />
937
+ <edge from-layer="25" from-port="7" to-layer="27" to-port="0" />
938
+ <edge from-layer="25" from-port="8" to-layer="27" to-port="1" />
939
+ <edge from-layer="25" from-port="9" to-layer="27" to-port="2" />
940
+ <edge from-layer="25" from-port="10" to-layer="27" to-port="3" />
941
+ <edge from-layer="25" from-port="11" to-layer="27" to-port="4" />
942
+ <edge from-layer="25" from-port="12" to-layer="27" to-port="5" />
943
+ <edge from-layer="26" from-port="0" to-layer="27" to-port="6" />
944
+ <edge from-layer="27" from-port="10" to-layer="28" to-port="1" />
945
+ <edge from-layer="27" from-port="9" to-layer="28" to-port="0" />
946
+ <edge from-layer="27" from-port="11" to-layer="36" to-port="4" />
947
+ <edge from-layer="27" from-port="10" to-layer="36" to-port="3" />
948
+ <edge from-layer="27" from-port="9" to-layer="36" to-port="2" />
949
+ <edge from-layer="27" from-port="8" to-layer="36" to-port="1" />
950
+ <edge from-layer="27" from-port="7" to-layer="36" to-port="0" />
951
+ <edge from-layer="27" from-port="11" to-layer="28" to-port="2" />
952
+ <edge from-layer="28" from-port="3" to-layer="29" to-port="0" />
953
+ <edge from-layer="30" from-port="0" to-layer="45" to-port="0" />
954
+ <edge from-layer="31" from-port="0" to-layer="45" to-port="1" />
955
+ <edge from-layer="32" from-port="0" to-layer="45" to-port="2" />
956
+ <edge from-layer="33" from-port="0" to-layer="34" to-port="0" />
957
+ <edge from-layer="34" from-port="1" to-layer="36" to-port="5" />
958
+ <edge from-layer="34" from-port="2" to-layer="36" to-port="6" />
959
+ <edge from-layer="34" from-port="3" to-layer="36" to-port="7" />
960
+ <edge from-layer="35" from-port="0" to-layer="36" to-port="8" />
961
+ <edge from-layer="36" from-port="11" to-layer="45" to-port="5" />
962
+ <edge from-layer="36" from-port="9" to-layer="45" to-port="3" />
963
+ <edge from-layer="36" from-port="9" to-layer="40" to-port="0" />
964
+ <edge from-layer="36" from-port="9" to-layer="37" to-port="1" />
965
+ <edge from-layer="36" from-port="10" to-layer="37" to-port="0" />
966
+ <edge from-layer="37" from-port="2" to-layer="39" to-port="0" />
967
+ <edge from-layer="38" from-port="0" to-layer="39" to-port="1" />
968
+ <edge from-layer="39" from-port="2" to-layer="40" to-port="1" />
969
+ <edge from-layer="40" from-port="2" to-layer="45" to-port="4" />
970
+ <edge from-layer="41" from-port="0" to-layer="45" to-port="6" />
971
+ <edge from-layer="42" from-port="0" to-layer="45" to-port="7" />
972
+ <edge from-layer="43" from-port="0" to-layer="45" to-port="8" />
973
+ <edge from-layer="44" from-port="0" to-layer="45" to-port="9" />
974
+ <edge from-layer="45" from-port="11" to-layer="50" to-port="1" />
975
+ <edge from-layer="45" from-port="15" to-layer="55" to-port="2" />
976
+ <edge from-layer="45" from-port="14" to-layer="55" to-port="1" />
977
+ <edge from-layer="45" from-port="13" to-layer="55" to-port="0" />
978
+ <edge from-layer="45" from-port="12" to-layer="50" to-port="2" />
979
+ <edge from-layer="45" from-port="10" to-layer="50" to-port="0" />
980
+ <edge from-layer="45" from-port="10" to-layer="46" to-port="1" />
981
+ <edge from-layer="45" from-port="11" to-layer="46" to-port="0" />
982
+ <edge from-layer="46" from-port="2" to-layer="48" to-port="0" />
983
+ <edge from-layer="47" from-port="0" to-layer="48" to-port="1" />
984
+ <edge from-layer="48" from-port="2" to-layer="50" to-port="3" />
985
+ <edge from-layer="48" from-port="2" to-layer="55" to-port="3" />
986
+ <edge from-layer="49" from-port="0" to-layer="50" to-port="4" />
987
+ <edge from-layer="50" from-port="6" to-layer="51" to-port="0" />
988
+ <edge from-layer="50" from-port="5" to-layer="58" to-port="0" />
989
+ <edge from-layer="51" from-port="1" to-layer="52" to-port="0" />
990
+ <edge from-layer="52" from-port="1" to-layer="53" to-port="0" />
991
+ <edge from-layer="54" from-port="0" to-layer="55" to-port="4" />
992
+ <edge from-layer="55" from-port="5" to-layer="56" to-port="0" />
993
+ <edge from-layer="56" from-port="1" to-layer="57" to-port="0" />
994
+ <edge from-layer="58" from-port="1" to-layer="59" to-port="0" />
995
+ </edges>
996
+ <rt_info />
997
+ </net>
ov_models/bert-base-multilingual-uncased/test_openvino_tokenizer.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+ from openvino import compile_model
3
+ import openvino_tokenizers
4
+ import os
5
+ import time
6
+ print(f"Process ID: {os.getpid()}")
7
+
8
+ hf_tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")
9
+ """
10
+ convert_tokenizer google-bert/bert-base-multilingual-uncased -o bert-base-multilingual-uncased --skip-special-tokens --trust-remote-code --utf8_replace_mode replace
11
+ """
12
+ ov_tokenizer = "openvino_tokenizer.xml"
13
+ compiled_tokenzier = compile_model(ov_tokenizer)
14
+
15
+
16
+ text_input = ["I am developing a clang-based c++ compiler"]
17
+ #print("text_input: ", text_input)
18
+
19
+ hf_output = hf_tokenizer(text_input[0])
20
+ print("hf_output: ", hf_output["input_ids"])
21
+ # Existing test
22
+ ov_output = compiled_tokenzier(text_input)
23
+ print("ov_output: ", ov_output["input_ids"])
24
+
25
+ # Measure time for compiled_tokenizer
26
+ start_time = time.time()
27
+ ov_output = compiled_tokenzier(text_input)
28
+ end_time = time.time()
29
+ print(f"Time taken for compiled_tokenizer: {(end_time - start_time) * 1000:.2f} ms")
30
+
31
+ # Additional tests
32
+
33
+
ov_models/bert-base-uncased/openvino_detokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65aecf58b4b16cc28a300a4f3f4da651ce6c77e78b742352ebd43f8e19b61711
3
+ size 323098
ov_models/bert-base-uncased/openvino_detokenizer.xml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_133" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_133">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Constant_141" type="Const" version="opset1">
14
+ <data element_type="u64" shape="2" offset="0" size="16" />
15
+ <output>
16
+ <port id="0" precision="U64">
17
+ <dim>2</dim>
18
+ </port>
19
+ </output>
20
+ </layer>
21
+ <layer id="2" name="Transpose_142" type="Transpose" version="opset1">
22
+ <input>
23
+ <port id="0" precision="I64">
24
+ <dim>-1</dim>
25
+ <dim>-1</dim>
26
+ </port>
27
+ <port id="1" precision="U64">
28
+ <dim>2</dim>
29
+ </port>
30
+ </input>
31
+ <output>
32
+ <port id="2" precision="I64">
33
+ <dim>-1</dim>
34
+ <dim>-1</dim>
35
+ </port>
36
+ </output>
37
+ </layer>
38
+ <layer id="3" name="Constant_135" type="Const" version="opset1">
39
+ <data element_type="u8" shape="323082" offset="16" size="323082" />
40
+ <output>
41
+ <port id="0" precision="U8">
42
+ <dim>323082</dim>
43
+ </port>
44
+ </output>
45
+ </layer>
46
+ <layer id="4" name="StringTensorUnpack_136" type="StringTensorUnpack" version="extension">
47
+ <data mode="begins_ends" />
48
+ <input>
49
+ <port id="0" precision="U8">
50
+ <dim>323082</dim>
51
+ </port>
52
+ </input>
53
+ <output>
54
+ <port id="1" precision="I32">
55
+ <dim>-1</dim>
56
+ </port>
57
+ <port id="2" precision="I32">
58
+ <dim>-1</dim>
59
+ </port>
60
+ <port id="3" precision="U8">
61
+ <dim>-1</dim>
62
+ </port>
63
+ </output>
64
+ </layer>
65
+ <layer id="5" name="VocabDecoder_137" type="VocabDecoder" version="extension">
66
+ <data skip_tokens="" />
67
+ <input>
68
+ <port id="0" precision="I64">
69
+ <dim>-1</dim>
70
+ <dim>-1</dim>
71
+ </port>
72
+ <port id="1" precision="I32">
73
+ <dim>-1</dim>
74
+ </port>
75
+ <port id="2" precision="I32">
76
+ <dim>-1</dim>
77
+ </port>
78
+ <port id="3" precision="U8">
79
+ <dim>-1</dim>
80
+ </port>
81
+ </input>
82
+ <output>
83
+ <port id="4" precision="I32">
84
+ <dim>-1</dim>
85
+ </port>
86
+ <port id="5" precision="I32">
87
+ <dim>-1</dim>
88
+ </port>
89
+ <port id="6" precision="I32">
90
+ <dim>-1</dim>
91
+ </port>
92
+ <port id="7" precision="I32">
93
+ <dim>-1</dim>
94
+ </port>
95
+ <port id="8" precision="U8">
96
+ <dim>-1</dim>
97
+ </port>
98
+ </output>
99
+ </layer>
100
+ <layer id="6" name="StringTensorPack_138" type="StringTensorPack" version="extension">
101
+ <data mode="begins_ends" />
102
+ <input>
103
+ <port id="0" precision="I32">
104
+ <dim>-1</dim>
105
+ </port>
106
+ <port id="1" precision="I32">
107
+ <dim>-1</dim>
108
+ </port>
109
+ <port id="2" precision="U8">
110
+ <dim>-1</dim>
111
+ </port>
112
+ </input>
113
+ <output>
114
+ <port id="3" precision="STRING">
115
+ <dim>-1</dim>
116
+ </port>
117
+ </output>
118
+ </layer>
119
+ <layer id="7" name="Result_139" type="Result" version="opset1">
120
+ <input>
121
+ <port id="0" precision="STRING">
122
+ <dim>-1</dim>
123
+ </port>
124
+ </input>
125
+ </layer>
126
+ </layers>
127
+ <edges>
128
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
129
+ <edge from-layer="1" from-port="0" to-layer="2" to-port="1" />
130
+ <edge from-layer="2" from-port="2" to-layer="5" to-port="0" />
131
+ <edge from-layer="3" from-port="0" to-layer="4" to-port="0" />
132
+ <edge from-layer="4" from-port="1" to-layer="5" to-port="1" />
133
+ <edge from-layer="4" from-port="2" to-layer="5" to-port="2" />
134
+ <edge from-layer="4" from-port="3" to-layer="5" to-port="3" />
135
+ <edge from-layer="5" from-port="6" to-layer="6" to-port="0" />
136
+ <edge from-layer="5" from-port="7" to-layer="6" to-port="1" />
137
+ <edge from-layer="5" from-port="8" to-layer="6" to-port="2" />
138
+ <edge from-layer="6" from-port="3" to-layer="7" to-port="0" />
139
+ </edges>
140
+ <rt_info />
141
+ </net>
ov_models/bert-base-uncased/openvino_tokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ce42f4dd6b8b2d87432fb6a71276087ab7dfd0adfe1a70601bba66b600bbc5
3
+ size 323439
ov_models/bert-base-uncased/openvino_tokenizer.xml ADDED
@@ -0,0 +1,997 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="tokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_1" type="Parameter" version="opset1">
5
+ <data shape="?" element_type="string" />
6
+ <output>
7
+ <port id="0" precision="STRING" names="Parameter_1">
8
+ <dim>-1</dim>
9
+ </port>
10
+ </output>
11
+ </layer>
12
+ <layer id="1" name="Constant_7" type="Const" version="opset1">
13
+ <data element_type="i64" shape="" offset="0" size="8" />
14
+ <output>
15
+ <port id="0" precision="I64" />
16
+ </output>
17
+ </layer>
18
+ <layer id="2" name="StringTensorUnpack_2" type="StringTensorUnpack" version="extension">
19
+ <data mode="begins_ends" />
20
+ <input>
21
+ <port id="0" precision="STRING">
22
+ <dim>-1</dim>
23
+ </port>
24
+ </input>
25
+ <output>
26
+ <port id="1" precision="I32">
27
+ <dim>-1</dim>
28
+ </port>
29
+ <port id="2" precision="I32">
30
+ <dim>-1</dim>
31
+ </port>
32
+ <port id="3" precision="U8">
33
+ <dim>-1</dim>
34
+ </port>
35
+ </output>
36
+ </layer>
37
+ <layer id="3" name="ShapeOf_3" type="ShapeOf" version="opset3">
38
+ <data output_type="i64" />
39
+ <input>
40
+ <port id="0" precision="I32">
41
+ <dim>-1</dim>
42
+ </port>
43
+ </input>
44
+ <output>
45
+ <port id="1" precision="I64">
46
+ <dim>1</dim>
47
+ </port>
48
+ </output>
49
+ </layer>
50
+ <layer id="4" name="Constant_4" type="Const" version="opset1">
51
+ <data element_type="i64" shape="" offset="0" size="8" />
52
+ <output>
53
+ <port id="0" precision="I64" />
54
+ </output>
55
+ </layer>
56
+ <layer id="5" name="Constant_5" type="Const" version="opset1">
57
+ <data element_type="i64" shape="" offset="0" size="8" />
58
+ <output>
59
+ <port id="0" precision="I64" />
60
+ </output>
61
+ </layer>
62
+ <layer id="6" name="Gather_6" type="Gather" version="opset8">
63
+ <data batch_dims="0" />
64
+ <input>
65
+ <port id="0" precision="I64">
66
+ <dim>1</dim>
67
+ </port>
68
+ <port id="1" precision="I64" />
69
+ <port id="2" precision="I64" />
70
+ </input>
71
+ <output>
72
+ <port id="3" precision="I64" />
73
+ </output>
74
+ </layer>
75
+ <layer id="7" name="Constant_8" type="Const" version="opset1">
76
+ <data element_type="i64" shape="" offset="8" size="8" />
77
+ <output>
78
+ <port id="0" precision="I64" />
79
+ </output>
80
+ </layer>
81
+ <layer id="8" name="Range_9" type="Range" version="opset4">
82
+ <data output_type="i32" />
83
+ <input>
84
+ <port id="0" precision="I64" />
85
+ <port id="1" precision="I64" />
86
+ <port id="2" precision="I64" />
87
+ </input>
88
+ <output>
89
+ <port id="3" precision="I32">
90
+ <dim>-1</dim>
91
+ </port>
92
+ </output>
93
+ </layer>
94
+ <layer id="9" name="Constant_10" type="Const" version="opset1">
95
+ <data element_type="i64" shape="" offset="8" size="8" />
96
+ <output>
97
+ <port id="0" precision="I64" />
98
+ </output>
99
+ </layer>
100
+ <layer id="10" name="Constant_11" type="Const" version="opset1">
101
+ <data element_type="i64" shape="" offset="8" size="8" />
102
+ <output>
103
+ <port id="0" precision="I64" />
104
+ </output>
105
+ </layer>
106
+ <layer id="11" name="Add_12" type="Add" version="opset1">
107
+ <data auto_broadcast="numpy" />
108
+ <input>
109
+ <port id="0" precision="I64" />
110
+ <port id="1" precision="I64" />
111
+ </input>
112
+ <output>
113
+ <port id="2" precision="I64" />
114
+ </output>
115
+ </layer>
116
+ <layer id="12" name="Constant_13" type="Const" version="opset1">
117
+ <data element_type="i64" shape="" offset="8" size="8" />
118
+ <output>
119
+ <port id="0" precision="I64" />
120
+ </output>
121
+ </layer>
122
+ <layer id="13" name="Range_14" type="Range" version="opset4">
123
+ <data output_type="i32" />
124
+ <input>
125
+ <port id="0" precision="I64" />
126
+ <port id="1" precision="I64" />
127
+ <port id="2" precision="I64" />
128
+ </input>
129
+ <output>
130
+ <port id="3" precision="I32">
131
+ <dim>-1</dim>
132
+ </port>
133
+ </output>
134
+ </layer>
135
+ <layer id="14" name="Constant_76" type="Const" version="opset1">
136
+ <data element_type="u8" shape="50" offset="16" size="50" />
137
+ <output>
138
+ <port id="0" precision="U8">
139
+ <dim>50</dim>
140
+ </port>
141
+ </output>
142
+ </layer>
143
+ <layer id="15" name="SpecialTokensSplit_77" type="SpecialTokensSplit" version="extension">
144
+ <input>
145
+ <port id="0" precision="I32">
146
+ <dim>-1</dim>
147
+ </port>
148
+ <port id="1" precision="I32">
149
+ <dim>-1</dim>
150
+ </port>
151
+ <port id="2" precision="I32">
152
+ <dim>-1</dim>
153
+ </port>
154
+ <port id="3" precision="I32">
155
+ <dim>-1</dim>
156
+ </port>
157
+ <port id="4" precision="U8">
158
+ <dim>-1</dim>
159
+ </port>
160
+ <port id="5" precision="U8">
161
+ <dim>50</dim>
162
+ </port>
163
+ </input>
164
+ <output>
165
+ <port id="6" precision="I32">
166
+ <dim>-1</dim>
167
+ </port>
168
+ <port id="7" precision="I32">
169
+ <dim>-1</dim>
170
+ </port>
171
+ <port id="8" precision="I32">
172
+ <dim>-1</dim>
173
+ </port>
174
+ <port id="9" precision="I32">
175
+ <dim>-1</dim>
176
+ </port>
177
+ <port id="10" precision="U8">
178
+ <dim>-1</dim>
179
+ </port>
180
+ <port id="11" precision="BOOL">
181
+ <dim>-1</dim>
182
+ </port>
183
+ </output>
184
+ </layer>
185
+ <layer id="16" name="Constant_79" type="Const" version="opset1">
186
+ <data element_type="u8" shape="39" offset="66" size="39" />
187
+ <output>
188
+ <port id="0" precision="U8">
189
+ <dim>39</dim>
190
+ </port>
191
+ </output>
192
+ </layer>
193
+ <layer id="17" name="Constant_81" type="Const" version="opset1">
194
+ <data element_type="u8" shape="0" offset="105" size="1" />
195
+ <output>
196
+ <port id="0" precision="U8">
197
+ <dim>0</dim>
198
+ </port>
199
+ </output>
200
+ </layer>
201
+ <layer id="18" name="RegexNormalization_82" type="RegexNormalization" version="extension">
202
+ <data global_replace="true" />
203
+ <input>
204
+ <port id="0" precision="I32">
205
+ <dim>-1</dim>
206
+ </port>
207
+ <port id="1" precision="I32">
208
+ <dim>-1</dim>
209
+ </port>
210
+ <port id="2" precision="U8">
211
+ <dim>-1</dim>
212
+ </port>
213
+ <port id="3" precision="BOOL">
214
+ <dim>-1</dim>
215
+ </port>
216
+ <port id="4" precision="U8">
217
+ <dim>39</dim>
218
+ </port>
219
+ <port id="5" precision="U8">
220
+ <dim>0</dim>
221
+ </port>
222
+ </input>
223
+ <output>
224
+ <port id="6" precision="I32">
225
+ <dim>-1</dim>
226
+ </port>
227
+ <port id="7" precision="I32">
228
+ <dim>-1</dim>
229
+ </port>
230
+ <port id="8" precision="U8">
231
+ <dim>-1</dim>
232
+ </port>
233
+ <port id="9" precision="BOOL">
234
+ <dim>-1</dim>
235
+ </port>
236
+ </output>
237
+ </layer>
238
+ <layer id="19" name="NormalizeUnicode_83" type="NormalizeUnicode" version="extension">
239
+ <data normalization_form="NFD" />
240
+ <input>
241
+ <port id="0" precision="I32">
242
+ <dim>-1</dim>
243
+ </port>
244
+ <port id="1" precision="I32">
245
+ <dim>-1</dim>
246
+ </port>
247
+ <port id="2" precision="U8">
248
+ <dim>-1</dim>
249
+ </port>
250
+ <port id="3" precision="BOOL">
251
+ <dim>-1</dim>
252
+ </port>
253
+ </input>
254
+ <output>
255
+ <port id="4" precision="I32">
256
+ <dim>-1</dim>
257
+ </port>
258
+ <port id="5" precision="I32">
259
+ <dim>-1</dim>
260
+ </port>
261
+ <port id="6" precision="U8">
262
+ <dim>-1</dim>
263
+ </port>
264
+ <port id="7" precision="BOOL">
265
+ <dim>-1</dim>
266
+ </port>
267
+ </output>
268
+ </layer>
269
+ <layer id="20" name="Constant_85" type="Const" version="opset1">
270
+ <data element_type="u8" shape="6" offset="106" size="6" />
271
+ <output>
272
+ <port id="0" precision="U8">
273
+ <dim>6</dim>
274
+ </port>
275
+ </output>
276
+ </layer>
277
+ <layer id="21" name="Constant_87" type="Const" version="opset1">
278
+ <data element_type="u8" shape="0" offset="105" size="1" />
279
+ <output>
280
+ <port id="0" precision="U8">
281
+ <dim>0</dim>
282
+ </port>
283
+ </output>
284
+ </layer>
285
+ <layer id="22" name="RegexNormalization_88" type="RegexNormalization" version="extension">
286
+ <data global_replace="true" />
287
+ <input>
288
+ <port id="0" precision="I32">
289
+ <dim>-1</dim>
290
+ </port>
291
+ <port id="1" precision="I32">
292
+ <dim>-1</dim>
293
+ </port>
294
+ <port id="2" precision="U8">
295
+ <dim>-1</dim>
296
+ </port>
297
+ <port id="3" precision="BOOL">
298
+ <dim>-1</dim>
299
+ </port>
300
+ <port id="4" precision="U8">
301
+ <dim>6</dim>
302
+ </port>
303
+ <port id="5" precision="U8">
304
+ <dim>0</dim>
305
+ </port>
306
+ </input>
307
+ <output>
308
+ <port id="6" precision="I32">
309
+ <dim>-1</dim>
310
+ </port>
311
+ <port id="7" precision="I32">
312
+ <dim>-1</dim>
313
+ </port>
314
+ <port id="8" precision="U8">
315
+ <dim>-1</dim>
316
+ </port>
317
+ <port id="9" precision="BOOL">
318
+ <dim>-1</dim>
319
+ </port>
320
+ </output>
321
+ </layer>
322
+ <layer id="23" name="CaseFold_89" type="CaseFold" version="extension">
323
+ <data encoding="utf-8" />
324
+ <input>
325
+ <port id="0" precision="I32">
326
+ <dim>-1</dim>
327
+ </port>
328
+ <port id="1" precision="I32">
329
+ <dim>-1</dim>
330
+ </port>
331
+ <port id="2" precision="U8">
332
+ <dim>-1</dim>
333
+ </port>
334
+ <port id="3" precision="BOOL">
335
+ <dim>-1</dim>
336
+ </port>
337
+ </input>
338
+ <output>
339
+ <port id="4" precision="I32">
340
+ <dim>-1</dim>
341
+ </port>
342
+ <port id="5" precision="I32">
343
+ <dim>-1</dim>
344
+ </port>
345
+ <port id="6" precision="U8">
346
+ <dim>-1</dim>
347
+ </port>
348
+ <port id="7" precision="BOOL">
349
+ <dim>-1</dim>
350
+ </port>
351
+ </output>
352
+ </layer>
353
+ <layer id="24" name="Constant_91" type="Const" version="opset1">
354
+ <data element_type="u8" shape="3" offset="112" size="3" />
355
+ <output>
356
+ <port id="0" precision="U8">
357
+ <dim>3</dim>
358
+ </port>
359
+ </output>
360
+ </layer>
361
+ <layer id="25" name="RegexSplit_92" type="RegexSplit" version="extension">
362
+ <data behaviour="remove" invert="false" max_splits="-1" />
363
+ <input>
364
+ <port id="0" precision="I32">
365
+ <dim>-1</dim>
366
+ </port>
367
+ <port id="1" precision="I32">
368
+ <dim>-1</dim>
369
+ </port>
370
+ <port id="2" precision="I32">
371
+ <dim>-1</dim>
372
+ </port>
373
+ <port id="3" precision="I32">
374
+ <dim>-1</dim>
375
+ </port>
376
+ <port id="4" precision="U8">
377
+ <dim>-1</dim>
378
+ </port>
379
+ <port id="5" precision="BOOL">
380
+ <dim>-1</dim>
381
+ </port>
382
+ <port id="6" precision="U8">
383
+ <dim>3</dim>
384
+ </port>
385
+ </input>
386
+ <output>
387
+ <port id="7" precision="I32">
388
+ <dim>-1</dim>
389
+ </port>
390
+ <port id="8" precision="I32">
391
+ <dim>-1</dim>
392
+ </port>
393
+ <port id="9" precision="I32">
394
+ <dim>-1</dim>
395
+ </port>
396
+ <port id="10" precision="I32">
397
+ <dim>-1</dim>
398
+ </port>
399
+ <port id="11" precision="U8">
400
+ <dim>-1</dim>
401
+ </port>
402
+ <port id="12" precision="BOOL">
403
+ <dim>-1</dim>
404
+ </port>
405
+ </output>
406
+ </layer>
407
+ <layer id="26" name="Constant_94" type="Const" version="opset1">
408
+ <data element_type="u8" shape="202" offset="115" size="202" />
409
+ <output>
410
+ <port id="0" precision="U8">
411
+ <dim>202</dim>
412
+ </port>
413
+ </output>
414
+ </layer>
415
+ <layer id="27" name="RegexSplit_95" type="RegexSplit" version="extension">
416
+ <data behaviour="isolate" invert="false" max_splits="-1" />
417
+ <input>
418
+ <port id="0" precision="I32">
419
+ <dim>-1</dim>
420
+ </port>
421
+ <port id="1" precision="I32">
422
+ <dim>-1</dim>
423
+ </port>
424
+ <port id="2" precision="I32">
425
+ <dim>-1</dim>
426
+ </port>
427
+ <port id="3" precision="I32">
428
+ <dim>-1</dim>
429
+ </port>
430
+ <port id="4" precision="U8">
431
+ <dim>-1</dim>
432
+ </port>
433
+ <port id="5" precision="BOOL">
434
+ <dim>-1</dim>
435
+ </port>
436
+ <port id="6" precision="U8">
437
+ <dim>202</dim>
438
+ </port>
439
+ </input>
440
+ <output>
441
+ <port id="7" precision="I32">
442
+ <dim>-1</dim>
443
+ </port>
444
+ <port id="8" precision="I32">
445
+ <dim>-1</dim>
446
+ </port>
447
+ <port id="9" precision="I32">
448
+ <dim>-1</dim>
449
+ </port>
450
+ <port id="10" precision="I32">
451
+ <dim>-1</dim>
452
+ </port>
453
+ <port id="11" precision="U8">
454
+ <dim>-1</dim>
455
+ </port>
456
+ <port id="12" precision="BOOL">
457
+ <dim>-1</dim>
458
+ </port>
459
+ </output>
460
+ </layer>
461
+ <layer id="28" name="StringTensorPack_131" type="StringTensorPack" version="extension">
462
+ <data mode="begins_ends" />
463
+ <input>
464
+ <port id="0" precision="I32">
465
+ <dim>-1</dim>
466
+ </port>
467
+ <port id="1" precision="I32">
468
+ <dim>-1</dim>
469
+ </port>
470
+ <port id="2" precision="U8">
471
+ <dim>-1</dim>
472
+ </port>
473
+ </input>
474
+ <output>
475
+ <port id="3" precision="STRING" names="regex_string_output">
476
+ <dim>-1</dim>
477
+ </port>
478
+ </output>
479
+ </layer>
480
+ <layer id="30" name="Constant_105" type="Const" version="opset1">
481
+ <data element_type="i32" shape="" offset="317" size="4" />
482
+ <output>
483
+ <port id="0" precision="I32" />
484
+ </output>
485
+ </layer>
486
+ <layer id="31" name="Constant_106" type="Const" version="opset1">
487
+ <data element_type="i32" shape="" offset="321" size="4" />
488
+ <output>
489
+ <port id="0" precision="I32" />
490
+ </output>
491
+ </layer>
492
+ <layer id="32" name="Constant_107" type="Const" version="opset1">
493
+ <data element_type="i32" shape="1" offset="325" size="4" />
494
+ <output>
495
+ <port id="0" precision="I32">
496
+ <dim>1</dim>
497
+ </port>
498
+ </output>
499
+ </layer>
500
+ <layer id="33" name="Constant_97" type="Const" version="opset1">
501
+ <data element_type="u8" shape="323082" offset="329" size="323082" />
502
+ <output>
503
+ <port id="0" precision="U8">
504
+ <dim>323082</dim>
505
+ </port>
506
+ </output>
507
+ </layer>
508
+ <layer id="34" name="StringTensorUnpack_98" type="StringTensorUnpack" version="extension">
509
+ <data mode="begins_ends" />
510
+ <input>
511
+ <port id="0" precision="U8">
512
+ <dim>323082</dim>
513
+ </port>
514
+ </input>
515
+ <output>
516
+ <port id="1" precision="I32">
517
+ <dim>-1</dim>
518
+ </port>
519
+ <port id="2" precision="I32">
520
+ <dim>-1</dim>
521
+ </port>
522
+ <port id="3" precision="U8">
523
+ <dim>-1</dim>
524
+ </port>
525
+ </output>
526
+ </layer>
527
+ <layer id="35" name="Constant_99" type="Const" version="opset1">
528
+ <data element_type="i64" shape="" offset="323411" size="8" />
529
+ <output>
530
+ <port id="0" precision="I64" />
531
+ </output>
532
+ </layer>
533
+ <layer id="36" name="WordpieceTokenizer_100" type="WordpieceTokenizer" version="extension">
534
+ <data suffix_indicator="##" max_bytes_per_word="100" />
535
+ <input>
536
+ <port id="0" precision="I32">
537
+ <dim>-1</dim>
538
+ </port>
539
+ <port id="1" precision="I32">
540
+ <dim>-1</dim>
541
+ </port>
542
+ <port id="2" precision="I32">
543
+ <dim>-1</dim>
544
+ </port>
545
+ <port id="3" precision="I32">
546
+ <dim>-1</dim>
547
+ </port>
548
+ <port id="4" precision="U8">
549
+ <dim>-1</dim>
550
+ </port>
551
+ <port id="5" precision="I32">
552
+ <dim>-1</dim>
553
+ </port>
554
+ <port id="6" precision="I32">
555
+ <dim>-1</dim>
556
+ </port>
557
+ <port id="7" precision="U8">
558
+ <dim>-1</dim>
559
+ </port>
560
+ <port id="8" precision="I64" />
561
+ </input>
562
+ <output>
563
+ <port id="9" precision="I32">
564
+ <dim>-1</dim>
565
+ </port>
566
+ <port id="10" precision="I32">
567
+ <dim>-1</dim>
568
+ </port>
569
+ <port id="11" precision="I32">
570
+ <dim>-1</dim>
571
+ </port>
572
+ </output>
573
+ </layer>
574
+ <layer id="37" name="Subtract_101" type="Subtract" version="opset1">
575
+ <data auto_broadcast="numpy" />
576
+ <input>
577
+ <port id="0" precision="I32">
578
+ <dim>-1</dim>
579
+ </port>
580
+ <port id="1" precision="I32">
581
+ <dim>-1</dim>
582
+ </port>
583
+ </input>
584
+ <output>
585
+ <port id="2" precision="I32">
586
+ <dim>-1</dim>
587
+ </port>
588
+ </output>
589
+ </layer>
590
+ <layer id="38" name="Constant_102" type="Const" version="opset1">
591
+ <data element_type="i32" shape="" offset="323419" size="4" />
592
+ <output>
593
+ <port id="0" precision="I32" />
594
+ </output>
595
+ </layer>
596
+ <layer id="39" name="Minimum_103" type="Minimum" version="opset1">
597
+ <data auto_broadcast="numpy" />
598
+ <input>
599
+ <port id="0" precision="I32">
600
+ <dim>-1</dim>
601
+ </port>
602
+ <port id="1" precision="I32" />
603
+ </input>
604
+ <output>
605
+ <port id="2" precision="I32">
606
+ <dim>-1</dim>
607
+ </port>
608
+ </output>
609
+ </layer>
610
+ <layer id="40" name="Add_104" type="Add" version="opset1">
611
+ <data auto_broadcast="numpy" />
612
+ <input>
613
+ <port id="0" precision="I32">
614
+ <dim>-1</dim>
615
+ </port>
616
+ <port id="1" precision="I32">
617
+ <dim>-1</dim>
618
+ </port>
619
+ </input>
620
+ <output>
621
+ <port id="2" precision="I32">
622
+ <dim>-1</dim>
623
+ </port>
624
+ </output>
625
+ </layer>
626
+ <layer id="41" name="Constant_108" type="Const" version="opset1">
627
+ <data element_type="i32" shape="" offset="317" size="4" />
628
+ <output>
629
+ <port id="0" precision="I32" />
630
+ </output>
631
+ </layer>
632
+ <layer id="42" name="Constant_109" type="Const" version="opset1">
633
+ <data element_type="i32" shape="" offset="321" size="4" />
634
+ <output>
635
+ <port id="0" precision="I32" />
636
+ </output>
637
+ </layer>
638
+ <layer id="43" name="Constant_110" type="Const" version="opset1">
639
+ <data element_type="i32" shape="1" offset="323423" size="4" />
640
+ <output>
641
+ <port id="0" precision="I32">
642
+ <dim>1</dim>
643
+ </port>
644
+ </output>
645
+ </layer>
646
+ <layer id="44" name="Constant_111" type="Const" version="opset1">
647
+ <data element_type="i32" shape="3" offset="323427" size="12" />
648
+ <output>
649
+ <port id="0" precision="I32">
650
+ <dim>3</dim>
651
+ </port>
652
+ </output>
653
+ </layer>
654
+ <layer id="45" name="CombineSegments_112" type="CombineSegments" version="extension">
655
+ <input>
656
+ <port id="0" precision="I32" />
657
+ <port id="1" precision="I32" />
658
+ <port id="2" precision="I32">
659
+ <dim>1</dim>
660
+ </port>
661
+ <port id="3" precision="I32">
662
+ <dim>-1</dim>
663
+ </port>
664
+ <port id="4" precision="I32">
665
+ <dim>-1</dim>
666
+ </port>
667
+ <port id="5" precision="I32">
668
+ <dim>-1</dim>
669
+ </port>
670
+ <port id="6" precision="I32" />
671
+ <port id="7" precision="I32" />
672
+ <port id="8" precision="I32">
673
+ <dim>1</dim>
674
+ </port>
675
+ <port id="9" precision="I32">
676
+ <dim>3</dim>
677
+ </port>
678
+ </input>
679
+ <output>
680
+ <port id="10" precision="I32">
681
+ <dim>-1</dim>
682
+ </port>
683
+ <port id="11" precision="I32">
684
+ <dim>-1</dim>
685
+ </port>
686
+ <port id="12" precision="I32">
687
+ <dim>-1</dim>
688
+ </port>
689
+ <port id="13" precision="I32">
690
+ <dim>-1</dim>
691
+ </port>
692
+ <port id="14" precision="I32">
693
+ <dim>-1</dim>
694
+ </port>
695
+ <port id="15" precision="I32">
696
+ <dim>-1</dim>
697
+ </port>
698
+ </output>
699
+ </layer>
700
+ <layer id="46" name="Subtract_113" type="Subtract" version="opset1">
701
+ <data auto_broadcast="numpy" />
702
+ <input>
703
+ <port id="0" precision="I32">
704
+ <dim>-1</dim>
705
+ </port>
706
+ <port id="1" precision="I32">
707
+ <dim>-1</dim>
708
+ </port>
709
+ </input>
710
+ <output>
711
+ <port id="2" precision="I32">
712
+ <dim>-1</dim>
713
+ </port>
714
+ </output>
715
+ </layer>
716
+ <layer id="47" name="Constant_114" type="Const" version="opset1">
717
+ <data element_type="i32" shape="" offset="317" size="4" />
718
+ <output>
719
+ <port id="0" precision="I32" />
720
+ </output>
721
+ </layer>
722
+ <layer id="48" name="ReduceMax_115" type="ReduceMax" version="opset1">
723
+ <data keep_dims="false" />
724
+ <input>
725
+ <port id="0" precision="I32">
726
+ <dim>-1</dim>
727
+ </port>
728
+ <port id="1" precision="I32" />
729
+ </input>
730
+ <output>
731
+ <port id="2" precision="I32" />
732
+ </output>
733
+ </layer>
734
+ <layer id="49" name="Constant_116" type="Const" version="opset1">
735
+ <data element_type="i32" shape="" offset="317" size="4" />
736
+ <output>
737
+ <port id="0" precision="I32" />
738
+ </output>
739
+ </layer>
740
+ <layer id="50" name="RaggedToDense_117" type="RaggedToDense" version="extension">
741
+ <data pad_right="true" />
742
+ <input>
743
+ <port id="0" precision="I32">
744
+ <dim>-1</dim>
745
+ </port>
746
+ <port id="1" precision="I32">
747
+ <dim>-1</dim>
748
+ </port>
749
+ <port id="2" precision="I32">
750
+ <dim>-1</dim>
751
+ </port>
752
+ <port id="3" precision="I32" />
753
+ <port id="4" precision="I32" />
754
+ </input>
755
+ <output>
756
+ <port id="5" precision="I32">
757
+ <dim>-1</dim>
758
+ <dim>-1</dim>
759
+ </port>
760
+ <port id="6" precision="BOOL">
761
+ <dim>-1</dim>
762
+ <dim>-1</dim>
763
+ </port>
764
+ </output>
765
+ </layer>
766
+ <layer id="51" name="Convert_118" type="Convert" version="opset1">
767
+ <data destination_type="i32" />
768
+ <input>
769
+ <port id="0" precision="BOOL">
770
+ <dim>-1</dim>
771
+ <dim>-1</dim>
772
+ </port>
773
+ </input>
774
+ <output>
775
+ <port id="1" precision="I32">
776
+ <dim>-1</dim>
777
+ <dim>-1</dim>
778
+ </port>
779
+ </output>
780
+ </layer>
781
+ <layer id="52" name="Convert_118" type="Convert" version="opset1">
782
+ <data destination_type="i64" />
783
+ <input>
784
+ <port id="0" precision="I32">
785
+ <dim>-1</dim>
786
+ <dim>-1</dim>
787
+ </port>
788
+ </input>
789
+ <output>
790
+ <port id="1" precision="I64" names="attention_mask">
791
+ <dim>-1</dim>
792
+ <dim>-1</dim>
793
+ </port>
794
+ </output>
795
+ </layer>
796
+ <layer id="54" name="Constant_119" type="Const" version="opset1">
797
+ <data element_type="i32" shape="" offset="317" size="4" />
798
+ <output>
799
+ <port id="0" precision="I32" />
800
+ </output>
801
+ </layer>
802
+ <layer id="55" name="RaggedToDense_120" type="RaggedToDense" version="extension">
803
+ <data pad_right="true" />
804
+ <input>
805
+ <port id="0" precision="I32">
806
+ <dim>-1</dim>
807
+ </port>
808
+ <port id="1" precision="I32">
809
+ <dim>-1</dim>
810
+ </port>
811
+ <port id="2" precision="I32">
812
+ <dim>-1</dim>
813
+ </port>
814
+ <port id="3" precision="I32" />
815
+ <port id="4" precision="I32" />
816
+ </input>
817
+ <output>
818
+ <port id="5" precision="I32">
819
+ <dim>-1</dim>
820
+ <dim>-1</dim>
821
+ </port>
822
+ <port id="6" precision="BOOL">
823
+ <dim>-1</dim>
824
+ <dim>-1</dim>
825
+ </port>
826
+ </output>
827
+ </layer>
828
+ <layer id="56" name="RaggedToDense_120.0" type="Convert" version="opset1">
829
+ <data destination_type="i64" />
830
+ <input>
831
+ <port id="0" precision="I32">
832
+ <dim>-1</dim>
833
+ <dim>-1</dim>
834
+ </port>
835
+ </input>
836
+ <output>
837
+ <port id="1" precision="I64" names="token_type_ids">
838
+ <dim>-1</dim>
839
+ <dim>-1</dim>
840
+ </port>
841
+ </output>
842
+ </layer>
843
+ <layer id="58" name="RaggedToDense_117.0" type="Convert" version="opset1">
844
+ <data destination_type="i64" />
845
+ <input>
846
+ <port id="0" precision="I32">
847
+ <dim>-1</dim>
848
+ <dim>-1</dim>
849
+ </port>
850
+ </input>
851
+ <output>
852
+ <port id="1" precision="I64" names="input_ids">
853
+ <dim>-1</dim>
854
+ <dim>-1</dim>
855
+ </port>
856
+ </output>
857
+ </layer>
858
+ <layer id="59" name="Result_121" type="Result" version="opset1">
859
+ <input>
860
+ <port id="0" precision="I64">
861
+ <dim>-1</dim>
862
+ <dim>-1</dim>
863
+ </port>
864
+ </input>
865
+ </layer>
866
+ <layer id="57" name="Result_122" type="Result" version="opset1">
867
+ <input>
868
+ <port id="0" precision="I64">
869
+ <dim>-1</dim>
870
+ <dim>-1</dim>
871
+ </port>
872
+ </input>
873
+ </layer>
874
+ <layer id="53" name="Result_123" type="Result" version="opset1">
875
+ <input>
876
+ <port id="0" precision="I64">
877
+ <dim>-1</dim>
878
+ <dim>-1</dim>
879
+ </port>
880
+ </input>
881
+ </layer>
882
+ <layer id="29" name="Result_132" type="Result" version="opset1">
883
+ <input>
884
+ <port id="0" precision="STRING">
885
+ <dim>-1</dim>
886
+ </port>
887
+ </input>
888
+ </layer>
889
+ </layers>
890
+ <edges>
891
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
892
+ <edge from-layer="1" from-port="0" to-layer="8" to-port="0" />
893
+ <edge from-layer="2" from-port="1" to-layer="3" to-port="0" />
894
+ <edge from-layer="2" from-port="3" to-layer="15" to-port="4" />
895
+ <edge from-layer="2" from-port="2" to-layer="15" to-port="3" />
896
+ <edge from-layer="2" from-port="1" to-layer="15" to-port="2" />
897
+ <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
898
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="1" />
899
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="2" />
900
+ <edge from-layer="6" from-port="3" to-layer="11" to-port="0" />
901
+ <edge from-layer="6" from-port="3" to-layer="8" to-port="1" />
902
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
903
+ <edge from-layer="8" from-port="3" to-layer="15" to-port="0" />
904
+ <edge from-layer="9" from-port="0" to-layer="13" to-port="0" />
905
+ <edge from-layer="10" from-port="0" to-layer="11" to-port="1" />
906
+ <edge from-layer="11" from-port="2" to-layer="13" to-port="1" />
907
+ <edge from-layer="12" from-port="0" to-layer="13" to-port="2" />
908
+ <edge from-layer="13" from-port="3" to-layer="15" to-port="1" />
909
+ <edge from-layer="14" from-port="0" to-layer="15" to-port="5" />
910
+ <edge from-layer="15" from-port="6" to-layer="25" to-port="0" />
911
+ <edge from-layer="15" from-port="7" to-layer="25" to-port="1" />
912
+ <edge from-layer="15" from-port="11" to-layer="18" to-port="3" />
913
+ <edge from-layer="15" from-port="10" to-layer="18" to-port="2" />
914
+ <edge from-layer="15" from-port="9" to-layer="18" to-port="1" />
915
+ <edge from-layer="15" from-port="8" to-layer="18" to-port="0" />
916
+ <edge from-layer="16" from-port="0" to-layer="18" to-port="4" />
917
+ <edge from-layer="17" from-port="0" to-layer="18" to-port="5" />
918
+ <edge from-layer="18" from-port="6" to-layer="19" to-port="0" />
919
+ <edge from-layer="18" from-port="7" to-layer="19" to-port="1" />
920
+ <edge from-layer="18" from-port="8" to-layer="19" to-port="2" />
921
+ <edge from-layer="18" from-port="9" to-layer="19" to-port="3" />
922
+ <edge from-layer="19" from-port="4" to-layer="22" to-port="0" />
923
+ <edge from-layer="19" from-port="6" to-layer="22" to-port="2" />
924
+ <edge from-layer="19" from-port="5" to-layer="22" to-port="1" />
925
+ <edge from-layer="19" from-port="7" to-layer="22" to-port="3" />
926
+ <edge from-layer="20" from-port="0" to-layer="22" to-port="4" />
927
+ <edge from-layer="21" from-port="0" to-layer="22" to-port="5" />
928
+ <edge from-layer="22" from-port="6" to-layer="23" to-port="0" />
929
+ <edge from-layer="22" from-port="7" to-layer="23" to-port="1" />
930
+ <edge from-layer="22" from-port="8" to-layer="23" to-port="2" />
931
+ <edge from-layer="22" from-port="9" to-layer="23" to-port="3" />
932
+ <edge from-layer="23" from-port="5" to-layer="25" to-port="3" />
933
+ <edge from-layer="23" from-port="7" to-layer="25" to-port="5" />
934
+ <edge from-layer="23" from-port="6" to-layer="25" to-port="4" />
935
+ <edge from-layer="23" from-port="4" to-layer="25" to-port="2" />
936
+ <edge from-layer="24" from-port="0" to-layer="25" to-port="6" />
937
+ <edge from-layer="25" from-port="7" to-layer="27" to-port="0" />
938
+ <edge from-layer="25" from-port="8" to-layer="27" to-port="1" />
939
+ <edge from-layer="25" from-port="9" to-layer="27" to-port="2" />
940
+ <edge from-layer="25" from-port="10" to-layer="27" to-port="3" />
941
+ <edge from-layer="25" from-port="11" to-layer="27" to-port="4" />
942
+ <edge from-layer="25" from-port="12" to-layer="27" to-port="5" />
943
+ <edge from-layer="26" from-port="0" to-layer="27" to-port="6" />
944
+ <edge from-layer="27" from-port="10" to-layer="28" to-port="1" />
945
+ <edge from-layer="27" from-port="9" to-layer="28" to-port="0" />
946
+ <edge from-layer="27" from-port="11" to-layer="36" to-port="4" />
947
+ <edge from-layer="27" from-port="10" to-layer="36" to-port="3" />
948
+ <edge from-layer="27" from-port="9" to-layer="36" to-port="2" />
949
+ <edge from-layer="27" from-port="8" to-layer="36" to-port="1" />
950
+ <edge from-layer="27" from-port="7" to-layer="36" to-port="0" />
951
+ <edge from-layer="27" from-port="11" to-layer="28" to-port="2" />
952
+ <edge from-layer="28" from-port="3" to-layer="29" to-port="0" />
953
+ <edge from-layer="30" from-port="0" to-layer="45" to-port="0" />
954
+ <edge from-layer="31" from-port="0" to-layer="45" to-port="1" />
955
+ <edge from-layer="32" from-port="0" to-layer="45" to-port="2" />
956
+ <edge from-layer="33" from-port="0" to-layer="34" to-port="0" />
957
+ <edge from-layer="34" from-port="1" to-layer="36" to-port="5" />
958
+ <edge from-layer="34" from-port="2" to-layer="36" to-port="6" />
959
+ <edge from-layer="34" from-port="3" to-layer="36" to-port="7" />
960
+ <edge from-layer="35" from-port="0" to-layer="36" to-port="8" />
961
+ <edge from-layer="36" from-port="11" to-layer="45" to-port="5" />
962
+ <edge from-layer="36" from-port="9" to-layer="45" to-port="3" />
963
+ <edge from-layer="36" from-port="9" to-layer="40" to-port="0" />
964
+ <edge from-layer="36" from-port="9" to-layer="37" to-port="1" />
965
+ <edge from-layer="36" from-port="10" to-layer="37" to-port="0" />
966
+ <edge from-layer="37" from-port="2" to-layer="39" to-port="0" />
967
+ <edge from-layer="38" from-port="0" to-layer="39" to-port="1" />
968
+ <edge from-layer="39" from-port="2" to-layer="40" to-port="1" />
969
+ <edge from-layer="40" from-port="2" to-layer="45" to-port="4" />
970
+ <edge from-layer="41" from-port="0" to-layer="45" to-port="6" />
971
+ <edge from-layer="42" from-port="0" to-layer="45" to-port="7" />
972
+ <edge from-layer="43" from-port="0" to-layer="45" to-port="8" />
973
+ <edge from-layer="44" from-port="0" to-layer="45" to-port="9" />
974
+ <edge from-layer="45" from-port="11" to-layer="50" to-port="1" />
975
+ <edge from-layer="45" from-port="15" to-layer="55" to-port="2" />
976
+ <edge from-layer="45" from-port="14" to-layer="55" to-port="1" />
977
+ <edge from-layer="45" from-port="13" to-layer="55" to-port="0" />
978
+ <edge from-layer="45" from-port="12" to-layer="50" to-port="2" />
979
+ <edge from-layer="45" from-port="10" to-layer="50" to-port="0" />
980
+ <edge from-layer="45" from-port="10" to-layer="46" to-port="1" />
981
+ <edge from-layer="45" from-port="11" to-layer="46" to-port="0" />
982
+ <edge from-layer="46" from-port="2" to-layer="48" to-port="0" />
983
+ <edge from-layer="47" from-port="0" to-layer="48" to-port="1" />
984
+ <edge from-layer="48" from-port="2" to-layer="50" to-port="3" />
985
+ <edge from-layer="48" from-port="2" to-layer="55" to-port="3" />
986
+ <edge from-layer="49" from-port="0" to-layer="50" to-port="4" />
987
+ <edge from-layer="50" from-port="6" to-layer="51" to-port="0" />
988
+ <edge from-layer="50" from-port="5" to-layer="58" to-port="0" />
989
+ <edge from-layer="51" from-port="1" to-layer="52" to-port="0" />
990
+ <edge from-layer="52" from-port="1" to-layer="53" to-port="0" />
991
+ <edge from-layer="54" from-port="0" to-layer="55" to-port="4" />
992
+ <edge from-layer="55" from-port="5" to-layer="56" to-port="0" />
993
+ <edge from-layer="56" from-port="1" to-layer="57" to-port="0" />
994
+ <edge from-layer="58" from-port="1" to-layer="59" to-port="0" />
995
+ </edges>
996
+ <rt_info />
997
+ </net>
ov_models/bert_EN_int8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545ac52bb6573735f9dc40b5f0887c5ced4aad683afef31e6f1dfe2a2a7e2acc
3
+ size 95640404
ov_models/bert_EN_int8.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/bert_ZH_int8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b1ae79abf2c43bfa4aaa39e099364e7ec441d38ee8f405c348a115d4c15c310
3
+ size 153816008
ov_models/bert_ZH_int8.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/bert_ZH_static_int8.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cmudict_cache.txt ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppinyin/cpp_pinyin.raw ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ae3c466ec74d09572a8dfcb3dec228e5931c4493f26b38ab4b5a5280acfb9b
3
+ size 12756812
ov_models/cppjieba/dict/README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CppJieba字典
2
+
3
+ 文件后缀名代表的是词典的编码方式。
4
+ 比如filename.utf8 是 utf8编码,filename.gbk 是 gbk编码方式。
5
+
6
+
7
+ ## 分词
8
+
9
+ ### jieba.dict.utf8/gbk
10
+
11
+ 作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
12
+
13
+ ### hmm_model.utf8/gbk
14
+
15
+ 作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
16
+
17
+ __对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
18
+
19
+
20
+ ## 关键词抽取
21
+
22
+ ### idf.utf8
23
+
24
+ IDF(Inverse Document Frequency)
25
+ 在KeywordExtractor中,使用的是经典的TF-IDF算法,所以需要这么一个词典提供IDF信息。
26
+
27
+ ### stop_words.utf8
28
+
29
+ 停用词词典
30
+
31
+
ov_models/cppjieba/dict/hmm_model.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/idf.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/jieba.dict.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/pos_dict/char_state_tab.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/pos_dict/prob_emit.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/pos_dict/prob_start.utf8 ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #初始状态的概率
2
+ #格式
3
+ #状态:概率
4
+ B,a:-4.7623052146
5
+ B,ad:-6.68006603678
6
+ B,ag:-3.14e+100
7
+ B,an:-8.69708322302
8
+ B,b:-5.01837436211
9
+ B,bg:-3.14e+100
10
+ B,c:-3.42388018495
11
+ B,d:-3.97504752976
12
+ B,df:-8.88897423083
13
+ B,dg:-3.14e+100
14
+ B,e:-8.56355183039
15
+ B,en:-3.14e+100
16
+ B,f:-5.49163041848
17
+ B,g:-3.14e+100
18
+ B,h:-13.53336513
19
+ B,i:-6.11578472756
20
+ B,in:-3.14e+100
21
+ B,j:-5.05761912847
22
+ B,jn:-3.14e+100
23
+ B,k:-3.14e+100
24
+ B,l:-4.90588358466
25
+ B,ln:-3.14e+100
26
+ B,m:-3.6524299819
27
+ B,mg:-3.14e+100
28
+ B,mq:-6.7869530014
29
+ B,n:-1.69662577975
30
+ B,ng:-3.14e+100
31
+ B,nr:-2.23104959138
32
+ B,nrfg:-5.87372217541
33
+ B,nrt:-4.98564273352
34
+ B,ns:-2.8228438315
35
+ B,nt:-4.84609166818
36
+ B,nz:-3.94698846058
37
+ B,o:-8.43349870215
38
+ B,p:-4.20098413209
39
+ B,q:-6.99812385896
40
+ B,qe:-3.14e+100
41
+ B,qg:-3.14e+100
42
+ B,r:-3.40981877908
43
+ B,rg:-3.14e+100
44
+ B,rr:-12.4347528413
45
+ B,rz:-7.94611647157
46
+ B,s:-5.52267359084
47
+ B,t:-3.36474790945
48
+ B,tg:-3.14e+100
49
+ B,u:-9.1639172775
50
+ B,ud:-3.14e+100
51
+ B,ug:-3.14e+100
52
+ B,uj:-3.14e+100
53
+ B,ul:-3.14e+100
54
+ B,uv:-3.14e+100
55
+ B,uz:-3.14e+100
56
+ B,v:-2.67405848743
57
+ B,vd:-9.04472876024
58
+ B,vg:-3.14e+100
59
+ B,vi:-12.4347528413
60
+ B,vn:-4.33156108902
61
+ B,vq:-12.1470707689
62
+ B,w:-3.14e+100
63
+ B,x:-3.14e+100
64
+ B,y:-9.84448567586
65
+ B,yg:-3.14e+100
66
+ B,z:-7.04568111149
67
+ B,zg:-3.14e+100
68
+ E,a:-3.14e+100
69
+ E,ad:-3.14e+100
70
+ E,ag:-3.14e+100
71
+ E,an:-3.14e+100
72
+ E,b:-3.14e+100
73
+ E,bg:-3.14e+100
74
+ E,c:-3.14e+100
75
+ E,d:-3.14e+100
76
+ E,df:-3.14e+100
77
+ E,dg:-3.14e+100
78
+ E,e:-3.14e+100
79
+ E,en:-3.14e+100
80
+ E,f:-3.14e+100
81
+ E,g:-3.14e+100
82
+ E,h:-3.14e+100
83
+ E,i:-3.14e+100
84
+ E,in:-3.14e+100
85
+ E,j:-3.14e+100
86
+ E,jn:-3.14e+100
87
+ E,k:-3.14e+100
88
+ E,l:-3.14e+100
89
+ E,ln:-3.14e+100
90
+ E,m:-3.14e+100
91
+ E,mg:-3.14e+100
92
+ E,mq:-3.14e+100
93
+ E,n:-3.14e+100
94
+ E,ng:-3.14e+100
95
+ E,nr:-3.14e+100
96
+ E,nrfg:-3.14e+100
97
+ E,nrt:-3.14e+100
98
+ E,ns:-3.14e+100
99
+ E,nt:-3.14e+100
100
+ E,nz:-3.14e+100
101
+ E,o:-3.14e+100
102
+ E,p:-3.14e+100
103
+ E,q:-3.14e+100
104
+ E,qe:-3.14e+100
105
+ E,qg:-3.14e+100
106
+ E,r:-3.14e+100
107
+ E,rg:-3.14e+100
108
+ E,rr:-3.14e+100
109
+ E,rz:-3.14e+100
110
+ E,s:-3.14e+100
111
+ E,t:-3.14e+100
112
+ E,tg:-3.14e+100
113
+ E,u:-3.14e+100
114
+ E,ud:-3.14e+100
115
+ E,ug:-3.14e+100
116
+ E,uj:-3.14e+100
117
+ E,ul:-3.14e+100
118
+ E,uv:-3.14e+100
119
+ E,uz:-3.14e+100
120
+ E,v:-3.14e+100
121
+ E,vd:-3.14e+100
122
+ E,vg:-3.14e+100
123
+ E,vi:-3.14e+100
124
+ E,vn:-3.14e+100
125
+ E,vq:-3.14e+100
126
+ E,w:-3.14e+100
127
+ E,x:-3.14e+100
128
+ E,y:-3.14e+100
129
+ E,yg:-3.14e+100
130
+ E,z:-3.14e+100
131
+ E,zg:-3.14e+100
132
+ M,a:-3.14e+100
133
+ M,ad:-3.14e+100
134
+ M,ag:-3.14e+100
135
+ M,an:-3.14e+100
136
+ M,b:-3.14e+100
137
+ M,bg:-3.14e+100
138
+ M,c:-3.14e+100
139
+ M,d:-3.14e+100
140
+ M,df:-3.14e+100
141
+ M,dg:-3.14e+100
142
+ M,e:-3.14e+100
143
+ M,en:-3.14e+100
144
+ M,f:-3.14e+100
145
+ M,g:-3.14e+100
146
+ M,h:-3.14e+100
147
+ M,i:-3.14e+100
148
+ M,in:-3.14e+100
149
+ M,j:-3.14e+100
150
+ M,jn:-3.14e+100
151
+ M,k:-3.14e+100
152
+ M,l:-3.14e+100
153
+ M,ln:-3.14e+100
154
+ M,m:-3.14e+100
155
+ M,mg:-3.14e+100
156
+ M,mq:-3.14e+100
157
+ M,n:-3.14e+100
158
+ M,ng:-3.14e+100
159
+ M,nr:-3.14e+100
160
+ M,nrfg:-3.14e+100
161
+ M,nrt:-3.14e+100
162
+ M,ns:-3.14e+100
163
+ M,nt:-3.14e+100
164
+ M,nz:-3.14e+100
165
+ M,o:-3.14e+100
166
+ M,p:-3.14e+100
167
+ M,q:-3.14e+100
168
+ M,qe:-3.14e+100
169
+ M,qg:-3.14e+100
170
+ M,r:-3.14e+100
171
+ M,rg:-3.14e+100
172
+ M,rr:-3.14e+100
173
+ M,rz:-3.14e+100
174
+ M,s:-3.14e+100
175
+ M,t:-3.14e+100
176
+ M,tg:-3.14e+100
177
+ M,u:-3.14e+100
178
+ M,ud:-3.14e+100
179
+ M,ug:-3.14e+100
180
+ M,uj:-3.14e+100
181
+ M,ul:-3.14e+100
182
+ M,uv:-3.14e+100
183
+ M,uz:-3.14e+100
184
+ M,v:-3.14e+100
185
+ M,vd:-3.14e+100
186
+ M,vg:-3.14e+100
187
+ M,vi:-3.14e+100
188
+ M,vn:-3.14e+100
189
+ M,vq:-3.14e+100
190
+ M,w:-3.14e+100
191
+ M,x:-3.14e+100
192
+ M,y:-3.14e+100
193
+ M,yg:-3.14e+100
194
+ M,z:-3.14e+100
195
+ M,zg:-3.14e+100
196
+ S,a:-3.90253968313
197
+ S,ad:-11.0484584802
198
+ S,ag:-6.95411391796
199
+ S,an:-12.8402179494
200
+ S,b:-6.47288876397
201
+ S,bg:-3.14e+100
202
+ S,c:-4.78696679586
203
+ S,d:-3.90391976418
204
+ S,df:-3.14e+100
205
+ S,dg:-8.9483976513
206
+ S,e:-5.94251300628
207
+ S,en:-3.14e+100
208
+ S,f:-5.19482024998
209
+ S,g:-6.50782681533
210
+ S,h:-8.65056320738
211
+ S,i:-3.14e+100
212
+ S,in:-3.14e+100
213
+ S,j:-4.91199211964
214
+ S,jn:-3.14e+100
215
+ S,k:-6.94032059583
216
+ S,l:-3.14e+100
217
+ S,ln:-3.14e+100
218
+ S,m:-3.26920065212
219
+ S,mg:-10.8253149289
220
+ S,mq:-3.14e+100
221
+ S,n:-3.85514838976
222
+ S,ng:-4.9134348611
223
+ S,nr:-4.48366310396
224
+ S,nrfg:-3.14e+100
225
+ S,nrt:-3.14e+100
226
+ S,ns:-3.14e+100
227
+ S,nt:-12.1470707689
228
+ S,nz:-3.14e+100
229
+ S,o:-8.46446092775
230
+ S,p:-2.98684018136
231
+ S,q:-4.88865861826
232
+ S,qe:-3.14e+100
233
+ S,qg:-3.14e+100
234
+ S,r:-2.76353367841
235
+ S,rg:-10.2752685919
236
+ S,rr:-3.14e+100
237
+ S,rz:-3.14e+100
238
+ S,s:-3.14e+100
239
+ S,t:-3.14e+100
240
+ S,tg:-6.27284253188
241
+ S,u:-6.94032059583
242
+ S,ud:-7.72823016105
243
+ S,ug:-7.53940370266
244
+ S,uj:-6.85251045118
245
+ S,ul:-8.41537131755
246
+ S,uv:-8.15808672229
247
+ S,uz:-9.29925862537
248
+ S,v:-3.05329230341
249
+ S,vd:-3.14e+100
250
+ S,vg:-5.94301818437
251
+ S,vi:-3.14e+100
252
+ S,vn:-11.4539235883
253
+ S,vq:-3.14e+100
254
+ S,w:-3.14e+100
255
+ S,x:-8.42741965607
256
+ S,y:-6.19707946995
257
+ S,yg:-13.53336513
258
+ S,z:-3.14e+100
259
+ S,zg:-3.14e+100
ov_models/cppjieba/dict/pos_dict/prob_trans.utf8 ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/cppjieba/dict/stop_words.utf8 ADDED
@@ -0,0 +1,1534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "
2
+ .
3
+
4
+ ,
5
+
6
+
7
+
8
+
9
+
10
+ `
11
+
12
+
13
+
14
+ ^
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+ ~
23
+ \
24
+
25
+ |
26
+ ¦
27
+
28
+ — 
29
+ (
30
+ )
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+ »
44
+ «
45
+
46
+
47
+
48
+
49
+
50
+
51
+ }
52
+ {
53
+ ]
54
+ [
55
+
56
+ ¸
57
+
58
+
59
+
60
+ ;
61
+
62
+ ¡
63
+
64
+ ¿
65
+
66
+
67
+
68
+
69
+
70
+ ´
71
+ ˊ
72
+ ˋ
73
+ -
74
+
75
+
76
+ @
77
+
78
+
79
+ _
80
+ ¯
81
+ _
82
+
83
+
84
+ +
85
+
86
+ =
87
+
88
+
89
+ <
90
+ ­
91
+ ˜
92
+ ~
93
+
94
+ #
95
+
96
+ $
97
+
98
+ &
99
+
100
+ %
101
+
102
+ *
103
+
104
+ \
105
+
106
+
107
+
108
+
109
+ ˇ
110
+
111
+
112
+
113
+
114
+
115
+ ︿
116
+
117
+
118
+
119
+
120
+ _
121
+ ˉ
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+ the
131
+ a
132
+ an
133
+ that
134
+ those
135
+ this
136
+ that
137
+ $
138
+ 0
139
+ 1
140
+ 2
141
+ 3
142
+ 4
143
+ 5
144
+ 6
145
+ 7
146
+ 8
147
+ 9
148
+ ?
149
+ _
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+ 一些
158
+ 一何
159
+ 一切
160
+ 一则
161
+ 一方面
162
+ 一旦
163
+ 一来
164
+ 一样
165
+ 一般
166
+ 一转眼
167
+ 万一
168
+
169
+ 上下
170
+
171
+
172
+ 不仅
173
+ 不但
174
+ 不光
175
+ 不单
176
+ 不只
177
+ 不外乎
178
+ 不如
179
+ 不妨
180
+ 不尽
181
+ 不尽然
182
+ 不得
183
+ 不怕
184
+ 不惟
185
+ 不成
186
+ 不拘
187
+ 不料
188
+ 不是
189
+ 不比
190
+ 不然
191
+ 不特
192
+ 不独
193
+ 不管
194
+ 不至于
195
+ 不若
196
+ 不论
197
+ 不过
198
+ 不问
199
+
200
+ 与其
201
+ 与其说
202
+ 与否
203
+ 与此同时
204
+
205
+ 且不说
206
+ 且说
207
+ 两者
208
+
209
+ 个别
210
+
211
+
212
+ 为了
213
+ 为什么
214
+ 为何
215
+ 为止
216
+ 为此
217
+ 为着
218
+
219
+ 乃至
220
+ 乃至于
221
+
222
+
223
+ 之一
224
+ 之所以
225
+ 之类
226
+ 乌乎
227
+
228
+
229
+
230
+ 也好
231
+ 也罢
232
+
233
+ 二来
234
+
235
+ 于是
236
+ 于是乎
237
+ 云云
238
+ 云尔
239
+
240
+
241
+
242
+ 人们
243
+ 人家
244
+ 什么
245
+ 什么样
246
+
247
+ 介于
248
+
249
+ 仍旧
250
+
251
+ 从此
252
+ 从而
253
+
254
+ 他人
255
+ 他们
256
+
257
+ 以上
258
+ 以为
259
+ 以便
260
+ 以免
261
+ 以及
262
+ 以故
263
+ 以期
264
+ 以来
265
+ 以至
266
+ 以至于
267
+ 以致
268
+
269
+
270
+ 任何
271
+ 任凭
272
+ 似的
273
+
274
+ 但凡
275
+ 但是
276
+
277
+ 何以
278
+ 何况
279
+ 何处
280
+ 何时
281
+ 余外
282
+ 作为
283
+
284
+ 你们
285
+ 使
286
+ 使得
287
+ 例如
288
+
289
+ 依据
290
+ 依照
291
+ 便于
292
+
293
+ 俺们
294
+
295
+ 倘使
296
+ 倘或
297
+ 倘然
298
+ 倘若
299
+
300
+ 假使
301
+ 假如
302
+ 假若
303
+ 傥然
304
+
305
+
306
+ 先不先
307
+ 光是
308
+ 全体
309
+ 全部
310
+
311
+ 关于
312
+
313
+ 其一
314
+ 其中
315
+ 其二
316
+ 其他
317
+ 其余
318
+ 其它
319
+ 其次
320
+ 具体地说
321
+ 具体说来
322
+ 兼之
323
+
324
+
325
+ 再其次
326
+ 再则
327
+ 再有
328
+ 再者
329
+ 再者说
330
+ 再说
331
+
332
+
333
+ 况且
334
+
335
+ 几时
336
+
337
+ 凡是
338
+
339
+ 凭借
340
+ 出于
341
+ 出来
342
+ 分别
343
+
344
+ 则甚
345
+
346
+ 别人
347
+ 别处
348
+ 别是
349
+ 别的
350
+ 别管
351
+ 别说
352
+
353
+ 前后
354
+ 前此
355
+ 前者
356
+ 加之
357
+ 加以
358
+
359
+ 即令
360
+ 即使
361
+ 即便
362
+ 即如
363
+ 即或
364
+ 即若
365
+
366
+
367
+
368
+ 又及
369
+
370
+ 及其
371
+ 及至
372
+ 反之
373
+ 反而
374
+ 反过来
375
+ 反过来说
376
+ 受到
377
+
378
+ 另一方面
379
+ 另外
380
+ 另悉
381
+
382
+ 只当
383
+ 只怕
384
+ 只是
385
+ 只有
386
+ 只消
387
+ 只要
388
+ 只限
389
+
390
+ 叮咚
391
+
392
+ 可以
393
+ 可是
394
+ 可见
395
+
396
+ 各个
397
+ 各位
398
+ 各种
399
+ 各自
400
+
401
+ 同时
402
+
403
+ 后者
404
+
405
+ 向使
406
+ 向着
407
+
408
+
409
+ 否则
410
+
411
+ 吧哒
412
+
413
+
414
+
415
+
416
+
417
+
418
+ 呜呼
419
+
420
+
421
+ 呵呵
422
+
423
+ 呼哧
424
+
425
+
426
+
427
+
428
+
429
+
430
+ 咱们
431
+
432
+
433
+
434
+ 哈哈
435
+
436
+
437
+ 哎呀
438
+ 哎哟
439
+
440
+
441
+
442
+
443
+
444
+ 哪个
445
+ 哪些
446
+ 哪儿
447
+ 哪天
448
+ 哪年
449
+ 哪怕
450
+ 哪样
451
+ 哪边
452
+ 哪里
453
+
454
+ 哼唷
455
+
456
+ 唯有
457
+
458
+
459
+
460
+
461
+ 啪达
462
+ 啷当
463
+
464
+
465
+ 喔唷
466
+
467
+
468
+ 嗡嗡
469
+
470
+
471
+
472
+
473
+ 嘎登
474
+
475
+
476
+
477
+
478
+ 嘿嘿
479
+
480
+ 因为
481
+ 因了
482
+ 因此
483
+ 因着
484
+ 因而
485
+ 固然
486
+
487
+ 在下
488
+ 在于
489
+
490
+ 基于
491
+ 处在
492
+
493
+ 多么
494
+ 多少
495
+
496
+ 大家
497
+
498
+ 她们
499
+
500
+
501
+ 如上
502
+ 如上所述
503
+ 如下
504
+ 如何
505
+ 如其
506
+ 如同
507
+ 如是
508
+ 如果
509
+ 如此
510
+ 如若
511
+ 始而
512
+ 孰料
513
+ 孰知
514
+
515
+ 宁可
516
+ 宁愿
517
+ 宁肯
518
+
519
+ 它们
520
+
521
+ 对于
522
+ 对待
523
+ 对方
524
+ 对比
525
+
526
+
527
+
528
+ 尔后
529
+ 尔尔
530
+ 尚且
531
+
532
+ 就是
533
+ 就是了
534
+ 就是说
535
+ 就算
536
+ 就要
537
+
538
+ 尽管
539
+ 尽管如此
540
+ 岂但
541
+
542
+
543
+ 已矣
544
+
545
+ 巴巴
546
+
547
+ 并且
548
+ 并非
549
+ 庶乎
550
+ 庶几
551
+ 开外
552
+ 开始
553
+
554
+ 归齐
555
+
556
+ 当地
557
+ 当然
558
+ 当着
559
+
560
+ 彼时
561
+ 彼此
562
+
563
+
564
+
565
+
566
+ 得了
567
+
568
+ 怎么
569
+ 怎么办
570
+ 怎么样
571
+ 怎奈
572
+ 怎样
573
+ 总之
574
+ 总的来看
575
+ 总的来说
576
+ 总的说来
577
+ 总而言之
578
+ 恰恰相反
579
+
580
+ 惟其
581
+ 慢说
582
+
583
+ 我们
584
+
585
+ 或则
586
+ 或是
587
+ 或曰
588
+ 或者
589
+ 截至
590
+
591
+ 所以
592
+ 所在
593
+ 所幸
594
+ 所有
595
+
596
+ 才能
597
+
598
+ 打从
599
+
600
+ 抑或
601
+
602
+
603
+ 按照
604
+ 换句话说
605
+ 换言之
606
+
607
+ 据此
608
+ 接着
609
+
610
+ 故此
611
+ 故而
612
+ 旁人
613
+
614
+ 无宁
615
+ 无论
616
+
617
+ 既往
618
+ 既是
619
+ 既然
620
+ 时候
621
+
622
+ 是以
623
+ 是的
624
+
625
+
626
+ 替代
627
+
628
+
629
+ 有些
630
+ 有关
631
+ 有及
632
+ 有时
633
+ 有的
634
+
635
+
636
+ 朝着
637
+
638
+ 本人
639
+ 本地
640
+ 本着
641
+ 本身
642
+
643
+ 来着
644
+ 来自
645
+ 来说
646
+ 极了
647
+ 果然
648
+ 果真
649
+
650
+ 某个
651
+ 某些
652
+ 某某
653
+ 根据
654
+
655
+ 正值
656
+ 正如
657
+ 正巧
658
+ 正是
659
+
660
+ 此地
661
+ 此处
662
+ 此外
663
+ 此时
664
+ 此次
665
+ 此间
666
+ 毋宁
667
+
668
+ 每当
669
+
670
+ 比及
671
+ 比如
672
+ 比方
673
+ 没奈何
674
+ 沿
675
+ 沿着
676
+ 漫说
677
+
678
+ 然则
679
+ 然后
680
+ 然而
681
+
682
+ 照着
683
+ 犹且
684
+ 犹自
685
+ 甚且
686
+ 甚么
687
+ 甚或
688
+ 甚而
689
+ 甚至
690
+ 甚至于
691
+
692
+ 用来
693
+
694
+ 由于
695
+ 由是
696
+ 由此
697
+ 由此可见
698
+
699
+ 的确
700
+ 的话
701
+ 直到
702
+ 相对而言
703
+ 省得
704
+
705
+ 眨眼
706
+
707
+ 着呢
708
+
709
+ 矣乎
710
+ 矣哉
711
+
712
+ 竟而
713
+
714
+
715
+ 等到
716
+ 等等
717
+ 简言之
718
+
719
+ 类如
720
+ 紧接着
721
+
722
+ 纵令
723
+ 纵使
724
+ 纵然
725
+
726
+ 经过
727
+ 结果
728
+
729
+ 继之
730
+ 继后
731
+ 继而
732
+ 综上所述
733
+ 罢了
734
+
735
+
736
+ 而且
737
+ 而况
738
+ 而后
739
+ 而外
740
+ 而已
741
+ 而是
742
+ 而言
743
+
744
+ 能否
745
+
746
+
747
+ 自个儿
748
+ 自从
749
+ 自各儿
750
+ 自后
751
+ 自家
752
+ 自己
753
+ 自打
754
+ 自身
755
+
756
+ 至于
757
+ 至今
758
+ 至若
759
+
760
+ 般的
761
+
762
+ 若夫
763
+ 若是
764
+ 若果
765
+ 若非
766
+ 莫不然
767
+ 莫如
768
+ 莫若
769
+
770
+ 虽则
771
+ 虽然
772
+ 虽说
773
+
774
+
775
+ 要不
776
+ 要不是
777
+ 要不然
778
+ 要么
779
+ 要是
780
+ 譬喻
781
+ 譬如
782
+
783
+ 许多
784
+
785
+ 设使
786
+ 设或
787
+ 设若
788
+ 诚如
789
+ 诚然
790
+
791
+ 说来
792
+
793
+ 诸位
794
+ 诸如
795
+
796
+ 谁人
797
+ 谁料
798
+ 谁知
799
+ 贼死
800
+ 赖以
801
+
802
+
803
+ 起见
804
+
805
+ 趁着
806
+ 越是
807
+
808
+
809
+
810
+ 较之
811
+
812
+
813
+
814
+ 还是
815
+ 还有
816
+ 还要
817
+
818
+ 这一来
819
+ 这个
820
+ 这么
821
+ 这么些
822
+ 这么样
823
+ 这么点儿
824
+ 这些
825
+ 这会儿
826
+ 这儿
827
+ 这就是说
828
+ 这时
829
+ 这样
830
+ 这次
831
+ 这般
832
+ 这边
833
+ 这里
834
+ 进而
835
+
836
+ 连同
837
+ 逐步
838
+ 通过
839
+ 遵循
840
+ 遵照
841
+
842
+ 那个
843
+ 那么
844
+ 那么些
845
+ 那么样
846
+ 那些
847
+ 那会儿
848
+ 那儿
849
+ 那时
850
+ 那样
851
+ 那般
852
+ 那边
853
+ 那里
854
+
855
+ 鄙人
856
+ 鉴于
857
+ 针对
858
+
859
+
860
+ 除了
861
+ 除外
862
+ 除开
863
+ 除此之外
864
+ 除非
865
+
866
+ 随后
867
+ 随时
868
+ 随着
869
+ 难道说
870
+ 非但
871
+ 非徒
872
+ 非特
873
+ 非独
874
+
875
+
876
+ 顺着
877
+ 首先
878
+
879
+
880
+
881
+
882
+
883
+ to
884
+ can
885
+ could
886
+ dare
887
+ do
888
+ did
889
+ does
890
+ may
891
+ might
892
+ would
893
+ should
894
+ must
895
+ will
896
+ ought
897
+ shall
898
+ need
899
+ is
900
+ a
901
+ am
902
+ are
903
+ about
904
+ according
905
+ after
906
+ against
907
+ all
908
+ almost
909
+ also
910
+ although
911
+ among
912
+ an
913
+ and
914
+ another
915
+ any
916
+ anything
917
+ approximately
918
+ as
919
+ asked
920
+ at
921
+ back
922
+ because
923
+ before
924
+ besides
925
+ between
926
+ both
927
+ but
928
+ by
929
+ call
930
+ called
931
+ currently
932
+ despite
933
+ did
934
+ do
935
+ dr
936
+ during
937
+ each
938
+ earlier
939
+ eight
940
+ even
941
+ eventually
942
+ every
943
+ everything
944
+ five
945
+ for
946
+ four
947
+ from
948
+ he
949
+ her
950
+ here
951
+ his
952
+ how
953
+ however
954
+ i
955
+ if
956
+ in
957
+ indeed
958
+ instead
959
+ it
960
+ its
961
+ just
962
+ last
963
+ like
964
+ major
965
+ many
966
+ may
967
+ maybe
968
+ meanwhile
969
+ more
970
+ moreover
971
+ most
972
+ mr
973
+ mrs
974
+ ms
975
+ much
976
+ my
977
+ neither
978
+ net
979
+ never
980
+ nevertheless
981
+ nine
982
+ no
983
+ none
984
+ not
985
+ nothing
986
+ now
987
+ of
988
+ on
989
+ once
990
+ one
991
+ only
992
+ or
993
+ other
994
+ our
995
+ over
996
+ partly
997
+ perhaps
998
+ prior
999
+ regarding
1000
+ separately
1001
+ seven
1002
+ several
1003
+ she
1004
+ should
1005
+ similarly
1006
+ since
1007
+ six
1008
+ so
1009
+ some
1010
+ somehow
1011
+ still
1012
+ such
1013
+ ten
1014
+ that
1015
+ the
1016
+ their
1017
+ then
1018
+ there
1019
+ therefore
1020
+ these
1021
+ they
1022
+ this
1023
+ those
1024
+ though
1025
+ three
1026
+ to
1027
+ two
1028
+ under
1029
+ unless
1030
+ unlike
1031
+ until
1032
+ volume
1033
+ we
1034
+ what
1035
+ whatever
1036
+ whats
1037
+ when
1038
+ where
1039
+ which
1040
+ while
1041
+ why
1042
+ with
1043
+ without
1044
+ yesterday
1045
+ yet
1046
+ you
1047
+ your
1048
+ aboard
1049
+ about
1050
+ above
1051
+ according to
1052
+ across
1053
+ afore
1054
+ after
1055
+ against
1056
+ agin
1057
+ along
1058
+ alongside
1059
+ amid
1060
+ amidst
1061
+ among
1062
+ amongst
1063
+ anent
1064
+ around
1065
+ as
1066
+ aslant
1067
+ astride
1068
+ at
1069
+ athwart
1070
+ bar
1071
+ because of
1072
+ before
1073
+ behind
1074
+ below
1075
+ beneath
1076
+ beside
1077
+ besides
1078
+ between
1079
+ betwixt
1080
+ beyond
1081
+ but
1082
+ by
1083
+ circa
1084
+ despite
1085
+ down
1086
+ during
1087
+ due to
1088
+ ere
1089
+ except
1090
+ for
1091
+ from
1092
+ in
1093
+ inside
1094
+ into
1095
+ less
1096
+ like
1097
+ mid
1098
+ midst
1099
+ minus
1100
+ near
1101
+ next
1102
+ nigh
1103
+ nigher
1104
+ nighest
1105
+ notwithstanding
1106
+ of
1107
+ off
1108
+ on
1109
+ on to
1110
+ onto
1111
+ out
1112
+ out of
1113
+ outside
1114
+ over
1115
+ past
1116
+ pending
1117
+ per
1118
+ plus
1119
+ qua
1120
+ re
1121
+ round
1122
+ sans
1123
+ save
1124
+ since
1125
+ through
1126
+ throughout
1127
+ thru
1128
+ till
1129
+ to
1130
+ toward
1131
+ towards
1132
+ under
1133
+ underneath
1134
+ unlike
1135
+ until
1136
+ unto
1137
+ up
1138
+ upon
1139
+ versus
1140
+ via
1141
+ vice
1142
+ with
1143
+ within
1144
+ without
1145
+ he
1146
+ her
1147
+ herself
1148
+ hers
1149
+ him
1150
+ himself
1151
+ his
1152
+ I
1153
+ it
1154
+ its
1155
+ itself
1156
+ me
1157
+ mine
1158
+ my
1159
+ myself
1160
+ ours
1161
+ she
1162
+ their
1163
+ theirs
1164
+ them
1165
+ themselves
1166
+ they
1167
+ us
1168
+ we
1169
+ our
1170
+ ourselves
1171
+ you
1172
+ your
1173
+ yours
1174
+ yourselves
1175
+ yourself
1176
+ this
1177
+ that
1178
+ these
1179
+ those
1180
+ "
1181
+ '
1182
+ ''
1183
+ (
1184
+ )
1185
+ *LRB*
1186
+ *RRB*
1187
+ <dquote>
1188
+ <ldquo>
1189
+ <lsquo>
1190
+ <rdquo>
1191
+ <rsquo>
1192
+ @
1193
+ &
1194
+ [
1195
+ ]
1196
+ `
1197
+ ``
1198
+ e.g.,
1199
+ {
1200
+ }
1201
+ &quot;
1202
+ &ldquo;
1203
+ &rdquo;
1204
+ -RRB-
1205
+ -LRB-
1206
+ --
1207
+ a
1208
+ about
1209
+ above
1210
+ across
1211
+ after
1212
+ afterwards
1213
+ again
1214
+ against
1215
+ all
1216
+ almost
1217
+ alone
1218
+ along
1219
+ already
1220
+ also
1221
+ although
1222
+ always
1223
+ am
1224
+ among
1225
+ amongst
1226
+ amoungst
1227
+ amount
1228
+ an
1229
+ and
1230
+ another
1231
+ any
1232
+ anyhow
1233
+ anyone
1234
+ anything
1235
+ anyway
1236
+ anywhere
1237
+ are
1238
+ around
1239
+ as
1240
+ at
1241
+ back
1242
+ be
1243
+ became
1244
+ because
1245
+ become
1246
+ becomes
1247
+ becoming
1248
+ been
1249
+ before
1250
+ beforehand
1251
+ behind
1252
+ being
1253
+ below
1254
+ beside
1255
+ besides
1256
+ between
1257
+ beyond
1258
+ bill
1259
+ both
1260
+ bottom
1261
+ but
1262
+ by
1263
+ call
1264
+ can
1265
+ cannot
1266
+ cant
1267
+ co
1268
+ computer
1269
+ con
1270
+ could
1271
+ couldnt
1272
+ cry
1273
+ de
1274
+ describe
1275
+ detail
1276
+ do
1277
+ done
1278
+ down
1279
+ due
1280
+ during
1281
+ each
1282
+ eg
1283
+ eight
1284
+ either
1285
+ eleven
1286
+ else
1287
+ elsewhere
1288
+ empty
1289
+ enough
1290
+ etc
1291
+ even
1292
+ ever
1293
+ every
1294
+ everyone
1295
+ everything
1296
+ everywhere
1297
+ except
1298
+ few
1299
+ fifteen
1300
+ fify
1301
+ fill
1302
+ find
1303
+ fire
1304
+ first
1305
+ five
1306
+ for
1307
+ former
1308
+ formerly
1309
+ forty
1310
+ found
1311
+ four
1312
+ from
1313
+ front
1314
+ full
1315
+ further
1316
+ get
1317
+ give
1318
+ go
1319
+ had
1320
+ has
1321
+ hasnt
1322
+ have
1323
+ he
1324
+ hence
1325
+ her
1326
+ here
1327
+ hereafter
1328
+ hereby
1329
+ herein
1330
+ hereupon
1331
+ hers
1332
+ herself
1333
+ him
1334
+ himself
1335
+ his
1336
+ how
1337
+ however
1338
+ hundred
1339
+ i
1340
+ ie
1341
+ if
1342
+ in
1343
+ inc
1344
+ indeed
1345
+ interest
1346
+ into
1347
+ is
1348
+ it
1349
+ its
1350
+ itself
1351
+ keep
1352
+ last
1353
+ latter
1354
+ latterly
1355
+ least
1356
+ less
1357
+ ltd
1358
+ made
1359
+ many
1360
+ may
1361
+ me
1362
+ meanwhile
1363
+ might
1364
+ mill
1365
+ mine
1366
+ more
1367
+ moreover
1368
+ most
1369
+ mostly
1370
+ move
1371
+ much
1372
+ must
1373
+ my
1374
+ myself
1375
+ name
1376
+ namely
1377
+ neither
1378
+ never
1379
+ nevertheless
1380
+ next
1381
+ nine
1382
+ no
1383
+ nobody
1384
+ none
1385
+ noone
1386
+ nor
1387
+ not
1388
+ nothing
1389
+ now
1390
+ nowhere
1391
+ of
1392
+ off
1393
+ often
1394
+ on
1395
+ once
1396
+ one
1397
+ only
1398
+ onto
1399
+ or
1400
+ other
1401
+ others
1402
+ otherwise
1403
+ our
1404
+ ours
1405
+ ourselves
1406
+ out
1407
+ over
1408
+ own
1409
+ part
1410
+ per
1411
+ perhaps
1412
+ please
1413
+ put
1414
+ rather
1415
+ re
1416
+ same
1417
+ see
1418
+ seem
1419
+ seemed
1420
+ seeming
1421
+ seems
1422
+ serious
1423
+ several
1424
+ she
1425
+ should
1426
+ show
1427
+ side
1428
+ since
1429
+ sincere
1430
+ six
1431
+ sixty
1432
+ so
1433
+ some
1434
+ somehow
1435
+ someone
1436
+ something
1437
+ sometime
1438
+ sometimes
1439
+ somewhere
1440
+ still
1441
+ such
1442
+ system
1443
+ take
1444
+ ten
1445
+ than
1446
+ that
1447
+ the
1448
+ their
1449
+ them
1450
+ themselves
1451
+ then
1452
+ thence
1453
+ there
1454
+ thereafter
1455
+ thereby
1456
+ therefore
1457
+ therein
1458
+ thereupon
1459
+ these
1460
+ they
1461
+ thick
1462
+ thin
1463
+ third
1464
+ this
1465
+ those
1466
+ though
1467
+ three
1468
+ through
1469
+ throughout
1470
+ thru
1471
+ thus
1472
+ to
1473
+ together
1474
+ too
1475
+ top
1476
+ toward
1477
+ towards
1478
+ twelve
1479
+ twenty
1480
+ two
1481
+ un
1482
+ under
1483
+ until
1484
+ up
1485
+ upon
1486
+ us
1487
+ very
1488
+ via
1489
+ was
1490
+ we
1491
+ well
1492
+ were
1493
+ what
1494
+ whatever
1495
+ when
1496
+ whence
1497
+ whenever
1498
+ where
1499
+ whereafter
1500
+ whereas
1501
+ whereby
1502
+ wherein
1503
+ whereupon
1504
+ wherever
1505
+ whether
1506
+ which
1507
+ while
1508
+ whither
1509
+ who
1510
+ whoever
1511
+ whole
1512
+ whom
1513
+ whose
1514
+ why
1515
+ will
1516
+ with
1517
+ within
1518
+ without
1519
+ would
1520
+ yet
1521
+ you
1522
+ your
1523
+ yours
1524
+ yourself
1525
+ yourselves
1526
+
1527
+
1528
+ :
1529
+ /
1530
+
1531
+ >
1532
+
1533
+ <
1534
+ !
ov_models/cppjieba/dict/user.dict.utf8 ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 云计算
2
+ 韩玉鉴赏
3
+ 蓝翔 nz
4
+ 区块链 10 nz
ov_models/deepfilternet3/df_dec.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bd50b92ae1c6175be9ddb3e8cf7f81b65b95223957bcd558cb2c6ef281aa435
3
+ size 3325940
ov_models/deepfilternet3/df_dec.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/deepfilternet3/enc.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa1ed5feb4dd234e051188a2cda6c6f6efabefd87404cbae20fe6c9913dc9838
3
+ size 1934676
ov_models/deepfilternet3/enc.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/deepfilternet3/erb_dec.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e9dc11c071f72b343d5af5d344a504a903347d455952e8d4027da3ab47e792
3
+ size 3278844
ov_models/deepfilternet3/erb_dec.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/mini-bart-g2p-no_cache/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cisco-ai/mini-bart-g2p",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_cross_attention": true,
6
+ "architectures": [
7
+ "BartForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.1,
10
+ "bos_token_id": 0,
11
+ "classifier_dropout": 0.0,
12
+ "cross_attention_hidden_size": 256,
13
+ "d_model": 256,
14
+ "decoder_attention_heads": 4,
15
+ "decoder_ffn_dim": 512,
16
+ "decoder_layerdrop": 0.1,
17
+ "decoder_layers": 3,
18
+ "decoder_start_token_id": 2,
19
+ "dropout": 0.3,
20
+ "encoder_attention_heads": 4,
21
+ "encoder_ffn_dim": 512,
22
+ "encoder_layerdrop": 0.1,
23
+ "encoder_layers": 3,
24
+ "eos_token_id": 2,
25
+ "forced_eos_token_id": 2,
26
+ "id2label": {
27
+ "0": "LABEL_0",
28
+ "1": "LABEL_1",
29
+ "2": "LABEL_2"
30
+ },
31
+ "init_std": 0.02,
32
+ "is_encoder_decoder": true,
33
+ "label2id": {
34
+ "LABEL_0": 0,
35
+ "LABEL_1": 1,
36
+ "LABEL_2": 2
37
+ },
38
+ "max_position_embeddings": 128,
39
+ "model_type": "bart",
40
+ "num_hidden_layers": 3,
41
+ "pad_token_id": 1,
42
+ "scale_embedding": true,
43
+ "transformers_version": "4.46.3",
44
+ "use_cache": true,
45
+ "vocab_size": 103
46
+ }
ov_models/mini-bart-g2p-no_cache/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "max_length": 128,
7
+ "pad_token_id": 1,
8
+ "transformers_version": "4.46.3"
9
+ }
ov_models/mini-bart-g2p-no_cache/openvino_decoder_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a802fdd3d8b0392f99fb13325f909bcc7f7bd5f44e3af56ae78fd74746d6fce4
3
+ size 9730212
ov_models/mini-bart-g2p-no_cache/openvino_decoder_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/mini-bart-g2p-no_cache/openvino_encoder_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57e3af93d552add522ff8853ae97f601e72ea7c2d841677a2f8f11b357a7e4c
3
+ size 6566064
ov_models/mini-bart-g2p-no_cache/openvino_encoder_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
ov_models/mini-bart-g2p-no_cache/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
ov_models/mini-bart-g2p-no_cache/tokenizer.json ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": null,
10
+ "added_tokens": [
11
+ {
12
+ "id": 0,
13
+ "content": "<s>",
14
+ "single_word": false,
15
+ "lstrip": false,
16
+ "rstrip": false,
17
+ "normalized": false,
18
+ "special": true
19
+ },
20
+ {
21
+ "id": 1,
22
+ "content": "<pad>",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 2,
31
+ "content": "</s>",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 3,
40
+ "content": "<unk>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 4,
49
+ "content": "<mask>",
50
+ "single_word": false,
51
+ "lstrip": true,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ }
56
+ ],
57
+ "normalizer": {
58
+ "type": "Lowercase"
59
+ },
60
+ "pre_tokenizer": {
61
+ "type": "Split",
62
+ "pattern": {
63
+ "String": ""
64
+ },
65
+ "behavior": "Removed",
66
+ "invert": false
67
+ },
68
+ "post_processor": {
69
+ "type": "RobertaProcessing",
70
+ "sep": [
71
+ "</s>",
72
+ 2
73
+ ],
74
+ "cls": [
75
+ "<s>",
76
+ 0
77
+ ],
78
+ "trim_offsets": true,
79
+ "add_prefix_space": false
80
+ },
81
+ "decoder": null,
82
+ "model": {
83
+ "type": "WordLevel",
84
+ "vocab": {
85
+ "<s>": 0,
86
+ "<pad>": 1,
87
+ "</s>": 2,
88
+ "<unk>": 3,
89
+ "<mask>": 4,
90
+ "e": 5,
91
+ "a": 6,
92
+ "s": 7,
93
+ "i": 8,
94
+ "r": 9,
95
+ "n": 10,
96
+ "AH0": 11,
97
+ "o": 12,
98
+ "N": 13,
99
+ "t": 14,
100
+ "l": 15,
101
+ "S": 16,
102
+ "L": 17,
103
+ "T": 18,
104
+ "R": 19,
105
+ "K": 20,
106
+ "c": 21,
107
+ "d": 22,
108
+ "D": 23,
109
+ "u": 24,
110
+ "IH0": 25,
111
+ "m": 26,
112
+ "M": 27,
113
+ "Z": 28,
114
+ "h": 29,
115
+ "g": 30,
116
+ "p": 31,
117
+ "ER0": 32,
118
+ "IY0": 33,
119
+ "b": 34,
120
+ "B": 35,
121
+ "P": 36,
122
+ "EH1": 37,
123
+ "AE1": 38,
124
+ "AA1": 39,
125
+ "y": 40,
126
+ "k": 41,
127
+ "IH1": 42,
128
+ "F": 43,
129
+ "f": 44,
130
+ "G": 45,
131
+ "w": 46,
132
+ "V": 47,
133
+ "v": 48,
134
+ "NG": 49,
135
+ "'": 50,
136
+ "IY1": 51,
137
+ "EY1": 52,
138
+ "HH": 53,
139
+ "W": 54,
140
+ "SH": 55,
141
+ "OW1": 56,
142
+ "AO1": 57,
143
+ "OW0": 58,
144
+ "AH1": 59,
145
+ "UW1": 60,
146
+ "AY1": 61,
147
+ "JH": 62,
148
+ "z": 63,
149
+ "CH": 64,
150
+ "Y": 65,
151
+ "AA0": 66,
152
+ "ER1": 67,
153
+ "EH2": 68,
154
+ "IH2": 69,
155
+ "TH": 70,
156
+ "AY2": 71,
157
+ "AE2": 72,
158
+ "EY2": 73,
159
+ "AA2": 74,
160
+ "EH0": 75,
161
+ "j": 76,
162
+ "AW1": 77,
163
+ "OW2": 78,
164
+ "x": 79,
165
+ "IY2": 80,
166
+ "UW0": 81,
167
+ "AO2": 82,
168
+ "UH1": 83,
169
+ "AE0": 84,
170
+ "q": 85,
171
+ "AO0": 86,
172
+ "AH2": 87,
173
+ "UW2": 88,
174
+ "AY0": 89,
175
+ "OY1": 90,
176
+ "-": 91,
177
+ "EY0": 92,
178
+ "DH": 93,
179
+ "AW2": 94,
180
+ "ER2": 95,
181
+ "ZH": 96,
182
+ "UH2": 97,
183
+ "AW0": 98,
184
+ "UH0": 99,
185
+ "OY2": 100,
186
+ "OY0": 101,
187
+ ".": 102
188
+ },
189
+ "unk_token": "<unk>"
190
+ }
191
+ }
ov_models/mini-bart-g2p-no_cache/tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_length": 128,
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "stride": 0,
56
+ "tokenizer_class": "BartTokenizer",
57
+ "trim_offsets": true,
58
+ "truncation_side": "right",
59
+ "truncation_strategy": "longest_first",
60
+ "unk_token": "<unk>"
61
+ }
ov_models/mini-bart-g2p-no_cache/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>":0,"<pad>":1,"</s>":2,"<unk>":3,"<mask>":4,"e":5,"a":6,"s":7,"i":8,"r":9,"n":10,"AH0":11,"o":12,"N":13,"t":14,"l":15,"S":16,"L":17,"T":18,"R":19,"K":20,"c":21,"d":22,"D":23,"u":24,"IH0":25,"m":26,"M":27,"Z":28,"h":29,"g":30,"p":31,"ER0":32,"IY0":33,"b":34,"B":35,"P":36,"EH1":37,"AE1":38,"AA1":39,"y":40,"k":41,"IH1":42,"F":43,"f":44,"G":45,"w":46,"V":47,"v":48,"NG":49,"'":50,"IY1":51,"EY1":52,"HH":53,"W":54,"SH":55,"OW1":56,"AO1":57,"OW0":58,"AH1":59,"UW1":60,"AY1":61,"JH":62,"z":63,"CH":64,"Y":65,"AA0":66,"ER1":67,"EH2":68,"IH2":69,"TH":70,"AY2":71,"AE2":72,"EY2":73,"AA2":74,"EH0":75,"j":76,"AW1":77,"OW2":78,"x":79,"IY2":80,"UW0":81,"AO2":82,"UH1":83,"AE0":84,"q":85,"AO0":86,"AH2":87,"UW2":88,"AY0":89,"OY1":90,"-":91,"EY0":92,"DH":93,"AW2":94,"ER2":95,"ZH":96,"UH2":97,"AW0":98,"UH0":99,"OY2":100,"OY0":101,".":102}
ov_models/opencpop-strict.txt ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a AA a
2
+ ai AA ai
3
+ an AA an
4
+ ang AA ang
5
+ ao AA ao
6
+ ba b a
7
+ bai b ai
8
+ ban b an
9
+ bang b ang
10
+ bao b ao
11
+ bei b ei
12
+ ben b en
13
+ beng b eng
14
+ bi b i
15
+ bian b ian
16
+ biao b iao
17
+ bie b ie
18
+ bin b in
19
+ bing b ing
20
+ bo b o
21
+ bu b u
22
+ ca c a
23
+ cai c ai
24
+ can c an
25
+ cang c ang
26
+ cao c ao
27
+ ce c e
28
+ cei c ei
29
+ cen c en
30
+ ceng c eng
31
+ cha ch a
32
+ chai ch ai
33
+ chan ch an
34
+ chang ch ang
35
+ chao ch ao
36
+ che ch e
37
+ chen ch en
38
+ cheng ch eng
39
+ chi ch ir
40
+ chong ch ong
41
+ chou ch ou
42
+ chu ch u
43
+ chua ch ua
44
+ chuai ch uai
45
+ chuan ch uan
46
+ chuang ch uang
47
+ chui ch ui
48
+ chun ch un
49
+ chuo ch uo
50
+ ci c i0
51
+ cong c ong
52
+ cou c ou
53
+ cu c u
54
+ cuan c uan
55
+ cui c ui
56
+ cun c un
57
+ cuo c uo
58
+ da d a
59
+ dai d ai
60
+ dan d an
61
+ dang d ang
62
+ dao d ao
63
+ de d e
64
+ dei d ei
65
+ den d en
66
+ deng d eng
67
+ di d i
68
+ dia d ia
69
+ dian d ian
70
+ diao d iao
71
+ die d ie
72
+ ding d ing
73
+ diu d iu
74
+ dong d ong
75
+ dou d ou
76
+ du d u
77
+ duan d uan
78
+ dui d ui
79
+ dun d un
80
+ duo d uo
81
+ e EE e
82
+ ei EE ei
83
+ en EE en
84
+ eng EE eng
85
+ er EE er
86
+ fa f a
87
+ fan f an
88
+ fang f ang
89
+ fei f ei
90
+ fen f en
91
+ feng f eng
92
+ fo f o
93
+ fou f ou
94
+ fu f u
95
+ ga g a
96
+ gai g ai
97
+ gan g an
98
+ gang g ang
99
+ gao g ao
100
+ ge g e
101
+ gei g ei
102
+ gen g en
103
+ geng g eng
104
+ gong g ong
105
+ gou g ou
106
+ gu g u
107
+ gua g ua
108
+ guai g uai
109
+ guan g uan
110
+ guang g uang
111
+ gui g ui
112
+ gun g un
113
+ guo g uo
114
+ ha h a
115
+ hai h ai
116
+ han h an
117
+ hang h ang
118
+ hao h ao
119
+ he h e
120
+ hei h ei
121
+ hen h en
122
+ heng h eng
123
+ hong h ong
124
+ hou h ou
125
+ hu h u
126
+ hua h ua
127
+ huai h uai
128
+ huan h uan
129
+ huang h uang
130
+ hui h ui
131
+ hun h un
132
+ huo h uo
133
+ ji j i
134
+ jia j ia
135
+ jian j ian
136
+ jiang j iang
137
+ jiao j iao
138
+ jie j ie
139
+ jin j in
140
+ jing j ing
141
+ jiong j iong
142
+ jiu j iu
143
+ ju j v
144
+ jv j v
145
+ juan j van
146
+ jvan j van
147
+ jue j ve
148
+ jve j ve
149
+ jun j vn
150
+ jvn j vn
151
+ ka k a
152
+ kai k ai
153
+ kan k an
154
+ kang k ang
155
+ kao k ao
156
+ ke k e
157
+ kei k ei
158
+ ken k en
159
+ keng k eng
160
+ kong k ong
161
+ kou k ou
162
+ ku k u
163
+ kua k ua
164
+ kuai k uai
165
+ kuan k uan
166
+ kuang k uang
167
+ kui k ui
168
+ kun k un
169
+ kuo k uo
170
+ la l a
171
+ lai l ai
172
+ lan l an
173
+ lang l ang
174
+ lao l ao
175
+ le l e
176
+ lei l ei
177
+ leng l eng
178
+ li l i
179
+ lia l ia
180
+ lian l ian
181
+ liang l iang
182
+ liao l iao
183
+ lie l ie
184
+ lin l in
185
+ ling l ing
186
+ liu l iu
187
+ lo l o
188
+ long l ong
189
+ lou l ou
190
+ lu l u
191
+ luan l uan
192
+ lun l un
193
+ luo l uo
194
+ lv l v
195
+ lve l ve
196
+ ma m a
197
+ mai m ai
198
+ man m an
199
+ mang m ang
200
+ mao m ao
201
+ me m e
202
+ mei m ei
203
+ men m en
204
+ meng m eng
205
+ mi m i
206
+ mian m ian
207
+ miao m iao
208
+ mie m ie
209
+ min m in
210
+ ming m ing
211
+ miu m iu
212
+ mo m o
213
+ mou m ou
214
+ mu m u
215
+ na n a
216
+ nai n ai
217
+ nan n an
218
+ nang n ang
219
+ nao n ao
220
+ ne n e
221
+ nei n ei
222
+ nen n en
223
+ neng n eng
224
+ ni n i
225
+ nian n ian
226
+ niang n iang
227
+ niao n iao
228
+ nie n ie
229
+ nin n in
230
+ ning n ing
231
+ niu n iu
232
+ nong n ong
233
+ nou n ou
234
+ nu n u
235
+ nuan n uan
236
+ nun n un
237
+ nuo n uo
238
+ nv n v
239
+ nve n ve
240
+ o OO o
241
+ ou OO ou
242
+ pa p a
243
+ pai p ai
244
+ pan p an
245
+ pang p ang
246
+ pao p ao
247
+ pei p ei
248
+ pen p en
249
+ peng p eng
250
+ pi p i
251
+ pian p ian
252
+ piao p iao
253
+ pie p ie
254
+ pin p in
255
+ ping p ing
256
+ po p o
257
+ pou p ou
258
+ pu p u
259
+ qi q i
260
+ qia q ia
261
+ qian q ian
262
+ qiang q iang
263
+ qiao q iao
264
+ qie q ie
265
+ qin q in
266
+ qing q ing
267
+ qiong q iong
268
+ qiu q iu
269
+ qu q v
270
+ qv q v
271
+ quan q van
272
+ qvan q van
273
+ que q ve
274
+ qve q ve
275
+ qun q vn
276
+ qvn q vn
277
+ ran r an
278
+ rang r ang
279
+ rao r ao
280
+ re r e
281
+ ren r en
282
+ reng r eng
283
+ ri r ir
284
+ rong r ong
285
+ rou r ou
286
+ ru r u
287
+ rua r ua
288
+ ruan r uan
289
+ rui r ui
290
+ run r un
291
+ ruo r uo
292
+ sa s a
293
+ sai s ai
294
+ san s an
295
+ sang s ang
296
+ sao s ao
297
+ se s e
298
+ sen s en
299
+ seng s eng
300
+ sha sh a
301
+ shai sh ai
302
+ shan sh an
303
+ shang sh ang
304
+ shao sh ao
305
+ she sh e
306
+ shei sh ei
307
+ shen sh en
308
+ sheng sh eng
309
+ shi sh ir
310
+ shou sh ou
311
+ shu sh u
312
+ shua sh ua
313
+ shuai sh uai
314
+ shuan sh uan
315
+ shuang sh uang
316
+ shui sh ui
317
+ shun sh un
318
+ shuo sh uo
319
+ si s i0
320
+ song s ong
321
+ sou s ou
322
+ su s u
323
+ suan s uan
324
+ sui s ui
325
+ sun s un
326
+ suo s uo
327
+ ta t a
328
+ tai t ai
329
+ tan t an
330
+ tang t ang
331
+ tao t ao
332
+ te t e
333
+ tei t ei
334
+ teng t eng
335
+ ti t i
336
+ tian t ian
337
+ tiao t iao
338
+ tie t ie
339
+ ting t ing
340
+ tong t ong
341
+ tou t ou
342
+ tu t u
343
+ tuan t uan
344
+ tui t ui
345
+ tun t un
346
+ tuo t uo
347
+ wa w a
348
+ wai w ai
349
+ wan w an
350
+ wang w ang
351
+ wei w ei
352
+ wen w en
353
+ weng w eng
354
+ wo w o
355
+ wu w u
356
+ xi x i
357
+ xia x ia
358
+ xian x ian
359
+ xiang x iang
360
+ xiao x iao
361
+ xie x ie
362
+ xin x in
363
+ xing x ing
364
+ xiong x iong
365
+ xiu x iu
366
+ xu x v
367
+ xv x v
368
+ xuan x van
369
+ xvan x van
370
+ xue x ve
371
+ xve x ve
372
+ xun x vn
373
+ xvn x vn
374
+ ya y a
375
+ yan y En
376
+ yang y ang
377
+ yao y ao
378
+ ye y E
379
+ yi y i
380
+ yin y in
381
+ ying y ing
382
+ yo y o
383
+ yong y ong
384
+ you y ou
385
+ yu y v
386
+ yv y v
387
+ yuan y van
388
+ yvan y van
389
+ yue y ve
390
+ yve y ve
391
+ yun y vn
392
+ yvn y vn
393
+ za z a
394
+ zai z ai
395
+ zan z an
396
+ zang z ang
397
+ zao z ao
398
+ ze z e
399
+ zei z ei
400
+ zen z en
401
+ zeng z eng
402
+ zha zh a
403
+ zhai zh ai
404
+ zhan zh an
405
+ zhang zh ang
406
+ zhao zh ao
407
+ zhe zh e
408
+ zhei zh ei
409
+ zhen zh en
410
+ zheng zh eng
411
+ zhi zh ir
412
+ zhong zh ong
413
+ zhou zh ou
414
+ zhu zh u
415
+ zhua zh ua
416
+ zhuai zh uai
417
+ zhuan zh uan
418
+ zhuang zh uang
419
+ zhui zh ui
420
+ zhun zh un
421
+ zhuo zh uo
422
+ zi z i0
423
+ zong z ong
424
+ zou z ou
425
+ zu z u
426
+ zuan z uan
427
+ zui z ui
428
+ zun z un
429
+ zuo z uo
ov_models/punc.dic ADDED
Binary file (1.02 kB). View file
 
ov_models/s2t_map.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae8632fbf4fc583ec7b9e4381d411ba413d158694c839b4aa01fe04ff8a7d929
3
+ size 41296
ov_models/t2s_map.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10bff52975415c211e738a1c76a20ba2da1129f10be0fed22184adbb273907d
3
+ size 42820
ov_models/tts_en.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acb2fa8b528bd5e90e823bf539ef4c9e82abf0fa6a1d9b1f1545a1e63f7a073
3
+ size 84361968