kenkwon commited on
Commit
cdafa72
·
1 Parent(s): b1bf095

Update spaCy pipeline

Browse files
Files changed (12) hide show
  1. .gitattributes +3 -0
  2. README.md +30 -0
  3. config.cfg +155 -0
  4. meta.json +42 -0
  5. ner/cfg +13 -0
  6. ner/moves +1 -0
  7. tokenizer/cfg +1 -0
  8. transformer/cfg +3 -0
  9. vocab/key2row +1 -0
  10. vocab/strings.json +736 -0
  11. vocab/vectors +0 -0
  12. vocab/vectors.cfg +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ner/model filter=lfs diff=lfs merge=lfs -text
37
+ transformer/model filter=lfs diff=lfs merge=lfs -text
38
+ vi_ner_task-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - vi
7
+ ---
8
+ | Feature | Description |
9
+ | --- | --- |
10
+ | **Name** | `vi_ner_task` |
11
+ | **Version** | `0.0.0` |
12
+ | **spaCy** | `>=3.7.5,<3.8.0` |
13
+ | **Default Pipeline** | `transformer`, `ner` |
14
+ | **Components** | `transformer`, `ner` |
15
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
16
+ | **Sources** | n/a |
17
+ | **License** | n/a |
18
+ | **Author** | [n/a]() |
19
+
20
+ ### Label Scheme
21
+
22
+ <details>
23
+
24
+ <summary>View label scheme (3 labels for 1 components)</summary>
25
+
26
+ | Component | Labels |
27
+ | --- | --- |
28
+ | **`ner`** | `DATE`, `TASK`, `TIME` |
29
+
30
+ </details>
config.cfg ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ seed = 0
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "vi"
13
+ pipeline = ["transformer","ner"]
14
+ disabled = []
15
+ before_creation = null
16
+ after_creation = null
17
+ after_pipeline_creation = null
18
+ batch_size = 1000
19
+ vectors = {"@vectors":"spacy.Vectors.v1"}
20
+
21
+ [nlp.tokenizer]
22
+ @tokenizers = "spacy.vi.VietnameseTokenizer"
23
+ use_pyvi = true
24
+
25
+ [components]
26
+
27
+ [components.ner]
28
+ factory = "ner"
29
+ incorrect_spans_key = null
30
+ moves = null
31
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
32
+ update_with_oracle_cut_size = 100
33
+
34
+ [components.ner.model]
35
+ @architectures = "spacy.TransitionBasedParser.v2"
36
+ state_type = "ner"
37
+ extra_state_tokens = false
38
+ hidden_width = 64
39
+ maxout_pieces = 2
40
+ use_upper = true
41
+ nO = null
42
+
43
+ [components.ner.model.tok2vec]
44
+ @architectures = "spacy.HashEmbedCNN.v2"
45
+ pretrained_vectors = null
46
+ width = 96
47
+ depth = 4
48
+ embed_size = 2000
49
+ window_size = 1
50
+ maxout_pieces = 3
51
+ subword_features = true
52
+
53
+ [components.transformer]
54
+ factory = "transformer"
55
+ max_batch_items = 4096
56
+ set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
57
+
58
+ [components.transformer.model]
59
+ @architectures = "spacy-transformers.TransformerModel.v3"
60
+ name = "vinai/phobert-base"
61
+ mixed_precision = false
62
+
63
+ [components.transformer.model.get_spans]
64
+ @span_getters = "spacy-transformers.strided_spans.v1"
65
+ window = 128
66
+ stride = 96
67
+
68
+ [components.transformer.model.grad_scaler_config]
69
+
70
+ [components.transformer.model.tokenizer_config]
71
+ use_fast = true
72
+
73
+ [components.transformer.model.transformer_config]
74
+
75
+ [corpora]
76
+
77
+ [corpora.dev]
78
+ @readers = "spacy.Corpus.v1"
79
+ path = ${paths.dev}
80
+ gold_preproc = false
81
+ max_length = 0
82
+ limit = 0
83
+ augmenter = null
84
+
85
+ [corpora.train]
86
+ @readers = "spacy.Corpus.v1"
87
+ path = ${paths.train}
88
+ gold_preproc = false
89
+ max_length = 0
90
+ limit = 0
91
+ augmenter = null
92
+
93
+ [training]
94
+ seed = ${system.seed}
95
+ gpu_allocator = ${system.gpu_allocator}
96
+ dropout = 0.1
97
+ accumulate_gradient = 1
98
+ patience = 1600
99
+ max_epochs = 0
100
+ max_steps = 20000
101
+ eval_frequency = 200
102
+ frozen_components = []
103
+ annotating_components = []
104
+ dev_corpus = "corpora.dev"
105
+ train_corpus = "corpora.train"
106
+ before_to_disk = null
107
+ before_update = null
108
+
109
+ [training.batcher]
110
+ @batchers = "spacy.batch_by_words.v1"
111
+ discard_oversize = false
112
+ tolerance = 0.2
113
+ get_length = null
114
+
115
+ [training.batcher.size]
116
+ @schedules = "compounding.v1"
117
+ start = 100
118
+ stop = 1000
119
+ compound = 1.001
120
+ t = 0.0
121
+
122
+ [training.logger]
123
+ @loggers = "spacy.ConsoleLogger.v1"
124
+ progress_bar = false
125
+
126
+ [training.optimizer]
127
+ @optimizers = "Adam.v1"
128
+ beta1 = 0.9
129
+ beta2 = 0.999
130
+ L2_is_weight_decay = true
131
+ L2 = 0.01
132
+ grad_clip = 1.0
133
+ use_averages = false
134
+ eps = 0.00000001
135
+ learn_rate = 0.001
136
+
137
+ [training.score_weights]
138
+ ents_f = 1.0
139
+ ents_p = 0.0
140
+ ents_r = 0.0
141
+ ents_per_type = null
142
+
143
+ [pretraining]
144
+
145
+ [initialize]
146
+ vectors = ${paths.vectors}
147
+ init_tok2vec = ${paths.init_tok2vec}
148
+ vocab_data = null
149
+ lookups = null
150
+ before_init = null
151
+ after_init = null
152
+
153
+ [initialize.components]
154
+
155
+ [initialize.tokenizer]
meta.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"vi",
3
+ "name":"ner_task",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.7.5,<3.8.0",
11
+ "spacy_git_version":"a6d0fc360",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "transformer":[
20
+
21
+ ],
22
+ "ner":[
23
+ "DATE",
24
+ "TASK",
25
+ "TIME"
26
+ ]
27
+ },
28
+ "pipeline":[
29
+ "transformer",
30
+ "ner"
31
+ ],
32
+ "components":[
33
+ "transformer",
34
+ "ner"
35
+ ],
36
+ "disabled":[
37
+
38
+ ],
39
+ "requirements":[
40
+ "spacy-transformers>=1.3.9,<1.4.0"
41
+ ]
42
+ }
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves٨{"0":{},"1":{"DATE":-1,"TIME":-2,"TASK":-3},"2":{"DATE":-1,"TIME":-2,"TASK":-3},"3":{"DATE":-1,"TIME":-2,"TASK":-3},"4":{"":1,"DATE":-1,"TIME":-2,"TASK":-3},"5":{"":1}}�cfg��neg_key�
tokenizer/cfg ADDED
@@ -0,0 +1 @@
 
 
1
+ {"use_pyvi":true}
transformer/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "max_batch_items":4096
3
+ }
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/strings.json ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ " Ba",
3
+ " T\u01b0",
4
+ " ba",
5
+ " t\u01b0",
6
+ "/10",
7
+ "/11",
8
+ "/12",
9
+ "0/1",
10
+ "0/9",
11
+ "1",
12
+ "1/10",
13
+ "1/12",
14
+ "10",
15
+ "10/10",
16
+ "10/11",
17
+ "10h",
18
+ "11",
19
+ "11h30",
20
+ "12/10",
21
+ "14",
22
+ "14h",
23
+ "15",
24
+ "15/10",
25
+ "15/11",
26
+ "17",
27
+ "17/10",
28
+ "18h",
29
+ "2",
30
+ "2/1",
31
+ "2/10",
32
+ "20/1",
33
+ "20/10",
34
+ "20/11",
35
+ "20/12",
36
+ "20h",
37
+ "23/12",
38
+ "23h59",
39
+ "25/10",
40
+ "25/11",
41
+ "3",
42
+ "3/10",
43
+ "30",
44
+ "30/10",
45
+ "30/11",
46
+ "30/12",
47
+ "30/9",
48
+ "4",
49
+ "4h",
50
+ "5",
51
+ "5h",
52
+ "6",
53
+ "7",
54
+ "7h",
55
+ "8",
56
+ "8h",
57
+ "9",
58
+ "9h",
59
+ "A",
60
+ "AI",
61
+ "An",
62
+ "Anh",
63
+ "B",
64
+ "Ba",
65
+ "Backend",
66
+ "B\u00e1ch",
67
+ "B\u00ecnh",
68
+ "B\u1ea3y",
69
+ "B\u1ec7nh",
70
+ "C",
71
+ "Chi\u1ec1u",
72
+ "Ch\u00fang",
73
+ "Ch\u01a1i",
74
+ "Ch\u1ecb",
75
+ "Ch\u1ee7",
76
+ "Ch\u1ee7 Nh\u1eadt",
77
+ "Ch\u1ee7 nh\u1eadt",
78
+ "Cu\u1ed1i",
79
+ "C\u01b0\u1eddng",
80
+ "C\u1ea3",
81
+ "D",
82
+ "Data",
83
+ "Design",
84
+ "Dev",
85
+ "DevOps",
86
+ "Dung",
87
+ "D\u0169ng",
88
+ "D\u1ecdn",
89
+ "F",
90
+ "FPT",
91
+ "Frontend",
92
+ "G",
93
+ "G\u00f2n",
94
+ "H",
95
+ "Hai",
96
+ "Hoa",
97
+ "Ho\u00e0n",
98
+ "Ho\u00e0ng",
99
+ "Ho\u00e1",
100
+ "Huy",
101
+ "H\u00e0",
102
+ "H\u00f3a",
103
+ "H\u00f4m",
104
+ "H\u00f9ng",
105
+ "H\u01b0ng",
106
+ "H\u1ea1nh",
107
+ "H\u1ea3i",
108
+ "H\u1ecdc",
109
+ "K",
110
+ "Khoa",
111
+ "Kh\u1ea3i",
112
+ "L",
113
+ "Lan",
114
+ "Linh",
115
+ "L\u00e0m",
116
+ "L\u00e2m",
117
+ "L\u1ea1t",
118
+ "L\u1ecbch",
119
+ "M",
120
+ "Mai",
121
+ "Marketing",
122
+ "Minh",
123
+ "Mobile",
124
+ "M\u00f9ng",
125
+ "M\u1ed1t",
126
+ "N",
127
+ "Nam",
128
+ "Nghe",
129
+ "Ng\u00e0y",
130
+ "Ng\u1ecdc",
131
+ "Ng\u1eef",
132
+ "Nh\u00f3m",
133
+ "Nh\u1eadt",
134
+ "N\u0103m",
135
+ "N\u1ea5u",
136
+ "N\u1eb5ng",
137
+ "N\u1ed9i",
138
+ "Ops",
139
+ "P",
140
+ "Ph\u00e1p",
141
+ "Ph\u00f2ng",
142
+ "Ph\u00fac",
143
+ "Ph\u01b0\u01a1ng",
144
+ "Product",
145
+ "Q",
146
+ "QA",
147
+ "Qu\u00e2n",
148
+ "ROOT",
149
+ "S",
150
+ "Sinh",
151
+ "S\u00e0i",
152
+ "S\u00e1ng",
153
+ "S\u00e1u",
154
+ "S\u1eafp",
155
+ "S\u1ebfp",
156
+ "T",
157
+ "TASK",
158
+ "Team",
159
+ "Th\u00e1ng",
160
+ "Th\u1ea3o",
161
+ "Th\u1eafng",
162
+ "Th\u1ee9",
163
+ "Th\u1ee9 3",
164
+ "Th\u1ee9 Ba",
165
+ "Th\u1ee9 B\u1ea3y",
166
+ "Th\u1ee9 Hai",
167
+ "Th\u1ee9 N\u0103m",
168
+ "Th\u1ee9 S\u00e1u",
169
+ "Th\u1ee9 T\u01b0",
170
+ "Th\u1ee9 ba",
171
+ "Th\u1ee9 b\u1ea3y",
172
+ "Th\u1ee9 hai",
173
+ "Th\u1ee9 n\u0103m",
174
+ "Th\u1ee9 s\u00e1u",
175
+ "Th\u1ee9 t\u01b0",
176
+ "Tin",
177
+ "Ti\u1ebfng",
178
+ "To\u00e1n",
179
+ "Tr\u00e2m",
180
+ "Tr\u01b0a",
181
+ "Tu\u1ea5n",
182
+ "Tu\u1ea7n",
183
+ "T\u00e0u",
184
+ "T\u00f4i",
185
+ "T\u01b0",
186
+ "T\u1eadp",
187
+ "T\u1ed1i",
188
+ "V",
189
+ "Vi\u1ebft",
190
+ "Vi\u1ec7t",
191
+ "V\u0103n",
192
+ "V\u0169ng",
193
+ "V\u1eadt",
194
+ "XX",
195
+ "XXX",
196
+ "Xem",
197
+ "Xx",
198
+ "Xxx",
199
+ "Xxx Xx",
200
+ "Xxx Xxx",
201
+ "Xxx Xxxx",
202
+ "Xxx d",
203
+ "Xxx xx",
204
+ "Xxx xxx",
205
+ "Xxx xxxx",
206
+ "XxxXxx",
207
+ "Xxxx",
208
+ "Xxxxx",
209
+ "a",
210
+ "ai",
211
+ "ail",
212
+ "all",
213
+ "ame",
214
+ "an",
215
+ "anh",
216
+ "ase",
217
+ "ask",
218
+ "assignment",
219
+ "ata",
220
+ "auth",
221
+ "b",
222
+ "ba",
223
+ "backend",
224
+ "banh",
225
+ "bug",
226
+ "bu\u1ed5i",
227
+ "b\u00e0i",
228
+ "b\u00e1ch",
229
+ "b\u00e1o",
230
+ "b\u00e8",
231
+ "b\u00ecnh",
232
+ "b\u00f3ng",
233
+ "b\u1ea1n",
234
+ "b\u1ea3n",
235
+ "b\u1ea3y",
236
+ "b\u1eaft",
237
+ "b\u1ec7nh",
238
+ "b\u1ecb",
239
+ "b\u1ed9",
240
+ "c",
241
+ "call",
242
+ "cao",
243
+ "chi\u1ec1u",
244
+ "cho",
245
+ "chuy\u1ec1n",
246
+ "chu\u1ea9n",
247
+ "ch\u00ednh",
248
+ "ch\u00fang",
249
+ "ch\u01a1i",
250
+ "ch\u1ea1y",
251
+ "ch\u1ecb",
252
+ "ch\u1ee3",
253
+ "ch\u1ee7",
254
+ "ch\u1ee7 nh\u1eadt",
255
+ "ch\u1ee9c",
256
+ "code",
257
+ "con",
258
+ "concert",
259
+ "cu\u1ed1i",
260
+ "c\u00e0",
261
+ "c\u00e1",
262
+ "c\u00e1o",
263
+ "c\u00f3",
264
+ "c\u00f4",
265
+ "c\u00f4ng",
266
+ "c\u00f9ng",
267
+ "c\u0169",
268
+ "c\u01a1m",
269
+ "c\u01b0\u1edbi",
270
+ "c\u01b0\u1eddng",
271
+ "c\u1ea3",
272
+ "c\u1ea7n",
273
+ "c\u1eadp",
274
+ "d",
275
+ "d/d",
276
+ "d/dd",
277
+ "data",
278
+ "database",
279
+ "dd",
280
+ "dd/d",
281
+ "dd/dd",
282
+ "ddx",
283
+ "ddxdd",
284
+ "deo",
285
+ "deploy",
286
+ "design",
287
+ "dev",
288
+ "devops",
289
+ "du",
290
+ "dung",
291
+ "dx",
292
+ "d\u0169ng",
293
+ "d\u1ea1o",
294
+ "d\u1eabn",
295
+ "d\u1eb9p",
296
+ "d\u1ecdn",
297
+ "d\u1ee5c",
298
+ "d\u1ee5ng",
299
+ "d\u1ef1",
300
+ "e",
301
+ "eam",
302
+ "email",
303
+ "end",
304
+ "ent",
305
+ "ert",
306
+ "est",
307
+ "f",
308
+ "fix",
309
+ "fpt",
310
+ "frontend",
311
+ "g",
312
+ "game",
313
+ "ghe",
314
+ "gia",
315
+ "gi\u00e1o",
316
+ "gi\u00e3n",
317
+ "gi\u1ea3ng",
318
+ "gi\u1edd",
319
+ "guitar",
320
+ "gym",
321
+ "g\u00e0y",
322
+ "g\u00f2n",
323
+ "g\u1eb7p",
324
+ "g\u1ecdc",
325
+ "g\u1ecdi",
326
+ "g\u1eedi",
327
+ "h",
328
+ "h30",
329
+ "h59",
330
+ "hai",
331
+ "ham",
332
+ "him",
333
+ "hoa",
334
+ "hop",
335
+ "ho\u00e0n",
336
+ "ho\u00e0ng",
337
+ "ho\u00e1",
338
+ "ho\u1ea1ch",
339
+ "ho\u1ea1t",
340
+ "huy",
341
+ "h\u00e0",
342
+ "h\u00e0ng",
343
+ "h\u00e1m",
344
+ "h\u00e1p",
345
+ "h\u00e2n",
346
+ "h\u00eam",
347
+ "h\u00ecnh",
348
+ "h\u00f3a",
349
+ "h\u00f3m",
350
+ "h\u00f4m",
351
+ "h\u00f9ng",
352
+ "h\u00fac",
353
+ "h\u00fat",
354
+ "h\u01a1i",
355
+ "h\u01b0ng",
356
+ "h\u01b0\u1edbng",
357
+ "h\u1ea1c",
358
+ "h\u1ea1nh",
359
+ "h\u1ea1y",
360
+ "h\u1ea3i",
361
+ "h\u1ea3o",
362
+ "h\u1ea7u",
363
+ "h\u1ea9m",
364
+ "h\u1eadt",
365
+ "h\u1eafn",
366
+ "h\u1ebft",
367
+ "h\u1ec7",
368
+ "h\u1ecdc",
369
+ "h\u1ecdp",
370
+ "h\u1ecfe",
371
+ "h\u1ed3",
372
+ "h\u1ee9c",
373
+ "h\u1ef1c",
374
+ "ide",
375
+ "iew",
376
+ "ign",
377
+ "ile",
378
+ "ine",
379
+ "ing",
380
+ "inh",
381
+ "int",
382
+ "ive",
383
+ "i\u00e1o",
384
+ "i\u00e3n",
385
+ "i\u00ean",
386
+ "i\u00eau",
387
+ "i\u1ebft",
388
+ "i\u1ec1n",
389
+ "i\u1ec1u",
390
+ "i\u1ec3m",
391
+ "i\u1ec3n",
392
+ "i\u1ec3u",
393
+ "i\u1ec7c",
394
+ "i\u1ec7m",
395
+ "i\u1ec7n",
396
+ "i\u1ec7p",
397
+ "i\u1ec7t",
398
+ "i\u1ec7u",
399
+ "k",
400
+ "khai",
401
+ "khi",
402
+ "khoa",
403
+ "kh\u00e1ch",
404
+ "kh\u00e1m",
405
+ "kh\u1ea3i",
406
+ "kh\u1ea3o",
407
+ "kh\u1ecfe",
408
+ "kia",
409
+ "ki\u1ec3m",
410
+ "k\u00fd",
411
+ "k\u1ebf",
412
+ "k\u1ef9",
413
+ "l",
414
+ "lan",
415
+ "linh",
416
+ "live",
417
+ "li\u1ec7u",
418
+ "loy",
419
+ "luy\u1ec7n",
420
+ "lu\u1eadn",
421
+ "l\u00e0m",
422
+ "l\u00e2m",
423
+ "l\u00fac",
424
+ "l\u00fd",
425
+ "l\u1ea1i",
426
+ "l\u1ea1t",
427
+ "l\u1eadp",
428
+ "l\u1ecbch",
429
+ "l\u1ed7i",
430
+ "m",
431
+ "mai",
432
+ "mail",
433
+ "marketing",
434
+ "minh",
435
+ "mobile",
436
+ "module",
437
+ "mua",
438
+ "mu\u1ed1n",
439
+ "m\u00f4n",
440
+ "m\u00f9ng",
441
+ "m\u1eb9",
442
+ "m\u1ec1m",
443
+ "m\u1ed1t",
444
+ "m\u1ed7i",
445
+ "m\u1edbi",
446
+ "n",
447
+ "nam",
448
+ "nay",
449
+ "nghe",
450
+ "nghi\u1ec7m",
451
+ "nghi\u1ec7p",
452
+ "ngo\u00e0i",
453
+ "ng\u00e0y",
454
+ "ng\u1ecdc",
455
+ "ng\u1ee7",
456
+ "ng\u1eef",
457
+ "nh\u00e0",
458
+ "nh\u00e2n",
459
+ "nh\u00f3m",
460
+ "nh\u1ea1c",
461
+ "nh\u1eadt",
462
+ "nh\u1eafn",
463
+ "nh\u1edb",
464
+ "nit",
465
+ "n\u00e0y",
466
+ "n\u00e2ng",
467
+ "n\u0103m",
468
+ "n\u0103ng",
469
+ "n\u1ea5u",
470
+ "n\u1eb5ng",
471
+ "n\u1ed9i",
472
+ "n\u1ed9p",
473
+ "n\u1eefa",
474
+ "o",
475
+ "ode",
476
+ "oga",
477
+ "ong",
478
+ "online",
479
+ "o\u00e0i",
480
+ "o\u00e0n",
481
+ "o\u00e1n",
482
+ "o\u1ea1i",
483
+ "o\u1ea1n",
484
+ "o\u1ea1t",
485
+ "p",
486
+ "pha",
487
+ "phim",
488
+ "phi\u00ean",
489
+ "ph\u00e1p",
490
+ "ph\u00ea",
491
+ "ph\u00f2ng",
492
+ "ph\u00fac",
493
+ "ph\u00fat",
494
+ "ph\u01b0\u01a1ng",
495
+ "ph\u1ea3i",
496
+ "ph\u1ea9m",
497
+ "ph\u1ecfng",
498
+ "product",
499
+ "q",
500
+ "qa",
501
+ "qu\u00e2n",
502
+ "qu\u00fd",
503
+ "qu\u1ed1c",
504
+ "r",
505
+ "rau",
506
+ "release",
507
+ "review",
508
+ "r\u00e2m",
509
+ "r\u0103ng",
510
+ "r\u01b0a",
511
+ "r\u1ed5",
512
+ "r\u1eeda",
513
+ "s",
514
+ "sau",
515
+ "server",
516
+ "sinh",
517
+ "si\u00eau",
518
+ "slide",
519
+ "so\u1ea1n",
520
+ "sprint",
521
+ "stream",
522
+ "s\u00e0i",
523
+ "s\u00e1ch",
524
+ "s\u00e1ng",
525
+ "s\u00e1u",
526
+ "s\u01a1",
527
+ "s\u1eafp",
528
+ "s\u1ebd",
529
+ "s\u1ebfp",
530
+ "s\u1ee9c",
531
+ "s\u1eed",
532
+ "s\u1eeda",
533
+ "t",
534
+ "tar",
535
+ "task",
536
+ "team",
537
+ "test",
538
+ "tham",
539
+ "thi",
540
+ "tho\u1ea1i",
541
+ "thuy\u1ebft",
542
+ "th\u00e0nh",
543
+ "th\u00e1ng",
544
+ "th\u00eam",
545
+ "th\u00ed",
546
+ "th\u01b0",
547
+ "th\u1ea3o",
548
+ "th\u1ea7u",
549
+ "th\u1eafng",
550
+ "th\u1ec3",
551
+ "th\u1ecb",
552
+ "th\u1ed1ng",
553
+ "th\u1ee9",
554
+ "th\u1ee9 2",
555
+ "th\u1ee9 3",
556
+ "th\u1ee9 4",
557
+ "th\u1ee9 5",
558
+ "th\u1ee9 6",
559
+ "th\u1ee9 7",
560
+ "th\u1ee9 Ba",
561
+ "th\u1ee9 B\u1ea3y",
562
+ "th\u1ee9 Hai",
563
+ "th\u1ee9 N\u0103m",
564
+ "th\u1ee9 S\u00e1u",
565
+ "th\u1ee9 T\u01b0",
566
+ "th\u1ee9 ba",
567
+ "th\u1ee9 b\u1ea3y",
568
+ "th\u1ee9 hai",
569
+ "th\u1ee9 n\u0103m",
570
+ "th\u1ee9 s\u00e1u",
571
+ "th\u1ee9 t\u01b0",
572
+ "th\u1ef1c",
573
+ "tin",
574
+ "ti\u1ebfng",
575
+ "ti\u1ec1n",
576
+ "ti\u1ec3u",
577
+ "ti\u1ec7c",
578
+ "to\u00e1n",
579
+ "tra",
580
+ "training",
581
+ "tri\u1ec3n",
582
+ "trong",
583
+ "trung",
584
+ "tr\u00e2m",
585
+ "tr\u00ecnh",
586
+ "tr\u01b0a",
587
+ "tr\u01b0\u1edbc",
588
+ "tu\u1ea5n",
589
+ "tu\u1ea7n",
590
+ "ty",
591
+ "t\u00e0i",
592
+ "t\u00e0u",
593
+ "t\u00e1c",
594
+ "t\u00e2m",
595
+ "t\u00f4i",
596
+ "t\u01b0",
597
+ "t\u1ea1i",
598
+ "t\u1eadp",
599
+ "t\u1ebf",
600
+ "t\u1ed1i",
601
+ "t\u1ed5",
602
+ "t\u1edbi",
603
+ "t\u1ee5",
604
+ "t\u1eeb",
605
+ "u",
606
+ "uct",
607
+ "ule",
608
+ "ung",
609
+ "unit",
610
+ "uth",
611
+ "u\u00e2n",
612
+ "u\u1ea5n",
613
+ "u\u1ea7n",
614
+ "u\u1ea9n",
615
+ "u\u1eadn",
616
+ "u\u1ed1c",
617
+ "u\u1ed1i",
618
+ "u\u1ed1n",
619
+ "u\u1ed1ng",
620
+ "u\u1ed5i",
621
+ "v",
622
+ "ver",
623
+ "vi",
624
+ "video",
625
+ "vi\u00ean",
626
+ "vi\u1ebft",
627
+ "vi\u1ec7c",
628
+ "vi\u1ec7n",
629
+ "vi\u1ec7t",
630
+ "v\u00e0",
631
+ "v\u00e0o",
632
+ "v\u0103n",
633
+ "v\u0169ng",
634
+ "v\u1ea5n",
635
+ "v\u1eadt",
636
+ "v\u1ec1",
637
+ "v\u1edbi",
638
+ "w",
639
+ "workshop",
640
+ "x",
641
+ "xe",
642
+ "xem",
643
+ "xin",
644
+ "xx",
645
+ "xxx",
646
+ "xxx Xx",
647
+ "xxx Xxx",
648
+ "xxx d",
649
+ "xxx xxx",
650
+ "xxx xxxx",
651
+ "xxxx",
652
+ "x\u1ebfp",
653
+ "y",
654
+ "yoga",
655
+ "y\u1ebft",
656
+ "y\u1ec1n",
657
+ "y\u1ec7n",
658
+ "z",
659
+ "\u00a0",
660
+ "\u00d4",
661
+ "\u00d4n",
662
+ "\u00e0ng",
663
+ "\u00e0nh",
664
+ "\u00e1",
665
+ "\u00e1ch",
666
+ "\u00e1n",
667
+ "\u00e1ng",
668
+ "\u00e2ng",
669
+ "\u00ecnh",
670
+ "\u00ednh",
671
+ "\u00f2ng",
672
+ "\u00f3ng",
673
+ "\u00f4",
674
+ "\u00f4n",
675
+ "\u00f4ng",
676
+ "\u00f9ng",
677
+ "\u00fang",
678
+ "\u0103",
679
+ "\u0103n",
680
+ "\u0103ng",
681
+ "\u0110",
682
+ "\u0110i",
683
+ "\u0110\u00e0",
684
+ "\u0110\u1ecba",
685
+ "\u0110\u1ecdc",
686
+ "\u0110\u1ee9c",
687
+ "\u0111",
688
+ "\u0111a",
689
+ "\u0111i",
690
+ "\u0111i\u1ec7n",
691
+ "\u0111\u00e0",
692
+ "\u0111\u00e1",
693
+ "\u0111\u00ecnh",
694
+ "\u0111\u00f3n",
695
+ "\u0111\u00f3ng",
696
+ "\u0111\u01b0a",
697
+ "\u0111\u1ea7u",
698
+ "\u0111\u1eb7t",
699
+ "\u0111\u1ebfn",
700
+ "\u0111\u1ec1",
701
+ "\u0111\u1ecba",
702
+ "\u0111\u1ecdc",
703
+ "\u0111\u1ed1i",
704
+ "\u0111\u1ed3",
705
+ "\u0111\u1ed3ng",
706
+ "\u0111\u1ed9i",
707
+ "\u0111\u1ee9c",
708
+ "\u0169ng",
709
+ "\u01a1ng",
710
+ "\u01b0ng",
711
+ "\u01b0\u1edbc",
712
+ "\u01b0\u1edbi",
713
+ "\u1ea1ch",
714
+ "\u1ea1nh",
715
+ "\u1ea3ng",
716
+ "\u1eafng",
717
+ "\u1eb5ng",
718
+ "\u1ebfng",
719
+ "\u1ec7nh",
720
+ "\u1ecbch",
721
+ "\u1ecfng",
722
+ "\u1ed1ng",
723
+ "\u1ed3ng",
724
+ "\u1edbng",
725
+ "\u1eddng",
726
+ "\u1edf",
727
+ "\u1ee5ng",
728
+ "\u1ee9",
729
+ "\u1ee9 2",
730
+ "\u1ee9 3",
731
+ "\u1ee9 4",
732
+ "\u1ee9 5",
733
+ "\u1ee9 6",
734
+ "\u1ee9 7",
735
+ "\u1ee9ng"
736
+ ]
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }