PuppetLover commited on
Commit
0a05cfd
·
verified ·
1 Parent(s): 03fc1c7

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +3 -342
  2. special_tokens_map.json +51 -121
  3. tokenizer_config.json +55 -3449
added_tokens.json CHANGED
@@ -1,342 +1,3 @@
1
- {
2
- "17": 36161,
3
- "2013": 36284,
4
- "300": 36114,
5
- "8,5": 36192,
6
- "80": 36189,
7
- "<extra_id_0>": 36095,
8
- "<extra_id_10>": 36085,
9
- "<extra_id_11>": 36084,
10
- "<extra_id_12>": 36083,
11
- "<extra_id_13>": 36082,
12
- "<extra_id_14>": 36081,
13
- "<extra_id_15>": 36080,
14
- "<extra_id_16>": 36079,
15
- "<extra_id_17>": 36078,
16
- "<extra_id_18>": 36077,
17
- "<extra_id_19>": 36076,
18
- "<extra_id_1>": 36094,
19
- "<extra_id_20>": 36075,
20
- "<extra_id_21>": 36074,
21
- "<extra_id_22>": 36073,
22
- "<extra_id_23>": 36072,
23
- "<extra_id_24>": 36071,
24
- "<extra_id_25>": 36070,
25
- "<extra_id_26>": 36069,
26
- "<extra_id_27>": 36068,
27
- "<extra_id_28>": 36067,
28
- "<extra_id_29>": 36066,
29
- "<extra_id_2>": 36093,
30
- "<extra_id_30>": 36065,
31
- "<extra_id_31>": 36064,
32
- "<extra_id_32>": 36063,
33
- "<extra_id_33>": 36062,
34
- "<extra_id_34>": 36061,
35
- "<extra_id_35>": 36060,
36
- "<extra_id_36>": 36059,
37
- "<extra_id_37>": 36058,
38
- "<extra_id_38>": 36057,
39
- "<extra_id_39>": 36056,
40
- "<extra_id_3>": 36092,
41
- "<extra_id_40>": 36055,
42
- "<extra_id_41>": 36054,
43
- "<extra_id_42>": 36053,
44
- "<extra_id_43>": 36052,
45
- "<extra_id_44>": 36051,
46
- "<extra_id_45>": 36050,
47
- "<extra_id_46>": 36049,
48
- "<extra_id_47>": 36048,
49
- "<extra_id_48>": 36047,
50
- "<extra_id_49>": 36046,
51
- "<extra_id_4>": 36091,
52
- "<extra_id_50>": 36045,
53
- "<extra_id_51>": 36044,
54
- "<extra_id_52>": 36043,
55
- "<extra_id_53>": 36042,
56
- "<extra_id_54>": 36041,
57
- "<extra_id_55>": 36040,
58
- "<extra_id_56>": 36039,
59
- "<extra_id_57>": 36038,
60
- "<extra_id_58>": 36037,
61
- "<extra_id_59>": 36036,
62
- "<extra_id_5>": 36090,
63
- "<extra_id_60>": 36035,
64
- "<extra_id_61>": 36034,
65
- "<extra_id_62>": 36033,
66
- "<extra_id_63>": 36032,
67
- "<extra_id_64>": 36031,
68
- "<extra_id_65>": 36030,
69
- "<extra_id_66>": 36029,
70
- "<extra_id_67>": 36028,
71
- "<extra_id_68>": 36027,
72
- "<extra_id_69>": 36026,
73
- "<extra_id_6>": 36089,
74
- "<extra_id_70>": 36025,
75
- "<extra_id_71>": 36024,
76
- "<extra_id_72>": 36023,
77
- "<extra_id_73>": 36022,
78
- "<extra_id_74>": 36021,
79
- "<extra_id_75>": 36020,
80
- "<extra_id_76>": 36019,
81
- "<extra_id_77>": 36018,
82
- "<extra_id_78>": 36017,
83
- "<extra_id_79>": 36016,
84
- "<extra_id_7>": 36088,
85
- "<extra_id_80>": 36015,
86
- "<extra_id_81>": 36014,
87
- "<extra_id_82>": 36013,
88
- "<extra_id_83>": 36012,
89
- "<extra_id_84>": 36011,
90
- "<extra_id_85>": 36010,
91
- "<extra_id_86>": 36009,
92
- "<extra_id_87>": 36008,
93
- "<extra_id_88>": 36007,
94
- "<extra_id_89>": 36006,
95
- "<extra_id_8>": 36087,
96
- "<extra_id_90>": 36005,
97
- "<extra_id_91>": 36004,
98
- "<extra_id_92>": 36003,
99
- "<extra_id_93>": 36002,
100
- "<extra_id_94>": 36001,
101
- "<extra_id_95>": 36000,
102
- "<extra_id_9>": 36086,
103
- "A_Bì": 36294,
104
- "A_h": 36291,
105
- "Biê_Hòa": 36133,
106
- "Bắc_ô": 36169,
107
- "Chí_h": 36322,
108
- "Chùa": 36096,
109
- "Cây": 36187,
110
- "Di_sả": 36121,
111
- "Dip": 36141,
112
- "Dù": 36264,
113
- "Dầu_ái": 36112,
114
- "Dầu_ái_ại": 36285,
115
- "Hiệ_ay": 36166,
116
- "Hiệp_Hòa": 36132,
117
- "Huệ_Tâm": 36179,
118
- "Hòa_g": 36097,
119
- "Hội": 36286,
120
- "Lâm_Tế_sá": 36164,
121
- "Môi_ườ": 36288,
122
- "Mỗi": 36301,
123
- "Nai": 36103,
124
- "Nam_Bộ": 36175,
125
- "Ngoài": 36220,
126
- "Nguyễ": 36292,
127
- "Nhiều": 36334,
128
- "Ni": 36176,
129
- "Năm": 36283,
130
- "Nằm": 36127,
131
- "Phậ_ử": 36271,
132
- "Phố": 36212,
133
- "Phố_bê": 36099,
134
- "Roxb": 36143,
135
- "T_ải": 36213,
136
- "Theo": 36201,
137
- "Thiê_hiê": 36287,
138
- "Thích_Nữ": 36178,
139
- "Từ": 36254,
140
- "Việ_Nam": 36145,
141
- "Với": 36228,
142
- "a_g": 36277,
143
- "a_h": 36296,
144
- "bao_hă": 36313,
145
- "bao_đổi": 36248,
146
- "biế": 36181,
147
- "biểu_ượ": 36217,
148
- "bà": 36252,
149
- "báu_vậ": 36118,
150
- "bảo_vệ": 36263,
151
- "bậ": 36230,
152
- "bậc": 36156,
153
- "bằ": 36174,
154
- "cao": 36188,
155
- "cao_iê": 36204,
156
- "che_bó": 36195,
157
- "che_chở": 36336,
158
- "chia_sẻ": 36282,
159
- "chiêm_bái": 36200,
160
- "chiều": 36229,
161
- "chùa": 36125,
162
- "chỉ": 36105,
163
- "chố": 36106,
164
- "chứ": 36184,
165
- "co_gười": 36321,
166
- "cây": 36111,
167
- "cây_cổ": 36333,
168
- "cò": 36109,
169
- "cò_ma": 36222,
170
- "cù_g": 36231,
171
- "cù_lao": 36211,
172
- "cả": 36267,
173
- "cảm_hấy": 36297,
174
- "cảm_hậ": 36310,
175
- "cầu": 36331,
176
- "cầu_bì": 36337,
177
- "cổ": 36126,
178
- "diệ": 36206,
179
- "du_khách": 36198,
180
- "dâ": 36136,
181
- "dò": 36100,
182
- "dưới": 36307,
183
- "dấu": 36258,
184
- "dấu_ấ": 36318,
185
- "dẫ_gia_đì": 36305,
186
- "dặ_dò": 36270,
187
- "dịp": 36325,
188
- "dự_g": 36159,
189
- "e_oca": 36142,
190
- "ghi": 36290,
191
- "ghiêm": 36278,
192
- "ghé": 36303,
193
- "ghĩa": 36223,
194
- "giao_hòa": 36319,
195
- "già": 36335,
196
- "giá": 36221,
197
- "giúp": 36236,
198
- "giữa": 36320,
199
- "guyệ": 36332,
200
- "gì": 36275,
201
- "gôi": 36152,
202
- "gười": 36135,
203
- "gắ": 36123,
204
- "gắ_liề": 36146,
205
- "gọi_là": 36128,
206
- "gốc": 36308,
207
- "gụ": 36293,
208
- "h_a": 36338,
209
- "h_hái": 36226,
210
- "h_hồ": 36245,
211
- "h_đế": 36306,
212
- "h_ố": 36266,
213
- "hiê": 36122,
214
- "hiê_g": 36312,
215
- "hiề": 36154,
216
- "hiề_sư": 36162,
217
- "hiều": 36328,
218
- "hiệ": 36205,
219
- "hoa": 36209,
220
- "huộc": 36130,
221
- "huộc_hệ": 36167,
222
- "huộc_phái": 36163,
223
- "hà_chùa": 36251,
224
- "hà_g": 36183,
225
- "hà_h": 36150,
226
- "hâ": 36191,
227
- "hãy": 36273,
228
- "hì_h": 36149,
229
- "hòa": 36238,
230
- "hóa": 36261,
231
- "hô": 36295,
232
- "hơ": 36113,
233
- "hư": 36115,
234
- "hấ_Đồ": 36157,
235
- "hầ": 36218,
236
- "hậ": 36144,
237
- "hập": 36242,
238
- "hắc_hở": 36260,
239
- "hế": 36315,
240
- "hế_kỷ": 36160,
241
- "hời_gia": 36259,
242
- "hời_kỳ": 36207,
243
- "hụ": 36246,
244
- "hữ": 36153,
245
- "iế_g": 36137,
246
- "khai": 36208,
247
- "khi": 36272,
248
- "khoa_học": 36227,
249
- "khách": 36241,
250
- "khí": 36239,
251
- "khô": 36215,
252
- "khô_g": 36104,
253
- "kiế": 36170,
254
- "kí_h": 36155,
255
- "kể": 36203,
256
- "kỷ_sừ": 36316,
257
- "liề": 36124,
258
- "luô": 36269,
259
- "luô_giữ": 36274,
260
- "lá": 36234,
261
- "lưu_giữ": 36110,
262
- "lầ": 36302,
263
- "lập": 36165,
264
- "lễ": 36326,
265
- "lịch_sử": 36214,
266
- "lớ": 36327,
267
- "lời": 36202,
268
- "mà": 36108,
269
- "má": 36196,
270
- "mì": 36300,
271
- "mắ": 36339,
272
- "mặ": 36225,
273
- "mỗi": 36324,
274
- "mộ": 36116,
275
- "phá": 36151,
276
- "phái": 36168,
277
- "phươ": 36243,
278
- "phườ": 36131,
279
- "phải": 36262,
280
- "phầ_li": 36244,
281
- "phậ": 36197,
282
- "phậ_ử": 36329,
283
- "phổi": 36235,
284
- "quá": 36147,
285
- "quý_giá": 36120,
286
- "sâ": 36268,
287
- "sô": 36101,
288
- "sạch_sẽ": 36279,
289
- "sẹo": 36256,
290
- "số": 36247,
291
- "sữ": 36317,
292
- "sự": 36276,
293
- "vai": 36233,
294
- "viế": 36330,
295
- "vì": 36299,
296
- "vì_vậy": 36323,
297
- "vò": 36190,
298
- "vô": 36298,
299
- "vù": 36172,
300
- "vẫ": 36265,
301
- "vế": 36255,
302
- "vị": 36280,
303
- "xa_h": 36138,
304
- "xem": 36253,
305
- "xum_xuê": 36194,
306
- "xây": 36158,
307
- "Â_Cổ_Tự": 36129,
308
- "á_lá": 36193,
309
- "âm_li": 36107,
310
- "âm_sự": 36314,
311
- "ê_cù_lao": 36098,
312
- "ê_hâ": 36257,
313
- "ê_khoa_học": 36140,
314
- "ê_vù": 36210,
315
- "ì_h": 36148,
316
- "Điều": 36289,
317
- "Đó": 36139,
318
- "Đồ": 36102,
319
- "điều": 36237,
320
- "điểm": 36249,
321
- "đó": 36232,
322
- "đấ": 36186,
323
- "đặc_biệ": 36224,
324
- "đặc_ư": 36171,
325
- "đế": 36199,
326
- "đều": 36304,
327
- "để": 36309,
328
- "địa_phươ": 36219,
329
- "địa_điểm": 36134,
330
- "đồ": 36173,
331
- "đổi": 36185,
332
- "ưở_g": 36177,
333
- "ạo_bó": 36216,
334
- "ị_h": 36240,
335
- "ị_li": 36311,
336
- "ồ_ại": 36182,
337
- "ụ_ì": 36180,
338
- "ụ_ì_chùa": 36281,
339
- "ự_hào": 36250,
340
- "“": 36117,
341
- "”": 36119
342
- }
 
1
+ {
2
+ "<mask>": 64000
3
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -1,121 +1,51 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>"
99
- ],
100
- "eos_token": {
101
- "content": "</s>",
102
- "lstrip": false,
103
- "normalized": false,
104
- "rstrip": false,
105
- "single_word": false
106
- },
107
- "pad_token": {
108
- "content": "<pad>",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false
113
- },
114
- "unk_token": {
115
- "content": "<unk>",
116
- "lstrip": false,
117
- "normalized": false,
118
- "rstrip": false,
119
- "single_word": false
120
- }
121
- }
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,3449 +1,55 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<pad>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "</s>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "<unk>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "46": {
29
- "content": "ườ",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "65": {
37
- "content": "iề",
38
- "lstrip": false,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": false
43
- },
44
- "66": {
45
- "content": "ày",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": false
51
- },
52
- "97": {
53
- "content": "iể",
54
- "lstrip": false,
55
- "normalized": true,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": false
59
- },
60
- "103": {
61
- "content": "ăm",
62
- "lstrip": false,
63
- "normalized": true,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": false
67
- },
68
- "137": {
69
- "content": "ải",
70
- "lstrip": false,
71
- "normalized": true,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": false
75
- },
76
- "201": {
77
- "content": "ổi",
78
- "lstrip": false,
79
- "normalized": true,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": false
83
- },
84
- "224": {
85
- "content": "ạo",
86
- "lstrip": false,
87
- "normalized": true,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": false
91
- },
92
- "232": {
93
- "content": "ôi",
94
- "lstrip": false,
95
- "normalized": true,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": false
99
- },
100
- "279": {
101
- "content": "ấp",
102
- "lstrip": false,
103
- "normalized": true,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": false
107
- },
108
- "330": {
109
- "content": "úc",
110
- "lstrip": false,
111
- "normalized": true,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": false
115
- },
116
- "389": {
117
- "content": "us",
118
- "lstrip": false,
119
- "normalized": true,
120
- "rstrip": false,
121
- "single_word": false,
122
- "special": false
123
- },
124
- "654": {
125
- "content": "ầm",
126
- "lstrip": false,
127
- "normalized": true,
128
- "rstrip": false,
129
- "single_word": false,
130
- "special": false
131
- },
132
- "672": {
133
- "content": "ằm",
134
- "lstrip": false,
135
- "normalized": true,
136
- "rstrip": false,
137
- "single_word": false,
138
- "special": false
139
- },
140
- "1235": {
141
- "content": "uổi",
142
- "lstrip": false,
143
- "normalized": true,
144
- "rstrip": false,
145
- "single_word": false,
146
- "special": false
147
- },
148
- "1512": {
149
- "content": "ựa",
150
- "lstrip": false,
151
- "normalized": true,
152
- "rstrip": false,
153
- "single_word": false,
154
- "special": false
155
- },
156
- "1843": {
157
- "content": "iềm",
158
- "lstrip": false,
159
- "normalized": true,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": false
163
- },
164
- "2353": {
165
- "content": "ma",
166
- "lstrip": false,
167
- "normalized": true,
168
- "rstrip": false,
169
- "single_word": false,
170
- "special": false
171
- },
172
- "2416": {
173
- "content": "ba",
174
- "lstrip": false,
175
- "normalized": true,
176
- "rstrip": false,
177
- "single_word": false,
178
- "special": false
179
- },
180
- "2657": {
181
- "content": "ala",
182
- "lstrip": false,
183
- "normalized": true,
184
- "rstrip": false,
185
- "single_word": false,
186
- "special": false
187
- },
188
- "3689": {
189
- "content": "ha",
190
- "lstrip": false,
191
- "normalized": true,
192
- "rstrip": false,
193
- "single_word": false,
194
- "special": false
195
- },
196
- "3752": {
197
- "content": "si",
198
- "lstrip": false,
199
- "normalized": true,
200
- "rstrip": false,
201
- "single_word": false,
202
- "special": false
203
- },
204
- "3888": {
205
- "content": "que",
206
- "lstrip": false,
207
- "normalized": true,
208
- "rstrip": false,
209
- "single_word": false,
210
- "special": false
211
- },
212
- "4104": {
213
- "content": "do",
214
- "lstrip": false,
215
- "normalized": true,
216
- "rstrip": false,
217
- "single_word": false,
218
- "special": false
219
- },
220
- "4358": {
221
- "content": "che",
222
- "lstrip": false,
223
- "normalized": true,
224
- "rstrip": false,
225
- "single_word": false,
226
- "special": false
227
- },
228
- "4717": {
229
- "content": "TP",
230
- "lstrip": false,
231
- "normalized": true,
232
- "rstrip": false,
233
- "single_word": false,
234
- "special": false
235
- },
236
- "10134": {
237
- "content": "cho",
238
- "lstrip": false,
239
- "normalized": true,
240
- "rstrip": false,
241
- "single_word": false,
242
- "special": false
243
- },
244
- "10817": {
245
- "content": "pus",
246
- "lstrip": false,
247
- "normalized": true,
248
- "rstrip": false,
249
- "single_word": false,
250
- "special": false
251
- },
252
- "11751": {
253
- "content": "gia",
254
- "lstrip": false,
255
- "normalized": true,
256
- "rstrip": false,
257
- "single_word": false,
258
- "special": false
259
- },
260
- "13011": {
261
- "content": "qua",
262
- "lstrip": false,
263
- "normalized": true,
264
- "rstrip": false,
265
- "single_word": false,
266
- "special": false
267
- },
268
- "14804": {
269
- "content": "xa",
270
- "lstrip": false,
271
- "normalized": true,
272
- "rstrip": false,
273
- "single_word": false,
274
- "special": false
275
- },
276
- "16084": {
277
- "content": "và",
278
- "lstrip": false,
279
- "normalized": true,
280
- "rstrip": false,
281
- "single_word": false,
282
- "special": false
283
- },
284
- "17503": {
285
- "content": "cô",
286
- "lstrip": false,
287
- "normalized": true,
288
- "rstrip": false,
289
- "single_word": false,
290
- "special": false
291
- },
292
- "18454": {
293
- "content": "bao",
294
- "lstrip": false,
295
- "normalized": true,
296
- "rstrip": false,
297
- "single_word": false,
298
- "special": false
299
- },
300
- "19739": {
301
- "content": "đã",
302
- "lstrip": false,
303
- "normalized": true,
304
- "rstrip": false,
305
- "single_word": false,
306
- "special": false
307
- },
308
- "22107": {
309
- "content": "là",
310
- "lstrip": false,
311
- "normalized": true,
312
- "rstrip": false,
313
- "single_word": false,
314
- "special": false
315
- },
316
- "23749": {
317
- "content": "có",
318
- "lstrip": false,
319
- "normalized": true,
320
- "rstrip": false,
321
- "single_word": false,
322
- "special": false
323
- },
324
- "23751": {
325
- "content": "của",
326
- "lstrip": false,
327
- "normalized": true,
328
- "rstrip": false,
329
- "single_word": false,
330
- "special": false
331
- },
332
- "24161": {
333
- "content": "được",
334
- "lstrip": false,
335
- "normalized": true,
336
- "rstrip": false,
337
- "single_word": false,
338
- "special": false
339
- },
340
- "28311": {
341
- "content": "các",
342
- "lstrip": false,
343
- "normalized": true,
344
- "rstrip": false,
345
- "single_word": false,
346
- "special": false
347
- },
348
- "28996": {
349
- "content": "hay",
350
- "lstrip": false,
351
- "normalized": true,
352
- "rstrip": false,
353
- "single_word": false,
354
- "special": false
355
- },
356
- "30092": {
357
- "content": "về",
358
- "lstrip": false,
359
- "normalized": true,
360
- "rstrip": false,
361
- "single_word": false,
362
- "special": false
363
- },
364
- "30860": {
365
- "content": "may",
366
- "lstrip": false,
367
- "normalized": true,
368
- "rstrip": false,
369
- "single_word": false,
370
- "special": false
371
- },
372
- "33032": {
373
- "content": "Tôi",
374
- "lstrip": false,
375
- "normalized": true,
376
- "rstrip": false,
377
- "single_word": false,
378
- "special": false
379
- },
380
- "33768": {
381
- "content": "với",
382
- "lstrip": false,
383
- "normalized": true,
384
- "rstrip": false,
385
- "single_word": false,
386
- "special": false
387
- },
388
- "35785": {
389
- "content": "h",
390
- "lstrip": false,
391
- "normalized": true,
392
- "rstrip": false,
393
- "single_word": false,
394
- "special": false
395
- },
396
- "35786": {
397
- "content": "i",
398
- "lstrip": false,
399
- "normalized": true,
400
- "rstrip": false,
401
- "single_word": false,
402
- "special": false
403
- },
404
- "35789": {
405
- "content": "g",
406
- "lstrip": false,
407
- "normalized": true,
408
- "rstrip": false,
409
- "single_word": false,
410
- "special": false
411
- },
412
- "35790": {
413
- "content": ",",
414
- "lstrip": false,
415
- "normalized": true,
416
- "rstrip": false,
417
- "single_word": false,
418
- "special": false
419
- },
420
- "35792": {
421
- "content": ".",
422
- "lstrip": false,
423
- "normalized": true,
424
- "rstrip": false,
425
- "single_word": false,
426
- "special": false
427
- },
428
- "35795": {
429
- "content": "o",
430
- "lstrip": false,
431
- "normalized": true,
432
- "rstrip": false,
433
- "single_word": false,
434
- "special": false
435
- },
436
- "35797": {
437
- "content": "m",
438
- "lstrip": false,
439
- "normalized": true,
440
- "rstrip": false,
441
- "single_word": false,
442
- "special": false
443
- },
444
- "35814": {
445
- "content": "T",
446
- "lstrip": false,
447
- "normalized": true,
448
- "rstrip": false,
449
- "single_word": false,
450
- "special": false
451
- },
452
- "35815": {
453
- "content": "ê",
454
- "lstrip": false,
455
- "normalized": true,
456
- "rstrip": false,
457
- "single_word": false,
458
- "special": false
459
- },
460
- "35820": {
461
- "content": "ó",
462
- "lstrip": false,
463
- "normalized": true,
464
- "rstrip": false,
465
- "single_word": false,
466
- "special": false
467
- },
468
- "35822": {
469
- "content": "â",
470
- "lstrip": false,
471
- "normalized": true,
472
- "rstrip": false,
473
- "single_word": false,
474
- "special": false
475
- },
476
- "35824": {
477
- "content": "ị",
478
- "lstrip": false,
479
- "normalized": true,
480
- "rstrip": false,
481
- "single_word": false,
482
- "special": false
483
- },
484
- "35831": {
485
- "content": ")",
486
- "lstrip": false,
487
- "normalized": true,
488
- "rstrip": false,
489
- "single_word": false,
490
- "special": false
491
- },
492
- "35832": {
493
- "content": "(",
494
- "lstrip": false,
495
- "normalized": true,
496
- "rstrip": false,
497
- "single_word": false,
498
- "special": false
499
- },
500
- "35833": {
501
- "content": "ấ",
502
- "lstrip": false,
503
- "normalized": true,
504
- "rstrip": false,
505
- "single_word": false,
506
- "special": false
507
- },
508
- "35843": {
509
- "content": "ă",
510
- "lstrip": false,
511
- "normalized": true,
512
- "rstrip": false,
513
- "single_word": false,
514
- "special": false
515
- },
516
- "35845": {
517
- "content": "ự",
518
- "lstrip": false,
519
- "normalized": true,
520
- "rstrip": false,
521
- "single_word": false,
522
- "special": false
523
- },
524
- "35848": {
525
- "content": "ồ",
526
- "lstrip": false,
527
- "normalized": true,
528
- "rstrip": false,
529
- "single_word": false,
530
- "special": false
531
- },
532
- "35851": {
533
- "content": "ở",
534
- "lstrip": false,
535
- "normalized": true,
536
- "rstrip": false,
537
- "single_word": false,
538
- "special": false
539
- },
540
- "35854": {
541
- "content": "ọ",
542
- "lstrip": false,
543
- "normalized": true,
544
- "rstrip": false,
545
- "single_word": false,
546
- "special": false
547
- },
548
- "35862": {
549
- "content": ":",
550
- "lstrip": false,
551
- "normalized": true,
552
- "rstrip": false,
553
- "single_word": false,
554
- "special": false
555
- },
556
- "35866": {
557
- "content": "ò",
558
- "lstrip": false,
559
- "normalized": true,
560
- "rstrip": false,
561
- "single_word": false,
562
- "special": false
563
- },
564
- "35868": {
565
- "content": "ừ",
566
- "lstrip": false,
567
- "normalized": true,
568
- "rstrip": false,
569
- "single_word": false,
570
- "special": false
571
- },
572
- "35872": {
573
- "content": "ử",
574
- "lstrip": false,
575
- "normalized": true,
576
- "rstrip": false,
577
- "single_word": false,
578
- "special": false
579
- },
580
- "35877": {
581
- "content": "ỉ",
582
- "lstrip": false,
583
- "normalized": true,
584
- "rstrip": false,
585
- "single_word": false,
586
- "special": false
587
- },
588
- "35879": {
589
- "content": "3",
590
- "lstrip": false,
591
- "normalized": true,
592
- "rstrip": false,
593
- "single_word": false,
594
- "special": false
595
- },
596
- "35886": {
597
- "content": "ý",
598
- "lstrip": false,
599
- "normalized": true,
600
- "rstrip": false,
601
- "single_word": false,
602
- "special": false
603
- },
604
- "35887": {
605
- "content": "é",
606
- "lstrip": false,
607
- "normalized": true,
608
- "rstrip": false,
609
- "single_word": false,
610
- "special": false
611
- },
612
- "35916": {
613
- "content": "Â",
614
- "lstrip": false,
615
- "normalized": true,
616
- "rstrip": false,
617
- "single_word": false,
618
- "special": false
619
- },
620
- "36000": {
621
- "content": "<extra_id_95>",
622
- "lstrip": false,
623
- "normalized": false,
624
- "rstrip": false,
625
- "single_word": false,
626
- "special": true
627
- },
628
- "36001": {
629
- "content": "<extra_id_94>",
630
- "lstrip": false,
631
- "normalized": false,
632
- "rstrip": false,
633
- "single_word": false,
634
- "special": true
635
- },
636
- "36002": {
637
- "content": "<extra_id_93>",
638
- "lstrip": false,
639
- "normalized": false,
640
- "rstrip": false,
641
- "single_word": false,
642
- "special": true
643
- },
644
- "36003": {
645
- "content": "<extra_id_92>",
646
- "lstrip": false,
647
- "normalized": false,
648
- "rstrip": false,
649
- "single_word": false,
650
- "special": true
651
- },
652
- "36004": {
653
- "content": "<extra_id_91>",
654
- "lstrip": false,
655
- "normalized": false,
656
- "rstrip": false,
657
- "single_word": false,
658
- "special": true
659
- },
660
- "36005": {
661
- "content": "<extra_id_90>",
662
- "lstrip": false,
663
- "normalized": false,
664
- "rstrip": false,
665
- "single_word": false,
666
- "special": true
667
- },
668
- "36006": {
669
- "content": "<extra_id_89>",
670
- "lstrip": false,
671
- "normalized": false,
672
- "rstrip": false,
673
- "single_word": false,
674
- "special": true
675
- },
676
- "36007": {
677
- "content": "<extra_id_88>",
678
- "lstrip": false,
679
- "normalized": false,
680
- "rstrip": false,
681
- "single_word": false,
682
- "special": true
683
- },
684
- "36008": {
685
- "content": "<extra_id_87>",
686
- "lstrip": false,
687
- "normalized": false,
688
- "rstrip": false,
689
- "single_word": false,
690
- "special": true
691
- },
692
- "36009": {
693
- "content": "<extra_id_86>",
694
- "lstrip": false,
695
- "normalized": false,
696
- "rstrip": false,
697
- "single_word": false,
698
- "special": true
699
- },
700
- "36010": {
701
- "content": "<extra_id_85>",
702
- "lstrip": false,
703
- "normalized": false,
704
- "rstrip": false,
705
- "single_word": false,
706
- "special": true
707
- },
708
- "36011": {
709
- "content": "<extra_id_84>",
710
- "lstrip": false,
711
- "normalized": false,
712
- "rstrip": false,
713
- "single_word": false,
714
- "special": true
715
- },
716
- "36012": {
717
- "content": "<extra_id_83>",
718
- "lstrip": false,
719
- "normalized": false,
720
- "rstrip": false,
721
- "single_word": false,
722
- "special": true
723
- },
724
- "36013": {
725
- "content": "<extra_id_82>",
726
- "lstrip": false,
727
- "normalized": false,
728
- "rstrip": false,
729
- "single_word": false,
730
- "special": true
731
- },
732
- "36014": {
733
- "content": "<extra_id_81>",
734
- "lstrip": false,
735
- "normalized": false,
736
- "rstrip": false,
737
- "single_word": false,
738
- "special": true
739
- },
740
- "36015": {
741
- "content": "<extra_id_80>",
742
- "lstrip": false,
743
- "normalized": false,
744
- "rstrip": false,
745
- "single_word": false,
746
- "special": true
747
- },
748
- "36016": {
749
- "content": "<extra_id_79>",
750
- "lstrip": false,
751
- "normalized": false,
752
- "rstrip": false,
753
- "single_word": false,
754
- "special": true
755
- },
756
- "36017": {
757
- "content": "<extra_id_78>",
758
- "lstrip": false,
759
- "normalized": false,
760
- "rstrip": false,
761
- "single_word": false,
762
- "special": true
763
- },
764
- "36018": {
765
- "content": "<extra_id_77>",
766
- "lstrip": false,
767
- "normalized": false,
768
- "rstrip": false,
769
- "single_word": false,
770
- "special": true
771
- },
772
- "36019": {
773
- "content": "<extra_id_76>",
774
- "lstrip": false,
775
- "normalized": false,
776
- "rstrip": false,
777
- "single_word": false,
778
- "special": true
779
- },
780
- "36020": {
781
- "content": "<extra_id_75>",
782
- "lstrip": false,
783
- "normalized": false,
784
- "rstrip": false,
785
- "single_word": false,
786
- "special": true
787
- },
788
- "36021": {
789
- "content": "<extra_id_74>",
790
- "lstrip": false,
791
- "normalized": false,
792
- "rstrip": false,
793
- "single_word": false,
794
- "special": true
795
- },
796
- "36022": {
797
- "content": "<extra_id_73>",
798
- "lstrip": false,
799
- "normalized": false,
800
- "rstrip": false,
801
- "single_word": false,
802
- "special": true
803
- },
804
- "36023": {
805
- "content": "<extra_id_72>",
806
- "lstrip": false,
807
- "normalized": false,
808
- "rstrip": false,
809
- "single_word": false,
810
- "special": true
811
- },
812
- "36024": {
813
- "content": "<extra_id_71>",
814
- "lstrip": false,
815
- "normalized": false,
816
- "rstrip": false,
817
- "single_word": false,
818
- "special": true
819
- },
820
- "36025": {
821
- "content": "<extra_id_70>",
822
- "lstrip": false,
823
- "normalized": false,
824
- "rstrip": false,
825
- "single_word": false,
826
- "special": true
827
- },
828
- "36026": {
829
- "content": "<extra_id_69>",
830
- "lstrip": false,
831
- "normalized": false,
832
- "rstrip": false,
833
- "single_word": false,
834
- "special": true
835
- },
836
- "36027": {
837
- "content": "<extra_id_68>",
838
- "lstrip": false,
839
- "normalized": false,
840
- "rstrip": false,
841
- "single_word": false,
842
- "special": true
843
- },
844
- "36028": {
845
- "content": "<extra_id_67>",
846
- "lstrip": false,
847
- "normalized": false,
848
- "rstrip": false,
849
- "single_word": false,
850
- "special": true
851
- },
852
- "36029": {
853
- "content": "<extra_id_66>",
854
- "lstrip": false,
855
- "normalized": false,
856
- "rstrip": false,
857
- "single_word": false,
858
- "special": true
859
- },
860
- "36030": {
861
- "content": "<extra_id_65>",
862
- "lstrip": false,
863
- "normalized": false,
864
- "rstrip": false,
865
- "single_word": false,
866
- "special": true
867
- },
868
- "36031": {
869
- "content": "<extra_id_64>",
870
- "lstrip": false,
871
- "normalized": false,
872
- "rstrip": false,
873
- "single_word": false,
874
- "special": true
875
- },
876
- "36032": {
877
- "content": "<extra_id_63>",
878
- "lstrip": false,
879
- "normalized": false,
880
- "rstrip": false,
881
- "single_word": false,
882
- "special": true
883
- },
884
- "36033": {
885
- "content": "<extra_id_62>",
886
- "lstrip": false,
887
- "normalized": false,
888
- "rstrip": false,
889
- "single_word": false,
890
- "special": true
891
- },
892
- "36034": {
893
- "content": "<extra_id_61>",
894
- "lstrip": false,
895
- "normalized": false,
896
- "rstrip": false,
897
- "single_word": false,
898
- "special": true
899
- },
900
- "36035": {
901
- "content": "<extra_id_60>",
902
- "lstrip": false,
903
- "normalized": false,
904
- "rstrip": false,
905
- "single_word": false,
906
- "special": true
907
- },
908
- "36036": {
909
- "content": "<extra_id_59>",
910
- "lstrip": false,
911
- "normalized": false,
912
- "rstrip": false,
913
- "single_word": false,
914
- "special": true
915
- },
916
- "36037": {
917
- "content": "<extra_id_58>",
918
- "lstrip": false,
919
- "normalized": false,
920
- "rstrip": false,
921
- "single_word": false,
922
- "special": true
923
- },
924
- "36038": {
925
- "content": "<extra_id_57>",
926
- "lstrip": false,
927
- "normalized": false,
928
- "rstrip": false,
929
- "single_word": false,
930
- "special": true
931
- },
932
- "36039": {
933
- "content": "<extra_id_56>",
934
- "lstrip": false,
935
- "normalized": false,
936
- "rstrip": false,
937
- "single_word": false,
938
- "special": true
939
- },
940
- "36040": {
941
- "content": "<extra_id_55>",
942
- "lstrip": false,
943
- "normalized": false,
944
- "rstrip": false,
945
- "single_word": false,
946
- "special": true
947
- },
948
- "36041": {
949
- "content": "<extra_id_54>",
950
- "lstrip": false,
951
- "normalized": false,
952
- "rstrip": false,
953
- "single_word": false,
954
- "special": true
955
- },
956
- "36042": {
957
- "content": "<extra_id_53>",
958
- "lstrip": false,
959
- "normalized": false,
960
- "rstrip": false,
961
- "single_word": false,
962
- "special": true
963
- },
964
- "36043": {
965
- "content": "<extra_id_52>",
966
- "lstrip": false,
967
- "normalized": false,
968
- "rstrip": false,
969
- "single_word": false,
970
- "special": true
971
- },
972
- "36044": {
973
- "content": "<extra_id_51>",
974
- "lstrip": false,
975
- "normalized": false,
976
- "rstrip": false,
977
- "single_word": false,
978
- "special": true
979
- },
980
- "36045": {
981
- "content": "<extra_id_50>",
982
- "lstrip": false,
983
- "normalized": false,
984
- "rstrip": false,
985
- "single_word": false,
986
- "special": true
987
- },
988
- "36046": {
989
- "content": "<extra_id_49>",
990
- "lstrip": false,
991
- "normalized": false,
992
- "rstrip": false,
993
- "single_word": false,
994
- "special": true
995
- },
996
- "36047": {
997
- "content": "<extra_id_48>",
998
- "lstrip": false,
999
- "normalized": false,
1000
- "rstrip": false,
1001
- "single_word": false,
1002
- "special": true
1003
- },
1004
- "36048": {
1005
- "content": "<extra_id_47>",
1006
- "lstrip": false,
1007
- "normalized": false,
1008
- "rstrip": false,
1009
- "single_word": false,
1010
- "special": true
1011
- },
1012
- "36049": {
1013
- "content": "<extra_id_46>",
1014
- "lstrip": false,
1015
- "normalized": false,
1016
- "rstrip": false,
1017
- "single_word": false,
1018
- "special": true
1019
- },
1020
- "36050": {
1021
- "content": "<extra_id_45>",
1022
- "lstrip": false,
1023
- "normalized": false,
1024
- "rstrip": false,
1025
- "single_word": false,
1026
- "special": true
1027
- },
1028
- "36051": {
1029
- "content": "<extra_id_44>",
1030
- "lstrip": false,
1031
- "normalized": false,
1032
- "rstrip": false,
1033
- "single_word": false,
1034
- "special": true
1035
- },
1036
- "36052": {
1037
- "content": "<extra_id_43>",
1038
- "lstrip": false,
1039
- "normalized": false,
1040
- "rstrip": false,
1041
- "single_word": false,
1042
- "special": true
1043
- },
1044
- "36053": {
1045
- "content": "<extra_id_42>",
1046
- "lstrip": false,
1047
- "normalized": false,
1048
- "rstrip": false,
1049
- "single_word": false,
1050
- "special": true
1051
- },
1052
- "36054": {
1053
- "content": "<extra_id_41>",
1054
- "lstrip": false,
1055
- "normalized": false,
1056
- "rstrip": false,
1057
- "single_word": false,
1058
- "special": true
1059
- },
1060
- "36055": {
1061
- "content": "<extra_id_40>",
1062
- "lstrip": false,
1063
- "normalized": false,
1064
- "rstrip": false,
1065
- "single_word": false,
1066
- "special": true
1067
- },
1068
- "36056": {
1069
- "content": "<extra_id_39>",
1070
- "lstrip": false,
1071
- "normalized": false,
1072
- "rstrip": false,
1073
- "single_word": false,
1074
- "special": true
1075
- },
1076
- "36057": {
1077
- "content": "<extra_id_38>",
1078
- "lstrip": false,
1079
- "normalized": false,
1080
- "rstrip": false,
1081
- "single_word": false,
1082
- "special": true
1083
- },
1084
- "36058": {
1085
- "content": "<extra_id_37>",
1086
- "lstrip": false,
1087
- "normalized": false,
1088
- "rstrip": false,
1089
- "single_word": false,
1090
- "special": true
1091
- },
1092
- "36059": {
1093
- "content": "<extra_id_36>",
1094
- "lstrip": false,
1095
- "normalized": false,
1096
- "rstrip": false,
1097
- "single_word": false,
1098
- "special": true
1099
- },
1100
- "36060": {
1101
- "content": "<extra_id_35>",
1102
- "lstrip": false,
1103
- "normalized": false,
1104
- "rstrip": false,
1105
- "single_word": false,
1106
- "special": true
1107
- },
1108
- "36061": {
1109
- "content": "<extra_id_34>",
1110
- "lstrip": false,
1111
- "normalized": false,
1112
- "rstrip": false,
1113
- "single_word": false,
1114
- "special": true
1115
- },
1116
- "36062": {
1117
- "content": "<extra_id_33>",
1118
- "lstrip": false,
1119
- "normalized": false,
1120
- "rstrip": false,
1121
- "single_word": false,
1122
- "special": true
1123
- },
1124
- "36063": {
1125
- "content": "<extra_id_32>",
1126
- "lstrip": false,
1127
- "normalized": false,
1128
- "rstrip": false,
1129
- "single_word": false,
1130
- "special": true
1131
- },
1132
- "36064": {
1133
- "content": "<extra_id_31>",
1134
- "lstrip": false,
1135
- "normalized": false,
1136
- "rstrip": false,
1137
- "single_word": false,
1138
- "special": true
1139
- },
1140
- "36065": {
1141
- "content": "<extra_id_30>",
1142
- "lstrip": false,
1143
- "normalized": false,
1144
- "rstrip": false,
1145
- "single_word": false,
1146
- "special": true
1147
- },
1148
- "36066": {
1149
- "content": "<extra_id_29>",
1150
- "lstrip": false,
1151
- "normalized": false,
1152
- "rstrip": false,
1153
- "single_word": false,
1154
- "special": true
1155
- },
1156
- "36067": {
1157
- "content": "<extra_id_28>",
1158
- "lstrip": false,
1159
- "normalized": false,
1160
- "rstrip": false,
1161
- "single_word": false,
1162
- "special": true
1163
- },
1164
- "36068": {
1165
- "content": "<extra_id_27>",
1166
- "lstrip": false,
1167
- "normalized": false,
1168
- "rstrip": false,
1169
- "single_word": false,
1170
- "special": true
1171
- },
1172
- "36069": {
1173
- "content": "<extra_id_26>",
1174
- "lstrip": false,
1175
- "normalized": false,
1176
- "rstrip": false,
1177
- "single_word": false,
1178
- "special": true
1179
- },
1180
- "36070": {
1181
- "content": "<extra_id_25>",
1182
- "lstrip": false,
1183
- "normalized": false,
1184
- "rstrip": false,
1185
- "single_word": false,
1186
- "special": true
1187
- },
1188
- "36071": {
1189
- "content": "<extra_id_24>",
1190
- "lstrip": false,
1191
- "normalized": false,
1192
- "rstrip": false,
1193
- "single_word": false,
1194
- "special": true
1195
- },
1196
- "36072": {
1197
- "content": "<extra_id_23>",
1198
- "lstrip": false,
1199
- "normalized": false,
1200
- "rstrip": false,
1201
- "single_word": false,
1202
- "special": true
1203
- },
1204
- "36073": {
1205
- "content": "<extra_id_22>",
1206
- "lstrip": false,
1207
- "normalized": false,
1208
- "rstrip": false,
1209
- "single_word": false,
1210
- "special": true
1211
- },
1212
- "36074": {
1213
- "content": "<extra_id_21>",
1214
- "lstrip": false,
1215
- "normalized": false,
1216
- "rstrip": false,
1217
- "single_word": false,
1218
- "special": true
1219
- },
1220
- "36075": {
1221
- "content": "<extra_id_20>",
1222
- "lstrip": false,
1223
- "normalized": false,
1224
- "rstrip": false,
1225
- "single_word": false,
1226
- "special": true
1227
- },
1228
- "36076": {
1229
- "content": "<extra_id_19>",
1230
- "lstrip": false,
1231
- "normalized": false,
1232
- "rstrip": false,
1233
- "single_word": false,
1234
- "special": true
1235
- },
1236
- "36077": {
1237
- "content": "<extra_id_18>",
1238
- "lstrip": false,
1239
- "normalized": false,
1240
- "rstrip": false,
1241
- "single_word": false,
1242
- "special": true
1243
- },
1244
- "36078": {
1245
- "content": "<extra_id_17>",
1246
- "lstrip": false,
1247
- "normalized": false,
1248
- "rstrip": false,
1249
- "single_word": false,
1250
- "special": true
1251
- },
1252
- "36079": {
1253
- "content": "<extra_id_16>",
1254
- "lstrip": false,
1255
- "normalized": false,
1256
- "rstrip": false,
1257
- "single_word": false,
1258
- "special": true
1259
- },
1260
- "36080": {
1261
- "content": "<extra_id_15>",
1262
- "lstrip": false,
1263
- "normalized": false,
1264
- "rstrip": false,
1265
- "single_word": false,
1266
- "special": true
1267
- },
1268
- "36081": {
1269
- "content": "<extra_id_14>",
1270
- "lstrip": false,
1271
- "normalized": false,
1272
- "rstrip": false,
1273
- "single_word": false,
1274
- "special": true
1275
- },
1276
- "36082": {
1277
- "content": "<extra_id_13>",
1278
- "lstrip": false,
1279
- "normalized": false,
1280
- "rstrip": false,
1281
- "single_word": false,
1282
- "special": true
1283
- },
1284
- "36083": {
1285
- "content": "<extra_id_12>",
1286
- "lstrip": false,
1287
- "normalized": false,
1288
- "rstrip": false,
1289
- "single_word": false,
1290
- "special": true
1291
- },
1292
- "36084": {
1293
- "content": "<extra_id_11>",
1294
- "lstrip": false,
1295
- "normalized": false,
1296
- "rstrip": false,
1297
- "single_word": false,
1298
- "special": true
1299
- },
1300
- "36085": {
1301
- "content": "<extra_id_10>",
1302
- "lstrip": false,
1303
- "normalized": false,
1304
- "rstrip": false,
1305
- "single_word": false,
1306
- "special": true
1307
- },
1308
- "36086": {
1309
- "content": "<extra_id_9>",
1310
- "lstrip": false,
1311
- "normalized": false,
1312
- "rstrip": false,
1313
- "single_word": false,
1314
- "special": true
1315
- },
1316
- "36087": {
1317
- "content": "<extra_id_8>",
1318
- "lstrip": false,
1319
- "normalized": false,
1320
- "rstrip": false,
1321
- "single_word": false,
1322
- "special": true
1323
- },
1324
- "36088": {
1325
- "content": "<extra_id_7>",
1326
- "lstrip": false,
1327
- "normalized": false,
1328
- "rstrip": false,
1329
- "single_word": false,
1330
- "special": true
1331
- },
1332
- "36089": {
1333
- "content": "<extra_id_6>",
1334
- "lstrip": false,
1335
- "normalized": false,
1336
- "rstrip": false,
1337
- "single_word": false,
1338
- "special": true
1339
- },
1340
- "36090": {
1341
- "content": "<extra_id_5>",
1342
- "lstrip": false,
1343
- "normalized": false,
1344
- "rstrip": false,
1345
- "single_word": false,
1346
- "special": true
1347
- },
1348
- "36091": {
1349
- "content": "<extra_id_4>",
1350
- "lstrip": false,
1351
- "normalized": false,
1352
- "rstrip": false,
1353
- "single_word": false,
1354
- "special": true
1355
- },
1356
- "36092": {
1357
- "content": "<extra_id_3>",
1358
- "lstrip": false,
1359
- "normalized": false,
1360
- "rstrip": false,
1361
- "single_word": false,
1362
- "special": true
1363
- },
1364
- "36093": {
1365
- "content": "<extra_id_2>",
1366
- "lstrip": false,
1367
- "normalized": false,
1368
- "rstrip": false,
1369
- "single_word": false,
1370
- "special": true
1371
- },
1372
- "36094": {
1373
- "content": "<extra_id_1>",
1374
- "lstrip": false,
1375
- "normalized": false,
1376
- "rstrip": false,
1377
- "single_word": false,
1378
- "special": true
1379
- },
1380
- "36095": {
1381
- "content": "<extra_id_0>",
1382
- "lstrip": false,
1383
- "normalized": false,
1384
- "rstrip": false,
1385
- "single_word": false,
1386
- "special": true
1387
- },
1388
- "36096": {
1389
- "content": "Chùa",
1390
- "lstrip": false,
1391
- "normalized": true,
1392
- "rstrip": false,
1393
- "single_word": false,
1394
- "special": false
1395
- },
1396
- "36097": {
1397
- "content": "Hòa_g",
1398
- "lstrip": false,
1399
- "normalized": true,
1400
- "rstrip": false,
1401
- "single_word": false,
1402
- "special": false
1403
- },
1404
- "36098": {
1405
- "content": "ê_cù_lao",
1406
- "lstrip": false,
1407
- "normalized": true,
1408
- "rstrip": false,
1409
- "single_word": false,
1410
- "special": false
1411
- },
1412
- "36099": {
1413
- "content": "Phố_bê",
1414
- "lstrip": false,
1415
- "normalized": true,
1416
- "rstrip": false,
1417
- "single_word": false,
1418
- "special": false
1419
- },
1420
- "36100": {
1421
- "content": "dò",
1422
- "lstrip": false,
1423
- "normalized": true,
1424
- "rstrip": false,
1425
- "single_word": false,
1426
- "special": false
1427
- },
1428
- "36101": {
1429
- "content": "sô",
1430
- "lstrip": false,
1431
- "normalized": true,
1432
- "rstrip": false,
1433
- "single_word": false,
1434
- "special": false
1435
- },
1436
- "36102": {
1437
- "content": "Đồ",
1438
- "lstrip": false,
1439
- "normalized": true,
1440
- "rstrip": false,
1441
- "single_word": false,
1442
- "special": false
1443
- },
1444
- "36103": {
1445
- "content": "Nai",
1446
- "lstrip": false,
1447
- "normalized": true,
1448
- "rstrip": false,
1449
- "single_word": false,
1450
- "special": false
1451
- },
1452
- "36104": {
1453
- "content": "khô_g",
1454
- "lstrip": false,
1455
- "normalized": true,
1456
- "rstrip": false,
1457
- "single_word": false,
1458
- "special": false
1459
- },
1460
- "36105": {
1461
- "content": "chỉ",
1462
- "lstrip": false,
1463
- "normalized": true,
1464
- "rstrip": false,
1465
- "single_word": false,
1466
- "special": false
1467
- },
1468
- "36106": {
1469
- "content": "chố",
1470
- "lstrip": false,
1471
- "normalized": true,
1472
- "rstrip": false,
1473
- "single_word": false,
1474
- "special": false
1475
- },
1476
- "36107": {
1477
- "content": "âm_li",
1478
- "lstrip": false,
1479
- "normalized": true,
1480
- "rstrip": false,
1481
- "single_word": false,
1482
- "special": false
1483
- },
1484
- "36108": {
1485
- "content": "mà",
1486
- "lstrip": false,
1487
- "normalized": true,
1488
- "rstrip": false,
1489
- "single_word": false,
1490
- "special": false
1491
- },
1492
- "36109": {
1493
- "content": "cò",
1494
- "lstrip": false,
1495
- "normalized": true,
1496
- "rstrip": false,
1497
- "single_word": false,
1498
- "special": false
1499
- },
1500
- "36110": {
1501
- "content": "lưu_giữ",
1502
- "lstrip": false,
1503
- "normalized": true,
1504
- "rstrip": false,
1505
- "single_word": false,
1506
- "special": false
1507
- },
1508
- "36111": {
1509
- "content": "cây",
1510
- "lstrip": false,
1511
- "normalized": true,
1512
- "rstrip": false,
1513
- "single_word": false,
1514
- "special": false
1515
- },
1516
- "36112": {
1517
- "content": "Dầu_ái",
1518
- "lstrip": false,
1519
- "normalized": true,
1520
- "rstrip": false,
1521
- "single_word": false,
1522
- "special": false
1523
- },
1524
- "36113": {
1525
- "content": "hơ",
1526
- "lstrip": false,
1527
- "normalized": true,
1528
- "rstrip": false,
1529
- "single_word": false,
1530
- "special": false
1531
- },
1532
- "36114": {
1533
- "content": "300",
1534
- "lstrip": false,
1535
- "normalized": true,
1536
- "rstrip": false,
1537
- "single_word": false,
1538
- "special": false
1539
- },
1540
- "36115": {
1541
- "content": "hư",
1542
- "lstrip": false,
1543
- "normalized": true,
1544
- "rstrip": false,
1545
- "single_word": false,
1546
- "special": false
1547
- },
1548
- "36116": {
1549
- "content": "mộ",
1550
- "lstrip": false,
1551
- "normalized": true,
1552
- "rstrip": false,
1553
- "single_word": false,
1554
- "special": false
1555
- },
1556
- "36117": {
1557
- "content": "“",
1558
- "lstrip": false,
1559
- "normalized": true,
1560
- "rstrip": false,
1561
- "single_word": false,
1562
- "special": false
1563
- },
1564
- "36118": {
1565
- "content": "báu_vậ",
1566
- "lstrip": false,
1567
- "normalized": true,
1568
- "rstrip": false,
1569
- "single_word": false,
1570
- "special": false
1571
- },
1572
- "36119": {
1573
- "content": "”",
1574
- "lstrip": false,
1575
- "normalized": true,
1576
- "rstrip": false,
1577
- "single_word": false,
1578
- "special": false
1579
- },
1580
- "36120": {
1581
- "content": "quý_giá",
1582
- "lstrip": false,
1583
- "normalized": true,
1584
- "rstrip": false,
1585
- "single_word": false,
1586
- "special": false
1587
- },
1588
- "36121": {
1589
- "content": "Di_sả",
1590
- "lstrip": false,
1591
- "normalized": true,
1592
- "rstrip": false,
1593
- "single_word": false,
1594
- "special": false
1595
- },
1596
- "36122": {
1597
- "content": "hiê",
1598
- "lstrip": false,
1599
- "normalized": true,
1600
- "rstrip": false,
1601
- "single_word": false,
1602
- "special": false
1603
- },
1604
- "36123": {
1605
- "content": "gắ",
1606
- "lstrip": false,
1607
- "normalized": true,
1608
- "rstrip": false,
1609
- "single_word": false,
1610
- "special": false
1611
- },
1612
- "36124": {
1613
- "content": "liề",
1614
- "lstrip": false,
1615
- "normalized": true,
1616
- "rstrip": false,
1617
- "single_word": false,
1618
- "special": false
1619
- },
1620
- "36125": {
1621
- "content": "chùa",
1622
- "lstrip": false,
1623
- "normalized": true,
1624
- "rstrip": false,
1625
- "single_word": false,
1626
- "special": false
1627
- },
1628
- "36126": {
1629
- "content": "cổ",
1630
- "lstrip": false,
1631
- "normalized": true,
1632
- "rstrip": false,
1633
- "single_word": false,
1634
- "special": false
1635
- },
1636
- "36127": {
1637
- "content": "Nằm",
1638
- "lstrip": false,
1639
- "normalized": true,
1640
- "rstrip": false,
1641
- "single_word": false,
1642
- "special": false
1643
- },
1644
- "36128": {
1645
- "content": "gọi_là",
1646
- "lstrip": false,
1647
- "normalized": true,
1648
- "rstrip": false,
1649
- "single_word": false,
1650
- "special": false
1651
- },
1652
- "36129": {
1653
- "content": "Â_Cổ_Tự",
1654
- "lstrip": false,
1655
- "normalized": true,
1656
- "rstrip": false,
1657
- "single_word": false,
1658
- "special": false
1659
- },
1660
- "36130": {
1661
- "content": "huộc",
1662
- "lstrip": false,
1663
- "normalized": true,
1664
- "rstrip": false,
1665
- "single_word": false,
1666
- "special": false
1667
- },
1668
- "36131": {
1669
- "content": "phườ",
1670
- "lstrip": false,
1671
- "normalized": true,
1672
- "rstrip": false,
1673
- "single_word": false,
1674
- "special": false
1675
- },
1676
- "36132": {
1677
- "content": "Hiệp_Hòa",
1678
- "lstrip": false,
1679
- "normalized": true,
1680
- "rstrip": false,
1681
- "single_word": false,
1682
- "special": false
1683
- },
1684
- "36133": {
1685
- "content": "Biê_Hòa",
1686
- "lstrip": false,
1687
- "normalized": true,
1688
- "rstrip": false,
1689
- "single_word": false,
1690
- "special": false
1691
- },
1692
- "36134": {
1693
- "content": "địa_điểm",
1694
- "lstrip": false,
1695
- "normalized": true,
1696
- "rstrip": false,
1697
- "single_word": false,
1698
- "special": false
1699
- },
1700
- "36135": {
1701
- "content": "gười",
1702
- "lstrip": false,
1703
- "normalized": true,
1704
- "rstrip": false,
1705
- "single_word": false,
1706
- "special": false
1707
- },
1708
- "36136": {
1709
- "content": "dâ",
1710
- "lstrip": false,
1711
- "normalized": true,
1712
- "rstrip": false,
1713
- "single_word": false,
1714
- "special": false
1715
- },
1716
- "36137": {
1717
- "content": "iế_g",
1718
- "lstrip": false,
1719
- "normalized": true,
1720
- "rstrip": false,
1721
- "single_word": false,
1722
- "special": false
1723
- },
1724
- "36138": {
1725
- "content": "xa_h",
1726
- "lstrip": false,
1727
- "normalized": true,
1728
- "rstrip": false,
1729
- "single_word": false,
1730
- "special": false
1731
- },
1732
- "36139": {
1733
- "content": "Đó",
1734
- "lstrip": false,
1735
- "normalized": true,
1736
- "rstrip": false,
1737
- "single_word": false,
1738
- "special": false
1739
- },
1740
- "36140": {
1741
- "content": "ê_khoa_học",
1742
- "lstrip": false,
1743
- "normalized": true,
1744
- "rstrip": false,
1745
- "single_word": false,
1746
- "special": false
1747
- },
1748
- "36141": {
1749
- "content": "Dip",
1750
- "lstrip": false,
1751
- "normalized": true,
1752
- "rstrip": false,
1753
- "single_word": false,
1754
- "special": false
1755
- },
1756
- "36142": {
1757
- "content": "e_oca",
1758
- "lstrip": false,
1759
- "normalized": true,
1760
- "rstrip": false,
1761
- "single_word": false,
1762
- "special": false
1763
- },
1764
- "36143": {
1765
- "content": "Roxb",
1766
- "lstrip": false,
1767
- "normalized": true,
1768
- "rstrip": false,
1769
- "single_word": false,
1770
- "special": false
1771
- },
1772
- "36144": {
1773
- "content": "hậ",
1774
- "lstrip": false,
1775
- "normalized": true,
1776
- "rstrip": false,
1777
- "single_word": false,
1778
- "special": false
1779
- },
1780
- "36145": {
1781
- "content": "Việ_Nam",
1782
- "lstrip": false,
1783
- "normalized": true,
1784
- "rstrip": false,
1785
- "single_word": false,
1786
- "special": false
1787
- },
1788
- "36146": {
1789
- "content": "gắ_liề",
1790
- "lstrip": false,
1791
- "normalized": true,
1792
- "rstrip": false,
1793
- "single_word": false,
1794
- "special": false
1795
- },
1796
- "36147": {
1797
- "content": "quá",
1798
- "lstrip": false,
1799
- "normalized": true,
1800
- "rstrip": false,
1801
- "single_word": false,
1802
- "special": false
1803
- },
1804
- "36148": {
1805
- "content": "ì_h",
1806
- "lstrip": false,
1807
- "normalized": true,
1808
- "rstrip": false,
1809
- "single_word": false,
1810
- "special": false
1811
- },
1812
- "36149": {
1813
- "content": "hì_h",
1814
- "lstrip": false,
1815
- "normalized": true,
1816
- "rstrip": false,
1817
- "single_word": false,
1818
- "special": false
1819
- },
1820
- "36150": {
1821
- "content": "hà_h",
1822
- "lstrip": false,
1823
- "normalized": true,
1824
- "rstrip": false,
1825
- "single_word": false,
1826
- "special": false
1827
- },
1828
- "36151": {
1829
- "content": "phá",
1830
- "lstrip": false,
1831
- "normalized": true,
1832
- "rstrip": false,
1833
- "single_word": false,
1834
- "special": false
1835
- },
1836
- "36152": {
1837
- "content": "gôi",
1838
- "lstrip": false,
1839
- "normalized": true,
1840
- "rstrip": false,
1841
- "single_word": false,
1842
- "special": false
1843
- },
1844
- "36153": {
1845
- "content": "hữ",
1846
- "lstrip": false,
1847
- "normalized": true,
1848
- "rstrip": false,
1849
- "single_word": false,
1850
- "special": false
1851
- },
1852
- "36154": {
1853
- "content": "hiề",
1854
- "lstrip": false,
1855
- "normalized": true,
1856
- "rstrip": false,
1857
- "single_word": false,
1858
- "special": false
1859
- },
1860
- "36155": {
1861
- "content": "kí_h",
1862
- "lstrip": false,
1863
- "normalized": true,
1864
- "rstrip": false,
1865
- "single_word": false,
1866
- "special": false
1867
- },
1868
- "36156": {
1869
- "content": "bậc",
1870
- "lstrip": false,
1871
- "normalized": true,
1872
- "rstrip": false,
1873
- "single_word": false,
1874
- "special": false
1875
- },
1876
- "36157": {
1877
- "content": "hấ_Đồ",
1878
- "lstrip": false,
1879
- "normalized": true,
1880
- "rstrip": false,
1881
- "single_word": false,
1882
- "special": false
1883
- },
1884
- "36158": {
1885
- "content": "xây",
1886
- "lstrip": false,
1887
- "normalized": true,
1888
- "rstrip": false,
1889
- "single_word": false,
1890
- "special": false
1891
- },
1892
- "36159": {
1893
- "content": "dự_g",
1894
- "lstrip": false,
1895
- "normalized": true,
1896
- "rstrip": false,
1897
- "single_word": false,
1898
- "special": false
1899
- },
1900
- "36160": {
1901
- "content": "hế_kỷ",
1902
- "lstrip": false,
1903
- "normalized": true,
1904
- "rstrip": false,
1905
- "single_word": false,
1906
- "special": false
1907
- },
1908
- "36161": {
1909
- "content": "17",
1910
- "lstrip": false,
1911
- "normalized": true,
1912
- "rstrip": false,
1913
- "single_word": false,
1914
- "special": false
1915
- },
1916
- "36162": {
1917
- "content": "hiề_sư",
1918
- "lstrip": false,
1919
- "normalized": true,
1920
- "rstrip": false,
1921
- "single_word": false,
1922
- "special": false
1923
- },
1924
- "36163": {
1925
- "content": "huộc_phái",
1926
- "lstrip": false,
1927
- "normalized": true,
1928
- "rstrip": false,
1929
- "single_word": false,
1930
- "special": false
1931
- },
1932
- "36164": {
1933
- "content": "Lâm_Tế_sá",
1934
- "lstrip": false,
1935
- "normalized": true,
1936
- "rstrip": false,
1937
- "single_word": false,
1938
- "special": false
1939
- },
1940
- "36165": {
1941
- "content": "lập",
1942
- "lstrip": false,
1943
- "normalized": true,
1944
- "rstrip": false,
1945
- "single_word": false,
1946
- "special": false
1947
- },
1948
- "36166": {
1949
- "content": "Hiệ_ay",
1950
- "lstrip": false,
1951
- "normalized": true,
1952
- "rstrip": false,
1953
- "single_word": false,
1954
- "special": false
1955
- },
1956
- "36167": {
1957
- "content": "huộc_hệ",
1958
- "lstrip": false,
1959
- "normalized": true,
1960
- "rstrip": false,
1961
- "single_word": false,
1962
- "special": false
1963
- },
1964
- "36168": {
1965
- "content": "phái",
1966
- "lstrip": false,
1967
- "normalized": true,
1968
- "rstrip": false,
1969
- "single_word": false,
1970
- "special": false
1971
- },
1972
- "36169": {
1973
- "content": "Bắc_ô",
1974
- "lstrip": false,
1975
- "normalized": true,
1976
- "rstrip": false,
1977
- "single_word": false,
1978
- "special": false
1979
- },
1980
- "36170": {
1981
- "content": "kiế",
1982
- "lstrip": false,
1983
- "normalized": true,
1984
- "rstrip": false,
1985
- "single_word": false,
1986
- "special": false
1987
- },
1988
- "36171": {
1989
- "content": "đặc_ư",
1990
- "lstrip": false,
1991
- "normalized": true,
1992
- "rstrip": false,
1993
- "single_word": false,
1994
- "special": false
1995
- },
1996
- "36172": {
1997
- "content": "vù",
1998
- "lstrip": false,
1999
- "normalized": true,
2000
- "rstrip": false,
2001
- "single_word": false,
2002
- "special": false
2003
- },
2004
- "36173": {
2005
- "content": "đồ",
2006
- "lstrip": false,
2007
- "normalized": true,
2008
- "rstrip": false,
2009
- "single_word": false,
2010
- "special": false
2011
- },
2012
- "36174": {
2013
- "content": "bằ",
2014
- "lstrip": false,
2015
- "normalized": true,
2016
- "rstrip": false,
2017
- "single_word": false,
2018
- "special": false
2019
- },
2020
- "36175": {
2021
- "content": "Nam_Bộ",
2022
- "lstrip": false,
2023
- "normalized": true,
2024
- "rstrip": false,
2025
- "single_word": false,
2026
- "special": false
2027
- },
2028
- "36176": {
2029
- "content": "Ni",
2030
- "lstrip": false,
2031
- "normalized": true,
2032
- "rstrip": false,
2033
- "single_word": false,
2034
- "special": false
2035
- },
2036
- "36177": {
2037
- "content": "ưở_g",
2038
- "lstrip": false,
2039
- "normalized": true,
2040
- "rstrip": false,
2041
- "single_word": false,
2042
- "special": false
2043
- },
2044
- "36178": {
2045
- "content": "Thích_Nữ",
2046
- "lstrip": false,
2047
- "normalized": true,
2048
- "rstrip": false,
2049
- "single_word": false,
2050
- "special": false
2051
- },
2052
- "36179": {
2053
- "content": "Huệ_Tâm",
2054
- "lstrip": false,
2055
- "normalized": true,
2056
- "rstrip": false,
2057
- "single_word": false,
2058
- "special": false
2059
- },
2060
- "36180": {
2061
- "content": "ụ_ì",
2062
- "lstrip": false,
2063
- "normalized": true,
2064
- "rstrip": false,
2065
- "single_word": false,
2066
- "special": false
2067
- },
2068
- "36181": {
2069
- "content": "biế",
2070
- "lstrip": false,
2071
- "normalized": true,
2072
- "rstrip": false,
2073
- "single_word": false,
2074
- "special": false
2075
- },
2076
- "36182": {
2077
- "content": "ồ_ại",
2078
- "lstrip": false,
2079
- "normalized": true,
2080
- "rstrip": false,
2081
- "single_word": false,
2082
- "special": false
2083
- },
2084
- "36183": {
2085
- "content": "hà_g",
2086
- "lstrip": false,
2087
- "normalized": true,
2088
- "rstrip": false,
2089
- "single_word": false,
2090
- "special": false
2091
- },
2092
- "36184": {
2093
- "content": "chứ",
2094
- "lstrip": false,
2095
- "normalized": true,
2096
- "rstrip": false,
2097
- "single_word": false,
2098
- "special": false
2099
- },
2100
- "36185": {
2101
- "content": "đổi",
2102
- "lstrip": false,
2103
- "normalized": true,
2104
- "rstrip": false,
2105
- "single_word": false,
2106
- "special": false
2107
- },
2108
- "36186": {
2109
- "content": "đấ",
2110
- "lstrip": false,
2111
- "normalized": true,
2112
- "rstrip": false,
2113
- "single_word": false,
2114
- "special": false
2115
- },
2116
- "36187": {
2117
- "content": "Cây",
2118
- "lstrip": false,
2119
- "normalized": true,
2120
- "rstrip": false,
2121
- "single_word": false,
2122
- "special": false
2123
- },
2124
- "36188": {
2125
- "content": "cao",
2126
- "lstrip": false,
2127
- "normalized": true,
2128
- "rstrip": false,
2129
- "single_word": false,
2130
- "special": false
2131
- },
2132
- "36189": {
2133
- "content": "80",
2134
- "lstrip": false,
2135
- "normalized": true,
2136
- "rstrip": false,
2137
- "single_word": false,
2138
- "special": false
2139
- },
2140
- "36190": {
2141
- "content": "vò",
2142
- "lstrip": false,
2143
- "normalized": true,
2144
- "rstrip": false,
2145
- "single_word": false,
2146
- "special": false
2147
- },
2148
- "36191": {
2149
- "content": "hâ",
2150
- "lstrip": false,
2151
- "normalized": true,
2152
- "rstrip": false,
2153
- "single_word": false,
2154
- "special": false
2155
- },
2156
- "36192": {
2157
- "content": "8,5",
2158
- "lstrip": false,
2159
- "normalized": true,
2160
- "rstrip": false,
2161
- "single_word": false,
2162
- "special": false
2163
- },
2164
- "36193": {
2165
- "content": "á_lá",
2166
- "lstrip": false,
2167
- "normalized": true,
2168
- "rstrip": false,
2169
- "single_word": false,
2170
- "special": false
2171
- },
2172
- "36194": {
2173
- "content": "xum_xuê",
2174
- "lstrip": false,
2175
- "normalized": true,
2176
- "rstrip": false,
2177
- "single_word": false,
2178
- "special": false
2179
- },
2180
- "36195": {
2181
- "content": "che_bó",
2182
- "lstrip": false,
2183
- "normalized": true,
2184
- "rstrip": false,
2185
- "single_word": false,
2186
- "special": false
2187
- },
2188
- "36196": {
2189
- "content": "má",
2190
- "lstrip": false,
2191
- "normalized": true,
2192
- "rstrip": false,
2193
- "single_word": false,
2194
- "special": false
2195
- },
2196
- "36197": {
2197
- "content": "phậ",
2198
- "lstrip": false,
2199
- "normalized": true,
2200
- "rstrip": false,
2201
- "single_word": false,
2202
- "special": false
2203
- },
2204
- "36198": {
2205
- "content": "du_khách",
2206
- "lstrip": false,
2207
- "normalized": true,
2208
- "rstrip": false,
2209
- "single_word": false,
2210
- "special": false
2211
- },
2212
- "36199": {
2213
- "content": "đế",
2214
- "lstrip": false,
2215
- "normalized": true,
2216
- "rstrip": false,
2217
- "single_word": false,
2218
- "special": false
2219
- },
2220
- "36200": {
2221
- "content": "chiêm_bái",
2222
- "lstrip": false,
2223
- "normalized": true,
2224
- "rstrip": false,
2225
- "single_word": false,
2226
- "special": false
2227
- },
2228
- "36201": {
2229
- "content": "Theo",
2230
- "lstrip": false,
2231
- "normalized": true,
2232
- "rstrip": false,
2233
- "single_word": false,
2234
- "special": false
2235
- },
2236
- "36202": {
2237
- "content": "lời",
2238
- "lstrip": false,
2239
- "normalized": true,
2240
- "rstrip": false,
2241
- "single_word": false,
2242
- "special": false
2243
- },
2244
- "36203": {
2245
- "content": "kể",
2246
- "lstrip": false,
2247
- "normalized": true,
2248
- "rstrip": false,
2249
- "single_word": false,
2250
- "special": false
2251
- },
2252
- "36204": {
2253
- "content": "cao_iê",
2254
- "lstrip": false,
2255
- "normalized": true,
2256
- "rstrip": false,
2257
- "single_word": false,
2258
- "special": false
2259
- },
2260
- "36205": {
2261
- "content": "hiệ",
2262
- "lstrip": false,
2263
- "normalized": true,
2264
- "rstrip": false,
2265
- "single_word": false,
2266
- "special": false
2267
- },
2268
- "36206": {
2269
- "content": "diệ",
2270
- "lstrip": false,
2271
- "normalized": true,
2272
- "rstrip": false,
2273
- "single_word": false,
2274
- "special": false
2275
- },
2276
- "36207": {
2277
- "content": "hời_kỳ",
2278
- "lstrip": false,
2279
- "normalized": true,
2280
- "rstrip": false,
2281
- "single_word": false,
2282
- "special": false
2283
- },
2284
- "36208": {
2285
- "content": "khai",
2286
- "lstrip": false,
2287
- "normalized": true,
2288
- "rstrip": false,
2289
- "single_word": false,
2290
- "special": false
2291
- },
2292
- "36209": {
2293
- "content": "hoa",
2294
- "lstrip": false,
2295
- "normalized": true,
2296
- "rstrip": false,
2297
- "single_word": false,
2298
- "special": false
2299
- },
2300
- "36210": {
2301
- "content": "ê_vù",
2302
- "lstrip": false,
2303
- "normalized": true,
2304
- "rstrip": false,
2305
- "single_word": false,
2306
- "special": false
2307
- },
2308
- "36211": {
2309
- "content": "cù_lao",
2310
- "lstrip": false,
2311
- "normalized": true,
2312
- "rstrip": false,
2313
- "single_word": false,
2314
- "special": false
2315
- },
2316
- "36212": {
2317
- "content": "Phố",
2318
- "lstrip": false,
2319
- "normalized": true,
2320
- "rstrip": false,
2321
- "single_word": false,
2322
- "special": false
2323
- },
2324
- "36213": {
2325
- "content": "T_ải",
2326
- "lstrip": false,
2327
- "normalized": true,
2328
- "rstrip": false,
2329
- "single_word": false,
2330
- "special": false
2331
- },
2332
- "36214": {
2333
- "content": "lịch_sử",
2334
- "lstrip": false,
2335
- "normalized": true,
2336
- "rstrip": false,
2337
- "single_word": false,
2338
- "special": false
2339
- },
2340
- "36215": {
2341
- "content": "khô",
2342
- "lstrip": false,
2343
- "normalized": true,
2344
- "rstrip": false,
2345
- "single_word": false,
2346
- "special": false
2347
- },
2348
- "36216": {
2349
- "content": "ạo_bó",
2350
- "lstrip": false,
2351
- "normalized": true,
2352
- "rstrip": false,
2353
- "single_word": false,
2354
- "special": false
2355
- },
2356
- "36217": {
2357
- "content": "biểu_ượ",
2358
- "lstrip": false,
2359
- "normalized": true,
2360
- "rstrip": false,
2361
- "single_word": false,
2362
- "special": false
2363
- },
2364
- "36218": {
2365
- "content": "hầ",
2366
- "lstrip": false,
2367
- "normalized": true,
2368
- "rstrip": false,
2369
- "single_word": false,
2370
- "special": false
2371
- },
2372
- "36219": {
2373
- "content": "địa_phươ",
2374
- "lstrip": false,
2375
- "normalized": true,
2376
- "rstrip": false,
2377
- "single_word": false,
2378
- "special": false
2379
- },
2380
- "36220": {
2381
- "content": "Ngoài",
2382
- "lstrip": false,
2383
- "normalized": true,
2384
- "rstrip": false,
2385
- "single_word": false,
2386
- "special": false
2387
- },
2388
- "36221": {
2389
- "content": "giá",
2390
- "lstrip": false,
2391
- "normalized": true,
2392
- "rstrip": false,
2393
- "single_word": false,
2394
- "special": false
2395
- },
2396
- "36222": {
2397
- "content": "cò_ma",
2398
- "lstrip": false,
2399
- "normalized": true,
2400
- "rstrip": false,
2401
- "single_word": false,
2402
- "special": false
2403
- },
2404
- "36223": {
2405
- "content": "ghĩa",
2406
- "lstrip": false,
2407
- "normalized": true,
2408
- "rstrip": false,
2409
- "single_word": false,
2410
- "special": false
2411
- },
2412
- "36224": {
2413
- "content": "đặc_biệ",
2414
- "lstrip": false,
2415
- "normalized": true,
2416
- "rstrip": false,
2417
- "single_word": false,
2418
- "special": false
2419
- },
2420
- "36225": {
2421
- "content": "mặ",
2422
- "lstrip": false,
2423
- "normalized": true,
2424
- "rstrip": false,
2425
- "single_word": false,
2426
- "special": false
2427
- },
2428
- "36226": {
2429
- "content": "h_hái",
2430
- "lstrip": false,
2431
- "normalized": true,
2432
- "rstrip": false,
2433
- "single_word": false,
2434
- "special": false
2435
- },
2436
- "36227": {
2437
- "content": "khoa_học",
2438
- "lstrip": false,
2439
- "normalized": true,
2440
- "rstrip": false,
2441
- "single_word": false,
2442
- "special": false
2443
- },
2444
- "36228": {
2445
- "content": "Với",
2446
- "lstrip": false,
2447
- "normalized": true,
2448
- "rstrip": false,
2449
- "single_word": false,
2450
- "special": false
2451
- },
2452
- "36229": {
2453
- "content": "chiều",
2454
- "lstrip": false,
2455
- "normalized": true,
2456
- "rstrip": false,
2457
- "single_word": false,
2458
- "special": false
2459
- },
2460
- "36230": {
2461
- "content": "bậ",
2462
- "lstrip": false,
2463
- "normalized": true,
2464
- "rstrip": false,
2465
- "single_word": false,
2466
- "special": false
2467
- },
2468
- "36231": {
2469
- "content": "cù_g",
2470
- "lstrip": false,
2471
- "normalized": true,
2472
- "rstrip": false,
2473
- "single_word": false,
2474
- "special": false
2475
- },
2476
- "36232": {
2477
- "content": "đó",
2478
- "lstrip": false,
2479
- "normalized": true,
2480
- "rstrip": false,
2481
- "single_word": false,
2482
- "special": false
2483
- },
2484
- "36233": {
2485
- "content": "vai",
2486
- "lstrip": false,
2487
- "normalized": true,
2488
- "rstrip": false,
2489
- "single_word": false,
2490
- "special": false
2491
- },
2492
- "36234": {
2493
- "content": "lá",
2494
- "lstrip": false,
2495
- "normalized": true,
2496
- "rstrip": false,
2497
- "single_word": false,
2498
- "special": false
2499
- },
2500
- "36235": {
2501
- "content": "phổi",
2502
- "lstrip": false,
2503
- "normalized": true,
2504
- "rstrip": false,
2505
- "single_word": false,
2506
- "special": false
2507
- },
2508
- "36236": {
2509
- "content": "giúp",
2510
- "lstrip": false,
2511
- "normalized": true,
2512
- "rstrip": false,
2513
- "single_word": false,
2514
- "special": false
2515
- },
2516
- "36237": {
2517
- "content": "điều",
2518
- "lstrip": false,
2519
- "normalized": true,
2520
- "rstrip": false,
2521
- "single_word": false,
2522
- "special": false
2523
- },
2524
- "36238": {
2525
- "content": "hòa",
2526
- "lstrip": false,
2527
- "normalized": true,
2528
- "rstrip": false,
2529
- "single_word": false,
2530
- "special": false
2531
- },
2532
- "36239": {
2533
- "content": "khí",
2534
- "lstrip": false,
2535
- "normalized": true,
2536
- "rstrip": false,
2537
- "single_word": false,
2538
- "special": false
2539
- },
2540
- "36240": {
2541
- "content": "ị_h",
2542
- "lstrip": false,
2543
- "normalized": true,
2544
- "rstrip": false,
2545
- "single_word": false,
2546
- "special": false
2547
- },
2548
- "36241": {
2549
- "content": "khách",
2550
- "lstrip": false,
2551
- "normalized": true,
2552
- "rstrip": false,
2553
- "single_word": false,
2554
- "special": false
2555
- },
2556
- "36242": {
2557
- "content": "hập",
2558
- "lstrip": false,
2559
- "normalized": true,
2560
- "rstrip": false,
2561
- "single_word": false,
2562
- "special": false
2563
- },
2564
- "36243": {
2565
- "content": "phươ",
2566
- "lstrip": false,
2567
- "normalized": true,
2568
- "rstrip": false,
2569
- "single_word": false,
2570
- "special": false
2571
- },
2572
- "36244": {
2573
- "content": "phầ_li",
2574
- "lstrip": false,
2575
- "normalized": true,
2576
- "rstrip": false,
2577
- "single_word": false,
2578
- "special": false
2579
- },
2580
- "36245": {
2581
- "content": "h_hồ",
2582
- "lstrip": false,
2583
- "normalized": true,
2584
- "rstrip": false,
2585
- "single_word": false,
2586
- "special": false
2587
- },
2588
- "36246": {
2589
- "content": "hụ",
2590
- "lstrip": false,
2591
- "normalized": true,
2592
- "rstrip": false,
2593
- "single_word": false,
2594
- "special": false
2595
- },
2596
- "36247": {
2597
- "content": "số",
2598
- "lstrip": false,
2599
- "normalized": true,
2600
- "rstrip": false,
2601
- "single_word": false,
2602
- "special": false
2603
- },
2604
- "36248": {
2605
- "content": "bao_đổi",
2606
- "lstrip": false,
2607
- "normalized": true,
2608
- "rstrip": false,
2609
- "single_word": false,
2610
- "special": false
2611
- },
2612
- "36249": {
2613
- "content": "điểm",
2614
- "lstrip": false,
2615
- "normalized": true,
2616
- "rstrip": false,
2617
- "single_word": false,
2618
- "special": false
2619
- },
2620
- "36250": {
2621
- "content": "ự_hào",
2622
- "lstrip": false,
2623
- "normalized": true,
2624
- "rstrip": false,
2625
- "single_word": false,
2626
- "special": false
2627
- },
2628
- "36251": {
2629
- "content": "hà_chùa",
2630
- "lstrip": false,
2631
- "normalized": true,
2632
- "rstrip": false,
2633
- "single_word": false,
2634
- "special": false
2635
- },
2636
- "36252": {
2637
- "content": "bà",
2638
- "lstrip": false,
2639
- "normalized": true,
2640
- "rstrip": false,
2641
- "single_word": false,
2642
- "special": false
2643
- },
2644
- "36253": {
2645
- "content": "xem",
2646
- "lstrip": false,
2647
- "normalized": true,
2648
- "rstrip": false,
2649
- "single_word": false,
2650
- "special": false
2651
- },
2652
- "36254": {
2653
- "content": "Từ",
2654
- "lstrip": false,
2655
- "normalized": true,
2656
- "rstrip": false,
2657
- "single_word": false,
2658
- "special": false
2659
- },
2660
- "36255": {
2661
- "content": "vế",
2662
- "lstrip": false,
2663
- "normalized": true,
2664
- "rstrip": false,
2665
- "single_word": false,
2666
- "special": false
2667
- },
2668
- "36256": {
2669
- "content": "sẹo",
2670
- "lstrip": false,
2671
- "normalized": true,
2672
- "rstrip": false,
2673
- "single_word": false,
2674
- "special": false
2675
- },
2676
- "36257": {
2677
- "content": "ê_hâ",
2678
- "lstrip": false,
2679
- "normalized": true,
2680
- "rstrip": false,
2681
- "single_word": false,
2682
- "special": false
2683
- },
2684
- "36258": {
2685
- "content": "dấu",
2686
- "lstrip": false,
2687
- "normalized": true,
2688
- "rstrip": false,
2689
- "single_word": false,
2690
- "special": false
2691
- },
2692
- "36259": {
2693
- "content": "hời_gia",
2694
- "lstrip": false,
2695
- "normalized": true,
2696
- "rstrip": false,
2697
- "single_word": false,
2698
- "special": false
2699
- },
2700
- "36260": {
2701
- "content": "hắc_hở",
2702
- "lstrip": false,
2703
- "normalized": true,
2704
- "rstrip": false,
2705
- "single_word": false,
2706
- "special": false
2707
- },
2708
- "36261": {
2709
- "content": "hóa",
2710
- "lstrip": false,
2711
- "normalized": true,
2712
- "rstrip": false,
2713
- "single_word": false,
2714
- "special": false
2715
- },
2716
- "36262": {
2717
- "content": "phải",
2718
- "lstrip": false,
2719
- "normalized": true,
2720
- "rstrip": false,
2721
- "single_word": false,
2722
- "special": false
2723
- },
2724
- "36263": {
2725
- "content": "bảo_vệ",
2726
- "lstrip": false,
2727
- "normalized": true,
2728
- "rstrip": false,
2729
- "single_word": false,
2730
- "special": false
2731
- },
2732
- "36264": {
2733
- "content": "Dù",
2734
- "lstrip": false,
2735
- "normalized": true,
2736
- "rstrip": false,
2737
- "single_word": false,
2738
- "special": false
2739
- },
2740
- "36265": {
2741
- "content": "vẫ",
2742
- "lstrip": false,
2743
- "normalized": true,
2744
- "rstrip": false,
2745
- "single_word": false,
2746
- "special": false
2747
- },
2748
- "36266": {
2749
- "content": "h_ố",
2750
- "lstrip": false,
2751
- "normalized": true,
2752
- "rstrip": false,
2753
- "single_word": false,
2754
- "special": false
2755
- },
2756
- "36267": {
2757
- "content": "cả",
2758
- "lstrip": false,
2759
- "normalized": true,
2760
- "rstrip": false,
2761
- "single_word": false,
2762
- "special": false
2763
- },
2764
- "36268": {
2765
- "content": "sâ",
2766
- "lstrip": false,
2767
- "normalized": true,
2768
- "rstrip": false,
2769
- "single_word": false,
2770
- "special": false
2771
- },
2772
- "36269": {
2773
- "content": "luô",
2774
- "lstrip": false,
2775
- "normalized": true,
2776
- "rstrip": false,
2777
- "single_word": false,
2778
- "special": false
2779
- },
2780
- "36270": {
2781
- "content": "dặ_dò",
2782
- "lstrip": false,
2783
- "normalized": true,
2784
- "rstrip": false,
2785
- "single_word": false,
2786
- "special": false
2787
- },
2788
- "36271": {
2789
- "content": "Phậ_ử",
2790
- "lstrip": false,
2791
- "normalized": true,
2792
- "rstrip": false,
2793
- "single_word": false,
2794
- "special": false
2795
- },
2796
- "36272": {
2797
- "content": "khi",
2798
- "lstrip": false,
2799
- "normalized": true,
2800
- "rstrip": false,
2801
- "single_word": false,
2802
- "special": false
2803
- },
2804
- "36273": {
2805
- "content": "hãy",
2806
- "lstrip": false,
2807
- "normalized": true,
2808
- "rstrip": false,
2809
- "single_word": false,
2810
- "special": false
2811
- },
2812
- "36274": {
2813
- "content": "luô_giữ",
2814
- "lstrip": false,
2815
- "normalized": true,
2816
- "rstrip": false,
2817
- "single_word": false,
2818
- "special": false
2819
- },
2820
- "36275": {
2821
- "content": "gì",
2822
- "lstrip": false,
2823
- "normalized": true,
2824
- "rstrip": false,
2825
- "single_word": false,
2826
- "special": false
2827
- },
2828
- "36276": {
2829
- "content": "sự",
2830
- "lstrip": false,
2831
- "normalized": true,
2832
- "rstrip": false,
2833
- "single_word": false,
2834
- "special": false
2835
- },
2836
- "36277": {
2837
- "content": "a_g",
2838
- "lstrip": false,
2839
- "normalized": true,
2840
- "rstrip": false,
2841
- "single_word": false,
2842
- "special": false
2843
- },
2844
- "36278": {
2845
- "content": "ghiêm",
2846
- "lstrip": false,
2847
- "normalized": true,
2848
- "rstrip": false,
2849
- "single_word": false,
2850
- "special": false
2851
- },
2852
- "36279": {
2853
- "content": "sạch_sẽ",
2854
- "lstrip": false,
2855
- "normalized": true,
2856
- "rstrip": false,
2857
- "single_word": false,
2858
- "special": false
2859
- },
2860
- "36280": {
2861
- "content": "vị",
2862
- "lstrip": false,
2863
- "normalized": true,
2864
- "rstrip": false,
2865
- "single_word": false,
2866
- "special": false
2867
- },
2868
- "36281": {
2869
- "content": "ụ_ì_chùa",
2870
- "lstrip": false,
2871
- "normalized": true,
2872
- "rstrip": false,
2873
- "single_word": false,
2874
- "special": false
2875
- },
2876
- "36282": {
2877
- "content": "chia_sẻ",
2878
- "lstrip": false,
2879
- "normalized": true,
2880
- "rstrip": false,
2881
- "single_word": false,
2882
- "special": false
2883
- },
2884
- "36283": {
2885
- "content": "Năm",
2886
- "lstrip": false,
2887
- "normalized": true,
2888
- "rstrip": false,
2889
- "single_word": false,
2890
- "special": false
2891
- },
2892
- "36284": {
2893
- "content": "2013",
2894
- "lstrip": false,
2895
- "normalized": true,
2896
- "rstrip": false,
2897
- "single_word": false,
2898
- "special": false
2899
- },
2900
- "36285": {
2901
- "content": "Dầu_ái_ại",
2902
- "lstrip": false,
2903
- "normalized": true,
2904
- "rstrip": false,
2905
- "single_word": false,
2906
- "special": false
2907
- },
2908
- "36286": {
2909
- "content": "Hội",
2910
- "lstrip": false,
2911
- "normalized": true,
2912
- "rstrip": false,
2913
- "single_word": false,
2914
- "special": false
2915
- },
2916
- "36287": {
2917
- "content": "Thiê_hiê",
2918
- "lstrip": false,
2919
- "normalized": true,
2920
- "rstrip": false,
2921
- "single_word": false,
2922
- "special": false
2923
- },
2924
- "36288": {
2925
- "content": "Môi_ườ",
2926
- "lstrip": false,
2927
- "normalized": true,
2928
- "rstrip": false,
2929
- "single_word": false,
2930
- "special": false
2931
- },
2932
- "36289": {
2933
- "content": "Điều",
2934
- "lstrip": false,
2935
- "normalized": true,
2936
- "rstrip": false,
2937
- "single_word": false,
2938
- "special": false
2939
- },
2940
- "36290": {
2941
- "content": "ghi",
2942
- "lstrip": false,
2943
- "normalized": true,
2944
- "rstrip": false,
2945
- "single_word": false,
2946
- "special": false
2947
- },
2948
- "36291": {
2949
- "content": "A_h",
2950
- "lstrip": false,
2951
- "normalized": true,
2952
- "rstrip": false,
2953
- "single_word": false,
2954
- "special": false
2955
- },
2956
- "36292": {
2957
- "content": "Nguyễ",
2958
- "lstrip": false,
2959
- "normalized": true,
2960
- "rstrip": false,
2961
- "single_word": false,
2962
- "special": false
2963
- },
2964
- "36293": {
2965
- "content": "gụ",
2966
- "lstrip": false,
2967
- "normalized": true,
2968
- "rstrip": false,
2969
- "single_word": false,
2970
- "special": false
2971
- },
2972
- "36294": {
2973
- "content": "A_Bì",
2974
- "lstrip": false,
2975
- "normalized": true,
2976
- "rstrip": false,
2977
- "single_word": false,
2978
- "special": false
2979
- },
2980
- "36295": {
2981
- "content": "hô",
2982
- "lstrip": false,
2983
- "normalized": true,
2984
- "rstrip": false,
2985
- "single_word": false,
2986
- "special": false
2987
- },
2988
- "36296": {
2989
- "content": "a_h",
2990
- "lstrip": false,
2991
- "normalized": true,
2992
- "rstrip": false,
2993
- "single_word": false,
2994
- "special": false
2995
- },
2996
- "36297": {
2997
- "content": "cảm_hấy",
2998
- "lstrip": false,
2999
- "normalized": true,
3000
- "rstrip": false,
3001
- "single_word": false,
3002
- "special": false
3003
- },
3004
- "36298": {
3005
- "content": "vô",
3006
- "lstrip": false,
3007
- "normalized": true,
3008
- "rstrip": false,
3009
- "single_word": false,
3010
- "special": false
3011
- },
3012
- "36299": {
3013
- "content": "vì",
3014
- "lstrip": false,
3015
- "normalized": true,
3016
- "rstrip": false,
3017
- "single_word": false,
3018
- "special": false
3019
- },
3020
- "36300": {
3021
- "content": "mì",
3022
- "lstrip": false,
3023
- "normalized": true,
3024
- "rstrip": false,
3025
- "single_word": false,
3026
- "special": false
3027
- },
3028
- "36301": {
3029
- "content": "Mỗi",
3030
- "lstrip": false,
3031
- "normalized": true,
3032
- "rstrip": false,
3033
- "single_word": false,
3034
- "special": false
3035
- },
3036
- "36302": {
3037
- "content": "lầ",
3038
- "lstrip": false,
3039
- "normalized": true,
3040
- "rstrip": false,
3041
- "single_word": false,
3042
- "special": false
3043
- },
3044
- "36303": {
3045
- "content": "ghé",
3046
- "lstrip": false,
3047
- "normalized": true,
3048
- "rstrip": false,
3049
- "single_word": false,
3050
- "special": false
3051
- },
3052
- "36304": {
3053
- "content": "đều",
3054
- "lstrip": false,
3055
- "normalized": true,
3056
- "rstrip": false,
3057
- "single_word": false,
3058
- "special": false
3059
- },
3060
- "36305": {
3061
- "content": "dẫ_gia_đì",
3062
- "lstrip": false,
3063
- "normalized": true,
3064
- "rstrip": false,
3065
- "single_word": false,
3066
- "special": false
3067
- },
3068
- "36306": {
3069
- "content": "h_đế",
3070
- "lstrip": false,
3071
- "normalized": true,
3072
- "rstrip": false,
3073
- "single_word": false,
3074
- "special": false
3075
- },
3076
- "36307": {
3077
- "content": "dưới",
3078
- "lstrip": false,
3079
- "normalized": true,
3080
- "rstrip": false,
3081
- "single_word": false,
3082
- "special": false
3083
- },
3084
- "36308": {
3085
- "content": "gốc",
3086
- "lstrip": false,
3087
- "normalized": true,
3088
- "rstrip": false,
3089
- "single_word": false,
3090
- "special": false
3091
- },
3092
- "36309": {
3093
- "content": "để",
3094
- "lstrip": false,
3095
- "normalized": true,
3096
- "rstrip": false,
3097
- "single_word": false,
3098
- "special": false
3099
- },
3100
- "36310": {
3101
- "content": "cảm_hậ",
3102
- "lstrip": false,
3103
- "normalized": true,
3104
- "rstrip": false,
3105
- "single_word": false,
3106
- "special": false
3107
- },
3108
- "36311": {
3109
- "content": "ị_li",
3110
- "lstrip": false,
3111
- "normalized": true,
3112
- "rstrip": false,
3113
- "single_word": false,
3114
- "special": false
3115
- },
3116
- "36312": {
3117
- "content": "hiê_g",
3118
- "lstrip": false,
3119
- "normalized": true,
3120
- "rstrip": false,
3121
- "single_word": false,
3122
- "special": false
3123
- },
3124
- "36313": {
3125
- "content": "bao_hă",
3126
- "lstrip": false,
3127
- "normalized": true,
3128
- "rstrip": false,
3129
- "single_word": false,
3130
- "special": false
3131
- },
3132
- "36314": {
3133
- "content": "âm_sự",
3134
- "lstrip": false,
3135
- "normalized": true,
3136
- "rstrip": false,
3137
- "single_word": false,
3138
- "special": false
3139
- },
3140
- "36315": {
3141
- "content": "hế",
3142
- "lstrip": false,
3143
- "normalized": true,
3144
- "rstrip": false,
3145
- "single_word": false,
3146
- "special": false
3147
- },
3148
- "36316": {
3149
- "content": "kỷ_sừ",
3150
- "lstrip": false,
3151
- "normalized": true,
3152
- "rstrip": false,
3153
- "single_word": false,
3154
- "special": false
3155
- },
3156
- "36317": {
3157
- "content": "sữ",
3158
- "lstrip": false,
3159
- "normalized": true,
3160
- "rstrip": false,
3161
- "single_word": false,
3162
- "special": false
3163
- },
3164
- "36318": {
3165
- "content": "dấu_ấ",
3166
- "lstrip": false,
3167
- "normalized": true,
3168
- "rstrip": false,
3169
- "single_word": false,
3170
- "special": false
3171
- },
3172
- "36319": {
3173
- "content": "giao_hòa",
3174
- "lstrip": false,
3175
- "normalized": true,
3176
- "rstrip": false,
3177
- "single_word": false,
3178
- "special": false
3179
- },
3180
- "36320": {
3181
- "content": "giữa",
3182
- "lstrip": false,
3183
- "normalized": true,
3184
- "rstrip": false,
3185
- "single_word": false,
3186
- "special": false
3187
- },
3188
- "36321": {
3189
- "content": "co_gười",
3190
- "lstrip": false,
3191
- "normalized": true,
3192
- "rstrip": false,
3193
- "single_word": false,
3194
- "special": false
3195
- },
3196
- "36322": {
3197
- "content": "Chí_h",
3198
- "lstrip": false,
3199
- "normalized": true,
3200
- "rstrip": false,
3201
- "single_word": false,
3202
- "special": false
3203
- },
3204
- "36323": {
3205
- "content": "vì_vậy",
3206
- "lstrip": false,
3207
- "normalized": true,
3208
- "rstrip": false,
3209
- "single_word": false,
3210
- "special": false
3211
- },
3212
- "36324": {
3213
- "content": "mỗi",
3214
- "lstrip": false,
3215
- "normalized": true,
3216
- "rstrip": false,
3217
- "single_word": false,
3218
- "special": false
3219
- },
3220
- "36325": {
3221
- "content": "dịp",
3222
- "lstrip": false,
3223
- "normalized": true,
3224
- "rstrip": false,
3225
- "single_word": false,
3226
- "special": false
3227
- },
3228
- "36326": {
3229
- "content": "lễ",
3230
- "lstrip": false,
3231
- "normalized": true,
3232
- "rstrip": false,
3233
- "single_word": false,
3234
- "special": false
3235
- },
3236
- "36327": {
3237
- "content": "lớ",
3238
- "lstrip": false,
3239
- "normalized": true,
3240
- "rstrip": false,
3241
- "single_word": false,
3242
- "special": false
3243
- },
3244
- "36328": {
3245
- "content": "hiều",
3246
- "lstrip": false,
3247
- "normalized": true,
3248
- "rstrip": false,
3249
- "single_word": false,
3250
- "special": false
3251
- },
3252
- "36329": {
3253
- "content": "phậ_ử",
3254
- "lstrip": false,
3255
- "normalized": true,
3256
- "rstrip": false,
3257
- "single_word": false,
3258
- "special": false
3259
- },
3260
- "36330": {
3261
- "content": "viế",
3262
- "lstrip": false,
3263
- "normalized": true,
3264
- "rstrip": false,
3265
- "single_word": false,
3266
- "special": false
3267
- },
3268
- "36331": {
3269
- "content": "cầu",
3270
- "lstrip": false,
3271
- "normalized": true,
3272
- "rstrip": false,
3273
- "single_word": false,
3274
- "special": false
3275
- },
3276
- "36332": {
3277
- "content": "guyệ",
3278
- "lstrip": false,
3279
- "normalized": true,
3280
- "rstrip": false,
3281
- "single_word": false,
3282
- "special": false
3283
- },
3284
- "36333": {
3285
- "content": "cây_cổ",
3286
- "lstrip": false,
3287
- "normalized": true,
3288
- "rstrip": false,
3289
- "single_word": false,
3290
- "special": false
3291
- },
3292
- "36334": {
3293
- "content": "Nhiều",
3294
- "lstrip": false,
3295
- "normalized": true,
3296
- "rstrip": false,
3297
- "single_word": false,
3298
- "special": false
3299
- },
3300
- "36335": {
3301
- "content": "già",
3302
- "lstrip": false,
3303
- "normalized": true,
3304
- "rstrip": false,
3305
- "single_word": false,
3306
- "special": false
3307
- },
3308
- "36336": {
3309
- "content": "che_chở",
3310
- "lstrip": false,
3311
- "normalized": true,
3312
- "rstrip": false,
3313
- "single_word": false,
3314
- "special": false
3315
- },
3316
- "36337": {
3317
- "content": "cầu_bì",
3318
- "lstrip": false,
3319
- "normalized": true,
3320
- "rstrip": false,
3321
- "single_word": false,
3322
- "special": false
3323
- },
3324
- "36338": {
3325
- "content": "h_a",
3326
- "lstrip": false,
3327
- "normalized": true,
3328
- "rstrip": false,
3329
- "single_word": false,
3330
- "special": false
3331
- },
3332
- "36339": {
3333
- "content": "mắ",
3334
- "lstrip": false,
3335
- "normalized": true,
3336
- "rstrip": false,
3337
- "single_word": false,
3338
- "special": false
3339
- }
3340
- },
3341
- "additional_special_tokens": [
3342
- "<extra_id_0>",
3343
- "<extra_id_1>",
3344
- "<extra_id_2>",
3345
- "<extra_id_3>",
3346
- "<extra_id_4>",
3347
- "<extra_id_5>",
3348
- "<extra_id_6>",
3349
- "<extra_id_7>",
3350
- "<extra_id_8>",
3351
- "<extra_id_9>",
3352
- "<extra_id_10>",
3353
- "<extra_id_11>",
3354
- "<extra_id_12>",
3355
- "<extra_id_13>",
3356
- "<extra_id_14>",
3357
- "<extra_id_15>",
3358
- "<extra_id_16>",
3359
- "<extra_id_17>",
3360
- "<extra_id_18>",
3361
- "<extra_id_19>",
3362
- "<extra_id_20>",
3363
- "<extra_id_21>",
3364
- "<extra_id_22>",
3365
- "<extra_id_23>",
3366
- "<extra_id_24>",
3367
- "<extra_id_25>",
3368
- "<extra_id_26>",
3369
- "<extra_id_27>",
3370
- "<extra_id_28>",
3371
- "<extra_id_29>",
3372
- "<extra_id_30>",
3373
- "<extra_id_31>",
3374
- "<extra_id_32>",
3375
- "<extra_id_33>",
3376
- "<extra_id_34>",
3377
- "<extra_id_35>",
3378
- "<extra_id_36>",
3379
- "<extra_id_37>",
3380
- "<extra_id_38>",
3381
- "<extra_id_39>",
3382
- "<extra_id_40>",
3383
- "<extra_id_41>",
3384
- "<extra_id_42>",
3385
- "<extra_id_43>",
3386
- "<extra_id_44>",
3387
- "<extra_id_45>",
3388
- "<extra_id_46>",
3389
- "<extra_id_47>",
3390
- "<extra_id_48>",
3391
- "<extra_id_49>",
3392
- "<extra_id_50>",
3393
- "<extra_id_51>",
3394
- "<extra_id_52>",
3395
- "<extra_id_53>",
3396
- "<extra_id_54>",
3397
- "<extra_id_55>",
3398
- "<extra_id_56>",
3399
- "<extra_id_57>",
3400
- "<extra_id_58>",
3401
- "<extra_id_59>",
3402
- "<extra_id_60>",
3403
- "<extra_id_61>",
3404
- "<extra_id_62>",
3405
- "<extra_id_63>",
3406
- "<extra_id_64>",
3407
- "<extra_id_65>",
3408
- "<extra_id_66>",
3409
- "<extra_id_67>",
3410
- "<extra_id_68>",
3411
- "<extra_id_69>",
3412
- "<extra_id_70>",
3413
- "<extra_id_71>",
3414
- "<extra_id_72>",
3415
- "<extra_id_73>",
3416
- "<extra_id_74>",
3417
- "<extra_id_75>",
3418
- "<extra_id_76>",
3419
- "<extra_id_77>",
3420
- "<extra_id_78>",
3421
- "<extra_id_79>",
3422
- "<extra_id_80>",
3423
- "<extra_id_81>",
3424
- "<extra_id_82>",
3425
- "<extra_id_83>",
3426
- "<extra_id_84>",
3427
- "<extra_id_85>",
3428
- "<extra_id_86>",
3429
- "<extra_id_87>",
3430
- "<extra_id_88>",
3431
- "<extra_id_89>",
3432
- "<extra_id_90>",
3433
- "<extra_id_91>",
3434
- "<extra_id_92>",
3435
- "<extra_id_93>",
3436
- "<extra_id_94>",
3437
- "<extra_id_95>"
3438
- ],
3439
- "clean_up_tokenization_spaces": true,
3440
- "eos_token": "</s>",
3441
- "extra_ids": 96,
3442
- "extra_special_tokens": {},
3443
- "legacy": true,
3444
- "model_max_length": 1000000000000000019884624838656,
3445
- "pad_token": "<pad>",
3446
- "sp_model_kwargs": {},
3447
- "tokenizer_class": "T5Tokenizer",
3448
- "unk_token": "<unk>"
3449
- }
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "64000": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "PhobertTokenizer",
54
+ "unk_token": "<unk>"
55
+ }