Upload tokenizer
Browse files- added_tokens.json +0 -36
- tokenizer_config.json +0 -664
added_tokens.json
CHANGED
|
@@ -20,13 +20,10 @@
|
|
| 20 |
"Hai_bê": 64139,
|
| 21 |
"Hi_oo": 64051,
|
| 22 |
"Hiệ": 64179,
|
| 23 |
-
"Huy_h": 64285,
|
| 24 |
"Hòa": 64225,
|
| 25 |
-
"Hội_gười": 64304,
|
| 26 |
"I_o": 64097,
|
| 27 |
"Iijima_Isao": 64046,
|
| 28 |
"Ishiba": 64007,
|
| 29 |
-
"Jea_e": 64284,
|
| 30 |
"Kaiha_a": 64054,
|
| 31 |
"Ke": 64055,
|
| 32 |
"Koji": 64053,
|
|
@@ -39,8 +36,6 @@
|
|
| 39 |
"Nam_Tow": 64264,
|
| 40 |
"Nam_chố": 64159,
|
| 41 |
"Nam_hực": 64128,
|
| 42 |
-
"Nam_vẫ": 64282,
|
| 43 |
-
"Nam_ă": 64311,
|
| 44 |
"Nam_ại": 64031,
|
| 45 |
"Nam_ỉ": 64243,
|
| 46 |
"Nam_ừ": 64016,
|
|
@@ -54,7 +49,6 @@
|
|
| 54 |
"Phạm_Mi_h": 64021,
|
| 55 |
"Phạm_Qua": 64032,
|
| 56 |
"Phầ_lớ": 64249,
|
| 57 |
-
"Phố_Việ": 64281,
|
| 58 |
"S_isuk": 64271,
|
| 59 |
"Shige_u": 64005,
|
| 60 |
"TPHCM.": 64183,
|
|
@@ -84,9 +78,7 @@
|
|
| 84 |
"Xuâ_Hòa": 64152,
|
| 85 |
"Yoshi_o": 64052,
|
| 86 |
"Yoshiko": 64008,
|
| 87 |
-
"a_g": 64296,
|
| 88 |
"a_h": 64278,
|
| 89 |
-
"a_hà_h": 64309,
|
| 90 |
"a_o": 64056,
|
| 91 |
"aka_Mao": 64049,
|
| 92 |
"am_Tow": 64266,
|
|
@@ -102,7 +94,6 @@
|
|
| 102 |
"bắ": 64012,
|
| 103 |
"bắ_đầu": 64274,
|
| 104 |
"bằ": 64251,
|
| 105 |
-
"bổ_g": 64316,
|
| 106 |
"chiế": 64226,
|
| 107 |
"chiế_lược": 64067,
|
| 108 |
"chuyê_cơ": 64001,
|
|
@@ -112,18 +103,15 @@
|
|
| 112 |
"chuyể_đổi": 64089,
|
| 113 |
"chí_h": 64084,
|
| 114 |
"chấ": 64077,
|
| 115 |
-
"chức_ă": 64283,
|
| 116 |
"co_gười": 64254,
|
| 117 |
"co_phố": 64267,
|
| 118 |
"co_si": 64250,
|
| 119 |
-
"co_số": 64291,
|
| 120 |
"co_xa": 64155,
|
| 121 |
"cà_g": 64261,
|
| 122 |
"cá_hâ": 64117,
|
| 123 |
"cù_g": 64009,
|
| 124 |
"cơ_sở_hạ": 64131,
|
| 125 |
"cườ": 64102,
|
| 126 |
-
"cải_hiệ": 64313,
|
| 127 |
"cậy_chí": 64081,
|
| 128 |
"diễ_a": 64199,
|
| 129 |
"diệ": 64069,
|
|
@@ -132,25 +120,20 @@
|
|
| 132 |
"dâ": 64258,
|
| 133 |
"dự_g": 64164,
|
| 134 |
"dự_á": 64095,
|
| 135 |
-
"eu_o": 64315,
|
| 136 |
"ghiệp": 64134,
|
| 137 |
"ghiệp_hố": 64236,
|
| 138 |
-
"ghĩ": 64299,
|
| 139 |
"ghĩa": 64060,
|
| 140 |
"ghị": 64198,
|
| 141 |
"giai_đọa": 64196,
|
| 142 |
"goại": 64111,
|
| 143 |
-
"guyệ": 64306,
|
| 144 |
"guồ": 64132,
|
| 145 |
"guồ_lực": 64200,
|
| 146 |
"gày": 64017,
|
| 147 |
-
"gôi_hà": 64292,
|
| 148 |
"gười": 64154,
|
| 149 |
"gầ": 64201,
|
| 150 |
"gắ": 64268,
|
| 151 |
"h_Vũ": 64030,
|
| 152 |
"h_chí": 64270,
|
| 153 |
-
"h_cư": 64287,
|
| 154 |
"h_cảm": 64212,
|
| 155 |
"h_hổ": 64181,
|
| 156 |
"h_hức": 64014,
|
|
@@ -158,7 +141,6 @@
|
|
| 158 |
"h_vực": 64080,
|
| 159 |
"h_đạo": 64115,
|
| 160 |
"hau": 64276,
|
| 161 |
-
"hiêu": 64301,
|
| 162 |
"hiế": 64227,
|
| 163 |
"hiế_hực": 64191,
|
| 164 |
"hiều": 64094,
|
|
@@ -166,7 +148,6 @@
|
|
| 166 |
"hiều_ý": 64203,
|
| 167 |
"hiệ": 64129,
|
| 168 |
"hiệm": 64170,
|
| 169 |
-
"hà_cửa": 64294,
|
| 170 |
"hà_g": 64110,
|
| 171 |
"hà_h": 64189,
|
| 172 |
"hà_lã": 64118,
|
|
@@ -177,7 +158,6 @@
|
|
| 177 |
"hâ_lực": 64133,
|
| 178 |
"hì_h": 64150,
|
| 179 |
"hòa": 64070,
|
| 180 |
-
"hòa_òa": 64302,
|
| 181 |
"hóa": 64135,
|
| 182 |
"hăm_TPHCM": 64222,
|
| 183 |
"hăm_chí": 64013,
|
|
@@ -188,7 +168,6 @@
|
|
| 188 |
"hươ": 64137,
|
| 189 |
"hướ": 64210,
|
| 190 |
"hấ": 64138,
|
| 191 |
-
"hấy": 64290,
|
| 192 |
"hậ": 64202,
|
| 193 |
"hằm": 64101,
|
| 194 |
"hế": 64061,
|
|
@@ -200,7 +179,6 @@
|
|
| 200 |
"hỏ": 64235,
|
| 201 |
"hỗ_ợ": 64127,
|
| 202 |
"hợp_ác": 64082,
|
| 203 |
-
"hứ": 64297,
|
| 204 |
"hữ": 64108,
|
| 205 |
"hữu_ghị": 64259,
|
| 206 |
"i_Masafumi": 64045,
|
|
@@ -214,9 +192,6 @@
|
|
| 214 |
"iể": 64093,
|
| 215 |
"iể_hà": 64165,
|
| 216 |
"iể_hực": 64076,
|
| 217 |
-
"kha_g": 64295,
|
| 218 |
-
"khô_g": 64289,
|
| 219 |
-
"khă": 64314,
|
| 220 |
"khả_ă": 64143,
|
| 221 |
"khẳ": 64119,
|
| 222 |
"khỏa": 64180,
|
|
@@ -240,7 +215,6 @@
|
|
| 240 |
"mặ_hà": 64144,
|
| 241 |
"mừ": 64157,
|
| 242 |
"o_Naoki": 64058,
|
| 243 |
-
"o_lớ": 64300,
|
| 244 |
"phươ": 64121,
|
| 245 |
"phấ_đấu": 64140,
|
| 246 |
"phầ": 64177,
|
|
@@ -249,7 +223,6 @@
|
|
| 249 |
"quốc_phò": 64123,
|
| 250 |
"quốc_ế": 64125,
|
| 251 |
"si_h": 64173,
|
| 252 |
-
"suấ": 64312,
|
| 253 |
"sâ": 64024,
|
| 254 |
"sả_xuấ": 64194,
|
| 255 |
"u_g": 64105,
|
|
@@ -261,15 +234,11 @@
|
|
| 261 |
"vậ_lực": 64234,
|
| 262 |
"vữ": 64186,
|
| 263 |
"xa_h": 64090,
|
| 264 |
-
"xuố": 64288,
|
| 265 |
-
"xúc_độ": 64303,
|
| 266 |
"xươ": 64228,
|
| 267 |
"Ô_g": 64213,
|
| 268 |
"ác_o": 64088,
|
| 269 |
-
"âm_sự": 64286,
|
| 270 |
"âm_ă": 64106,
|
| 271 |
"âm_đặc": 64175,
|
| 272 |
-
"âm_ới": 64307,
|
| 273 |
"ê_hế": 64263,
|
| 274 |
"ê_hế_giới": 64074,
|
| 275 |
"ê_ấ": 64078,
|
|
@@ -277,7 +246,6 @@
|
|
| 277 |
"í_h": 64246,
|
| 278 |
"í_hức": 64206,
|
| 279 |
"òa": 64068,
|
| 280 |
-
"ô_g": 64310,
|
| 281 |
"ă_mó": 64277,
|
| 282 |
"Đô_g": 64103,
|
| 283 |
"Đả_g": 64241,
|
|
@@ -286,7 +254,6 @@
|
|
| 286 |
"đà_phá": 64075,
|
| 287 |
"đòa": 64010,
|
| 288 |
"đú": 64223,
|
| 289 |
-
"đơ_sơ": 64293,
|
| 290 |
"đạ": 64188,
|
| 291 |
"đại_hóa": 64136,
|
| 292 |
"đấ_ước": 64168,
|
|
@@ -295,7 +262,6 @@
|
|
| 295 |
"đề_liê": 64209,
|
| 296 |
"đị": 64120,
|
| 297 |
"đối_ác": 64087,
|
| 298 |
-
"đứ": 64308,
|
| 299 |
"ơ_Chí_h": 64242,
|
| 300 |
"ưu_iê": 64109,
|
| 301 |
"ươ": 64217,
|
|
@@ -307,10 +273,8 @@
|
|
| 307 |
"ườ": 64142,
|
| 308 |
"ưở": 64027,
|
| 309 |
"ưở_g": 64025,
|
| 310 |
-
"ấ_hiều": 64298,
|
| 311 |
"ấ_đô": 64253,
|
| 312 |
"ẩm_hực": 64275,
|
| 313 |
-
"ập_hợp": 64305,
|
| 314 |
"ập_u": 64126,
|
| 315 |
"ụ_cộ": 64083,
|
| 316 |
"Ủy": 64171,
|
|
|
|
| 20 |
"Hai_bê": 64139,
|
| 21 |
"Hi_oo": 64051,
|
| 22 |
"Hiệ": 64179,
|
|
|
|
| 23 |
"Hòa": 64225,
|
|
|
|
| 24 |
"I_o": 64097,
|
| 25 |
"Iijima_Isao": 64046,
|
| 26 |
"Ishiba": 64007,
|
|
|
|
| 27 |
"Kaiha_a": 64054,
|
| 28 |
"Ke": 64055,
|
| 29 |
"Koji": 64053,
|
|
|
|
| 36 |
"Nam_Tow": 64264,
|
| 37 |
"Nam_chố": 64159,
|
| 38 |
"Nam_hực": 64128,
|
|
|
|
|
|
|
| 39 |
"Nam_ại": 64031,
|
| 40 |
"Nam_ỉ": 64243,
|
| 41 |
"Nam_ừ": 64016,
|
|
|
|
| 49 |
"Phạm_Mi_h": 64021,
|
| 50 |
"Phạm_Qua": 64032,
|
| 51 |
"Phầ_lớ": 64249,
|
|
|
|
| 52 |
"S_isuk": 64271,
|
| 53 |
"Shige_u": 64005,
|
| 54 |
"TPHCM.": 64183,
|
|
|
|
| 78 |
"Xuâ_Hòa": 64152,
|
| 79 |
"Yoshi_o": 64052,
|
| 80 |
"Yoshiko": 64008,
|
|
|
|
| 81 |
"a_h": 64278,
|
|
|
|
| 82 |
"a_o": 64056,
|
| 83 |
"aka_Mao": 64049,
|
| 84 |
"am_Tow": 64266,
|
|
|
|
| 94 |
"bắ": 64012,
|
| 95 |
"bắ_đầu": 64274,
|
| 96 |
"bằ": 64251,
|
|
|
|
| 97 |
"chiế": 64226,
|
| 98 |
"chiế_lược": 64067,
|
| 99 |
"chuyê_cơ": 64001,
|
|
|
|
| 103 |
"chuyể_đổi": 64089,
|
| 104 |
"chí_h": 64084,
|
| 105 |
"chấ": 64077,
|
|
|
|
| 106 |
"co_gười": 64254,
|
| 107 |
"co_phố": 64267,
|
| 108 |
"co_si": 64250,
|
|
|
|
| 109 |
"co_xa": 64155,
|
| 110 |
"cà_g": 64261,
|
| 111 |
"cá_hâ": 64117,
|
| 112 |
"cù_g": 64009,
|
| 113 |
"cơ_sở_hạ": 64131,
|
| 114 |
"cườ": 64102,
|
|
|
|
| 115 |
"cậy_chí": 64081,
|
| 116 |
"diễ_a": 64199,
|
| 117 |
"diệ": 64069,
|
|
|
|
| 120 |
"dâ": 64258,
|
| 121 |
"dự_g": 64164,
|
| 122 |
"dự_á": 64095,
|
|
|
|
| 123 |
"ghiệp": 64134,
|
| 124 |
"ghiệp_hố": 64236,
|
|
|
|
| 125 |
"ghĩa": 64060,
|
| 126 |
"ghị": 64198,
|
| 127 |
"giai_đọa": 64196,
|
| 128 |
"goại": 64111,
|
|
|
|
| 129 |
"guồ": 64132,
|
| 130 |
"guồ_lực": 64200,
|
| 131 |
"gày": 64017,
|
|
|
|
| 132 |
"gười": 64154,
|
| 133 |
"gầ": 64201,
|
| 134 |
"gắ": 64268,
|
| 135 |
"h_Vũ": 64030,
|
| 136 |
"h_chí": 64270,
|
|
|
|
| 137 |
"h_cảm": 64212,
|
| 138 |
"h_hổ": 64181,
|
| 139 |
"h_hức": 64014,
|
|
|
|
| 141 |
"h_vực": 64080,
|
| 142 |
"h_đạo": 64115,
|
| 143 |
"hau": 64276,
|
|
|
|
| 144 |
"hiế": 64227,
|
| 145 |
"hiế_hực": 64191,
|
| 146 |
"hiều": 64094,
|
|
|
|
| 148 |
"hiều_ý": 64203,
|
| 149 |
"hiệ": 64129,
|
| 150 |
"hiệm": 64170,
|
|
|
|
| 151 |
"hà_g": 64110,
|
| 152 |
"hà_h": 64189,
|
| 153 |
"hà_lã": 64118,
|
|
|
|
| 158 |
"hâ_lực": 64133,
|
| 159 |
"hì_h": 64150,
|
| 160 |
"hòa": 64070,
|
|
|
|
| 161 |
"hóa": 64135,
|
| 162 |
"hăm_TPHCM": 64222,
|
| 163 |
"hăm_chí": 64013,
|
|
|
|
| 168 |
"hươ": 64137,
|
| 169 |
"hướ": 64210,
|
| 170 |
"hấ": 64138,
|
|
|
|
| 171 |
"hậ": 64202,
|
| 172 |
"hằm": 64101,
|
| 173 |
"hế": 64061,
|
|
|
|
| 179 |
"hỏ": 64235,
|
| 180 |
"hỗ_ợ": 64127,
|
| 181 |
"hợp_ác": 64082,
|
|
|
|
| 182 |
"hữ": 64108,
|
| 183 |
"hữu_ghị": 64259,
|
| 184 |
"i_Masafumi": 64045,
|
|
|
|
| 192 |
"iể": 64093,
|
| 193 |
"iể_hà": 64165,
|
| 194 |
"iể_hực": 64076,
|
|
|
|
|
|
|
|
|
|
| 195 |
"khả_ă": 64143,
|
| 196 |
"khẳ": 64119,
|
| 197 |
"khỏa": 64180,
|
|
|
|
| 215 |
"mặ_hà": 64144,
|
| 216 |
"mừ": 64157,
|
| 217 |
"o_Naoki": 64058,
|
|
|
|
| 218 |
"phươ": 64121,
|
| 219 |
"phấ_đấu": 64140,
|
| 220 |
"phầ": 64177,
|
|
|
|
| 223 |
"quốc_phò": 64123,
|
| 224 |
"quốc_ế": 64125,
|
| 225 |
"si_h": 64173,
|
|
|
|
| 226 |
"sâ": 64024,
|
| 227 |
"sả_xuấ": 64194,
|
| 228 |
"u_g": 64105,
|
|
|
|
| 234 |
"vậ_lực": 64234,
|
| 235 |
"vữ": 64186,
|
| 236 |
"xa_h": 64090,
|
|
|
|
|
|
|
| 237 |
"xươ": 64228,
|
| 238 |
"Ô_g": 64213,
|
| 239 |
"ác_o": 64088,
|
|
|
|
| 240 |
"âm_ă": 64106,
|
| 241 |
"âm_đặc": 64175,
|
|
|
|
| 242 |
"ê_hế": 64263,
|
| 243 |
"ê_hế_giới": 64074,
|
| 244 |
"ê_ấ": 64078,
|
|
|
|
| 246 |
"í_h": 64246,
|
| 247 |
"í_hức": 64206,
|
| 248 |
"òa": 64068,
|
|
|
|
| 249 |
"ă_mó": 64277,
|
| 250 |
"Đô_g": 64103,
|
| 251 |
"Đả_g": 64241,
|
|
|
|
| 254 |
"đà_phá": 64075,
|
| 255 |
"đòa": 64010,
|
| 256 |
"đú": 64223,
|
|
|
|
| 257 |
"đạ": 64188,
|
| 258 |
"đại_hóa": 64136,
|
| 259 |
"đấ_ước": 64168,
|
|
|
|
| 262 |
"đề_liê": 64209,
|
| 263 |
"đị": 64120,
|
| 264 |
"đối_ác": 64087,
|
|
|
|
| 265 |
"ơ_Chí_h": 64242,
|
| 266 |
"ưu_iê": 64109,
|
| 267 |
"ươ": 64217,
|
|
|
|
| 273 |
"ườ": 64142,
|
| 274 |
"ưở": 64027,
|
| 275 |
"ưở_g": 64025,
|
|
|
|
| 276 |
"ấ_đô": 64253,
|
| 277 |
"ẩm_hực": 64275,
|
|
|
|
| 278 |
"ập_u": 64126,
|
| 279 |
"ụ_cộ": 64083,
|
| 280 |
"Ủy": 64171,
|
tokenizer_config.json
CHANGED
|
@@ -360,14 +360,6 @@
|
|
| 360 |
"single_word": false,
|
| 361 |
"special": false
|
| 362 |
},
|
| 363 |
-
"99": {
|
| 364 |
-
"content": "1",
|
| 365 |
-
"lstrip": false,
|
| 366 |
-
"normalized": true,
|
| 367 |
-
"rstrip": false,
|
| 368 |
-
"single_word": false,
|
| 369 |
-
"special": false
|
| 370 |
-
},
|
| 371 |
"100": {
|
| 372 |
"content": "số",
|
| 373 |
"lstrip": false,
|
|
@@ -400,14 +392,6 @@
|
|
| 400 |
"single_word": false,
|
| 401 |
"special": false
|
| 402 |
},
|
| 403 |
-
"118": {
|
| 404 |
-
"content": "hay",
|
| 405 |
-
"lstrip": false,
|
| 406 |
-
"normalized": true,
|
| 407 |
-
"rstrip": false,
|
| 408 |
-
"single_word": false,
|
| 409 |
-
"special": false
|
| 410 |
-
},
|
| 411 |
"120": {
|
| 412 |
"content": "Hà_Nội",
|
| 413 |
"lstrip": false,
|
|
@@ -432,14 +416,6 @@
|
|
| 432 |
"single_word": false,
|
| 433 |
"special": false
|
| 434 |
},
|
| 435 |
-
"133": {
|
| 436 |
-
"content": "giá",
|
| 437 |
-
"lstrip": false,
|
| 438 |
-
"normalized": true,
|
| 439 |
-
"rstrip": false,
|
| 440 |
-
"single_word": false,
|
| 441 |
-
"special": false
|
| 442 |
-
},
|
| 443 |
"135": {
|
| 444 |
"content": "...",
|
| 445 |
"lstrip": false,
|
|
@@ -472,14 +448,6 @@
|
|
| 472 |
"single_word": false,
|
| 473 |
"special": false
|
| 474 |
},
|
| 475 |
-
"162": {
|
| 476 |
-
"content": "Sau",
|
| 477 |
-
"lstrip": false,
|
| 478 |
-
"normalized": true,
|
| 479 |
-
"rstrip": false,
|
| 480 |
-
"single_word": false,
|
| 481 |
-
"special": false
|
| 482 |
-
},
|
| 483 |
"163": {
|
| 484 |
"content": "4",
|
| 485 |
"lstrip": false,
|
|
@@ -504,14 +472,6 @@
|
|
| 504 |
"single_word": false,
|
| 505 |
"special": false
|
| 506 |
},
|
| 507 |
-
"173": {
|
| 508 |
-
"content": "5",
|
| 509 |
-
"lstrip": false,
|
| 510 |
-
"normalized": true,
|
| 511 |
-
"rstrip": false,
|
| 512 |
-
"single_word": false,
|
| 513 |
-
"special": false
|
| 514 |
-
},
|
| 515 |
"181": {
|
| 516 |
"content": "cấp",
|
| 517 |
"lstrip": false,
|
|
@@ -528,38 +488,6 @@
|
|
| 528 |
"single_word": false,
|
| 529 |
"special": false
|
| 530 |
},
|
| 531 |
-
"193": {
|
| 532 |
-
"content": "em",
|
| 533 |
-
"lstrip": false,
|
| 534 |
-
"normalized": true,
|
| 535 |
-
"rstrip": false,
|
| 536 |
-
"single_word": false,
|
| 537 |
-
"special": false
|
| 538 |
-
},
|
| 539 |
-
"207": {
|
| 540 |
-
"content": "mọi",
|
| 541 |
-
"lstrip": false,
|
| 542 |
-
"normalized": true,
|
| 543 |
-
"rstrip": false,
|
| 544 |
-
"single_word": false,
|
| 545 |
-
"special": false
|
| 546 |
-
},
|
| 547 |
-
"218": {
|
| 548 |
-
"content": "Tôi",
|
| 549 |
-
"lstrip": false,
|
| 550 |
-
"normalized": true,
|
| 551 |
-
"rstrip": false,
|
| 552 |
-
"single_word": false,
|
| 553 |
-
"special": false
|
| 554 |
-
},
|
| 555 |
-
"219": {
|
| 556 |
-
"content": "lúc",
|
| 557 |
-
"lstrip": false,
|
| 558 |
-
"normalized": true,
|
| 559 |
-
"rstrip": false,
|
| 560 |
-
"single_word": false,
|
| 561 |
-
"special": false
|
| 562 |
-
},
|
| 563 |
"222": {
|
| 564 |
"content": "học",
|
| 565 |
"lstrip": false,
|
|
@@ -640,14 +568,6 @@
|
|
| 640 |
"single_word": false,
|
| 641 |
"special": false
|
| 642 |
},
|
| 643 |
-
"267": {
|
| 644 |
-
"content": "xã_hội",
|
| 645 |
-
"lstrip": false,
|
| 646 |
-
"normalized": true,
|
| 647 |
-
"rstrip": false,
|
| 648 |
-
"single_word": false,
|
| 649 |
-
"special": false
|
| 650 |
-
},
|
| 651 |
"268": {
|
| 652 |
"content": "Phó",
|
| 653 |
"lstrip": false,
|
|
@@ -744,14 +664,6 @@
|
|
| 744 |
"single_word": false,
|
| 745 |
"special": false
|
| 746 |
},
|
| 747 |
-
"359": {
|
| 748 |
-
"content": "khó",
|
| 749 |
-
"lstrip": false,
|
| 750 |
-
"normalized": true,
|
| 751 |
-
"rstrip": false,
|
| 752 |
-
"single_word": false,
|
| 753 |
-
"special": false
|
| 754 |
-
},
|
| 755 |
"378": {
|
| 756 |
"content": "độ",
|
| 757 |
"lstrip": false,
|
|
@@ -776,22 +688,6 @@
|
|
| 776 |
"single_word": false,
|
| 777 |
"special": false
|
| 778 |
},
|
| 779 |
-
"426": {
|
| 780 |
-
"content": "9",
|
| 781 |
-
"lstrip": false,
|
| 782 |
-
"normalized": true,
|
| 783 |
-
"rstrip": false,
|
| 784 |
-
"single_word": false,
|
| 785 |
-
"special": false
|
| 786 |
-
},
|
| 787 |
-
"434": {
|
| 788 |
-
"content": "Năm",
|
| 789 |
-
"lstrip": false,
|
| 790 |
-
"normalized": true,
|
| 791 |
-
"rstrip": false,
|
| 792 |
-
"single_word": false,
|
| 793 |
-
"special": false
|
| 794 |
-
},
|
| 795 |
"447": {
|
| 796 |
"content": "hỏi",
|
| 797 |
"lstrip": false,
|
|
@@ -800,14 +696,6 @@
|
|
| 800 |
"single_word": false,
|
| 801 |
"special": false
|
| 802 |
},
|
| 803 |
-
"450": {
|
| 804 |
-
"content": "Pháp",
|
| 805 |
-
"lstrip": false,
|
| 806 |
-
"normalized": true,
|
| 807 |
-
"rstrip": false,
|
| 808 |
-
"single_word": false,
|
| 809 |
-
"special": false
|
| 810 |
-
},
|
| 811 |
"471": {
|
| 812 |
"content": "sớm",
|
| 813 |
"lstrip": false,
|
|
@@ -832,14 +720,6 @@
|
|
| 832 |
"single_word": false,
|
| 833 |
"special": false
|
| 834 |
},
|
| 835 |
-
"499": {
|
| 836 |
-
"content": "bước",
|
| 837 |
-
"lstrip": false,
|
| 838 |
-
"normalized": true,
|
| 839 |
-
"rstrip": false,
|
| 840 |
-
"single_word": false,
|
| 841 |
-
"special": false
|
| 842 |
-
},
|
| 843 |
"506": {
|
| 844 |
"content": "hộ",
|
| 845 |
"lstrip": false,
|
|
@@ -960,14 +840,6 @@
|
|
| 960 |
"single_word": false,
|
| 961 |
"special": false
|
| 962 |
},
|
| 963 |
-
"654": {
|
| 964 |
-
"content": "hôm",
|
| 965 |
-
"lstrip": false,
|
| 966 |
-
"normalized": true,
|
| 967 |
-
"rstrip": false,
|
| 968 |
-
"single_word": false,
|
| 969 |
-
"special": false
|
| 970 |
-
},
|
| 971 |
"669": {
|
| 972 |
"content": "Đó",
|
| 973 |
"lstrip": false,
|
|
@@ -984,14 +856,6 @@
|
|
| 984 |
"single_word": false,
|
| 985 |
"special": false
|
| 986 |
},
|
| 987 |
-
"680": {
|
| 988 |
-
"content": "Bà",
|
| 989 |
-
"lstrip": false,
|
| 990 |
-
"normalized": true,
|
| 991 |
-
"rstrip": false,
|
| 992 |
-
"single_word": false,
|
| 993 |
-
"special": false
|
| 994 |
-
},
|
| 995 |
"681": {
|
| 996 |
"content": "giới",
|
| 997 |
"lstrip": false,
|
|
@@ -1000,14 +864,6 @@
|
|
| 1000 |
"single_word": false,
|
| 1001 |
"special": false
|
| 1002 |
},
|
| 1003 |
-
"696": {
|
| 1004 |
-
"content": "100",
|
| 1005 |
-
"lstrip": false,
|
| 1006 |
-
"normalized": true,
|
| 1007 |
-
"rstrip": false,
|
| 1008 |
-
"single_word": false,
|
| 1009 |
-
"special": false
|
| 1010 |
-
},
|
| 1011 |
"711": {
|
| 1012 |
"content": "bức",
|
| 1013 |
"lstrip": false,
|
|
@@ -1016,14 +872,6 @@
|
|
| 1016 |
"single_word": false,
|
| 1017 |
"special": false
|
| 1018 |
},
|
| 1019 |
-
"733": {
|
| 1020 |
-
"content": "đời",
|
| 1021 |
-
"lstrip": false,
|
| 1022 |
-
"normalized": true,
|
| 1023 |
-
"rstrip": false,
|
| 1024 |
-
"single_word": false,
|
| 1025 |
-
"special": false
|
| 1026 |
-
},
|
| 1027 |
"740": {
|
| 1028 |
"content": "lập",
|
| 1029 |
"lstrip": false,
|
|
@@ -1080,14 +928,6 @@
|
|
| 1080 |
"single_word": false,
|
| 1081 |
"special": false
|
| 1082 |
},
|
| 1083 |
-
"825": {
|
| 1084 |
-
"content": "chủ_yếu",
|
| 1085 |
-
"lstrip": false,
|
| 1086 |
-
"normalized": true,
|
| 1087 |
-
"rstrip": false,
|
| 1088 |
-
"single_word": false,
|
| 1089 |
-
"special": false
|
| 1090 |
-
},
|
| 1091 |
"835": {
|
| 1092 |
"content": "phố",
|
| 1093 |
"lstrip": false,
|
|
@@ -1152,14 +992,6 @@
|
|
| 1152 |
"single_word": false,
|
| 1153 |
"special": false
|
| 1154 |
},
|
| 1155 |
-
"956": {
|
| 1156 |
-
"content": "đại_học",
|
| 1157 |
-
"lstrip": false,
|
| 1158 |
-
"normalized": true,
|
| 1159 |
-
"rstrip": false,
|
| 1160 |
-
"single_word": false,
|
| 1161 |
-
"special": false
|
| 1162 |
-
},
|
| 1163 |
"988": {
|
| 1164 |
"content": "chở",
|
| 1165 |
"lstrip": false,
|
|
@@ -1192,22 +1024,6 @@
|
|
| 1192 |
"single_word": false,
|
| 1193 |
"special": false
|
| 1194 |
},
|
| 1195 |
-
"1093": {
|
| 1196 |
-
"content": "hội",
|
| 1197 |
-
"lstrip": false,
|
| 1198 |
-
"normalized": true,
|
| 1199 |
-
"rstrip": false,
|
| 1200 |
-
"single_word": false,
|
| 1201 |
-
"special": false
|
| 1202 |
-
},
|
| 1203 |
-
"1106": {
|
| 1204 |
-
"content": "Mỗi",
|
| 1205 |
-
"lstrip": false,
|
| 1206 |
-
"normalized": true,
|
| 1207 |
-
"rstrip": false,
|
| 1208 |
-
"single_word": false,
|
| 1209 |
-
"special": false
|
| 1210 |
-
},
|
| 1211 |
"1111": {
|
| 1212 |
"content": "i",
|
| 1213 |
"lstrip": false,
|
|
@@ -1248,14 +1064,6 @@
|
|
| 1248 |
"single_word": false,
|
| 1249 |
"special": false
|
| 1250 |
},
|
| 1251 |
-
"1287": {
|
| 1252 |
-
"content": "cầm",
|
| 1253 |
-
"lstrip": false,
|
| 1254 |
-
"normalized": true,
|
| 1255 |
-
"rstrip": false,
|
| 1256 |
-
"single_word": false,
|
| 1257 |
-
"special": false
|
| 1258 |
-
},
|
| 1259 |
"1292": {
|
| 1260 |
"content": "châu_Á",
|
| 1261 |
"lstrip": false,
|
|
@@ -1272,14 +1080,6 @@
|
|
| 1272 |
"single_word": false,
|
| 1273 |
"special": false
|
| 1274 |
},
|
| 1275 |
-
"1338": {
|
| 1276 |
-
"content": "bây_giờ",
|
| 1277 |
-
"lstrip": false,
|
| 1278 |
-
"normalized": true,
|
| 1279 |
-
"rstrip": false,
|
| 1280 |
-
"single_word": false,
|
| 1281 |
-
"special": false
|
| 1282 |
-
},
|
| 1283 |
"1351": {
|
| 1284 |
"content": "Cả",
|
| 1285 |
"lstrip": false,
|
|
@@ -1328,14 +1128,6 @@
|
|
| 1328 |
"single_word": false,
|
| 1329 |
"special": false
|
| 1330 |
},
|
| 1331 |
-
"1464": {
|
| 1332 |
-
"content": "bao",
|
| 1333 |
-
"lstrip": false,
|
| 1334 |
-
"normalized": true,
|
| 1335 |
-
"rstrip": false,
|
| 1336 |
-
"single_word": false,
|
| 1337 |
-
"special": false
|
| 1338 |
-
},
|
| 1339 |
"1517": {
|
| 1340 |
"content": "a",
|
| 1341 |
"lstrip": false,
|
|
@@ -1368,14 +1160,6 @@
|
|
| 1368 |
"single_word": false,
|
| 1369 |
"special": false
|
| 1370 |
},
|
| 1371 |
-
"1606": {
|
| 1372 |
-
"content": "lúa",
|
| 1373 |
-
"lstrip": false,
|
| 1374 |
-
"normalized": true,
|
| 1375 |
-
"rstrip": false,
|
| 1376 |
-
"single_word": false,
|
| 1377 |
-
"special": false
|
| 1378 |
-
},
|
| 1379 |
"1615": {
|
| 1380 |
"content": "Chiều",
|
| 1381 |
"lstrip": false,
|
|
@@ -1384,14 +1168,6 @@
|
|
| 1384 |
"single_word": false,
|
| 1385 |
"special": false
|
| 1386 |
},
|
| 1387 |
-
"1663": {
|
| 1388 |
-
"content": "70",
|
| 1389 |
-
"lstrip": false,
|
| 1390 |
-
"normalized": true,
|
| 1391 |
-
"rstrip": false,
|
| 1392 |
-
"single_word": false,
|
| 1393 |
-
"special": false
|
| 1394 |
-
},
|
| 1395 |
"1664": {
|
| 1396 |
"content": "h",
|
| 1397 |
"lstrip": false,
|
|
@@ -1408,14 +1184,6 @@
|
|
| 1408 |
"single_word": false,
|
| 1409 |
"special": false
|
| 1410 |
},
|
| 1411 |
-
"1750": {
|
| 1412 |
-
"content": "lao",
|
| 1413 |
-
"lstrip": false,
|
| 1414 |
-
"normalized": true,
|
| 1415 |
-
"rstrip": false,
|
| 1416 |
-
"single_word": false,
|
| 1417 |
-
"special": false
|
| 1418 |
-
},
|
| 1419 |
"1775": {
|
| 1420 |
"content": "đám",
|
| 1421 |
"lstrip": false,
|
|
@@ -1440,14 +1208,6 @@
|
|
| 1440 |
"single_word": false,
|
| 1441 |
"special": false
|
| 1442 |
},
|
| 1443 |
-
"1912": {
|
| 1444 |
-
"content": "độc_lập",
|
| 1445 |
-
"lstrip": false,
|
| 1446 |
-
"normalized": true,
|
| 1447 |
-
"rstrip": false,
|
| 1448 |
-
"single_word": false,
|
| 1449 |
-
"special": false
|
| 1450 |
-
},
|
| 1451 |
"2089": {
|
| 1452 |
"content": "pháp_lý",
|
| 1453 |
"lstrip": false,
|
|
@@ -1472,14 +1232,6 @@
|
|
| 1472 |
"single_word": false,
|
| 1473 |
"special": false
|
| 1474 |
},
|
| 1475 |
-
"2153": {
|
| 1476 |
-
"content": "sở",
|
| 1477 |
-
"lstrip": false,
|
| 1478 |
-
"normalized": true,
|
| 1479 |
-
"rstrip": false,
|
| 1480 |
-
"single_word": false,
|
| 1481 |
-
"special": false
|
| 1482 |
-
},
|
| 1483 |
"2185": {
|
| 1484 |
"content": "phó",
|
| 1485 |
"lstrip": false,
|
|
@@ -1504,14 +1256,6 @@
|
|
| 1504 |
"single_word": false,
|
| 1505 |
"special": false
|
| 1506 |
},
|
| 1507 |
-
"2209": {
|
| 1508 |
-
"content": "cơ",
|
| 1509 |
-
"lstrip": false,
|
| 1510 |
-
"normalized": true,
|
| 1511 |
-
"rstrip": false,
|
| 1512 |
-
"single_word": false,
|
| 1513 |
-
"special": false
|
| 1514 |
-
},
|
| 1515 |
"2217": {
|
| 1516 |
"content": "đậm",
|
| 1517 |
"lstrip": false,
|
|
@@ -1568,14 +1312,6 @@
|
|
| 1568 |
"single_word": false,
|
| 1569 |
"special": false
|
| 1570 |
},
|
| 1571 |
-
"2469": {
|
| 1572 |
-
"content": "hưu",
|
| 1573 |
-
"lstrip": false,
|
| 1574 |
-
"normalized": true,
|
| 1575 |
-
"rstrip": false,
|
| 1576 |
-
"single_word": false,
|
| 1577 |
-
"special": false
|
| 1578 |
-
},
|
| 1579 |
"2557": {
|
| 1580 |
"content": "chức_vụ",
|
| 1581 |
"lstrip": false,
|
|
@@ -1624,14 +1360,6 @@
|
|
| 1624 |
"single_word": false,
|
| 1625 |
"special": false
|
| 1626 |
},
|
| 1627 |
-
"2913": {
|
| 1628 |
-
"content": "lò",
|
| 1629 |
-
"lstrip": false,
|
| 1630 |
-
"normalized": true,
|
| 1631 |
-
"rstrip": false,
|
| 1632 |
-
"single_word": false,
|
| 1633 |
-
"special": false
|
| 1634 |
-
},
|
| 1635 |
"2991": {
|
| 1636 |
"content": "ô",
|
| 1637 |
"lstrip": false,
|
|
@@ -1688,14 +1416,6 @@
|
|
| 1688 |
"single_word": false,
|
| 1689 |
"special": false
|
| 1690 |
},
|
| 1691 |
-
"3810": {
|
| 1692 |
-
"content": "kỹ_sư",
|
| 1693 |
-
"lstrip": false,
|
| 1694 |
-
"normalized": true,
|
| 1695 |
-
"rstrip": false,
|
| 1696 |
-
"single_word": false,
|
| 1697 |
-
"special": false
|
| 1698 |
-
},
|
| 1699 |
"3988": {
|
| 1700 |
"content": "ổ",
|
| 1701 |
"lstrip": false,
|
|
@@ -1728,22 +1448,6 @@
|
|
| 1728 |
"single_word": false,
|
| 1729 |
"special": false
|
| 1730 |
},
|
| 1731 |
-
"4216": {
|
| 1732 |
-
"content": "1992",
|
| 1733 |
-
"lstrip": false,
|
| 1734 |
-
"normalized": true,
|
| 1735 |
-
"rstrip": false,
|
| 1736 |
-
"single_word": false,
|
| 1737 |
-
"special": false
|
| 1738 |
-
},
|
| 1739 |
-
"4278": {
|
| 1740 |
-
"content": "1994",
|
| 1741 |
-
"lstrip": false,
|
| 1742 |
-
"normalized": true,
|
| 1743 |
-
"rstrip": false,
|
| 1744 |
-
"single_word": false,
|
| 1745 |
-
"special": false
|
| 1746 |
-
},
|
| 1747 |
"4318": {
|
| 1748 |
"content": "MTTQ",
|
| 1749 |
"lstrip": false,
|
|
@@ -1856,14 +1560,6 @@
|
|
| 1856 |
"single_word": false,
|
| 1857 |
"special": false
|
| 1858 |
},
|
| 1859 |
-
"5854": {
|
| 1860 |
-
"content": "Nội_Bài",
|
| 1861 |
-
"lstrip": false,
|
| 1862 |
-
"normalized": true,
|
| 1863 |
-
"rstrip": false,
|
| 1864 |
-
"single_word": false,
|
| 1865 |
-
"special": false
|
| 1866 |
-
},
|
| 1867 |
"6007": {
|
| 1868 |
"content": "hư",
|
| 1869 |
"lstrip": false,
|
|
@@ -1992,14 +1688,6 @@
|
|
| 1992 |
"single_word": false,
|
| 1993 |
"special": false
|
| 1994 |
},
|
| 1995 |
-
"8915": {
|
| 1996 |
-
"content": "ời",
|
| 1997 |
-
"lstrip": false,
|
| 1998 |
-
"normalized": true,
|
| 1999 |
-
"rstrip": false,
|
| 2000 |
-
"single_word": false,
|
| 2001 |
-
"special": false
|
| 2002 |
-
},
|
| 2003 |
"8942": {
|
| 2004 |
"content": "lê",
|
| 2005 |
"lstrip": false,
|
|
@@ -2032,14 +1720,6 @@
|
|
| 2032 |
"single_word": false,
|
| 2033 |
"special": false
|
| 2034 |
},
|
| 2035 |
-
"9412": {
|
| 2036 |
-
"content": "ài",
|
| 2037 |
-
"lstrip": false,
|
| 2038 |
-
"normalized": true,
|
| 2039 |
-
"rstrip": false,
|
| 2040 |
-
"single_word": false,
|
| 2041 |
-
"special": false
|
| 2042 |
-
},
|
| 2043 |
"9456": {
|
| 2044 |
"content": "ề",
|
| 2045 |
"lstrip": false,
|
|
@@ -2168,14 +1848,6 @@
|
|
| 2168 |
"single_word": false,
|
| 2169 |
"special": false
|
| 2170 |
},
|
| 2171 |
-
"12416": {
|
| 2172 |
-
"content": "ợ",
|
| 2173 |
-
"lstrip": false,
|
| 2174 |
-
"normalized": true,
|
| 2175 |
-
"rstrip": false,
|
| 2176 |
-
"single_word": false,
|
| 2177 |
-
"special": false
|
| 2178 |
-
},
|
| 2179 |
"13291": {
|
| 2180 |
"content": "ộc",
|
| 2181 |
"lstrip": false,
|
|
@@ -2216,14 +1888,6 @@
|
|
| 2216 |
"single_word": false,
|
| 2217 |
"special": false
|
| 2218 |
},
|
| 2219 |
-
"15002": {
|
| 2220 |
-
"content": "Cò",
|
| 2221 |
-
"lstrip": false,
|
| 2222 |
-
"normalized": true,
|
| 2223 |
-
"rstrip": false,
|
| 2224 |
-
"single_word": false,
|
| 2225 |
-
"special": false
|
| 2226 |
-
},
|
| 2227 |
"15145": {
|
| 2228 |
"content": "úc",
|
| 2229 |
"lstrip": false,
|
|
@@ -2264,14 +1928,6 @@
|
|
| 2264 |
"single_word": false,
|
| 2265 |
"special": false
|
| 2266 |
},
|
| 2267 |
-
"16788": {
|
| 2268 |
-
"content": "iểu",
|
| 2269 |
-
"lstrip": false,
|
| 2270 |
-
"normalized": true,
|
| 2271 |
-
"rstrip": false,
|
| 2272 |
-
"single_word": false,
|
| 2273 |
-
"special": false
|
| 2274 |
-
},
|
| 2275 |
"17341": {
|
| 2276 |
"content": "iếp",
|
| 2277 |
"lstrip": false,
|
|
@@ -2296,14 +1952,6 @@
|
|
| 2296 |
"single_word": false,
|
| 2297 |
"special": false
|
| 2298 |
},
|
| 2299 |
-
"20017": {
|
| 2300 |
-
"content": "đỡ_đầu",
|
| 2301 |
-
"lstrip": false,
|
| 2302 |
-
"normalized": true,
|
| 2303 |
-
"rstrip": false,
|
| 2304 |
-
"single_word": false,
|
| 2305 |
-
"special": false
|
| 2306 |
-
},
|
| 2307 |
"20463": {
|
| 2308 |
"content": "kiều_hối",
|
| 2309 |
"lstrip": false,
|
|
@@ -2352,14 +2000,6 @@
|
|
| 2352 |
"single_word": false,
|
| 2353 |
"special": false
|
| 2354 |
},
|
| 2355 |
-
"24096": {
|
| 2356 |
-
"content": "há",
|
| 2357 |
-
"lstrip": false,
|
| 2358 |
-
"normalized": true,
|
| 2359 |
-
"rstrip": false,
|
| 2360 |
-
"single_word": false,
|
| 2361 |
-
"special": false
|
| 2362 |
-
},
|
| 2363 |
"30251": {
|
| 2364 |
"content": "ă",
|
| 2365 |
"lstrip": false,
|
|
@@ -2400,14 +2040,6 @@
|
|
| 2400 |
"single_word": false,
|
| 2401 |
"special": false
|
| 2402 |
},
|
| 2403 |
-
"35477": {
|
| 2404 |
-
"content": "hy",
|
| 2405 |
-
"lstrip": false,
|
| 2406 |
-
"normalized": true,
|
| 2407 |
-
"rstrip": false,
|
| 2408 |
-
"single_word": false,
|
| 2409 |
-
"special": false
|
| 2410 |
-
},
|
| 2411 |
"37463": {
|
| 2412 |
"content": "ổ_chức",
|
| 2413 |
"lstrip": false,
|
|
@@ -2456,14 +2088,6 @@
|
|
| 2456 |
"single_word": false,
|
| 2457 |
"special": false
|
| 2458 |
},
|
| 2459 |
-
"46921": {
|
| 2460 |
-
"content": "a_đời",
|
| 2461 |
-
"lstrip": false,
|
| 2462 |
-
"normalized": true,
|
| 2463 |
-
"rstrip": false,
|
| 2464 |
-
"single_word": false,
|
| 2465 |
-
"special": false
|
| 2466 |
-
},
|
| 2467 |
"48978": {
|
| 2468 |
"content": "â",
|
| 2469 |
"lstrip": false,
|
|
@@ -4743,294 +4367,6 @@
|
|
| 4743 |
"rstrip": false,
|
| 4744 |
"single_word": false,
|
| 4745 |
"special": false
|
| 4746 |
-
},
|
| 4747 |
-
"64281": {
|
| 4748 |
-
"content": "Phố_Việ",
|
| 4749 |
-
"lstrip": false,
|
| 4750 |
-
"normalized": true,
|
| 4751 |
-
"rstrip": false,
|
| 4752 |
-
"single_word": false,
|
| 4753 |
-
"special": false
|
| 4754 |
-
},
|
| 4755 |
-
"64282": {
|
| 4756 |
-
"content": "Nam_vẫ",
|
| 4757 |
-
"lstrip": false,
|
| 4758 |
-
"normalized": true,
|
| 4759 |
-
"rstrip": false,
|
| 4760 |
-
"single_word": false,
|
| 4761 |
-
"special": false
|
| 4762 |
-
},
|
| 4763 |
-
"64283": {
|
| 4764 |
-
"content": "chức_ă",
|
| 4765 |
-
"lstrip": false,
|
| 4766 |
-
"normalized": true,
|
| 4767 |
-
"rstrip": false,
|
| 4768 |
-
"single_word": false,
|
| 4769 |
-
"special": false
|
| 4770 |
-
},
|
| 4771 |
-
"64284": {
|
| 4772 |
-
"content": "Jea_e",
|
| 4773 |
-
"lstrip": false,
|
| 4774 |
-
"normalized": true,
|
| 4775 |
-
"rstrip": false,
|
| 4776 |
-
"single_word": false,
|
| 4777 |
-
"special": false
|
| 4778 |
-
},
|
| 4779 |
-
"64285": {
|
| 4780 |
-
"content": "Huy_h",
|
| 4781 |
-
"lstrip": false,
|
| 4782 |
-
"normalized": true,
|
| 4783 |
-
"rstrip": false,
|
| 4784 |
-
"single_word": false,
|
| 4785 |
-
"special": false
|
| 4786 |
-
},
|
| 4787 |
-
"64286": {
|
| 4788 |
-
"content": "âm_sự",
|
| 4789 |
-
"lstrip": false,
|
| 4790 |
-
"normalized": true,
|
| 4791 |
-
"rstrip": false,
|
| 4792 |
-
"single_word": false,
|
| 4793 |
-
"special": false
|
| 4794 |
-
},
|
| 4795 |
-
"64287": {
|
| 4796 |
-
"content": "h_cư",
|
| 4797 |
-
"lstrip": false,
|
| 4798 |
-
"normalized": true,
|
| 4799 |
-
"rstrip": false,
|
| 4800 |
-
"single_word": false,
|
| 4801 |
-
"special": false
|
| 4802 |
-
},
|
| 4803 |
-
"64288": {
|
| 4804 |
-
"content": "xuố",
|
| 4805 |
-
"lstrip": false,
|
| 4806 |
-
"normalized": true,
|
| 4807 |
-
"rstrip": false,
|
| 4808 |
-
"single_word": false,
|
| 4809 |
-
"special": false
|
| 4810 |
-
},
|
| 4811 |
-
"64289": {
|
| 4812 |
-
"content": "khô_g",
|
| 4813 |
-
"lstrip": false,
|
| 4814 |
-
"normalized": true,
|
| 4815 |
-
"rstrip": false,
|
| 4816 |
-
"single_word": false,
|
| 4817 |
-
"special": false
|
| 4818 |
-
},
|
| 4819 |
-
"64290": {
|
| 4820 |
-
"content": "hấy",
|
| 4821 |
-
"lstrip": false,
|
| 4822 |
-
"normalized": true,
|
| 4823 |
-
"rstrip": false,
|
| 4824 |
-
"single_word": false,
|
| 4825 |
-
"special": false
|
| 4826 |
-
},
|
| 4827 |
-
"64291": {
|
| 4828 |
-
"content": "co_số",
|
| 4829 |
-
"lstrip": false,
|
| 4830 |
-
"normalized": true,
|
| 4831 |
-
"rstrip": false,
|
| 4832 |
-
"single_word": false,
|
| 4833 |
-
"special": false
|
| 4834 |
-
},
|
| 4835 |
-
"64292": {
|
| 4836 |
-
"content": "gôi_hà",
|
| 4837 |
-
"lstrip": false,
|
| 4838 |
-
"normalized": true,
|
| 4839 |
-
"rstrip": false,
|
| 4840 |
-
"single_word": false,
|
| 4841 |
-
"special": false
|
| 4842 |
-
},
|
| 4843 |
-
"64293": {
|
| 4844 |
-
"content": "đơ_sơ",
|
| 4845 |
-
"lstrip": false,
|
| 4846 |
-
"normalized": true,
|
| 4847 |
-
"rstrip": false,
|
| 4848 |
-
"single_word": false,
|
| 4849 |
-
"special": false
|
| 4850 |
-
},
|
| 4851 |
-
"64294": {
|
| 4852 |
-
"content": "hà_cửa",
|
| 4853 |
-
"lstrip": false,
|
| 4854 |
-
"normalized": true,
|
| 4855 |
-
"rstrip": false,
|
| 4856 |
-
"single_word": false,
|
| 4857 |
-
"special": false
|
| 4858 |
-
},
|
| 4859 |
-
"64295": {
|
| 4860 |
-
"content": "kha_g",
|
| 4861 |
-
"lstrip": false,
|
| 4862 |
-
"normalized": true,
|
| 4863 |
-
"rstrip": false,
|
| 4864 |
-
"single_word": false,
|
| 4865 |
-
"special": false
|
| 4866 |
-
},
|
| 4867 |
-
"64296": {
|
| 4868 |
-
"content": "a_g",
|
| 4869 |
-
"lstrip": false,
|
| 4870 |
-
"normalized": true,
|
| 4871 |
-
"rstrip": false,
|
| 4872 |
-
"single_word": false,
|
| 4873 |
-
"special": false
|
| 4874 |
-
},
|
| 4875 |
-
"64297": {
|
| 4876 |
-
"content": "hứ",
|
| 4877 |
-
"lstrip": false,
|
| 4878 |
-
"normalized": true,
|
| 4879 |
-
"rstrip": false,
|
| 4880 |
-
"single_word": false,
|
| 4881 |
-
"special": false
|
| 4882 |
-
},
|
| 4883 |
-
"64298": {
|
| 4884 |
-
"content": "ấ_hiều",
|
| 4885 |
-
"lstrip": false,
|
| 4886 |
-
"normalized": true,
|
| 4887 |
-
"rstrip": false,
|
| 4888 |
-
"single_word": false,
|
| 4889 |
-
"special": false
|
| 4890 |
-
},
|
| 4891 |
-
"64299": {
|
| 4892 |
-
"content": "ghĩ",
|
| 4893 |
-
"lstrip": false,
|
| 4894 |
-
"normalized": true,
|
| 4895 |
-
"rstrip": false,
|
| 4896 |
-
"single_word": false,
|
| 4897 |
-
"special": false
|
| 4898 |
-
},
|
| 4899 |
-
"64300": {
|
| 4900 |
-
"content": "o_lớ",
|
| 4901 |
-
"lstrip": false,
|
| 4902 |
-
"normalized": true,
|
| 4903 |
-
"rstrip": false,
|
| 4904 |
-
"single_word": false,
|
| 4905 |
-
"special": false
|
| 4906 |
-
},
|
| 4907 |
-
"64301": {
|
| 4908 |
-
"content": "hiêu",
|
| 4909 |
-
"lstrip": false,
|
| 4910 |
-
"normalized": true,
|
| 4911 |
-
"rstrip": false,
|
| 4912 |
-
"single_word": false,
|
| 4913 |
-
"special": false
|
| 4914 |
-
},
|
| 4915 |
-
"64302": {
|
| 4916 |
-
"content": "hòa_òa",
|
| 4917 |
-
"lstrip": false,
|
| 4918 |
-
"normalized": true,
|
| 4919 |
-
"rstrip": false,
|
| 4920 |
-
"single_word": false,
|
| 4921 |
-
"special": false
|
| 4922 |
-
},
|
| 4923 |
-
"64303": {
|
| 4924 |
-
"content": "xúc_độ",
|
| 4925 |
-
"lstrip": false,
|
| 4926 |
-
"normalized": true,
|
| 4927 |
-
"rstrip": false,
|
| 4928 |
-
"single_word": false,
|
| 4929 |
-
"special": false
|
| 4930 |
-
},
|
| 4931 |
-
"64304": {
|
| 4932 |
-
"content": "Hội_gười",
|
| 4933 |
-
"lstrip": false,
|
| 4934 |
-
"normalized": true,
|
| 4935 |
-
"rstrip": false,
|
| 4936 |
-
"single_word": false,
|
| 4937 |
-
"special": false
|
| 4938 |
-
},
|
| 4939 |
-
"64305": {
|
| 4940 |
-
"content": "ập_hợp",
|
| 4941 |
-
"lstrip": false,
|
| 4942 |
-
"normalized": true,
|
| 4943 |
-
"rstrip": false,
|
| 4944 |
-
"single_word": false,
|
| 4945 |
-
"special": false
|
| 4946 |
-
},
|
| 4947 |
-
"64306": {
|
| 4948 |
-
"content": "guyệ",
|
| 4949 |
-
"lstrip": false,
|
| 4950 |
-
"normalized": true,
|
| 4951 |
-
"rstrip": false,
|
| 4952 |
-
"single_word": false,
|
| 4953 |
-
"special": false
|
| 4954 |
-
},
|
| 4955 |
-
"64307": {
|
| 4956 |
-
"content": "âm_ới",
|
| 4957 |
-
"lstrip": false,
|
| 4958 |
-
"normalized": true,
|
| 4959 |
-
"rstrip": false,
|
| 4960 |
-
"single_word": false,
|
| 4961 |
-
"special": false
|
| 4962 |
-
},
|
| 4963 |
-
"64308": {
|
| 4964 |
-
"content": "đứ",
|
| 4965 |
-
"lstrip": false,
|
| 4966 |
-
"normalized": true,
|
| 4967 |
-
"rstrip": false,
|
| 4968 |
-
"single_word": false,
|
| 4969 |
-
"special": false
|
| 4970 |
-
},
|
| 4971 |
-
"64309": {
|
| 4972 |
-
"content": "a_hà_h",
|
| 4973 |
-
"lstrip": false,
|
| 4974 |
-
"normalized": true,
|
| 4975 |
-
"rstrip": false,
|
| 4976 |
-
"single_word": false,
|
| 4977 |
-
"special": false
|
| 4978 |
-
},
|
| 4979 |
-
"64310": {
|
| 4980 |
-
"content": "ô_g",
|
| 4981 |
-
"lstrip": false,
|
| 4982 |
-
"normalized": true,
|
| 4983 |
-
"rstrip": false,
|
| 4984 |
-
"single_word": false,
|
| 4985 |
-
"special": false
|
| 4986 |
-
},
|
| 4987 |
-
"64311": {
|
| 4988 |
-
"content": "Nam_ă",
|
| 4989 |
-
"lstrip": false,
|
| 4990 |
-
"normalized": true,
|
| 4991 |
-
"rstrip": false,
|
| 4992 |
-
"single_word": false,
|
| 4993 |
-
"special": false
|
| 4994 |
-
},
|
| 4995 |
-
"64312": {
|
| 4996 |
-
"content": "suấ",
|
| 4997 |
-
"lstrip": false,
|
| 4998 |
-
"normalized": true,
|
| 4999 |
-
"rstrip": false,
|
| 5000 |
-
"single_word": false,
|
| 5001 |
-
"special": false
|
| 5002 |
-
},
|
| 5003 |
-
"64313": {
|
| 5004 |
-
"content": "cải_hiệ",
|
| 5005 |
-
"lstrip": false,
|
| 5006 |
-
"normalized": true,
|
| 5007 |
-
"rstrip": false,
|
| 5008 |
-
"single_word": false,
|
| 5009 |
-
"special": false
|
| 5010 |
-
},
|
| 5011 |
-
"64314": {
|
| 5012 |
-
"content": "khă",
|
| 5013 |
-
"lstrip": false,
|
| 5014 |
-
"normalized": true,
|
| 5015 |
-
"rstrip": false,
|
| 5016 |
-
"single_word": false,
|
| 5017 |
-
"special": false
|
| 5018 |
-
},
|
| 5019 |
-
"64315": {
|
| 5020 |
-
"content": "eu_o",
|
| 5021 |
-
"lstrip": false,
|
| 5022 |
-
"normalized": true,
|
| 5023 |
-
"rstrip": false,
|
| 5024 |
-
"single_word": false,
|
| 5025 |
-
"special": false
|
| 5026 |
-
},
|
| 5027 |
-
"64316": {
|
| 5028 |
-
"content": "bổ_g",
|
| 5029 |
-
"lstrip": false,
|
| 5030 |
-
"normalized": true,
|
| 5031 |
-
"rstrip": false,
|
| 5032 |
-
"single_word": false,
|
| 5033 |
-
"special": false
|
| 5034 |
}
|
| 5035 |
},
|
| 5036 |
"bos_token": "<s>",
|
|
|
|
| 360 |
"single_word": false,
|
| 361 |
"special": false
|
| 362 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
"100": {
|
| 364 |
"content": "số",
|
| 365 |
"lstrip": false,
|
|
|
|
| 392 |
"single_word": false,
|
| 393 |
"special": false
|
| 394 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
"120": {
|
| 396 |
"content": "Hà_Nội",
|
| 397 |
"lstrip": false,
|
|
|
|
| 416 |
"single_word": false,
|
| 417 |
"special": false
|
| 418 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
"135": {
|
| 420 |
"content": "...",
|
| 421 |
"lstrip": false,
|
|
|
|
| 448 |
"single_word": false,
|
| 449 |
"special": false
|
| 450 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
"163": {
|
| 452 |
"content": "4",
|
| 453 |
"lstrip": false,
|
|
|
|
| 472 |
"single_word": false,
|
| 473 |
"special": false
|
| 474 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
"181": {
|
| 476 |
"content": "cấp",
|
| 477 |
"lstrip": false,
|
|
|
|
| 488 |
"single_word": false,
|
| 489 |
"special": false
|
| 490 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
"222": {
|
| 492 |
"content": "học",
|
| 493 |
"lstrip": false,
|
|
|
|
| 568 |
"single_word": false,
|
| 569 |
"special": false
|
| 570 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
"268": {
|
| 572 |
"content": "Phó",
|
| 573 |
"lstrip": false,
|
|
|
|
| 664 |
"single_word": false,
|
| 665 |
"special": false
|
| 666 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
"378": {
|
| 668 |
"content": "độ",
|
| 669 |
"lstrip": false,
|
|
|
|
| 688 |
"single_word": false,
|
| 689 |
"special": false
|
| 690 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
"447": {
|
| 692 |
"content": "hỏi",
|
| 693 |
"lstrip": false,
|
|
|
|
| 696 |
"single_word": false,
|
| 697 |
"special": false
|
| 698 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
"471": {
|
| 700 |
"content": "sớm",
|
| 701 |
"lstrip": false,
|
|
|
|
| 720 |
"single_word": false,
|
| 721 |
"special": false
|
| 722 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 723 |
"506": {
|
| 724 |
"content": "hộ",
|
| 725 |
"lstrip": false,
|
|
|
|
| 840 |
"single_word": false,
|
| 841 |
"special": false
|
| 842 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
"669": {
|
| 844 |
"content": "Đó",
|
| 845 |
"lstrip": false,
|
|
|
|
| 856 |
"single_word": false,
|
| 857 |
"special": false
|
| 858 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
"681": {
|
| 860 |
"content": "giới",
|
| 861 |
"lstrip": false,
|
|
|
|
| 864 |
"single_word": false,
|
| 865 |
"special": false
|
| 866 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
"711": {
|
| 868 |
"content": "bức",
|
| 869 |
"lstrip": false,
|
|
|
|
| 872 |
"single_word": false,
|
| 873 |
"special": false
|
| 874 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 875 |
"740": {
|
| 876 |
"content": "lập",
|
| 877 |
"lstrip": false,
|
|
|
|
| 928 |
"single_word": false,
|
| 929 |
"special": false
|
| 930 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
"835": {
|
| 932 |
"content": "phố",
|
| 933 |
"lstrip": false,
|
|
|
|
| 992 |
"single_word": false,
|
| 993 |
"special": false
|
| 994 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
"988": {
|
| 996 |
"content": "chở",
|
| 997 |
"lstrip": false,
|
|
|
|
| 1024 |
"single_word": false,
|
| 1025 |
"special": false
|
| 1026 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
"1111": {
|
| 1028 |
"content": "i",
|
| 1029 |
"lstrip": false,
|
|
|
|
| 1064 |
"single_word": false,
|
| 1065 |
"special": false
|
| 1066 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1067 |
"1292": {
|
| 1068 |
"content": "châu_Á",
|
| 1069 |
"lstrip": false,
|
|
|
|
| 1080 |
"single_word": false,
|
| 1081 |
"special": false
|
| 1082 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1083 |
"1351": {
|
| 1084 |
"content": "Cả",
|
| 1085 |
"lstrip": false,
|
|
|
|
| 1128 |
"single_word": false,
|
| 1129 |
"special": false
|
| 1130 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1131 |
"1517": {
|
| 1132 |
"content": "a",
|
| 1133 |
"lstrip": false,
|
|
|
|
| 1160 |
"single_word": false,
|
| 1161 |
"special": false
|
| 1162 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1163 |
"1615": {
|
| 1164 |
"content": "Chiều",
|
| 1165 |
"lstrip": false,
|
|
|
|
| 1168 |
"single_word": false,
|
| 1169 |
"special": false
|
| 1170 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
"1664": {
|
| 1172 |
"content": "h",
|
| 1173 |
"lstrip": false,
|
|
|
|
| 1184 |
"single_word": false,
|
| 1185 |
"special": false
|
| 1186 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1187 |
"1775": {
|
| 1188 |
"content": "đám",
|
| 1189 |
"lstrip": false,
|
|
|
|
| 1208 |
"single_word": false,
|
| 1209 |
"special": false
|
| 1210 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
"2089": {
|
| 1212 |
"content": "pháp_lý",
|
| 1213 |
"lstrip": false,
|
|
|
|
| 1232 |
"single_word": false,
|
| 1233 |
"special": false
|
| 1234 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1235 |
"2185": {
|
| 1236 |
"content": "phó",
|
| 1237 |
"lstrip": false,
|
|
|
|
| 1256 |
"single_word": false,
|
| 1257 |
"special": false
|
| 1258 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
"2217": {
|
| 1260 |
"content": "đậm",
|
| 1261 |
"lstrip": false,
|
|
|
|
| 1312 |
"single_word": false,
|
| 1313 |
"special": false
|
| 1314 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1315 |
"2557": {
|
| 1316 |
"content": "chức_vụ",
|
| 1317 |
"lstrip": false,
|
|
|
|
| 1360 |
"single_word": false,
|
| 1361 |
"special": false
|
| 1362 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1363 |
"2991": {
|
| 1364 |
"content": "ô",
|
| 1365 |
"lstrip": false,
|
|
|
|
| 1416 |
"single_word": false,
|
| 1417 |
"special": false
|
| 1418 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1419 |
"3988": {
|
| 1420 |
"content": "ổ",
|
| 1421 |
"lstrip": false,
|
|
|
|
| 1448 |
"single_word": false,
|
| 1449 |
"special": false
|
| 1450 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1451 |
"4318": {
|
| 1452 |
"content": "MTTQ",
|
| 1453 |
"lstrip": false,
|
|
|
|
| 1560 |
"single_word": false,
|
| 1561 |
"special": false
|
| 1562 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1563 |
"6007": {
|
| 1564 |
"content": "hư",
|
| 1565 |
"lstrip": false,
|
|
|
|
| 1688 |
"single_word": false,
|
| 1689 |
"special": false
|
| 1690 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1691 |
"8942": {
|
| 1692 |
"content": "lê",
|
| 1693 |
"lstrip": false,
|
|
|
|
| 1720 |
"single_word": false,
|
| 1721 |
"special": false
|
| 1722 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1723 |
"9456": {
|
| 1724 |
"content": "ề",
|
| 1725 |
"lstrip": false,
|
|
|
|
| 1848 |
"single_word": false,
|
| 1849 |
"special": false
|
| 1850 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1851 |
"13291": {
|
| 1852 |
"content": "ộc",
|
| 1853 |
"lstrip": false,
|
|
|
|
| 1888 |
"single_word": false,
|
| 1889 |
"special": false
|
| 1890 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1891 |
"15145": {
|
| 1892 |
"content": "úc",
|
| 1893 |
"lstrip": false,
|
|
|
|
| 1928 |
"single_word": false,
|
| 1929 |
"special": false
|
| 1930 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1931 |
"17341": {
|
| 1932 |
"content": "iếp",
|
| 1933 |
"lstrip": false,
|
|
|
|
| 1952 |
"single_word": false,
|
| 1953 |
"special": false
|
| 1954 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1955 |
"20463": {
|
| 1956 |
"content": "kiều_hối",
|
| 1957 |
"lstrip": false,
|
|
|
|
| 2000 |
"single_word": false,
|
| 2001 |
"special": false
|
| 2002 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2003 |
"30251": {
|
| 2004 |
"content": "ă",
|
| 2005 |
"lstrip": false,
|
|
|
|
| 2040 |
"single_word": false,
|
| 2041 |
"special": false
|
| 2042 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2043 |
"37463": {
|
| 2044 |
"content": "ổ_chức",
|
| 2045 |
"lstrip": false,
|
|
|
|
| 2088 |
"single_word": false,
|
| 2089 |
"special": false
|
| 2090 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2091 |
"48978": {
|
| 2092 |
"content": "â",
|
| 2093 |
"lstrip": false,
|
|
|
|
| 4367 |
"rstrip": false,
|
| 4368 |
"single_word": false,
|
| 4369 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4370 |
}
|
| 4371 |
},
|
| 4372 |
"bos_token": "<s>",
|