longdnk commited on
Commit
307e2ab
·
verified ·
1 Parent(s): ac5d04d

Upload vocab.json

Browse files
Files changed (1) hide show
  1. vocab.json +1 -708
vocab.json CHANGED
@@ -1,708 +1 @@
1
- {
2
- "<pad>": 0,
3
- "<s>": 1,
4
- "</s>": 2,
5
- "<unk>": 3,
6
- "|": 4,
7
- "a": 5,
8
- "á": 6,
9
- "à": 7,
10
- "ả": 8,
11
- "ã": 9,
12
- "ạ": 10,
13
- "e": 11,
14
- "é": 12,
15
- "è": 13,
16
- "ẻ": 14,
17
- "ẽ": 15,
18
- "ẹ": 16,
19
- "ê": 17,
20
- "ế": 18,
21
- "ề": 19,
22
- "ể": 20,
23
- "ễ": 21,
24
- "ệ": 22,
25
- "i": 23,
26
- "í": 24,
27
- "ì": 25,
28
- "ỉ": 26,
29
- "ĩ": 27,
30
- "ị": 28,
31
- "o": 29,
32
- "ó": 30,
33
- "ò": 31,
34
- "ỏ": 32,
35
- "õ": 33,
36
- "ọ": 34,
37
- "ơ": 35,
38
- "ớ": 36,
39
- "ờ": 37,
40
- "ở": 38,
41
- "ỡ": 39,
42
- "ợ": 40,
43
- "ô": 41,
44
- "ố": 42,
45
- "ồ": 43,
46
- "ổ": 44,
47
- "ỗ": 45,
48
- "ộ": 46,
49
- "u": 47,
50
- "ú": 48,
51
- "ù": 49,
52
- "ủ": 50,
53
- "ũ": 51,
54
- "ụ": 52,
55
- "ư": 53,
56
- "ứ": 54,
57
- "ừ": 55,
58
- "ử": 56,
59
- "ữ": 57,
60
- "ự": 58,
61
- "y": 59,
62
- "ỳ": 60,
63
- "ý": 61,
64
- "ỷ": 62,
65
- "ỹ": 63,
66
- "ỵ": 64,
67
- "ă": 65,
68
- "ắ": 66,
69
- "ằ": 67,
70
- "ẳ": 68,
71
- "ẵ": 69,
72
- "ặ": 70,
73
- "â": 71,
74
- "ấ": 72,
75
- "ầ": 73,
76
- "ẩ": 74,
77
- "ẫ": 75,
78
- "ậ": 76,
79
- "đ": 77,
80
- "q": 78,
81
- "w": 79,
82
- "r": 80,
83
- "t": 81,
84
- "p": 82,
85
- "s": 83,
86
- "d": 84,
87
- "f": 85,
88
- "g": 86,
89
- "h": 87,
90
- "j": 88,
91
- "k": 89,
92
- "l": 90,
93
- "z": 91,
94
- "x": 92,
95
- "c": 93,
96
- "v": 94,
97
- "b": 95,
98
- "n": 96,
99
- "m": 97,
100
- "th": 98,
101
- "ch": 99,
102
- "kh": 100,
103
- "ph": 101,
104
- "nh": 102,
105
- "gh": 103,
106
- "qu": 104,
107
- "ng": 105,
108
- "ngh": 106,
109
- "tr": 107,
110
- "ác": 108,
111
- "ạc": 109,
112
- "ai": 110,
113
- "ái": 111,
114
- "ài": 112,
115
- "ải": 113,
116
- "ãi": 114,
117
- "ại": 115,
118
- "am": 116,
119
- "ám": 117,
120
- "àm": 118,
121
- "ảm": 119,
122
- "ãm": 120,
123
- "ạm": 121,
124
- "an": 122,
125
- "án": 123,
126
- "àn": 124,
127
- "ản": 125,
128
- "ãn": 126,
129
- "ạn": 127,
130
- "ao": 128,
131
- "áo": 129,
132
- "ào": 130,
133
- "ảo": 131,
134
- "ão": 132,
135
- "ạo": 133,
136
- "au": 134,
137
- "áu": 135,
138
- "àu": 136,
139
- "ảu": 137,
140
- "ãu": 138,
141
- "ạu": 139,
142
- "áp": 140,
143
- "ạp": 141,
144
- "át": 142,
145
- "ạt": 143,
146
- "ay": 144,
147
- "áy": 145,
148
- "ày": 146,
149
- "ảy": 147,
150
- "ãy": 148,
151
- "ạy": 149,
152
- "ắc": 150,
153
- "ặc": 151,
154
- "ăm": 152,
155
- "ằm": 153,
156
- "ắm": 154,
157
- "ẳm": 155,
158
- "ẵm": 156,
159
- "ặm": 157,
160
- "ăn": 158,
161
- "ắn": 159,
162
- "ằn": 160,
163
- "ẳn": 161,
164
- "ẵn": 162,
165
- "ặn": 163,
166
- "ắp": 164,
167
- "ặp": 165,
168
- "ắt": 166,
169
- "ặt": 167,
170
- "ấc": 168,
171
- "ậc": 169,
172
- "âm": 170,
173
- "ấm": 171,
174
- "ầm": 172,
175
- "ẩm": 173,
176
- "ẫm": 174,
177
- "ậm": 175,
178
- "ân": 176,
179
- "ấn": 177,
180
- "ần": 178,
181
- "ẩn": 179,
182
- "ẫn": 180,
183
- "ận": 181,
184
- "ấp": 182,
185
- "ập": 183,
186
- "ất": 184,
187
- "ật": 185,
188
- "âu": 186,
189
- "ấu": 187,
190
- "ầu": 188,
191
- "ẩu": 189,
192
- "ẫu": 190,
193
- "ậu": 191,
194
- "ây": 192,
195
- "ấy": 193,
196
- "ầy": 194,
197
- "ẩy": 195,
198
- "ẫy": 196,
199
- "ậy": 197,
200
- "éc": 198,
201
- "ẹc": 199,
202
- "em": 200,
203
- "ém": 201,
204
- "èm": 202,
205
- "ẻm": 203,
206
- "ẽm": 204,
207
- "ẹm": 205,
208
- "en": 206,
209
- "én": 207,
210
- "èn": 208,
211
- "ẻn": 209,
212
- "ẽn": 210,
213
- "ẹn": 211,
214
- "eo": 212,
215
- "éo": 213,
216
- "èo": 214,
217
- "ẻo": 215,
218
- "ẽo": 216,
219
- "ẹo": 217,
220
- "ép": 218,
221
- "ẹp": 219,
222
- "ét": 220,
223
- "ẹt": 221,
224
- "êm": 222,
225
- "ếm": 223,
226
- "ềm": 224,
227
- "ễm": 225,
228
- "ệm": 226,
229
- "ên": 227,
230
- "ến": 228,
231
- "ền": 229,
232
- "ển": 230,
233
- "ện": 231,
234
- "ếp": 232,
235
- "ệp": 233,
236
- "ết": 234,
237
- "ệt": 235,
238
- "êu": 236,
239
- "ếu": 237,
240
- "ều": 238,
241
- "ểu": 239,
242
- "ễu": 240,
243
- "ệu": 241,
244
- "ia": 242,
245
- "ía": 243,
246
- "ìa": 244,
247
- "ỉa": 245,
248
- "ĩa": 246,
249
- "ịa": 247,
250
- "im": 248,
251
- "ím": 249,
252
- "ìm": 250,
253
- "ỉm": 251,
254
- "ĩm": 252,
255
- "ịm": 253,
256
- "in": 254,
257
- "ín": 255,
258
- "ìn": 256,
259
- "ỉn": 257,
260
- "ịn": 258,
261
- "íp": 259,
262
- "ịp": 260,
263
- "ít": 261,
264
- "ịt": 262,
265
- "iu": 263,
266
- "íu": 264,
267
- "ìu": 265,
268
- "ỉu": 266,
269
- "ĩu": 267,
270
- "ịu": 268,
271
- "oa": 269,
272
- "óa": 270,
273
- "òa": 271,
274
- "ỏa": 272,
275
- "õa": 273,
276
- "ọa": 274,
277
- "oà": 275,
278
- "óc": 276,
279
- "ọc": 277,
280
- "oe": 278,
281
- "óe": 279,
282
- "òe": 280,
283
- "ỏe": 281,
284
- "ọe": 282,
285
- "oẹ": 283,
286
- "oi": 284,
287
- "ói": 285,
288
- "òi": 286,
289
- "ỏi": 287,
290
- "õi": 288,
291
- "ọi": 289,
292
- "om": 290,
293
- "óm": 291,
294
- "òm": 292,
295
- "ỏm": 293,
296
- "õm": 294,
297
- "ọm": 295,
298
- "on": 296,
299
- "ón": 297,
300
- "òn": 298,
301
- "ỏn": 299,
302
- "õn": 300,
303
- "ọn": 301,
304
- "óp": 302,
305
- "ọp": 303,
306
- "ót": 304,
307
- "ọt": 305,
308
- "ốc": 306,
309
- "ộc": 307,
310
- "ôi": 308,
311
- "ối": 309,
312
- "ồi": 310,
313
- "ổi": 311,
314
- "ỗi": 312,
315
- "ội": 313,
316
- "ôm": 314,
317
- "ốm": 315,
318
- "ồm": 316,
319
- "ổm": 317,
320
- "ỗm": 318,
321
- "ộm": 319,
322
- "ôn": 320,
323
- "ốn": 321,
324
- "ồn": 322,
325
- "ổn": 323,
326
- "ỗn": 324,
327
- "ộn": 325,
328
- "ốp": 326,
329
- "ộp": 327,
330
- "ốt": 328,
331
- "ột": 329,
332
- "ơi": 330,
333
- "ới": 331,
334
- "ời": 332,
335
- "ởi": 333,
336
- "ỡi": 334,
337
- "ợi": 335,
338
- "ơm": 336,
339
- "ớm": 337,
340
- "ờm": 338,
341
- "ởm": 339,
342
- "ỡm": 340,
343
- "ợm": 341,
344
- "ơn": 342,
345
- "ớn": 343,
346
- "ờn": 344,
347
- "ởn": 345,
348
- "ỡn": 346,
349
- "ợn": 347,
350
- "ớp": 348,
351
- "ợp": 349,
352
- "ớt": 350,
353
- "ợt": 351,
354
- "ua": 352,
355
- "úa": 353,
356
- "ùa": 354,
357
- "ủa": 355,
358
- "ũa": 356,
359
- "ụa": 357,
360
- "úc": 358,
361
- "ục": 359,
362
- "uê": 360,
363
- "uế": 361,
364
- "uề": 362,
365
- "uể": 363,
366
- "uệ": 364,
367
- "ui": 365,
368
- "úi": 366,
369
- "ùi": 367,
370
- "ủi": 368,
371
- "ũi": 369,
372
- "ụi": 370,
373
- "um": 371,
374
- "úm": 372,
375
- "ùm": 373,
376
- "ủm": 374,
377
- "ũm": 375,
378
- "ụm": 376,
379
- "un": 377,
380
- "ún": 378,
381
- "ùn": 379,
382
- "ủn": 380,
383
- "ũn": 381,
384
- "ụn": 382,
385
- "úp": 383,
386
- "ụp": 384,
387
- "út": 385,
388
- "ụt": 386,
389
- "uy": 387,
390
- "úy": 388,
391
- "ùy": 389,
392
- "ủy": 390,
393
- "ũy": 391,
394
- "ụy": 392,
395
- "ưa": 393,
396
- "ứa": 394,
397
- "ừa": 395,
398
- "ửa": 396,
399
- "ữa": 397,
400
- "ựa": 398,
401
- "ức": 399,
402
- "ực": 400,
403
- "ửi": 401,
404
- "ừm": 402,
405
- "uơ": 403,
406
- "uở": 404,
407
- "ứt": 405,
408
- "ựt": 406,
409
- "ưu": 407,
410
- "ứu": 408,
411
- "ừu": 409,
412
- "ửu": 410,
413
- "ữu": 411,
414
- "ựu": 412,
415
- "sh": 413,
416
- "aw": 414,
417
- "ee": 415,
418
- "ea": 416,
419
- "ei": 417,
420
- "ew": 418,
421
- "eu": 419,
422
- "ie": 420,
423
- "oo": 421,
424
- "ou": 422,
425
- "ow": 423,
426
- "oy": 424,
427
- "ue": 425,
428
- "io": 426,
429
- "ách": 427,
430
- "ạch": 428,
431
- "ang": 429,
432
- "áng": 430,
433
- "àng": 431,
434
- "ảng": 432,
435
- "ãng": 433,
436
- "ạng": 434,
437
- "anh": 435,
438
- "ánh": 436,
439
- "ành": 437,
440
- "ảnh": 438,
441
- "ãnh": 439,
442
- "ạnh": 440,
443
- "ăng": 441,
444
- "ắng": 442,
445
- "ằng": 443,
446
- "ẳng": 444,
447
- "ẵng": 445,
448
- "ặng": 446,
449
- "âng": 447,
450
- "ấng": 448,
451
- "ầng": 449,
452
- "ẩng": 450,
453
- "ẫng": 451,
454
- "ậng": 452,
455
- "eng": 453,
456
- "éng": 454,
457
- "èng": 455,
458
- "ẻng": 456,
459
- "ếch": 457,
460
- "ệch": 458,
461
- "ênh": 459,
462
- "ếnh": 460,
463
- "ềnh": 461,
464
- "ểnh": 462,
465
- "ễnh": 463,
466
- "ệnh": 464,
467
- "ích": 465,
468
- "ịch": 466,
469
- "iếc": 467,
470
- "iệc": 468,
471
- "iêm": 469,
472
- "iếm": 470,
473
- "iềm": 471,
474
- "iểm": 472,
475
- "iễm": 473,
476
- "iệm": 474,
477
- "iên": 475,
478
- "iến": 476,
479
- "iền": 477,
480
- "iển": 478,
481
- "iễn": 479,
482
- "iện": 480,
483
- "iếp": 481,
484
- "iệp": 482,
485
- "iết": 483,
486
- "iệt": 484,
487
- "iêu": 485,
488
- "iếu": 486,
489
- "iều": 487,
490
- "iểu": 488,
491
- "iễu": 489,
492
- "iệu": 490,
493
- "inh": 491,
494
- "ính": 492,
495
- "ình": 493,
496
- "ỉnh": 494,
497
- "ĩnh": 495,
498
- "ịnh": 496,
499
- "oác": 497,
500
- "oạc": 498,
501
- "oai": 499,
502
- "oái": 500,
503
- "oài": 501,
504
- "oải": 502,
505
- "oãi": 503,
506
- "oại": 504,
507
- "oàm": 505,
508
- "oan": 506,
509
- "oán": 507,
510
- "oàn": 508,
511
- "oản": 509,
512
- "oãn": 510,
513
- "oạn": 511,
514
- "oao": 512,
515
- "oáo": 513,
516
- "oáp": 514,
517
- "oạp": 515,
518
- "oát": 516,
519
- "oạt": 517,
520
- "oay": 518,
521
- "oáy": 519,
522
- "oảy": 520,
523
- "oắc": 521,
524
- "oặc": 522,
525
- "oăm": 523,
526
- "oăn": 524,
527
- "oẳn": 525,
528
- "oắn": 526,
529
- "oằn": 527,
530
- "oắt": 528,
531
- "oặt": 529,
532
- "oen": 530,
533
- "oẻn": 531,
534
- "oeo": 532,
535
- "oéo": 533,
536
- "oèo": 534,
537
- "oẻo": 535,
538
- "oét": 536,
539
- "oẹt": 537,
540
- "ong": 538,
541
- "óng": 539,
542
- "òng": 540,
543
- "ỏng": 541,
544
- "õng": 542,
545
- "ọng": 543,
546
- "oóc": 544,
547
- "oọc": 545,
548
- "ông": 546,
549
- "ống": 547,
550
- "ồng": 548,
551
- "ổng": 549,
552
- "ỗng": 550,
553
- "ộng": 551,
554
- "uân": 552,
555
- "uấn": 553,
556
- "uần": 554,
557
- "uẩn": 555,
558
- "uẫn": 556,
559
- "uận": 557,
560
- "uất": 558,
561
- "uật": 559,
562
- "uây": 560,
563
- "uấy": 561,
564
- "uầy": 562,
565
- "ung": 563,
566
- "úng": 564,
567
- "ùng": 565,
568
- "ủng": 566,
569
- "ũng": 567,
570
- "ụng": 568,
571
- "uốc": 569,
572
- "uộc": 570,
573
- "uôi": 571,
574
- "uối": 572,
575
- "uồi": 573,
576
- "uổi": 574,
577
- "uỗi": 575,
578
- "uội": 576,
579
- "uôm": 577,
580
- "uốm": 578,
581
- "uồm": 579,
582
- "uỗm": 580,
583
- "uộm": 581,
584
- "uôn": 582,
585
- "uốn": 583,
586
- "uồn": 584,
587
- "uỗn": 585,
588
- "uộn": 586,
589
- "uốt": 587,
590
- "uột": 588,
591
- "uýt": 589,
592
- "uỵt": 590,
593
- "uya": 591,
594
- "uỷu": 592,
595
- "ưng": 593,
596
- "ứng": 594,
597
- "ừng": 595,
598
- "ửng": 596,
599
- "ững": 597,
600
- "ựng": 598,
601
- "ước": 599,
602
- "ược": 600,
603
- "ươi": 601,
604
- "ưới": 602,
605
- "ười": 603,
606
- "ưởi": 604,
607
- "ưỡi": 605,
608
- "ượi": 606,
609
- "ươm": 607,
610
- "ướm": 608,
611
- "ườm": 609,
612
- "ượm": 610,
613
- "ươn": 611,
614
- "ướn": 612,
615
- "ườn": 613,
616
- "ưỡn": 614,
617
- "ượn": 615,
618
- "ướp": 616,
619
- "ượp": 617,
620
- "ướt": 618,
621
- "ượt": 619,
622
- "ươu": 620,
623
- "ướu": 621,
624
- "ượu": 622,
625
- "yêm": 623,
626
- "yếm": 624,
627
- "yểm": 625,
628
- "yên": 626,
629
- "yến": 627,
630
- "yêu": 628,
631
- "yếu": 629,
632
- "yểu": 630,
633
- "yết": 631,
634
- "iêng": 632,
635
- "iếng": 633,
636
- "iềng": 634,
637
- "iểng": 635,
638
- "iễng": 636,
639
- "iệng": 637,
640
- "oách": 638,
641
- "oạch": 639,
642
- "oang": 640,
643
- "oáng": 641,
644
- "oàng": 642,
645
- "oảng": 643,
646
- "oãng": 644,
647
- "oạng": 645,
648
- "oanh": 646,
649
- "oánh": 647,
650
- "oành": 648,
651
- "oạnh": 649,
652
- "oảnh": 650,
653
- "oăng": 651,
654
- "oắng": 652,
655
- "oằng": 653,
656
- "oẳng": 654,
657
- "oong": 655,
658
- "uếch": 656,
659
- "uênh": 657,
660
- "uông": 658,
661
- "uống": 659,
662
- "uồng": 660,
663
- "uổng": 661,
664
- "uỗng": 662,
665
- "uộng": 663,
666
- "uých": 664,
667
- "uỵch": 665,
668
- "uyên": 666,
669
- "uyến": 667,
670
- "uyền": 668,
671
- "uyển": 669,
672
- "uyễn": 670,
673
- "uyện": 671,
674
- "uyết": 672,
675
- "uyệt": 673,
676
- "uynh": 674,
677
- "uỳnh": 675,
678
- "uýnh": 676,
679
- "uỷnh": 677,
680
- "ương": 678,
681
- "ướng": 679,
682
- "ường": 680,
683
- "ưởng": 681,
684
- "ưỡng": 682,
685
- "ượng": 683,
686
- "op": 684,
687
- "ot": 685,
688
- "gi": 686,
689
- "ap": 687,
690
- "at": 688,
691
- "ac": 689,
692
- "it": 690,
693
- "ip": 691,
694
- "ic": 692,
695
- "ep": 693,
696
- "et": 694,
697
- "ec": 695,
698
- "1": 696,
699
- "2": 697,
700
- "3": 698,
701
- "4": 699,
702
- "5": 700,
703
- "6": 701,
704
- "7": 702,
705
- "8": 703,
706
- "9": 704,
707
- "0": 705
708
- }
 
1
+ {"ẻ": 0, "6": 1, "ụ": 2, "í": 3, "3": 4, "ỹ": 5, "ý": 6, "ẩ": 7, "ở": 8, "ề": 9, "õ": 10, "7": 11, "ê": 12, "ứ": 13, "ỏ": 14, "v": 15, "ỷ": 16, "a": 17, "l": 18, "ự": 19, "q": 20, "ờ": 21, "j": 22, "ố": 23, "à": 24, "ỗ": 25, "n": 26, "é": 27, "ủ": 28, "у": 29, "ô": 30, "u": 31, "y": 32, "ằ": 33, "4": 34, "w": 35, "b": 36, "ệ": 37, "ễ": 38, "s": 39, "ì": 40, "ầ": 41, "ỵ": 42, "8": 43, "d": 44, "ể": 45, "r": 47, "ũ": 48, "c": 49, "ạ": 50, "9": 51, "ế": 52, "ù": 53, "ỡ": 54, "2": 55, "t": 56, "i": 57, "g": 58, "́": 59, "ử": 60, "̀": 61, "á": 62, "0": 63, "ậ": 64, "e": 65, "ộ": 66, "m": 67, "ẳ": 68, "ợ": 69, "ĩ": 70, "h": 71, "â": 72, "ú": 73, "ọ": 74, "ồ": 75, "ặ": 76, "f": 77, "ữ": 78, "ắ": 79, "ỳ": 80, "x": 81, "ó": 82, "ã": 83, "ổ": 84, "ị": 85, "̣": 86, "z": 87, "ả": 88, "đ": 89, "è": 90, "ừ": 91, "ò": 92, "ẵ": 93, "1": 94, "ơ": 95, "k": 96, "ẫ": 97, "p": 98, "ấ": 99, "ẽ": 100, "ỉ": 101, "ớ": 102, "ẹ": 103, "ă": 104, "o": 105, "ư": 106, "5": 107, "|": 46, "<unk>": 108, "<pad>": 109}