Commit
·
0f0ba4f
1
Parent(s):
c11d84b
add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"
|
|
|
|
| 1 |
+
{"c": 0, "ề": 1, "ữ": 2, "b": 3, "õ": 4, "ẽ": 5, "ĩ": 6, "w": 7, "ự": 8, "é": 9, "'": 10, "ổ": 11, "í": 12, "ớ": 13, "ắ": 14, "ó": 15, "ị": 16, "đ": 17, "z": 18, "y": 19, "p": 20, "ũ": 21, "a": 22, "m": 23, "ặ": 24, "ý": 25, "l": 26, "f": 27, "ỉ": 28, "h": 29, "ả": 30, "ằ": 31, "ô": 32, "ấ": 33, "ẩ": 34, "ử": 35, "ẻ": 36, "ứ": 37, "t": 38, "ì": 39, "ệ": 40, "ỏ": 41, "ụ": 42, "ờ": 43, "u": 44, "ể": 45, "o": 47, "ẫ": 48, "â": 49, "n": 50, "s": 51, "ố": 52, "ỡ": 53, "ầ": 54, "ế": 55, "à": 56, "ậ": 57, "ẵ": 58, "ú": 59, "ợ": 60, "v": 61, "ỵ": 62, "i": 63, "ẹ": 64, "x": 65, "è": 66, "ă": 67, "ã": 68, "ỗ": 69, "ù": 70, "ạ": 71, "ỷ": 72, "ồ": 73, "d": 74, "á": 75, "ở": 76, "ọ": 77, "ư": 78, "ê": 79, "ủ": 80, "ễ": 81, "ỹ": 82, "ơ": 83, "ò": 84, "ẳ": 85, "ừ": 86, "g": 87, "r": 88, "ỳ": 89, "q": 90, "e": 91, "ộ": 92, "k": 93, "j": 94, "|": 46, "[UNK]": 95, "[PAD]": 96}
|