add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"
|
|
|
|
| 1 |
+
{"ọ": 0, "v": 1, "ẩ": 2, "a": 3, "ũ": 4, "e": 5, "u": 6, "ố": 7, "ù": 8, "4": 9, "ĩ": 10, "ẻ": 11, "ằ": 12, "ồ": 13, "m": 14, "d": 15, "í": 16, "ầ": 17, "ứ": 18, "ê": 19, "ợ": 20, "ỹ": 21, "é": 22, "ẽ": 23, "ý": 24, "ề": 25, "ử": 26, "ổ": 27, "l": 28, "ơ": 29, "ế": 30, "s": 31, "i": 32, "ò": 33, "â": 34, "ẫ": 35, "ệ": 36, "ú": 37, "y": 38, "õ": 39, "ộ": 40, "ẹ": 41, "ụ": 42, "ó": 43, "p": 44, "ủ": 45, "ấ": 46, "ỡ": 47, "đ": 48, "ậ": 49, "ặ": 50, "à": 51, "n": 52, "ỷ": 53, "ả": 54, "ỳ": 55, "ă": 56, "ờ": 57, "ữ": 59, "x": 60, "q": 61, "ể": 62, "ớ": 63, "á": 64, "ẵ": 65, "ị": 66, "è": 67, "ạ": 68, "c": 69, "ẳ": 70, "ắ": 71, "r": 72, "ỵ": 73, "ì": 74, "t": 75, "ở": 76, "ự": 77, "ỏ": 78, "ư": 79, "g": 80, "ễ": 81, "ỉ": 82, "ã": 83, "ỗ": 84, "ô": 85, "o": 86, "k": 87, "ừ": 88, "b": 89, "h": 90, "|": 58, "[UNK]": 91, "[PAD]": 92}
|