tclong commited on
Commit
984e2f8
·
1 Parent(s): 8af0afd

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ô": 0, "": 1, "l": 2, "é": 3, "p": 4, "": 5, "": 6, "n": 7, "": 8, "ơ": 9, "e": 10, "": 11, "ă": 12, "â": 13, "": 14, "": 15, "": 16, "": 17, "": 18, "": 19, "ý": 20, "": 21, "à": 22, "g": 23, "ế": 24, "": 25, "": 26, "": 27, "a": 28, "": 29, "è": 30, "b": 31, "k": 32, "r": 33, "o": 34, "v": 35, "": 36, "": 37, "q": 38, "": 39, "": 40, "ũ": 41, "á": 42, "": 43, "": 44, "": 45, "ó": 46, "ĩ": 47, "c": 48, "m": 49, "": 50, "": 52, "": 53, "ù": 54, "ê": 55, "x": 56, "": 57, "": 58, "ự": 59, "": 60, "": 61, "s": 62, "d": 63, "": 64, "": 65, "í": 66, "": 67, "ì": 68, "": 69, "": 70, "": 71, "h": 72, "u": 73, "ò": 74, "": 75, "ú": 76, "i": 77, "": 78, "õ": 79, "t": 80, "": 81, "ã": 82, "4": 83, "": 84, "đ": 85, "y": 86, "ư": 87, "": 88, "": 89, "": 90, "|": 51, "[UNK]": 91, "[PAD]": 92}
 
1
+ {"": 0, "v": 1, "": 2, "a": 3, "ũ": 4, "e": 5, "u": 6, "": 7, "ù": 8, "4": 9, "ĩ": 10, "": 11, "": 12, "": 13, "m": 14, "d": 15, "í": 16, "": 17, "": 18, "ê": 19, "": 20, "": 21, "é": 22, "": 23, "ý": 24, "": 25, "": 26, "": 27, "l": 28, "ơ": 29, "ế": 30, "s": 31, "i": 32, "ò": 33, "â": 34, "": 35, "": 36, "ú": 37, "y": 38, "õ": 39, "": 40, "": 41, "": 42, "ó": 43, "p": 44, "": 45, "": 46, "": 47, "đ": 48, "": 49, "": 50, "à": 51, "n": 52, "": 53, "": 54, "": 55, "ă": 56, "": 57, "": 59, "x": 60, "q": 61, "": 62, "": 63, "á": 64, "": 65, "": 66, "è": 67, "": 68, "c": 69, "": 70, "": 71, "r": 72, "": 73, "ì": 74, "t": 75, "": 76, "": 77, "": 78, "ư": 79, "g": 80, "": 81, "": 82, "ã": 83, "": 84, "ô": 85, "o": 86, "k": 87, "": 88, "b": 89, "h": 90, "|": 58, "[UNK]": 91, "[PAD]": 92}