DangHuuTrang commited on
Commit
0f0ba4f
·
1 Parent(s): c11d84b

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"i": 0, "b": 1, "": 2, "u": 3, "": 4, "v": 5, "": 6, "r": 7, "â": 8, "": 9, "o": 10, "í": 11, "h": 12, "": 13, "": 14, "s": 15, "õ": 16, "": 17, "": 18, "ù": 19, "y": 20, "ĩ": 21, "": 22, "ô": 23, "": 24, "t": 25, "ì": 26, "": 27, "è": 28, "à": 29, "m": 31, "f": 32, "ó": 33, "ũ": 34, "": 35, "": 36, "ư": 37, "": 38, "é": 39, "": 40, "": 41, "q": 42, "": 43, "ế": 44, "'": 45, "": 46, "a": 47, "": 48, "g": 49, "": 50, "ă": 51, "": 52, "n": 53, "á": 54, "j": 55, "": 56, "d": 57, "e": 58, "": 59, "": 60, "ý": 61, "đ": 62, "ò": 63, "z": 64, "l": 65, "w": 66, "ơ": 67, "": 68, "ê": 69, "": 70, "ạ": 71, "": 72, "": 73, "": 74, "": 75, "p": 76, "": 77, "": 78, "": 79, "": 80, "": 81, "": 82, "c": 83, "": 84, "x": 85, "": 86, "k": 87, "": 88, "": 89, "": 90, "ú": 91, "": 92, "": 93, "ã": 94, "|": 30, "[UNK]": 95, "[PAD]": 96}
 
1
+ {"c": 0, "": 1, "": 2, "b": 3, "õ": 4, "": 5, "ĩ": 6, "w": 7, "": 8, "é": 9, "'": 10, "": 11, "í": 12, "": 13, "": 14, "ó": 15, "": 16, "đ": 17, "z": 18, "y": 19, "p": 20, "ũ": 21, "a": 22, "m": 23, "": 24, "ý": 25, "l": 26, "f": 27, "": 28, "h": 29, "": 30, "ằ": 31, "ô": 32, "": 33, "": 34, "": 35, "": 36, "": 37, "t": 38, "ì": 39, "": 40, "": 41, "": 42, "": 43, "u": 44, "": 45, "o": 47, "": 48, "â": 49, "n": 50, "s": 51, "": 52, "": 53, "": 54, "ế": 55, "à": 56, "": 57, "": 58, "ú": 59, "": 60, "v": 61, "": 62, "i": 63, "": 64, "x": 65, "è": 66, "ă": 67, "ã": 68, "": 69, "ù": 70, "ạ": 71, "": 72, "": 73, "d": 74, "á": 75, "": 76, "": 77, "ư": 78, "ê": 79, "": 80, "": 81, "": 82, "ơ": 83, "ò": 84, "": 85, "": 86, "g": 87, "r": 88, "": 89, "q": 90, "e": 91, "": 92, "k": 93, "j": 94, "|": 46, "[UNK]": 95, "[PAD]": 96}