LanPham commited on
Commit
bbeb3b8
·
1 Parent(s): f6f16c0

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ĩ": 0, "ở": 1, "ả": 2, "đ": 3, "à": 4, "a": 5, "y": 6, "ũ": 7, "ữ": 8, "ồ": 9, "n": 10, "i": 11, "r": 12, "ừ": 13, "ỏ": 14, "ủ": 15, "ẹ": 16, "g": 17, "ế": 18, "ỳ": 19, "ỡ": 20, "ặ": 21, "m": 22, "x": 23, "ỗ": 24, "ố": 25, "ẽ": 26, "d": 27, "ễ": 28, "ò": 29, "s": 30, "ề": 31, "ô": 32, "ú": 33, "è": 34, "ụ": 35, "ẩ": 36, "ă": 37, "ỷ": 38, "ằ": 39, "ỹ": 40, "ờ": 41, "o": 42, "ù": 43, "õ": 44, "ó": 45, "ọ": 46, "ư": 47, "ậ": 48, "k": 49, "h": 50, "ẫ": 51, "ì": 52, "ấ": 53, "ã": 54, "ể": 55, "t": 56, "ự": 57, "ẻ": 58, "ẵ": 59, "e": 60, "ê": 61, "í": 62, "ộ": 63, "á": 64, "b": 65, "ạ": 66, "ợ": 67, "c": 68, "ứ": 69, "â": 70, "ầ": 71, "v": 72, "ơ": 73, "é": 74, "ử": 75, "ị": 76, "ổ": 77, "ý": 78, "l": 79, "ắ": 80, "ệ": 81, "ỉ": 82, "u": 84, "q": 85, "p": 86, "ớ": 87, "|": 83, "[UNK]": 88, "[PAD]": 89}