add tokenizer
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -1
- tokenizer_config.json +1 -1
- vocab.json +1 -1
added_tokens.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"<s>": 93, "</s>": 94}
|
special_tokens_map.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
|
|
|
| 1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"
|
|
|
|
| 1 |
+
{"ẩ": 0, "ứ": 1, "ủ": 2, "n": 3, "ầ": 4, "ũ": 5, "ỗ": 6, "ở": 7, "ồ": 8, "ớ": 9, "ư": 10, "à": 11, "ỹ": 12, "ĩ": 13, "r": 14, "ệ": 15, "ó": 16, "d": 17, "ợ": 18, "ý": 19, "ờ": 20, "õ": 21, "u": 22, "ặ": 23, "ỳ": 24, "a": 25, "ễ": 26, "ỷ": 27, "ự": 28, "ữ": 29, "ằ": 30, "y": 31, "ẫ": 32, "ơ": 33, "t": 34, "è": 35, "ậ": 36, "đ": 37, "x": 38, "ổ": 39, "é": 40, "ố": 41, "ù": 43, "ử": 44, "ẳ": 45, "ả": 46, "p": 47, "ọ": 48, "â": 49, "ị": 50, "ụ": 51, "ì": 52, "c": 53, "q": 54, "ỡ": 55, "l": 56, "ề": 57, "ắ": 58, "ừ": 59, "4": 60, "ò": 61, "á": 62, "e": 63, "í": 64, "v": 65, "ú": 66, "ă": 67, "ê": 68, "ấ": 69, "ỏ": 70, "ẻ": 71, "m": 72, "h": 73, "b": 74, "ỵ": 75, "ỉ": 76, "ế": 77, "o": 78, "ẽ": 79, "s": 80, "g": 81, "ẵ": 82, "ẹ": 83, "ã": 84, "i": 85, "k": 86, "ể": 87, "ạ": 88, "ộ": 89, "ô": 90, "|": 42, "[UNK]": 91, "[PAD]": 92}
|