Commit
·
5d1e9a8
1
Parent(s):
3d4889b
add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"[PAD]": 0, "[UNK]": 1, "|": 2, "A": 3, "B": 4, "C": 5, "D": 6, "E": 7, "F": 8, "G": 9, "H": 10, "I": 11, "J": 12, "K": 13, "L": 14, "M": 15, "N": 16, "O": 17, "P": 18, "Q": 19, "R": 20, "S": 21, "T": 22, "U": 23, "V": 24, "W": 25, "X": 26, "Y": 27, "Z": 28, "Å": 29, "Ä": 30, "Ö": 31}
|