{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "\\s+" }, "content": "|" } ] }, "pre_tokenizer": { "type": "Split", "pattern": { "String": "\\S" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "CTC", "pad_token": "", "word_delimiter_token": "|", "cleanup": true }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "a": 4, "b": 5, "d": 6, "e": 7, "f": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "z": 23, "æ": 24, "ð": 25, "ŋ": 26, "ɑ": 27, "ɔ": 28, "ə": 29, "ɛ": 30, "ɡ": 31, "ɪ": 32, "ɹ": 33, "ɾ": 34, "ʃ": 35, "ʊ": 36, "ʌ": 37, "ʒ": 38, "ʔ": 39, "θ": 40 }, "unk_token": "" } }