{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "\t", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 1, "content": "\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 2, "content": " ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 3, "content": "(", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 4, "content": ")", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 5, "content": "*", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 6, "content": ",", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 7, "content": "-", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 8, "content": ".", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 9, "content": "<", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 10, "content": ">", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 11, "content": "[", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 12, "content": "]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 13, "content": "_", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 14, "content": "a", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 15, "content": "b", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 16, "content": "c", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 17, "content": "d", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 18, "content": "d͡z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 19, "content": "d͡ʑ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 20, "content": "d͡ʒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 21, "content": "e", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 22, "content": "e̞", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 23, "content": "f", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 24, "content": "g", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 25, "content": "h", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 26, "content": "i", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 27, "content": "j", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 28, "content": "k", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 29, "content": "l", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 30, "content": "m", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 31, "content": "m̥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 32, "content": "n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 33, "content": "n̥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 34, "content": "o", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 35, "content": "o̞", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 36, "content": "p", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 37, "content": "q", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 38, "content": "r", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 39, "content": "s", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 40, "content": "t", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 41, "content": "t͡s", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 42, "content": "t͡ɕ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 43, "content": "t͡ʃ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 44, "content": "u", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 45, "content": "v", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 46, "content": "x", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 47, "content": "y", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 48, "content": "z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 49, "content": "|", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 50, "content": "«", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 51, "content": "»", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 52, "content": "æ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 53, "content": "ç", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 54, "content": "ð", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 55, "content": "ø", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 56, "content": "ø̞", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 57, "content": "ħ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 58, "content": "ŋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 59, "content": "ŋ̊", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 60, "content": "œ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 61, "content": "ɐ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 62, "content": "ɑ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 63, "content": "ɒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 64, "content": "ɔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 65, "content": "ɕ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 66, "content": "ɖ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 67, "content": "ɘ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 68, "content": "ə", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 69, "content": "ɚ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 70, "content": "ɛ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 71, "content": "ɜ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 72, "content": "ɜː", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 73, "content": "ɞ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 74, "content": "ɟ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 75, "content": "ɡ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 76, "content": "ɢ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 77, "content": "ɣ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 78, "content": "ɦ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 79, "content": "ɨ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 80, "content": "ɪ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 81, "content": "ɫ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 82, "content": "ɭ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 83, "content": "ɯ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 84, "content": "ɰ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 85, "content": "ɱ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 86, "content": "ɲ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 87, "content": "ɳ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 88, "content": "ɴ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 89, "content": "ɵ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 90, "content": "ɸ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 91, "content": "ɹ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 92, "content": "ɻ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 93, "content": "ɽ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 94, "content": "ɾ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 95, "content": "ʀ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 96, "content": "ʁ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 97, "content": "ʂ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 98, "content": "ʃ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 99, "content": "ʈ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 100, "content": "ʉ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 101, "content": "ʊ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 102, "content": "ʋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 103, "content": "ʎ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 104, "content": "ʏ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 105, "content": "ʐ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 106, "content": "ʒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 107, "content": "ʔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 108, "content": "ʕ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 109, "content": "ʙ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 110, "content": "ʝ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 111, "content": "ʟ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 112, "content": "ʣ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 113, "content": "ʥ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 114, "content": "ʦ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 115, "content": "ʨ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 116, "content": "ʰ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 117, "content": "ʲ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 118, "content": "ʷ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 119, "content": "ˈ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 120, "content": "ˌ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 121, "content": "ː", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 122, "content": "ˑ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 123, "content": "ˠ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 124, "content": "ˤ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 125, "content": "˥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 126, "content": "˥˩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 127, "content": "˦", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 128, "content": "˧", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 129, "content": "˧˥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 130, "content": "˧˩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 131, "content": "˨", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 132, "content": "˩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 133, "content": "˩˥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 134, "content": "̃", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 135, "content": "̆", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 136, "content": "̈", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 137, "content": "̜", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 138, "content": "̟", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 139, "content": "̠", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 140, "content": "̥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 141, "content": "̩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 142, "content": "̬", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 143, "content": "̯", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 144, "content": "̹", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 145, "content": "̽", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 146, "content": "β", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 147, "content": "θ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 148, "content": "χ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 149, "content": "ᵊ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 150, "content": "‖", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 151, "content": "↗", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 152, "content": "↘", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 153, "content": "ⱱ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 154, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 155, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 156, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 157, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "prepend_scheme": "first", "split": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": {} }, "decoder": { "type": "Sequence", "decoders": [ { "type": "Replace", "pattern": { "String": "▁" }, "content": " " }, { "type": "ByteFallback" }, { "type": "Fuse" }, { "type": "Strip", "content": " ", "start": 1, "stop": 0 } ] }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": true, "byte_fallback": true, "ignore_merges": false, "vocab": {}, "merges": [] } }