{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Strip", "strip_left": false, "strip_right": true }, { "type": "Replace", "pattern": { "String": " {2,}" }, "content": "▁" } ] }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "WhitespaceSplit" }, { "type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": true } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "special_tokens": { "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] } } }, "decoder": { "type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": true }, "model": { "type": "Unigram", "unk_id": 3, "vocab": [ [ "", 0.0 ], [ "[PAD]", 0.0 ], [ "", 0.0 ], [ "[UNK]", 0.0 ], [ "[MASK]", 0.0 ] ], "byte_fallback": false } }