| { | |
| "bos_token": { | |
| "__type": "AddedToken", | |
| "content": "<s>", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "cls_token": { | |
| "__type": "AddedToken", | |
| "content": "[CLS]", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "do_basic_tokenize": true, | |
| "do_lower_case": false, | |
| "eos_token": { | |
| "__type": "AddedToken", | |
| "content": "</s>", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "mask_token": { | |
| "__type": "AddedToken", | |
| "content": "[MASK]", | |
| "lstrip": true, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "name_or_path": "tokenizer/vocab.txt", | |
| "never_split": null, | |
| "pad_token": { | |
| "__type": "AddedToken", | |
| "content": "[PAD]", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "sep_token": { | |
| "__type": "AddedToken", | |
| "content": "[SEP]", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| "strip_accents": null, | |
| "tokenize_chinese_chars": true, | |
| "tokenizer_class": "MPNetTokenizer", | |
| "unk_token": { | |
| "__type": "AddedToken", | |
| "content": "[UNK]", | |
| "lstrip": false, | |
| "normalized": true, | |
| "rstrip": false, | |
| "single_word": false | |
| } | |
| } | |