| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [], | |
| "normalizer": { | |
| "type": "Sequence", | |
| "normalizers": [ | |
| {"type": "Lowercase"}, | |
| {"type": "StripAccents"} | |
| ] | |
| }, | |
| "pre_tokenizer": { | |
| "type": "WhitespaceSplit" | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": "[CLS] $A [SEP]", | |
| "pair": "[CLS] $A [SEP] $B:1 [SEP]:1" | |
| }, | |
| "decoder": { | |
| "type": "WordPiece", | |
| "unknown": "[UNK]", | |
| "prefix": "##" | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "vocab_size": 30522, | |
| "merges": [] | |
| } | |
| } | |