{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Strip",
"strip_left": false,
"strip_right": true
},
{
"type": "Replace",
"pattern": {
"String": " {2,}"
},
"content": "▁"
}
]
},
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "WhitespaceSplit"
},
{
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
}
]
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "",
"type_id": 0
}
}
],
"special_tokens": {
"": {
"id": "",
"ids": [
2
],
"tokens": [
""
]
},
"": {
"id": "",
"ids": [
0
],
"tokens": [
""
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"prepend_scheme": "always",
"split": true
},
"model": {
"type": "Unigram",
"unk_id": 3,
"vocab": [
[
"",
0.0
],
[
"[PAD]",
0.0
],
[
"",
0.0
],
[
"[UNK]",
0.0
],
[
"[MASK]",
0.0
]
],
"byte_fallback": false
}
}