Davar-IvriNet / tokenizer_config.json
Nitzanbanin's picture
Save tokenizer and model configuration files (re-attempt)
b2bcfa3 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 1,
"content": "CLS",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 2,
"content": "SEP",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "NFD"
},
{
"type": "StripAccents"
},
{
"type": "Lowercase"
}
]
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "CLS",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "SEP",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "CLS",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "SEP",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "SEP",
"type_id": 0
}
}
],
"special_tokens": {
"CLS": {
"id": "CLS",
"ids": [
1
],
"tokens": [
"CLS"
]
},
"SEP": {
"id": "SEP",
"ids": [
2
],
"tokens": [
"SEP"
]
}
}
},
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {},
"merges": []
}
}