{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "NFC" }, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": {} }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "[BOS]": 5, "[EOS]": 6, "\n": 7, " ": 8, "!": 9, "\"": 10, "'": 11, "-": 12, ".": 13, "«": 14, "»": 15, "؁": 16, "،": 17, "؎": 18, "ؐ": 19, "ؑ": 20, "ؒ": 21, "ؓ": 22, "ؔ": 23, "؛": 24, "؟": 25, "ؠ": 26, "ء": 27, "آ": 28, "أ": 29, "ؤ": 30, "إ": 31, "ئ": 32, "ا": 33, "ب": 34, "ت": 35, "ث": 36, "ج": 37, "ح": 38, "خ": 39, "د": 40, "ذ": 41, "ر": 42, "ز": 43, "س": 44, "ش": 45, "ص": 46, "ض": 47, "ط": 48, "ظ": 49, "ع": 50, "غ": 51, "ف": 52, "ق": 53, "ك": 54, "ل": 55, "م": 56, "ن": 57, "ه": 58, "و": 59, "ً": 60, "ٍ": 61, "َ": 62, "ُ": 63, "ِ": 64, "ّ": 65, "ْ": 66, "ٓ": 67, "ٔ": 68, "ٕ": 69, "ٖ": 70, "ٗ": 71, "٘": 72, "ٚ": 73, "ٛ": 74, "ٟ": 75, "٠": 76, "١": 77, "٢": 78, "٣": 79, "٤": 80, "٥": 81, "٦": 82, "٧": 83, "٨": 84, "٩": 85, "٪": 86, "٭": 87, "ٮ": 88, "ٮ۪": 89, "ٰ": 90, "ٲ": 91, "ٳ": 92, "ٹ": 93, "پ": 94, "ٿ": 95, "ڀ": 96, "چ": 97, "ڈ": 98, "ڑ": 99, "ژ": 100, "ڙ": 101, "ک": 102, "ڪ": 103, "ڮ": 104, "گ": 105, "ں": 106, "ھ": 107, "ہ": 108, "ۂ": 109, "ۃ": 110, "ۄ": 111, "ۅ": 112, "ۆ": 113, "ی": 114, "ۍ": 115, "ے": 116, "ۓ": 117, "۔": 118, "۪": 119, "ۭ": 120, "۰": 121, "۱": 122, "۲": 123, "۳": 124, "۴": 125, "۵": 126, "۶": 127, "۷": 128, "۸": 129, "۹": 130, "﴾": 131, "﴿": 132 }, "unk_token": "[UNK]" } }