{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "a": 5, "b": 6, "c": 7, "e": 8, "f": 9, "i": 10, "k": 11, "l": 12, "m": 13, "n": 14, "p": 15, "r": 16, "s": 17, "u": 18, "ı": 19, "##c": 20, "##ı": 21, "##n": 22, "##l": 23, "##m": 24, "##a": 25, "##i": 26, "##k": 27, "##r": 28, "##u": 29, "##b": 30, "##e": 31, "##p": 32, "##s": 33, "ac": 34, "##li": 35, "acı": 36, "el": 37, "fi": 38, "##ıl": 39, "##nıl": 40, "##ma": 41, "##ir": 42, "##is": 43, "##kir": 44, "##ub": 45, "##ps": 46, "acub": 47, "##lik": 48, "##lips": 49, "##lilik": 50, "acık": 51, "acınıl": 52, "ellips": 53, "fikir": 54, "acube": 55, "acınılma": 56, "ellipsis": 57, "fikirlilik": 58 } } }