{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "[PAD]": 3, "C": 4, "c": 5, "(": 6, ")": 7, "1": 8, "O": 9, "N": 10, "2": 11, "=": 12, "n": 13, "[C@H]": 14, "[C@@H]": 15, "3": 16, "F": 17, "o": 18, "[nH]": 19, "s": 20, "S": 21, "#": 22, "Cl": 23, "4": 24, "-": 25, "/": 26, "[C@]": 27, "[C@@]": 28, "[O-]": 29, "[N+]": 30, "\\": 31, "Br": 32, "[S@+]": 33, "5": 34, "[S@@+]": 35, "[n+]": 36, "I": 37, "[S+]": 38, "[Si]": 39, "[S@]": 40, "6": 41, "[S@@]": 42, "P": 43, "B": 44, "[CH]": 45, "7": 46, "[C]": 47, "[N-]": 48, "[CH2]": 49, "[O]": 50, "8": 51, "[P@]": 52, "[C-]": 53, "[P@@]": 54, "[N@+]": 55, "9": 56, "[N@@+]": 57, "[cH-]": 58, "[CH-]": 59, "[Sn]": 60, "[s+]": 61, "[B-]": 62, "%10": 63, ".": 64, "[NH+]": 65, "[P@@H]": 66, "[P+]": 67, "[o+]": 68, "[OH+]": 69, "[Sn+2]": 70, "[Sn+]": 71, "[SH]": 72, "[NH2+]": 73, "[B@-]": 74, "[B@@-]": 75, "[BH3-]": 76, "[IH2]": 77, "[NH-]": 78, "[O-2]": 79, "[OH-]": 80, "[P@+]": 81, "[BH-]": 82, "[F-]": 83, "[N]": 84, "[P@H]": 85, "p": 86, "[Br+]": 87, "[NH]": 88, "[O+]": 89, "[P@@+]": 90, "[PH]": 91, "[Sn+3]": 92, "[nH+]": 93 }, "unk_token": "" } }