{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|C\\(=N\\)N|CCC\\(C\\)|\\(CCCN\\)|NC\\(=O\\)|C\\(C\\)=O|=C\\(N\\)N|N=C\\(N\\)|NC\\(=N\\)|C\\(=O\\)C|CS\\(=O\\)|OC\\(=O\\)|C\\(=O\\)c|c\\(=O\\)n|C\\(=O\\)O|C\\(N\\)=O|cc\\(Br\\)|CC\\(=O\\)|C\\(=O\\)N|ccc\\(C\\)|ccc\\(F\\)|c\\(=O\\)|C\\(=N\\)|c\\(O\\)c|NC\\(C\\)|n\\(C\\)c|CC\\(O\\)|cc\\(N\\)|CC\\(C\\)|cc\\(C\\)|C\\(=O\\)|cc\\(O\\)|c\\(N\\)c|c\\(Cl\\)|C\\(N\\)N|N\\(C\\)C|NC\\(N\\)|=C\\(N\\)|C\\(O\\)C|c\\(OC\\)|\\(C#N\\)|C\\(C\\)C|CC\\(N\\)|C\\(C\\)N|c\\(CO\\)|c\\(Br\\)|\\(CCO\\)|C\\(CC\\)|S\\(=O\\)|c\\(C\\)c|\\(=N\\)|c\\(O\\)|\\(Br\\)|\\(CS\\)|c\\(C\\)|\\(CC\\)|c\\(I\\)|C\\(C\\)|N\\(C\\)|C\\(O\\)|C\\(I\\)|C\\(F\\)|\\(Cl\\)|n\\(C\\)|\\(OC\\)|\\(=O\\)|c\\(F\\)|CCCN\\)|\\(=S\\)|c\\(N\\)|\\(CO\\)|C\\(N\\)|\\(C\\)|ccccc|\\(S\\)|\\(F\\)|\\(O\\)|C#N\\)|CCO\\)|\\(N\\)|C\\(=N|\\(I\\)|CSSC|=N\\)|CC=O|CCCO|Cl\\)|CCNO|=O\\)|CCSC|\\(=N|CO\\)|CCNC|CCCC|=S\\)|CN=C|CCCS|cccc|CCCN|Br\\)|cccn|CS\\)|C=CC|OC\\)|CC=C|cnn|=NC|COC|OCC|\\(O|CCS|CNc|#Cc|=CC|ccn|C=C|CSc|ccc|NCc|CCO|N=C|cnc|I\\)|CCc|OCc|CCl|ccs|COc|CCn|CSC|SCC|NCC|CCN|CNC|C#C|C=O|CNO|CCC|SSC|C#N|O=C|NOC|S\\)|csc|ncc|C\\)|N\\)|\\(C|ncn|F\\)|O\\)|N#C|nnc|CSS|cco|Cl|NC|nc|co|CS|CO|no|cc|CN|cn|SS|OC|\\)|SN|nn|CC|#C|NO|=S|NS|cs|=C|Oc|=O|oc|Nc|Cc|=N|NN|C=|C#|\\(|SC|sc|Br|N#|#N|p|O|I|N|C|s|=|c|B|S|F|n|P|#|o)" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": null, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, ":": 5, "%11": 6, "-": 7, "[As]": 8, "[pH]": 9, "[Po]": 10, "[Ra]": 11, "[3H]": 12, "[S-]": 13, "8": 14, "%21": 15, "[CH-]": 16, "[IH]": 17, "P": 18, "[SeH]": 19, "[O]": 20, "4": 21, "/": 22, "[N-]": 23, "[129Xe]": 24, "[Cl+3]": 25, "3": 26, "[C@@]": 27, "[11CH3]": 28, "[13C]": 29, "[Sn+]": 30, "[P@@]": 31, "[Ge]": 32, "[BH3-]": 33, "[123I]": 34, "[14CH2]": 35, "[Al-]": 36, "[Si]": 37, "[S@]": 38, "[W]": 39, "=": 40, "%19": 41, "Cl": 42, "[Cl+2]": 43, "%14": 44, "[Al]": 45, "9": 46, "[B-]": 47, "[Cl+]": 48, "[TlH2]": 49, "[NH2+]": 50, "[11CH]": 51, "[SnH]": 52, "[SiH3]": 53, "[Sn]": 54, "[11C]": 55, "S": 56, "[SiH2]": 57, "%18": 58, "[BH-]": 59, "[Ru]": 60, "%10": 61, "[V]": 62, "[o+]": 63, "[O+]": 64, "c": 65, "[I-]": 66, "[C@@H]": 67, "n": 68, "2": 69, "[Se-]": 70, "[N+]": 71, "N": 72, "s": 73, "[PH+]": 74, "[C@]": 75, "[N@]": 76, "[C+]": 77, "[s+]": 78, "[N@+]": 79, "[125I]": 80, "[cH-]": 81, "[Th]": 82, "C": 83, "[Sb]": 84, "5": 85, "[c-]": 86, "#": 87, "[Ca]": 88, "%16": 89, "[Tl]": 90, "[18F]": 91, "[223Ra]": 92, "[BH2-]": 93, "[O-]": 94, "[Bi]": 95, "[te]": 96, "Br": 97, "[Cr]": 98, "[N@@]": 99, "[Hg]": 100, "[S@+]": 101, "\\": 102, "[n+]": 103, "%15": 104, "[123Te]": 105, "[C-]": 106, "1": 107, "[NH+]": 108, "[I+]": 109, "[CH]": 110, "%13": 111, "[Pb]": 112, "[14C]": 113, "[2H]": 114, "[P@]": 115, "[OH+]": 116, ")": 117, "[Tc]": 118, "[se+]": 119, "[NH-]": 120, "[nH]": 121, "B": 122, "[CH2]": 123, "[P+]": 124, "[se]": 125, "[In]": 126, "[Te]": 127, "[Se+]": 128, "%12": 129, "[S+]": 130, "o": 131, "[C]": 132, "[N@@+]": 133, "[n-]": 134, "6": 135, "[S@@]": 136, "[nH+]": 137, "[Si+]": 138, "[PH]": 139, "[Hg+]": 140, "[C@H]": 141, "[Ga]": 142, "[S@@+]": 143, "[NH3+]": 144, "[SiH]": 145, "[11c]": 146, "%20": 147, "%17": 148, "(": 149, "O": 150, "[IH2]": 151, "[As+]": 152, "F": 153, "[CH2-]": 154, "[Se]": 155, "[c+]": 156, "%23": 157, "[SH]": 158, "I": 159, "7": 160, "%22": 161, "[Os]": 162, "[OH]": 163, "p": 164, "[P@+]": 165, "[Ag+]": 166, "[Ag-4]": 167, "[Ag]": 168, "[Al-3]": 169, "[AsH3]": 170, "[AsH]": 171, "[At]": 172, "[B@-]": 173, "[B@@-]": 174, "[B]": 175, "[Ba]": 176, "[Br+2]": 177, "[BrH]": 178, "[Br]": 179, "[CH3]": 180, "[CaH2]": 181, "[Cs]": 182, "[FH]": 183, "[F]": 184, "[H]": 185, "[He]": 186, "[I+2]": 187, "[I+3]": 188, "[I]": 189, "[K]": 190, "[Kr]": 191, "[Li+]": 192, "[LiH]": 193, "[MgH2]": 194, "[Mg]": 195, "[NH3]": 196, "[N]": 197, "[Na]": 198, "[OH2]": 199, "[P@@+]": 200, "[PH2]": 201, "[P]": 202, "[Rb]": 203, "[SH+]": 204, "[SH2]": 205, "[S]": 206, "[Se-2]": 207, "[SeH2]": 208, "[Si@]": 209, "[SrH2]": 210, "[TeH]": 211, "[Xe]": 212, "[Zn+2]": 213, "[Zn-2]": 214, "[Zn]": 215, "[n]": 216, "[te+]": 217, "=O": 218, "CC": 219, "NC": 220, "CO": 221, "cc": 222, "CCC": 223, "CCCC": 224, "ccc": 225, "CCN": 226, "CCCN": 227, "CN": 228, "CNC": 229, "cccc": 230, "ccccc": 231, "N)": 232, "(N)": 233, "=O)": 234, "(=O)": 235, "C(=O)": 236, "C(=O)N": 237, "O)": 238, "(C": 239, "(C)": 240, "C(C)": 241, "C(C)C": 242, "CC(=O)": 243, "C(=O)O": 244, "C(=O)C": 245, "C(N)": 246, "CC(N)": 247, "C(N)=O": 248, "CO)": 249, "(CO)": 250, "CC(C)": 251, "CS": 252, "=N": 253, "CCNC": 254, "NC(=O)": 255, "=N)": 256, "(=N)": 257, "C(=N)": 258, "CC=O": 259, "CCCN)": 260, "(CCCN)": 261, "NC(=N)": 262, "Br)": 263, "(Br)": 264, "F)": 265, "(F)": 266, "S)": 267, "(S)": 268, "C)": 269, "(O)": 270, "CCS": 271, "CCCS": 272, "CCSC": 273, "cn": 274, "ccn": 275, "cccn": 276, "CSC": 277, "=C": 278, "CCO": 279, "(O": 280, "(=N": 281, "C(=N": 282, "c(O)": 283, "OC": 284, "SCC": 285, "ccc(F)": 286, "S(=O)": 287, "O=C": 288, "CCc": 289, "OC(=O)": 290, "C#": 291, "Cc": 292, "C=C": 293, "C=": 294, "#N": 295, "C#N": 296, "ccs": 297, "NO": 298, "C(O)": 299, "csc": 300, "ccc(C)": 301, "cc(Br)": 302, "ncn": 303, "CCNO": 304, "CCCO": 305, "CSS": 306, "CSSC": 307, "=CC": 308, "I)": 309, "(I)": 310, "CNO": 311, "N(C)": 312, "N(C)C": 313, "C(N)N": 314, "NOC": 315, "C(C)=O": 316, "#C": 317, "cco": 318, "NS": 319, "SN": 320, "c(=O)n": 321, "=S)": 322, "(=S)": 323, "c(N)c": 324, "N=C": 325, "SC": 326, "SSC": 327, "CCC(C)": 328, "c(=O)": 329, "C#N)": 330, "(C#N)": 331, "SS": 332, "=S": 333, "oc": 334, "co": 335, "no": 336, "N#": 337, "N#C": 338, "nc": 339, "sc": 340, "C(=N)N": 341, "C=O": 342, "c(F)": 343, "C(F)": 344, "c(I)": 345, "C(I)": 346, "cnn": 347, "cc(N)": 348, "NC(N)": 349, "OC)": 350, "(OC)": 351, "c(OC)": 352, "c(Br)": 353, "c(N)": 354, "cc(O)": 355, "CS)": 356, "(CS)": 357, "Oc": 358, "cnc": 359, "Cl)": 360, "(Cl)": 361, "c(Cl)": 362, "c(O)c": 363, "NCC": 364, "COC": 365, "OCC": 366, "Nc": 367, "ncc": 368, "cc(C)": 369, "nn": 370, "cs": 371, "c(C)c": 372, "COc": 373, "C(=O)c": 374, "c(C)": 375, "(CC)": 376, "NCc": 377, "nnc": 378, "C(O)C": 379, "=C(N)": 380, "C=CC": 381, "=C(N)N": 382, "N=C(N)": 383, "OCc": 384, "CC=C": 385, "CCl": 386, "CCn": 387, "CNc": 388, "CC(O)": 389, "NN": 390, "CSc": 391, "NC(C)": 392, "CS(=O)": 393, "C(CC)": 394, "C#C": 395, "C(C)N": 396, "CCO)": 397, "(CCO)": 398, "CN=C": 399, "n(C)": 400, "n(C)c": 401, "c(CO)": 402, "#Cc": 403, "=NC": 404 } } }