| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "[PAD]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "[UNK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "[CLS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "[SEP]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "[MASK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": null, | |
| "pre_tokenizer": { | |
| "type": "Split", | |
| "pattern": { | |
| "Regex": "(\\[[^\\]]+]|C\\(=N\\)N|CCC\\(C\\)|\\(CCCN\\)|NC\\(=O\\)|C\\(C\\)=O|=C\\(N\\)N|N=C\\(N\\)|NC\\(=N\\)|C\\(=O\\)C|CS\\(=O\\)|OC\\(=O\\)|C\\(=O\\)c|c\\(=O\\)n|C\\(=O\\)O|C\\(N\\)=O|cc\\(Br\\)|CC\\(=O\\)|C\\(=O\\)N|ccc\\(C\\)|ccc\\(F\\)|c\\(=O\\)|C\\(=N\\)|c\\(O\\)c|NC\\(C\\)|n\\(C\\)c|CC\\(O\\)|cc\\(N\\)|CC\\(C\\)|cc\\(C\\)|C\\(=O\\)|cc\\(O\\)|c\\(N\\)c|c\\(Cl\\)|C\\(N\\)N|N\\(C\\)C|NC\\(N\\)|=C\\(N\\)|C\\(O\\)C|c\\(OC\\)|\\(C#N\\)|C\\(C\\)C|CC\\(N\\)|C\\(C\\)N|c\\(CO\\)|c\\(Br\\)|\\(CCO\\)|C\\(CC\\)|S\\(=O\\)|c\\(C\\)c|\\(=N\\)|c\\(O\\)|\\(Br\\)|\\(CS\\)|c\\(C\\)|\\(CC\\)|c\\(I\\)|C\\(C\\)|N\\(C\\)|C\\(O\\)|C\\(I\\)|C\\(F\\)|\\(Cl\\)|n\\(C\\)|\\(OC\\)|\\(=O\\)|c\\(F\\)|CCCN\\)|\\(=S\\)|c\\(N\\)|\\(CO\\)|C\\(N\\)|\\(C\\)|ccccc|\\(S\\)|\\(F\\)|\\(O\\)|C#N\\)|CCO\\)|\\(N\\)|C\\(=N|\\(I\\)|CSSC|=N\\)|CC=O|CCCO|Cl\\)|CCNO|=O\\)|CCSC|\\(=N|CO\\)|CCNC|CCCC|=S\\)|CN=C|CCCS|cccc|CCCN|Br\\)|cccn|CS\\)|C=CC|OC\\)|CC=C|cnn|=NC|COC|OCC|\\(O|CCS|CNc|#Cc|=CC|ccn|C=C|CSc|ccc|NCc|CCO|N=C|cnc|I\\)|CCc|OCc|CCl|ccs|COc|CCn|CSC|SCC|NCC|CCN|CNC|C#C|C=O|CNO|CCC|SSC|C#N|O=C|NOC|S\\)|csc|ncc|C\\)|N\\)|\\(C|ncn|F\\)|O\\)|N#C|nnc|CSS|cco|Cl|NC|nc|co|CS|CO|no|cc|CN|cn|SS|OC|\\)|SN|nn|CC|#C|NO|=S|NS|cs|=C|Oc|=O|oc|Nc|Cc|=N|NN|C=|C#|\\(|SC|sc|Br|N#|#N|p|O|I|N|C|s|=|c|B|S|F|n|P|#|o)" | |
| }, | |
| "behavior": "Isolated", | |
| "invert": false | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "SpecialToken": { | |
| "id": "[CLS]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "SpecialToken": { | |
| "id": "[CLS]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": { | |
| "[CLS]": { | |
| "id": "[CLS]", | |
| "ids": [ | |
| 2 | |
| ], | |
| "tokens": [ | |
| "[CLS]" | |
| ] | |
| }, | |
| "[SEP]": { | |
| "id": "[SEP]", | |
| "ids": [ | |
| 3 | |
| ], | |
| "tokens": [ | |
| "[SEP]" | |
| ] | |
| } | |
| } | |
| }, | |
| "decoder": null, | |
| "model": { | |
| "type": "WordPiece", | |
| "unk_token": "[UNK]", | |
| "continuing_subword_prefix": "##", | |
| "max_input_chars_per_word": 100, | |
| "vocab": { | |
| "[PAD]": 0, | |
| "[UNK]": 1, | |
| "[CLS]": 2, | |
| "[SEP]": 3, | |
| "[MASK]": 4, | |
| ":": 5, | |
| "%11": 6, | |
| "-": 7, | |
| "[As]": 8, | |
| "[pH]": 9, | |
| "[Po]": 10, | |
| "[Ra]": 11, | |
| "[3H]": 12, | |
| "[S-]": 13, | |
| "8": 14, | |
| "%21": 15, | |
| "[CH-]": 16, | |
| "[IH]": 17, | |
| "P": 18, | |
| "[SeH]": 19, | |
| "[O]": 20, | |
| "4": 21, | |
| "/": 22, | |
| "[N-]": 23, | |
| "[129Xe]": 24, | |
| "[Cl+3]": 25, | |
| "3": 26, | |
| "[C@@]": 27, | |
| "[11CH3]": 28, | |
| "[13C]": 29, | |
| "[Sn+]": 30, | |
| "[P@@]": 31, | |
| "[Ge]": 32, | |
| "[BH3-]": 33, | |
| "[123I]": 34, | |
| "[14CH2]": 35, | |
| "[Al-]": 36, | |
| "[Si]": 37, | |
| "[S@]": 38, | |
| "[W]": 39, | |
| "=": 40, | |
| "%19": 41, | |
| "Cl": 42, | |
| "[Cl+2]": 43, | |
| "%14": 44, | |
| "[Al]": 45, | |
| "9": 46, | |
| "[B-]": 47, | |
| "[Cl+]": 48, | |
| "[TlH2]": 49, | |
| "[NH2+]": 50, | |
| "[11CH]": 51, | |
| "[SnH]": 52, | |
| "[SiH3]": 53, | |
| "[Sn]": 54, | |
| "[11C]": 55, | |
| "S": 56, | |
| "[SiH2]": 57, | |
| "%18": 58, | |
| "[BH-]": 59, | |
| "[Ru]": 60, | |
| "%10": 61, | |
| "[V]": 62, | |
| "[o+]": 63, | |
| "[O+]": 64, | |
| "c": 65, | |
| "[I-]": 66, | |
| "[C@@H]": 67, | |
| "n": 68, | |
| "2": 69, | |
| "[Se-]": 70, | |
| "[N+]": 71, | |
| "N": 72, | |
| "s": 73, | |
| "[PH+]": 74, | |
| "[C@]": 75, | |
| "[N@]": 76, | |
| "[C+]": 77, | |
| "[s+]": 78, | |
| "[N@+]": 79, | |
| "[125I]": 80, | |
| "[cH-]": 81, | |
| "[Th]": 82, | |
| "C": 83, | |
| "[Sb]": 84, | |
| "5": 85, | |
| "[c-]": 86, | |
| "#": 87, | |
| "[Ca]": 88, | |
| "%16": 89, | |
| "[Tl]": 90, | |
| "[18F]": 91, | |
| "[223Ra]": 92, | |
| "[BH2-]": 93, | |
| "[O-]": 94, | |
| "[Bi]": 95, | |
| "[te]": 96, | |
| "Br": 97, | |
| "[Cr]": 98, | |
| "[N@@]": 99, | |
| "[Hg]": 100, | |
| "[S@+]": 101, | |
| "\\": 102, | |
| "[n+]": 103, | |
| "%15": 104, | |
| "[123Te]": 105, | |
| "[C-]": 106, | |
| "1": 107, | |
| "[NH+]": 108, | |
| "[I+]": 109, | |
| "[CH]": 110, | |
| "%13": 111, | |
| "[Pb]": 112, | |
| "[14C]": 113, | |
| "[2H]": 114, | |
| "[P@]": 115, | |
| "[OH+]": 116, | |
| ")": 117, | |
| "[Tc]": 118, | |
| "[se+]": 119, | |
| "[NH-]": 120, | |
| "[nH]": 121, | |
| "B": 122, | |
| "[CH2]": 123, | |
| "[P+]": 124, | |
| "[se]": 125, | |
| "[In]": 126, | |
| "[Te]": 127, | |
| "[Se+]": 128, | |
| "%12": 129, | |
| "[S+]": 130, | |
| "o": 131, | |
| "[C]": 132, | |
| "[N@@+]": 133, | |
| "[n-]": 134, | |
| "6": 135, | |
| "[S@@]": 136, | |
| "[nH+]": 137, | |
| "[Si+]": 138, | |
| "[PH]": 139, | |
| "[Hg+]": 140, | |
| "[C@H]": 141, | |
| "[Ga]": 142, | |
| "[S@@+]": 143, | |
| "[NH3+]": 144, | |
| "[SiH]": 145, | |
| "[11c]": 146, | |
| "%20": 147, | |
| "%17": 148, | |
| "(": 149, | |
| "O": 150, | |
| "[IH2]": 151, | |
| "[As+]": 152, | |
| "F": 153, | |
| "[CH2-]": 154, | |
| "[Se]": 155, | |
| "[c+]": 156, | |
| "%23": 157, | |
| "[SH]": 158, | |
| "I": 159, | |
| "7": 160, | |
| "%22": 161, | |
| "[Os]": 162, | |
| "[OH]": 163, | |
| "p": 164, | |
| "[P@+]": 165, | |
| "[Ag+]": 166, | |
| "[Ag-4]": 167, | |
| "[Ag]": 168, | |
| "[Al-3]": 169, | |
| "[AsH3]": 170, | |
| "[AsH]": 171, | |
| "[At]": 172, | |
| "[B@-]": 173, | |
| "[B@@-]": 174, | |
| "[B]": 175, | |
| "[Ba]": 176, | |
| "[Br+2]": 177, | |
| "[BrH]": 178, | |
| "[Br]": 179, | |
| "[CH3]": 180, | |
| "[CaH2]": 181, | |
| "[Cs]": 182, | |
| "[FH]": 183, | |
| "[F]": 184, | |
| "[H]": 185, | |
| "[He]": 186, | |
| "[I+2]": 187, | |
| "[I+3]": 188, | |
| "[I]": 189, | |
| "[K]": 190, | |
| "[Kr]": 191, | |
| "[Li+]": 192, | |
| "[LiH]": 193, | |
| "[MgH2]": 194, | |
| "[Mg]": 195, | |
| "[NH3]": 196, | |
| "[N]": 197, | |
| "[Na]": 198, | |
| "[OH2]": 199, | |
| "[P@@+]": 200, | |
| "[PH2]": 201, | |
| "[P]": 202, | |
| "[Rb]": 203, | |
| "[SH+]": 204, | |
| "[SH2]": 205, | |
| "[S]": 206, | |
| "[Se-2]": 207, | |
| "[SeH2]": 208, | |
| "[Si@]": 209, | |
| "[SrH2]": 210, | |
| "[TeH]": 211, | |
| "[Xe]": 212, | |
| "[Zn+2]": 213, | |
| "[Zn-2]": 214, | |
| "[Zn]": 215, | |
| "[n]": 216, | |
| "[te+]": 217, | |
| "=O": 218, | |
| "CC": 219, | |
| "NC": 220, | |
| "CO": 221, | |
| "cc": 222, | |
| "CCC": 223, | |
| "CCCC": 224, | |
| "ccc": 225, | |
| "CCN": 226, | |
| "CCCN": 227, | |
| "CN": 228, | |
| "CNC": 229, | |
| "cccc": 230, | |
| "ccccc": 231, | |
| "N)": 232, | |
| "(N)": 233, | |
| "=O)": 234, | |
| "(=O)": 235, | |
| "C(=O)": 236, | |
| "C(=O)N": 237, | |
| "O)": 238, | |
| "(C": 239, | |
| "(C)": 240, | |
| "C(C)": 241, | |
| "C(C)C": 242, | |
| "CC(=O)": 243, | |
| "C(=O)O": 244, | |
| "C(=O)C": 245, | |
| "C(N)": 246, | |
| "CC(N)": 247, | |
| "C(N)=O": 248, | |
| "CO)": 249, | |
| "(CO)": 250, | |
| "CC(C)": 251, | |
| "CS": 252, | |
| "=N": 253, | |
| "CCNC": 254, | |
| "NC(=O)": 255, | |
| "=N)": 256, | |
| "(=N)": 257, | |
| "C(=N)": 258, | |
| "CC=O": 259, | |
| "CCCN)": 260, | |
| "(CCCN)": 261, | |
| "NC(=N)": 262, | |
| "Br)": 263, | |
| "(Br)": 264, | |
| "F)": 265, | |
| "(F)": 266, | |
| "S)": 267, | |
| "(S)": 268, | |
| "C)": 269, | |
| "(O)": 270, | |
| "CCS": 271, | |
| "CCCS": 272, | |
| "CCSC": 273, | |
| "cn": 274, | |
| "ccn": 275, | |
| "cccn": 276, | |
| "CSC": 277, | |
| "=C": 278, | |
| "CCO": 279, | |
| "(O": 280, | |
| "(=N": 281, | |
| "C(=N": 282, | |
| "c(O)": 283, | |
| "OC": 284, | |
| "SCC": 285, | |
| "ccc(F)": 286, | |
| "S(=O)": 287, | |
| "O=C": 288, | |
| "CCc": 289, | |
| "OC(=O)": 290, | |
| "C#": 291, | |
| "Cc": 292, | |
| "C=C": 293, | |
| "C=": 294, | |
| "#N": 295, | |
| "C#N": 296, | |
| "ccs": 297, | |
| "NO": 298, | |
| "C(O)": 299, | |
| "csc": 300, | |
| "ccc(C)": 301, | |
| "cc(Br)": 302, | |
| "ncn": 303, | |
| "CCNO": 304, | |
| "CCCO": 305, | |
| "CSS": 306, | |
| "CSSC": 307, | |
| "=CC": 308, | |
| "I)": 309, | |
| "(I)": 310, | |
| "CNO": 311, | |
| "N(C)": 312, | |
| "N(C)C": 313, | |
| "C(N)N": 314, | |
| "NOC": 315, | |
| "C(C)=O": 316, | |
| "#C": 317, | |
| "cco": 318, | |
| "NS": 319, | |
| "SN": 320, | |
| "c(=O)n": 321, | |
| "=S)": 322, | |
| "(=S)": 323, | |
| "c(N)c": 324, | |
| "N=C": 325, | |
| "SC": 326, | |
| "SSC": 327, | |
| "CCC(C)": 328, | |
| "c(=O)": 329, | |
| "C#N)": 330, | |
| "(C#N)": 331, | |
| "SS": 332, | |
| "=S": 333, | |
| "oc": 334, | |
| "co": 335, | |
| "no": 336, | |
| "N#": 337, | |
| "N#C": 338, | |
| "nc": 339, | |
| "sc": 340, | |
| "C(=N)N": 341, | |
| "C=O": 342, | |
| "c(F)": 343, | |
| "C(F)": 344, | |
| "c(I)": 345, | |
| "C(I)": 346, | |
| "cnn": 347, | |
| "cc(N)": 348, | |
| "NC(N)": 349, | |
| "OC)": 350, | |
| "(OC)": 351, | |
| "c(OC)": 352, | |
| "c(Br)": 353, | |
| "c(N)": 354, | |
| "cc(O)": 355, | |
| "CS)": 356, | |
| "(CS)": 357, | |
| "Oc": 358, | |
| "cnc": 359, | |
| "Cl)": 360, | |
| "(Cl)": 361, | |
| "c(Cl)": 362, | |
| "c(O)c": 363, | |
| "NCC": 364, | |
| "COC": 365, | |
| "OCC": 366, | |
| "Nc": 367, | |
| "ncc": 368, | |
| "cc(C)": 369, | |
| "nn": 370, | |
| "cs": 371, | |
| "c(C)c": 372, | |
| "COc": 373, | |
| "C(=O)c": 374, | |
| "c(C)": 375, | |
| "(CC)": 376, | |
| "NCc": 377, | |
| "nnc": 378, | |
| "C(O)C": 379, | |
| "=C(N)": 380, | |
| "C=CC": 381, | |
| "=C(N)N": 382, | |
| "N=C(N)": 383, | |
| "OCc": 384, | |
| "CC=C": 385, | |
| "CCl": 386, | |
| "CCn": 387, | |
| "CNc": 388, | |
| "CC(O)": 389, | |
| "NN": 390, | |
| "CSc": 391, | |
| "NC(C)": 392, | |
| "CS(=O)": 393, | |
| "C(CC)": 394, | |
| "C#C": 395, | |
| "C(C)N": 396, | |
| "CCO)": 397, | |
| "(CCO)": 398, | |
| "CN=C": 399, | |
| "n(C)": 400, | |
| "n(C)c": 401, | |
| "c(CO)": 402, | |
| "#Cc": 403, | |
| "=NC": 404 | |
| } | |
| } | |
| } |