{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[cls]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 255, "content": "[bos]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 256, "content": "[pad]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 257, "content": "[eos]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 258, "content": "[sep]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 259, "content": "[unk]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "WhitespaceSplit" }, { "type": "Split", "pattern": { "Regex": "(\\[[a-z]+]|\\[[A-Z][a-z]?|Br?|Cl?|N|O|S|P|F|I|H[2-4]?|\\[|\\]|,|;|\\(|\\)|\\.|=|\\#|-|\\+|\\\\|/|:|~|@|\\?|>|\\*|\\$|%|(?<=%)[0-9]{2}|(?<=,)[0-9]{2}(?=\\))|(?<=\\()[0-9]{2}(?=,)|(?<=:)[0-9]{2}(?=\\])|[0-9]|\\|)" }, "behavior": "Isolated", "invert": false } ] }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[cls]": 0, "[H": 1, "[He": 2, "[Li": 3, "[Be": 4, "[B": 5, "[C": 6, "[N": 7, "[O": 8, "[F": 9, "[Ne": 10, "[Na": 11, "[Mg": 12, "[Al": 13, "[Si": 14, "[P": 15, "[S": 16, "[Cl": 17, "[Ar": 18, "[K": 19, "[Ca": 20, "[Sc": 21, "[Ti": 22, "[V": 23, "[Cr": 24, "[Mn": 25, "[Fe": 26, "[Co": 27, "[Ni": 28, "[Cu": 29, "[Zn": 30, "[Ga": 31, "[Ge": 32, "[As": 33, "[Se": 34, "[Br": 35, "[Kr": 36, "[Rb": 37, "[Sr": 38, "[Y": 39, "[Zr": 40, "[Nb": 41, "[Mo": 42, "[Tc": 43, "[Ru": 44, "[Rh": 45, "[Pd": 46, "[Ag": 47, "[Cd": 48, "[In": 49, "[Sn": 50, "[Sb": 51, "[Te": 52, "[I": 53, "[Xe": 54, "[Cs": 55, "[Ba": 56, "[La": 57, "[Ce": 58, "[Pr": 59, "[Nd": 60, "[Pm": 61, "[Sm": 62, "[Eu": 63, "[Gd": 64, "[Tb": 65, "[Dy": 66, "[Ho": 67, "[Er": 68, "[Tm": 69, "[Yb": 70, "[Lu": 71, "[Hf": 72, "[Ta": 73, "[W": 74, "[Re": 75, "[Os": 76, "[Ir": 77, "[Pt": 78, "[Au": 79, "[Hg": 80, "[Tl": 81, "[Pb": 82, "[Bi": 83, "[Po": 84, "[At": 85, "[Rn": 86, "[Fr": 87, "[Ra": 88, "[Ac": 89, "[Th": 90, "[Pa": 91, "[U": 92, "[Np": 93, "[Pu": 94, "[Am": 95, "[Cm": 96, "[Bk": 97, "[Cf": 98, "[Es": 99, "[Fm": 100, "[Md": 101, "[No": 102, "[Lr": 103, "[Rf": 104, "[Db": 105, "[Sg": 106, "[Bh": 107, "[Hs": 108, "[Mt": 109, "[Ds": 110, "[Rg": 111, "[Cn": 112, "[Nh": 113, "[Fl": 114, "[Mc": 115, "[Lv": 116, "[Ts": 117, "[Og": 118, "C": 119, "Cl": 120, "B": 121, "Br": 122, "N": 123, "O": 124, "S": 125, "P": 126, "F": 127, "I": 128, "H": 129, "H2": 130, "H3": 131, "H4": 132, "[": 133, "]": 134, ":": 135, "=": 136, "#": 137, "$": 138, "\\": 139, "/": 140, "(": 141, ")": 142, ".": 143, "[prod]": 144, "[reac]": 145, "[mech]": 146, "+": 147, "-": 148, "@": 149, "*": 150, "0": 151, "1": 152, "2": 153, "3": 154, "4": 155, "5": 156, "6": 157, "7": 158, "8": 159, "9": 160, "10": 161, "11": 162, "12": 163, "13": 164, "14": 165, "15": 166, "16": 167, "17": 168, "18": 169, "19": 170, "20": 171, "21": 172, "22": 173, "23": 174, "24": 175, "25": 176, "26": 177, "27": 178, "28": 179, "29": 180, "30": 181, "31": 182, "32": 183, "33": 184, "34": 185, "35": 186, "36": 187, "37": 188, "38": 189, "39": 190, "40": 191, "41": 192, "42": 193, "43": 194, "44": 195, "45": 196, "46": 197, "47": 198, "48": 199, "49": 200, "50": 201, "51": 202, "52": 203, "53": 204, "54": 205, "55": 206, "56": 207, "57": 208, "58": 209, "59": 210, "60": 211, "61": 212, "62": 213, "63": 214, "64": 215, "65": 216, "66": 217, "67": 218, "68": 219, "69": 220, "70": 221, "71": 222, "72": 223, "73": 224, "74": 225, "75": 226, "76": 227, "77": 228, "78": 229, "79": 230, "80": 231, "81": 232, "82": 233, "83": 234, "84": 235, "85": 236, "86": 237, "87": 238, "88": 239, "89": 240, "90": 241, "91": 242, "92": 243, "93": 244, "94": 245, "95": 246, "96": 247, "97": 248, "98": 249, "99": 250, "%": 251, ",": 252, ";": 253, "|": 254, "[bos]": 255, "[pad]": 256, "[eos]": 257, "[sep]": 258, "[unk]": 259 }, "unk_token": "[unk]" } }