{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 159, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 160, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 161, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 162, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 163, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 164, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "++" }, "content": "+2" }, { "type": "Replace", "pattern": { "String": "--" }, "content": "-2" }, { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "outer": "Br?|Cl?|F|I|N|O|P|S|b|c|n|o|p|s|\\*|[\\.\\-=\\#\\$:/\\\\]|\\d|%|\\(|\\)|\\[.*?]", "inner": "(\\d+)?(A[c|g|l|m|r|s|t|u]|B[a|e|h|i|k|r]?|C[a|d|e|f|l|m|n|o|r|s|u]?|D[b|s|y]|E[r|s|u]|F[e|l|m|r]?|G[a|d|e]|H[e|f|g|o|s]?|I[n|r]?|Kr?|L[a|i|r|u|v]|M[c|d|g|n|o|t]|N[a|b|d|e|h|i|o|p]?|O[g|s]?|P[a|b|d|m|o|r|t|u]?|R[a|b|e|f|g|h|n|u]|S[b|c|e|g|i|m|n|r]?|T[a|b|c|e|h|i|l|m|s]|U|V|W|Xe|Yb?|Z[n|r]|as|b|c|n|o|p|se?|\\*)(?:(@(?:@|AL|OH|SP|T[B|H])?)(\\d{1,2})?)?(?:(H)(\\d)?)?(?:([+-]{1,2})(\\d{0,2}))?(?:(:)(\\d+))?" }, "post_processor": null, "decoder": { "type": "Fuse" }, "model": { "type": "WordLevel", "vocab": { "[UNK]": 0, "#": 1, "$": 2, "%": 3, "(": 4, ")": 5, "*": 6, "+": 7, "-": 8, ".": 9, "/": 10, "0": 11, "1": 12, "2": 13, "3": 14, "4": 15, "5": 16, "6": 17, "7": 18, "8": 19, "9": 20, ":": 21, "=": 22, "@": 23, "@@": 24, "@AL": 25, "@OH": 26, "@SP": 27, "@TB": 28, "@TH": 29, "Ac": 30, "Ag": 31, "Al": 32, "Am": 33, "Ar": 34, "As": 35, "At": 36, "Au": 37, "B": 38, "Ba": 39, "Be": 40, "Bh": 41, "Bi": 42, "Bk": 43, "Br": 44, "C": 45, "Ca": 46, "Cd": 47, "Ce": 48, "Cf": 49, "Cl": 50, "Cm": 51, "Cn": 52, "Co": 53, "Cr": 54, "Cs": 55, "Cu": 56, "Db": 57, "Ds": 58, "Dy": 59, "Er": 60, "Es": 61, "Eu": 62, "F": 63, "Fe": 64, "Fl": 65, "Fm": 66, "Fr": 67, "Ga": 68, "Gd": 69, "Ge": 70, "H": 71, "He": 72, "Hf": 73, "Hg": 74, "Ho": 75, "Hs": 76, "I": 77, "In": 78, "Ir": 79, "K": 80, "Kr": 81, "La": 82, "Li": 83, "Lr": 84, "Lu": 85, "Lv": 86, "Mc": 87, "Md": 88, "Mg": 89, "Mn": 90, "Mo": 91, "Mt": 92, "N": 93, "Na": 94, "Nb": 95, "Nd": 96, "Ne": 97, "Nh": 98, "Ni": 99, "No": 100, "Np": 101, "O": 102, "Og": 103, "Os": 104, "P": 105, "Pa": 106, "Pb": 107, "Pd": 108, "Pm": 109, "Po": 110, "Pr": 111, "Pt": 112, "Pu": 113, "Ra": 114, "Rb": 115, "Re": 116, "Rf": 117, "Rg": 118, "Rh": 119, "Rn": 120, "Ru": 121, "S": 122, "Sb": 123, "Sc": 124, "Se": 125, "Sg": 126, "Si": 127, "Sm": 128, "Sn": 129, "Sr": 130, "Ta": 131, "Tb": 132, "Tc": 133, "Te": 134, "Th": 135, "Ti": 136, "Tl": 137, "Tm": 138, "Ts": 139, "U": 140, "V": 141, "W": 142, "Xe": 143, "Y": 144, "Yb": 145, "Zn": 146, "Zr": 147, "[": 148, "\\": 149, "]": 150, "as": 151, "b": 152, "c": 153, "n": 154, "o": 155, "p": 156, "s": 157, "se": 158 }, "unk_token": "[UNK]" } }