{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "\\[[^\\[\\]]+\\]|@TH|@AL|@SP|@TB|@OH|Cl|Br|se|as|\\-|\\#|\\$|\\\\|\\.|\\+|\\-|@@|\\(|\\)|\\*|He|Li|Be|Ne|Na|Mg|Al|Si|Cl|Ar|Ca|Ti|Cr|Mn|Fe|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr|Rb|Sr|Zr|Mo|Tc|Ru|Rh|Pd|Ag|Cd|Te|Xe|Ba|La|Hf|Ta|Re|Ir|Pt|Au|Hg|Tl|Bi|At|Rn|Fr|Ra|Ac|Rf|Db|Sg|Bh|Mt|Ds|Rg|Nh|Fl|Mc|Lv|Ts|Og|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Er|Tm|Lu|Th|Pa|Pu|Am|Cm|Bk|Cf|Es|Fm|Md|Lr|te|si|B|C|N|O|P|S|F|I|b|c|n|o|p|s|=|:|/|@|0|1|2|3|4|5|6|7|8|9|%|H|B|C|N|O|F|P|S|K|V|Y|I|W|U" }, "behavior": "Isolated", "invert": false }, { "type": "Split", "pattern": { "Regex": "@TH|@AL|@SP|@TB|@OH|He|Li|Be|Ne|Na|Mg|Al|Si|Cl|Ar|Ca|Sc|Ti|Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr|Rb|Sr|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|Xe|Cs|Ba|La|Hf|Ta|Re|Os|Ir|Pt|Au|Hg|Tl|Pb|Bi|Po|At|Rn|Fr|Ra|Ac|Rf|Db|Sg|Bh|Hs|Mt|Ds|Rg|Cn|Nh|Fl|Mc|Lv|Ts|Og|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Th|Pa|Np|Pu|Am|Cm|Bk|Cf|Es|Fm|Md|No|Lr|te|si|se|as|\\-|\\#|\\$|\\\\|\\.|\\+|\\-|@@|\\[|\\]|H|B|C|N|O|F|P|S|K|V|Y|I|W|U|b|c|n|o|p|s|=|:|/|@|0|1|2|3|4|5|6|7|8|9|%|%" }, "behavior": "Isolated", "invert": false } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[BOS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[EOS]", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "[BOS]": { "id": "[BOS]", "ids": [ 1 ], "tokens": [ "[BOS]" ] }, "[EOS]": { "id": "[EOS]", "ids": [ 2 ], "tokens": [ "[EOS]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "", "cleanup": true }, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[BOS]": 1, "[EOS]": 2, "[MASK]": 3, "[UNK]": 4, "H": 5, "He": 6, "Li": 7, "Be": 8, "B": 9, "C": 10, "N": 11, "O": 12, "F": 13, "Ne": 14, "Na": 15, "Mg": 16, "Al": 17, "Si": 18, "P": 19, "S": 20, "Cl": 21, "Ar": 22, "K": 23, "Ca": 24, "Sc": 25, "Ti": 26, "V": 27, "Cr": 28, "Mn": 29, "Fe": 30, "Co": 31, "Ni": 32, "Cu": 33, "Zn": 34, "Ga": 35, "Ge": 36, "As": 37, "Se": 38, "Br": 39, "Kr": 40, "Rb": 41, "Sr": 42, "Y": 43, "Zr": 44, "Nb": 45, "Mo": 46, "Tc": 47, "Ru": 48, "Rh": 49, "Pd": 50, "Ag": 51, "Cd": 52, "In": 53, "Sn": 54, "Sb": 55, "Te": 56, "I": 57, "Xe": 58, "Cs": 59, "Ba": 60, "La": 61, "Hf": 62, "Ta": 63, "W": 64, "Re": 65, "Os": 66, "Ir": 67, "Pt": 68, "Au": 69, "Hg": 70, "Tl": 71, "Pb": 72, "Bi": 73, "Po": 74, "At": 75, "Rn": 76, "Fr": 77, "Ra": 78, "Ac": 79, "Rf": 80, "Db": 81, "Sg": 82, "Bh": 83, "Hs": 84, "Mt": 85, "Ds": 86, "Rg": 87, "Cn": 88, "Nh": 89, "Fl": 90, "Mc": 91, "Lv": 92, "Ts": 93, "Og": 94, "Ce": 95, "Pr": 96, "Nd": 97, "Pm": 98, "Sm": 99, "Eu": 100, "Gd": 101, "Tb": 102, "Dy": 103, "Ho": 104, "Er": 105, "Tm": 106, "Yb": 107, "Lu": 108, "Th": 109, "Pa": 110, "U": 111, "Np": 112, "Pu": 113, "Am": 114, "Cm": 115, "Bk": 116, "Cf": 117, "Es": 118, "Fm": 119, "Md": 120, "No": 121, "Lr": 122, "te": 123, "si": 124, "b": 125, "c": 126, "n": 127, "o": 128, "p": 129, "s": 130, "se": 131, "as": 132, "-": 133, "=": 134, "#": 135, "$": 136, ":": 137, "/": 138, "\\": 139, ".": 140, "+": 141, "@": 142, "@@": 143, "@TH": 144, "@AL": 145, "@SP": 146, "@TB": 147, "@OH": 148, "(": 149, ")": 150, "*": 151, "0": 152, "1": 153, "2": 154, "3": 155, "4": 156, "5": 157, "6": 158, "7": 159, "8": 160, "9": 161, "%": 162, "[": 163, "]": 164 }, "unk_token": "[UNK]" } }