{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 2, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[START]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "\\[START\\]|\\[STOP\\]|\\[MASK\\]|\\[PAD\\]|@TH|@AL|@SP|@TB|@OH|He|Li|Be|Ne|Na|Mg|Al|Si|Cl|Ar|Ca|Sc|Ti|Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr|Rb|Sr|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|Xe|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|Re|Os|Ir|Pt|Au|Hg|Tl|Pb|Bi|Po|At|Rn|Fr|Ra|Ac|Th|Pa|Np|Pu|Am|Cm|Bk|Cf|Es|Fm|Md|No|Lr|Rf|Db|Sg|Bh|Hs|Mt|Ds|Rg|Cn|Nh|Fl|Mc|Lv|Ts|Og|\\-|\\+|\\#|\\$|\\\\|@@|\\(|\\)|\\[|\\]|\\.|\\*|H|B|C|N|O|F|P|S|K|V|Y|I|W|U|=|:|/|@|0|1|2|3|4|5|6|7|8|9|%" }, "behavior": "Isolated", "invert": false } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[START]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[STOP]", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "[START]": { "id": "[START]", "ids": [ 0 ], "tokens": [ "[START]" ] }, "[STOP]": { "id": "[STOP]", "ids": [ 1 ], "tokens": [ "[STOP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "", "cleanup": true }, "model": { "type": "WordLevel", "vocab": { "[START]": 0, "[STOP]": 1, "[PAD]": 2, "[MASK]": 3, "H": 4, "He": 5, "Li": 6, "Be": 7, "B": 8, "C": 9, "N": 10, "O": 11, "F": 12, "Ne": 13, "Na": 14, "Mg": 15, "Al": 16, "Si": 17, "P": 18, "S": 19, "Cl": 20, "Ar": 21, "K": 22, "Ca": 23, "Sc": 24, "Ti": 25, "V": 26, "Cr": 27, "Mn": 28, "Fe": 29, "Co": 30, "Ni": 31, "Cu": 32, "Zn": 33, "Ga": 34, "Ge": 35, "As": 36, "Se": 37, "Br": 38, "Kr": 39, "Rb": 40, "Sr": 41, "Y": 42, "Zr": 43, "Nb": 44, "Mo": 45, "Tc": 46, "Ru": 47, "Rh": 48, "Pd": 49, "Ag": 50, "Cd": 51, "In": 52, "Sn": 53, "Sb": 54, "Te": 55, "I": 56, "Xe": 57, "Cs": 58, "Ba": 59, "La": 60, "Ce": 61, "Pr": 62, "Nd": 63, "Pm": 64, "Sm": 65, "Eu": 66, "Gd": 67, "Tb": 68, "Dy": 69, "Ho": 70, "Er": 71, "Tm": 72, "Yb": 73, "Lu": 74, "Hf": 75, "Ta": 76, "W": 77, "Re": 78, "Os": 79, "Ir": 80, "Pt": 81, "Au": 82, "Hg": 83, "Tl": 84, "Pb": 85, "Bi": 86, "Po": 87, "At": 88, "Rn": 89, "Fr": 90, "Ra": 91, "Ac": 92, "Th": 93, "Pa": 94, "U": 95, "Np": 96, "Pu": 97, "Am": 98, "Cm": 99, "Bk": 100, "Cf": 101, "Es": 102, "Fm": 103, "Md": 104, "No": 105, "Lr": 106, "Rf": 107, "Db": 108, "Sg": 109, "Bh": 110, "Hs": 111, "Mt": 112, "Ds": 113, "Rg": 114, "Cn": 115, "Nh": 116, "Fl": 117, "Mc": 118, "Lv": 119, "Ts": 120, "Og": 121, "-": 122, "+": 123, "=": 124, "#": 125, "$": 126, ":": 127, "/": 128, "\\": 129, "@": 130, "@@": 131, "@TH": 132, "@AL": 133, "@SP": 134, "@TB": 135, "@OH": 136, "0": 137, "1": 138, "2": 139, "3": 140, "4": 141, "5": 142, "6": 143, "7": 144, "8": 145, "9": 146, "(": 147, ")": 148, "[": 149, "]": 150, ".": 151, "*": 152, "%": 153 }, "unk_token": "[UNK]" } }