{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 128, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 1 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "0": 5, "1": 6, "2": 7, "3": 8, "4": 9, "5": 10, "6": 11, "7": 12, "8": 13, "9": 14, "A": 15, "Ac": 16, "Ag": 17, "Al": 18, "Am": 19, "Ar": 20, "As": 21, "At": 22, "Au": 23, "B": 24, "Ba": 25, "Be": 26, "Bi": 27, "Bk": 28, "Br": 29, "C": 30, "Ca": 31, "Cd": 32, "Ce": 33, "Cf": 34, "Cl": 35, "Cm": 36, "Co": 37, "Cr": 38, "Cs": 39, "Cu": 40, "D": 41, "Dy": 42, "E": 43, "Er": 44, "Es": 45, "Eu": 46, "F": 47, "Fe": 48, "Fm": 49, "Fr": 50, "G": 51, "Ga": 52, "Gd": 53, "Ge": 54, "H": 55, "He": 56, "Hf": 57, "Hg": 58, "Ho": 59, "I": 60, "In": 61, "Ir": 62, "K": 63, "Kr": 64, "L": 65, "La": 66, "Li": 67, "Lr": 68, "Lu": 69, "M": 70, "Md": 71, "Mg": 72, "Mn": 73, "Mo": 74, "N": 75, "Na": 76, "Nb": 77, "Nd": 78, "Ne": 79, "Ni": 80, "No": 81, "Np": 82, "O": 83, "Os": 84, "P": 85, "Pa": 86, "Pb": 87, "Pd": 88, "Pm": 89, "Po": 90, "Pr": 91, "Pt": 92, "Pu": 93, "R": 94, "Ra": 95, "Rb": 96, "Re": 97, "Rh": 98, "Rn": 99, "Ru": 100, "S": 101, "Sb": 102, "Sc": 103, "Se": 104, "Si": 105, "Sm": 106, "Sn": 107, "Sr": 108, "T": 109, "Ta": 110, "Tb": 111, "Tc": 112, "Te": 113, "Th": 114, "Ti": 115, "Tl": 116, "Tm": 117, "U": 118, "V": 119, "W": 120, "X": 121, "Xe": 122, "Y": 123, "Yb": 124, "Z": 125, "Zn": 126, "Zr": 127, "a": 128, "b": 129, "c": 130, "d": 131, "e": 132, "f": 133, "g": 134, "h": 135, "i": 136, "k": 137, "l": 138, "m": 139, "n": 140, "o": 141, "p": 142, "r": 143, "s": 144, "t": 145, "u": 146, "y": 147 }, "merges": [ [ "H", "e" ], [ "L", "i" ], [ "B", "e" ], [ "N", "e" ], [ "N", "a" ], [ "M", "g" ], [ "A", "l" ], [ "S", "i" ], [ "C", "l" ], [ "A", "r" ], [ "C", "a" ], [ "S", "c" ], [ "T", "i" ], [ "C", "r" ], [ "M", "n" ], [ "F", "e" ], [ "C", "o" ], [ "N", "i" ], [ "C", "u" ], [ "Z", "n" ], [ "G", "a" ], [ "G", "e" ], [ "A", "s" ], [ "S", "e" ], [ "B", "r" ], [ "K", "r" ], [ "R", "b" ], [ "S", "r" ], [ "Z", "r" ], [ "N", "b" ], [ "M", "o" ], [ "T", "c" ], [ "R", "u" ], [ "R", "h" ], [ "P", "d" ], [ "A", "g" ], [ "C", "d" ], [ "I", "n" ], [ "S", "n" ], [ "S", "b" ], [ "T", "e" ], [ "X", "e" ], [ "C", "s" ], [ "B", "a" ], [ "L", "a" ], [ "C", "e" ], [ "P", "r" ], [ "N", "d" ], [ "P", "m" ], [ "S", "m" ], [ "E", "u" ], [ "G", "d" ], [ "T", "b" ], [ "D", "y" ], [ "H", "o" ], [ "E", "r" ], [ "T", "m" ], [ "Y", "b" ], [ "L", "u" ], [ "H", "f" ], [ "T", "a" ], [ "R", "e" ], [ "O", "s" ], [ "I", "r" ], [ "P", "t" ], [ "A", "u" ], [ "H", "g" ], [ "T", "l" ], [ "P", "b" ], [ "B", "i" ], [ "P", "o" ], [ "A", "t" ], [ "R", "n" ], [ "F", "r" ], [ "R", "a" ], [ "A", "c" ], [ "T", "h" ], [ "P", "a" ], [ "N", "p" ], [ "P", "u" ], [ "A", "m" ], [ "C", "m" ], [ "B", "k" ], [ "C", "f" ], [ "E", "s" ], [ "F", "m" ], [ "M", "d" ], [ "N", "o" ], [ "L", "r" ] ] } }