{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "special": true }, { "id": 15, "content": "+", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 20, "content": "0", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 21, "content": "1", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 22, "content": "2", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 23, "content": "3", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 24, "content": "4", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 25, "content": "5", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 26, "content": "6", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 27, "content": "7", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 28, "content": "8", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 29, "content": "9", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 38, "content": "B", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 39, "content": "C", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 42, "content": "F", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 44, "content": "H", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 45, "content": "I", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 50, "content": "N", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 51, "content": "O", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 52, "content": "P", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 55, "content": "S", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 274, "content": "Cl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 284, "content": "Br", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 417, "content": "Si", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 512, "content": "Se", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 513, "content": "As", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 514, "content": "Sn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 0 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "!": 5, "\"": 6, "#": 7, "$": 8, "%": 9, "&": 10, "'": 11, "(": 12, ")": 13, "*": 14, "+": 15, ",": 16, "-": 17, ".": 18, "/": 19, "0": 20, "1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26, "7": 27, "8": 28, "9": 29, ":": 30, ";": 31, "<": 32, "=": 33, ">": 34, "?": 35, "@": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "\\": 64, "]": 65, "^": 66, "_": 67, "`": 68, "a": 69, "b": 70, "c": 71, "d": 72, "e": 73, "f": 74, "g": 75, "h": 76, "i": 77, "j": 78, "k": 79, "l": 80, "m": 81, "n": 82, "o": 83, "p": 84, "q": 85, "r": 86, "s": 87, "t": 88, "u": 89, "v": 90, "w": 91, "x": 92, "y": 93, "z": 94, "{": 95, "|": 96, "}": 97, "~": 98, "¡": 99, "¢": 100, "£": 101, "¤": 102, "¥": 103, "¦": 104, "§": 105, "¨": 106, "©": 107, "ª": 108, "«": 109, "¬": 110, "®": 111, "¯": 112, "°": 113, "±": 114, "²": 115, "³": 116, "´": 117, "µ": 118, "¶": 119, "·": 120, "¸": 121, "¹": 122, "º": 123, "»": 124, "¼": 125, "½": 126, "¾": 127, "¿": 128, "À": 129, "Á": 130, "Â": 131, "Ã": 132, "Ä": 133, "Å": 134, "Æ": 135, "Ç": 136, "È": 137, "É": 138, "Ê": 139, "Ë": 140, "Ì": 141, "Í": 142, "Î": 143, "Ï": 144, "Ð": 145, "Ñ": 146, "Ò": 147, "Ó": 148, "Ô": 149, "Õ": 150, "Ö": 151, "×": 152, "Ø": 153, "Ù": 154, "Ú": 155, "Û": 156, "Ü": 157, "Ý": 158, "Þ": 159, "ß": 160, "à": 161, "á": 162, "â": 163, "ã": 164, "ä": 165, "å": 166, "æ": 167, "ç": 168, "è": 169, "é": 170, "ê": 171, "ë": 172, "ì": 173, "í": 174, "î": 175, "ï": 176, "ð": 177, "ñ": 178, "ò": 179, "ó": 180, "ô": 181, "õ": 182, "ö": 183, "÷": 184, "ø": 185, "ù": 186, "ú": 187, "û": 188, "ü": 189, "ý": 190, "þ": 191, "ÿ": 192, "Ā": 193, "ā": 194, "Ă": 195, "ă": 196, "Ą": 197, "ą": 198, "Ć": 199, "ć": 200, "Ĉ": 201, "ĉ": 202, "Ċ": 203, "ċ": 204, "Č": 205, "č": 206, "Ď": 207, "ď": 208, "Đ": 209, "đ": 210, "Ē": 211, "ē": 212, "Ĕ": 213, "ĕ": 214, "Ė": 215, "ė": 216, "Ę": 217, "ę": 218, "Ě": 219, "ě": 220, "Ĝ": 221, "ĝ": 222, "Ğ": 223, "ğ": 224, "Ġ": 225, "ġ": 226, "Ģ": 227, "ģ": 228, "Ĥ": 229, "ĥ": 230, "Ħ": 231, "ħ": 232, "Ĩ": 233, "ĩ": 234, "Ī": 235, "ī": 236, "Ĭ": 237, "ĭ": 238, "Į": 239, "į": 240, "İ": 241, "ı": 242, "IJ": 243, "ij": 244, "Ĵ": 245, "ĵ": 246, "Ķ": 247, "ķ": 248, "ĸ": 249, "Ĺ": 250, "ĺ": 251, "Ļ": 252, "ļ": 253, "Ľ": 254, "ľ": 255, "Ŀ": 256, "ŀ": 257, "Ł": 258, "ł": 259, "Ń": 260, "cc": 261, "CC": 262, "(=": 263, "ccc": 264, "OC": 265, "CCCC": 266, "CCC": 267, "ccccc": 268, "NC": 269, "CO": 270, ")(": 271, "Cc": 272, "nc": 273, "Cl": 274, "Nc": 275, "nH": 276, "12": 277, "cccc": 278, "COc": 279, "OCC": 280, "(-": 281, ")=": 282, "CCCCCCCC": 283, "Br": 284, "COC": 285, "+]": 286, ")[": 287, "CCCCC": 288, "-]": 289, "Oc": 290, "oc": 291, "CCN": 292, "CN": 293, ")(=": 294, "-])": 295, "21": 296, "([": 297, "CCc": 298, "+](=": 299, "cn": 300, "CS": 301, "23": 302, "CCCN": 303, "CCCCCC": 304, "ncc": 305, "CCOC": 306, "cnc": 307, "NCC": 308, "sc": 309, "CNC": 310, "CCCCN": 311, "nn": 312, "NCc": 313, "CCCCCCCCC": 314, "Cn": 315, "COP": 316, "OP": 317, "CCCCCCC": 318, "NCCCC": 319, "cccnc": 320, "OCc": 321, "ncnc": 322, "CCNCC": 323, "+]([": 324, "NS": 325, "CCOCC": 326, "+](": 327, "CCl": 328, "32": 329, "nnc": 330, "OCO": 331, "Clc": 332, "34": 333, "CCO": 334, "43": 335, "CCNC": 336, "=[": 337, "NCCCCC": 338, "SC": 339, "CCOc": 340, "ccnc": 341, "CSc": 342, "ccncc": 343, "Sc": 344, "ccccn": 345, "CCCCCCCCCCCCCCCC": 346, "Fc": 347, "NNC": 348, "NN": 349, "CCCc": 350, "NCCc": 351, "cccs": 352, "13": 353, "SCC": 354, "ncn": 355, "Brc": 356, "CSCCC": 357, "csc": 358, "CNc": 359, "coc": 360, "cs": 361, "CCn": 362, "NCCC": 363, "ccoc": 364, "ccco": 365, ")([": 366, "ncccc": 367, "ccn": 368, "OCCN": 369, "cnn": 370, "CCCCCCCCCCCCCC": 371, "CCCCCn": 372, "nccc": 373, "FC": 374, "CCOP": 375, "CCCNC": 376, "CCCCCCCCCCCCCCC": 377, "OCCc": 378, "NO": 379, "no": 380, "NCCO": 381, "NCCN": 382, "OCCOCC": 383, "45": 384, "CCS": 385, "CCCl": 386, "CCCCCCCCCCCC": 387, "CCNc": 388, "CSC": 389, "CCCCCCCCCC": 390, "cncc": 391, "NNc": 392, "OCCO": 393, "OS": 394, "CNCc": 395, "CCCCCCCCCCCCCCCCC": 396, "CCCCc": 397, "CCCCCCCCCCC": 398, "31": 399, "SCc": 400, "CCCO": 401, "COCC": 402, "CNCC": 403, "CCCn": 404, "occc": 405, "noc": 406, "cncn": 407, "CBr": 408, "CCCCCF": 409, "OCCC": 410, ")-": 411, "CCCCn": 412, "nccs": 413, "56": 414, "ccsc": 415, "CCCCCCCCCCCCC": 416, "Si": 417, "sccc": 418, "CCSc": 419, "CNS": 420, "NCCS": 421, "ON": 422, "occ": 423, "CCSC": 424, "](": 425, "ncccn": 426, "OCCCC": 427, "CCNCc": 428, "CCCCCCCCCCCCCCCCCC": 429, "CCNS": 430, "cnccc": 431, "54": 432, "Nn": 433, "CCCOc": 434, "nccn": 435, "CP": 436, "SCCNC": 437, "CCCCNC": 438, "nccnc": 439, "NCCCN": 440, "nonc": 441, "14": 442, "on": 443, "cnccn": 444, "CSCC": 445, "cccn": 446, "24": 447, "ns": 448, "CCCOC": 449, "CNCCc": 450, "On": 451, "OCCCN": 452, "CCCCOC": 453, "CCCCCO": 454, "CCCNCC": 455, "OO": 456, "CCCCCc": 457, "CCCCCN": 458, "nnnn": 459, "35": 460, "CCCCOCC": 461, "CCCS": 462, "CON": 463, "ClCc": 464, "nnn": 465, "onc": 466, "CCCCCCCCc": 467, "NCCNC": 468, "Ic": 469, "NOCC": 470, "CCCCCNC": 471, "CNCCC": 472, "nsnc": 473, "65": 474, "ccnn": 475, "CNCCN": 476, "cnnc": 477, "CCCNS": 478, "CCCCCCCCCCCCCCCCOCC": 479, "CCSCC": 480, "CCCCCCc": 481, "OCCOc": 482, "COCc": 483, "nnnc": 484, "OCCOCCOCCOCC": 485, "NCCCCCC": 486, "scc": 487, "CCCCOc": 488, "cncnc": 489, "46": 490, "NCCCCN": 491, "NCCCc": 492, "SSc": 493, "CCCCNc": 494, "CCCNc": 495, "-][": 496, "67": 497, "SCCC": 498, "SSC": 499, "OCCOC": 500, "341": 501, "SP": 502, "CCP": 503, "OCOC": 504, "COCCOCC": 505, "ncncc": 506, "CCCCCCO": 507, "nncs": 508, "NCCCn": 509, "NOS": 510, "10": 511 }, "merges": [ "c c", "C C", "( =", "cc c", "O C", "CC CC", "CC C", "cc ccc", "N C", "C O", ") (", "C c", "n c", "C l", "N c", "n H", "1 2", "cc cc", "CO c", "O CC", "( -", ") =", "CCCC CCCC", "B r", "C OC", "+ ]", ") [", "CCCC C", "- ]", "O c", "o c", "CC N", "C N", ") (=", "-] )", "2 1", "( [", "CC c", "+] (=", "c n", "C S", "2 3", "CCC N", "CCCC CC", "n cc", "CC OC", "c nc", "N CC", "s c", "C NC", "CCCC N", "n n", "NC c", "CCCCCCCC C", "C n", "CO P", "O P", "CCCC CCC", "N CCCC", "ccc nc", "OC c", "nc nc", "CCN CC", "+] ([", "N S", "CC OCC", "+] (", "CC l", "3 2", "n nc", "OC O", "Cl c", "3 4", "CC O", "4 3", "CC NC", "= [", "N CCCCC", "S C", "CC Oc", "cc nc", "CS c", "cc ncc", "S c", "cccc n", "CCCCCCCC CCCCCCCC", "F c", "N NC", "N N", "CCC c", "N CCc", "ccc s", "1 3", "S CC", "nc n", "Br c", "CS CCC", "c sc", "C Nc", "c oc", "c s", "CC n", "N CCC", "cc oc", "ccc o", ")( [", "n cccc", "cc n", "OCC N", "cn n", "CCCCCCCC CCCCCC", "CCCCC n", "n ccc", "F C", "CC OP", "CCC NC", "CCCCCCCC CCCCCCC", "OCC c", "N O", "n o", "NCC O", "N CCN", "OCC OCC", "4 5", "CC S", "CCC l", "CCCCCCCC CCCC", "CC Nc", "CS C", "CCCCCCCC CC", "cn cc", "N Nc", "OCC O", "O S", "CNC c", "CCCCCCCC CCCCCCCCC", "CCCC c", "CCCCCCCC CCC", "3 1", "S Cc", "CCC O", "CO CC", "CN CC", "CCC n", "o ccc", "n oc", "cnc n", "C Br", "CCCCC F", "O CCC", ") -", "CCCC n", "ncc s", "5 6", "cc sc", "CCCCCCCC CCCCC", "S i", "s ccc", "CC Sc", "CN S", "NCC S", "O N", "o cc", "CC SC", "] (", "nccc n", "O CCCC", "CC NCc", "CCCCCCCCCCCCCCCC CC", "CCN S", "cn ccc", "5 4", "N n", "CCC Oc", "ncc n", "C P", "S CCNC", "CCCC NC", "ncc nc", "N CCCN", "no nc", "1 4", "o n", "cn ccn", "CS CC", "ccc n", "2 4", "n s", "CCC OC", "CN CCc", "O n", "O CCCN", "CCCC OC", "CCCC CO", "CCCN CC", "O O", "CCCC Cc", "CCCCC N", "nn nn", "3 5", "CCCC OCC", "CCC S", "CO N", "Cl Cc", "nn n", "o nc", "CCCCCCCC c", "NCC NC", "I c", "N OCC", "CCCCC NC", "CN CCC", "ns nc", "6 5", "cc nn", "CN CCN", "cn nc", "CCCN S", "CCCCCCCCCCCCCCCC OCC", "CC SCC", "CCCC CCc", "OCC Oc", "COC c", "nn nc", "OCCOCC OCCOCC", "N CCCCCC", "s cc", "CCCC Oc", "cnc nc", "4 6", "N CCCCN", "N CCCc", "S Sc", "CCCC Nc", "CCC Nc", "-] [", "6 7", "S CCC", "S SC", "OCC OC", "34 1", "S P", "CC P", "OC OC", "CO CCOCC", "nc ncc", "CCCCCC O", "nnc s", "NCCC n", "NO S", "1 0" ] } }