{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "",
"single_word": false,
"lstrip": true,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "RobertaProcessing",
"sep": [
"",
2
],
"cls": [
"",
0
],
"trim_offsets": true,
"add_prefix_space": false
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"": 0,
"": 1,
"": 2,
"": 3,
"": 4,
"!": 5,
"\"": 6,
"#": 7,
"$": 8,
"%": 9,
"&": 10,
"'": 11,
"(": 12,
")": 13,
"*": 14,
"+": 15,
",": 16,
"-": 17,
".": 18,
"/": 19,
"0": 20,
"1": 21,
"2": 22,
"3": 23,
"4": 24,
"5": 25,
"6": 26,
"7": 27,
"8": 28,
"9": 29,
":": 30,
";": 31,
"<": 32,
"=": 33,
">": 34,
"?": 35,
"@": 36,
"A": 37,
"B": 38,
"C": 39,
"D": 40,
"E": 41,
"F": 42,
"G": 43,
"H": 44,
"I": 45,
"J": 46,
"K": 47,
"L": 48,
"M": 49,
"N": 50,
"O": 51,
"P": 52,
"Q": 53,
"R": 54,
"S": 55,
"T": 56,
"U": 57,
"V": 58,
"W": 59,
"X": 60,
"Y": 61,
"Z": 62,
"[": 63,
"\\": 64,
"]": 65,
"^": 66,
"_": 67,
"`": 68,
"a": 69,
"b": 70,
"c": 71,
"d": 72,
"e": 73,
"f": 74,
"g": 75,
"h": 76,
"i": 77,
"j": 78,
"k": 79,
"l": 80,
"m": 81,
"n": 82,
"o": 83,
"p": 84,
"q": 85,
"r": 86,
"s": 87,
"t": 88,
"u": 89,
"v": 90,
"w": 91,
"x": 92,
"y": 93,
"z": 94,
"{": 95,
"|": 96,
"}": 97,
"~": 98,
"¡": 99,
"¢": 100,
"£": 101,
"¤": 102,
"¥": 103,
"¦": 104,
"§": 105,
"¨": 106,
"©": 107,
"ª": 108,
"«": 109,
"¬": 110,
"®": 111,
"¯": 112,
"°": 113,
"±": 114,
"²": 115,
"³": 116,
"´": 117,
"µ": 118,
"¶": 119,
"·": 120,
"¸": 121,
"¹": 122,
"º": 123,
"»": 124,
"¼": 125,
"½": 126,
"¾": 127,
"¿": 128,
"À": 129,
"Á": 130,
"Â": 131,
"Ã": 132,
"Ä": 133,
"Å": 134,
"Æ": 135,
"Ç": 136,
"È": 137,
"É": 138,
"Ê": 139,
"Ë": 140,
"Ì": 141,
"Í": 142,
"Î": 143,
"Ï": 144,
"Ð": 145,
"Ñ": 146,
"Ò": 147,
"Ó": 148,
"Ô": 149,
"Õ": 150,
"Ö": 151,
"×": 152,
"Ø": 153,
"Ù": 154,
"Ú": 155,
"Û": 156,
"Ü": 157,
"Ý": 158,
"Þ": 159,
"ß": 160,
"à": 161,
"á": 162,
"â": 163,
"ã": 164,
"ä": 165,
"å": 166,
"æ": 167,
"ç": 168,
"è": 169,
"é": 170,
"ê": 171,
"ë": 172,
"ì": 173,
"í": 174,
"î": 175,
"ï": 176,
"ð": 177,
"ñ": 178,
"ò": 179,
"ó": 180,
"ô": 181,
"õ": 182,
"ö": 183,
"÷": 184,
"ø": 185,
"ù": 186,
"ú": 187,
"û": 188,
"ü": 189,
"ý": 190,
"þ": 191,
"ÿ": 192,
"Ā": 193,
"ā": 194,
"Ă": 195,
"ă": 196,
"Ą": 197,
"ą": 198,
"Ć": 199,
"ć": 200,
"Ĉ": 201,
"ĉ": 202,
"Ċ": 203,
"ċ": 204,
"Č": 205,
"č": 206,
"Ď": 207,
"ď": 208,
"Đ": 209,
"đ": 210,
"Ē": 211,
"ē": 212,
"Ĕ": 213,
"ĕ": 214,
"Ė": 215,
"ė": 216,
"Ę": 217,
"ę": 218,
"Ě": 219,
"ě": 220,
"Ĝ": 221,
"ĝ": 222,
"Ğ": 223,
"ğ": 224,
"Ġ": 225,
"ġ": 226,
"Ģ": 227,
"ģ": 228,
"Ĥ": 229,
"ĥ": 230,
"Ħ": 231,
"ħ": 232,
"Ĩ": 233,
"ĩ": 234,
"Ī": 235,
"ī": 236,
"Ĭ": 237,
"ĭ": 238,
"Į": 239,
"į": 240,
"İ": 241,
"ı": 242,
"IJ": 243,
"ij": 244,
"Ĵ": 245,
"ĵ": 246,
"Ķ": 247,
"ķ": 248,
"ĸ": 249,
"Ĺ": 250,
"ĺ": 251,
"Ļ": 252,
"ļ": 253,
"Ľ": 254,
"ľ": 255,
"Ŀ": 256,
"ŀ": 257,
"Ł": 258,
"ł": 259,
"Ń": 260,
"))": 261,
"cc": 262,
"CC": 263,
"))))": 264,
"cccc": 265,
"CCCC": 266,
"CO": 267,
"))))))))": 268,
"CCC": 269,
"cccccc": 270,
"ccc": 271,
")))": 272,
"NC": 273,
"OC": 274,
"10": 275,
"nc": 276,
"CCO": 277,
"Cl": 278,
")=": 279,
"NCC": 280,
"))))))": 281,
"CCCCC": 282,
")))))))": 283,
"nH": 284,
"ccccc": 285,
"CCCCCCCC": 286,
"OCC": 287,
"CN": 288,
"COC": 289,
"cn": 290,
"Br": 291,
")))))))))": 292,
"Ccccc": 293,
")[": 294,
")))))": 295,
"COcccc": 296,
"))))))))))": 297,
"CNC": 298,
"COCCO": 299,
"CCCCCC": 300,
"+]": 301,
"-]": 302,
"cC": 303,
"cnc": 304,
"ccO": 305,
"oc": 306,
"14": 307,
"CCOC": 308,
"ccC": 309,
")))))))))))": 310,
"COccc": 311,
"ccccO": 312,
"106": 313,
"Cccc": 314,
"cO": 315,
"CCN": 316,
"Ccccccc": 317,
"+]=": 318,
"Ncccc": 319,
"COCC": 320,
"69": 321,
"cOC": 322,
"96": 323,
"NCCCC": 324,
"NCCC": 325,
"CCNC": 326,
"ncc": 327,
"ccccC": 328,
"-]))": 329,
"CS": 330,
"CCCCCCC": 331,
"CF": 332,
"CNCC": 333,
"Ccc": 334,
"Ncccccc": 335,
"CCCO": 336,
"cccO": 337,
"ccccCl": 338,
"13": 339,
"COcccccc": 340,
"NCCO": 341,
"))))))))))))": 342,
"CCCCCCCCC": 343,
"sc": 344,
"17": 345,
"95": 346,
"nn": 347,
"CCNCC": 348,
"COccccc": 349,
"OCCO": 350,
"cCl": 351,
"CCl": 352,
"Cccccc": 353,
"ccOC": 354,
"Ccn": 355,
"cccC": 356,
"nccC": 357,
"COccccC": 358,
"COP": 359,
"OCOCCO": 360,
"CCCN": 361,
"OCO": 362,
")))))))))))))": 363,
"ccccF": 364,
"11": 365,
"SC": 366,
"COCOCCO": 367,
"ccccOC": 368,
"cccCl": 369,
"-])": 370,
"ccn": 371,
"cN": 372,
"))))))))))))))))": 373,
"ncnc": 374,
"ccccBr": 375,
"ccccnc": 376,
"OCCC": 377,
"NS": 378,
"CCcccccc": 379,
"CccccO": 380,
"15": 381,
"CCCNC": 382,
"ccccN": 383,
")))))))))))))))": 384,
"CCCCCCCCCCCCCCCC": 385,
"nC": 386,
"co": 387,
"cccOC": 388,
"COcccC": 389,
"ccCl": 390,
"CCCCN": 391,
"ncC": 392,
"CccccC": 393,
"ncN": 394,
"CcccC": 395,
"cccccO": 396,
"COcccO": 397,
"cs": 398,
"CCCCO": 399,
"Ccnc": 400,
"cccBr": 401,
"NCCCCC": 402,
"CCS": 403,
"NCCcc": 404,
"cccccccccc": 405,
"ccnc": 406,
"NCCNCC": 407,
"12": 408,
"ncccccc": 409,
"NCCcccccc": 410,
"Cnc": 411,
"ccccNC": 412,
"cccccn": 413,
"Ncn": 414,
"NCcccccc": 415,
"cCC": 416,
"cF": 417,
"occcO": 418,
"CCOCOCCO": 419,
"cccccCl": 420,
"NccccCl": 421,
"FCF": 422,
"NN": 423,
"cccF": 424,
"cncccccc": 425,
"CCOP": 426,
"CccccNC": 427,
"OCCCC": 428,
"nnc": 429,
"CCOCC": 430,
"cccccC": 431,
"OCCOC": 432,
"NCCCCN": 433,
"Ncnc": 434,
"cnn": 435,
"))=": 436,
"CCCCCnccC": 437,
"CCCOC": 438,
"16": 439,
"Cc": 440,
"))))))))))))))": 441,
"cccncc": 442,
"CccccS": 443,
"ccBr": 444,
"CCCNCC": 445,
"NCCS": 446,
"cccccccc": 447,
"18": 448,
"NccccC": 449,
"OCOCC": 450,
"COcc": 451,
"NCCCCCC": 452,
")))))))))))))))))": 453,
"NCCOCC": 454,
"Occcccc": 455,
"cBr": 456,
"ccN": 457,
"COccO": 458,
"CCCcccccc": 459,
"CCCCCCCCCCCCCC": 460,
"+][": 461,
"NO": 462,
"CNCCCC": 463,
"CNCCC": 464,
"COCOCC": 465,
"COcccOC": 466,
"OCCCOP": 467,
"CCCCNC": 468,
"Clcccc": 469,
"Occcc": 470,
"ccF": 471,
"ncCl": 472,
"ccCC": 473,
"CCccccO": 474,
"CCOCCO": 475,
"OCcccccc": 476,
"CCCCCCCCCCCCCCC": 477,
"SCC": 478,
"NCCccccO": 479,
"CccccCl": 480,
"CccO": 481,
"csc": 482,
"ncNC": 483,
"cncCl": 484,
"nccc": 485,
"CCCCCCO": 486,
"20": 487,
"SN": 488,
"coc": 489,
"))))))))))))))))))": 490,
"NCCNC": 491,
"CCCCCCCCCC": 492,
"NNC": 493,
"ccccs": 494,
"CcccO": 495,
"CccC": 496,
"Fcccc": 497,
"cccccBr": 498,
"136": 499,
"cncnc": 500,
"Nccc": 501,
"CCcc": 502,
"ccccccccc": 503,
"CCCl": 504,
"CcccccC": 505,
"22": 506,
"CCCCCO": 507,
"cccoc": 508,
"ncn": 509,
"NCCN": 510,
"ccCF": 511
},
"merges": [
") )",
"c c",
"C C",
")) ))",
"cc cc",
"CC CC",
"C O",
")))) ))))",
"CC C",
"cccc cc",
"cc c",
")) )",
"N C",
"O C",
"1 0",
"n c",
"CC O",
"C l",
") =",
"N CC",
")))) ))",
"CCCC C",
")))) )))",
"n H",
"cccc c",
"CCCC CCCC",
"O CC",
"C N",
"CO C",
"c n",
"B r",
")))))))) )",
"C cccc",
") [",
")))) )",
"CO cccc",
")))))))) ))",
"C NC",
"CO CCO",
"CCCC CC",
"+ ]",
"- ]",
"c C",
"c nc",
"cc O",
"o c",
"1 4",
"CC OC",
"cc C",
")))))))) )))",
"CO ccc",
"cccc O",
"10 6",
"C ccc",
"c O",
"CC N",
"C cccccc",
"+] =",
"N cccc",
"CO CC",
"6 9",
"c OC",
"9 6",
"N CCCC",
"N CCC",
"CC NC",
"n cc",
"cccc C",
"-] ))",
"C S",
"CCCC CCC",
"C F",
"C NCC",
"C cc",
"N cccccc",
"CC CO",
"ccc O",
"cccc Cl",
"1 3",
"CO cccccc",
"N CCO",
")))))))) ))))",
"CCCC CCCCC",
"s c",
"1 7",
"9 5",
"n n",
"CC NCC",
"CO ccccc",
"O CCO",
"c Cl",
"CC l",
"C ccccc",
"cc OC",
"C cn",
"ccc C",
"n ccC",
"COcccc C",
"CO P",
"O COCCO",
"CCC N",
"O CO",
")))))))) )))))",
"cccc F",
"1 1",
"S C",
"CO COCCO",
"cccc OC",
"ccc Cl",
"-] )",
"cc n",
"c N",
")))))))) ))))))))",
"nc nc",
"cccc Br",
"cccc nc",
"O CCC",
"N S",
"CC cccccc",
"Ccccc O",
"1 5",
"CCC NC",
"cccc N",
")))))))) )))))))",
"CCCCCCCC CCCCCCCC",
"n C",
"c o",
"ccc OC",
"COccc C",
"cc Cl",
"CCCC N",
"nc C",
"Ccccc C",
"nc N",
"Cccc C",
"ccccc O",
"COccc O",
"c s",
"CCCC O",
"C cnc",
"ccc Br",
"N CCCCC",
"CC S",
"NCC cc",
"cccc cccccc",
"cc nc",
"NCC NCC",
"1 2",
"n cccccc",
"NCC cccccc",
"C nc",
"cccc NC",
"ccccc n",
"N cn",
"NC cccccc",
"c CC",
"c F",
"o cccO",
"CCO COCCO",
"ccccc Cl",
"Ncccc Cl",
"F CF",
"N N",
"ccc F",
"cn cccccc",
"CCO P",
"Ccccc NC",
"O CCCC",
"n nc",
"CCO CC",
"ccccc C",
"OCC OC",
"NCCCC N",
"N cnc",
"cn n",
")) =",
"CCCCC nccC",
"CC COC",
"1 6",
"C c",
")))))))) ))))))",
"ccc ncc",
"Ccccc S",
"cc Br",
"CCC NCC",
"NCC S",
"cccc cccc",
"1 8",
"Ncccc C",
"O COCC",
"CO cc",
"N CCCCCC",
")))))))) )))))))))",
"NCCO CC",
"O cccccc",
"c Br",
"cc N",
"CO ccO",
"CCC cccccc",
"CCCCCCCC CCCCCC",
"+] [",
"N O",
"CN CCCC",
"CN CCC",
"CO COCC",
"COccc OC",
"OCC COP",
"CCCC NC",
"Cl cccc",
"O cccc",
"cc F",
"nc Cl",
"cc CC",
"CC ccccO",
"CCO CCO",
"OC cccccc",
"CCCCCCCC CCCCCCC",
"S CC",
"NCC ccccO",
"Ccccc Cl",
"C ccO",
"c sc",
"nc NC",
"cnc Cl",
"n ccc",
"CCCC CCO",
"2 0",
"S N",
"c oc",
")))))))) ))))))))))",
"NCC NC",
"CCCCCCCC CC",
"N NC",
"cccc s",
"Cccc O",
"C ccC",
"F cccc",
"ccccc Br",
"13 6",
"cnc nc",
"N ccc",
"CC cc",
"cccc ccccc",
"CCC l",
"Cccccc C",
"2 2",
"CCCC CO",
"ccc oc",
"nc n",
"NCC N",
"ccC F"
]
}
}