Transformers
SMILES_tokenizer / tokenizer.json
Volowan's picture
Upload tokenizer
53cb5c5 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[cls]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 254,
"content": "[bos]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 255,
"content": "[pad]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 256,
"content": "[eos]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 257,
"content": "[sep]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 258,
"content": "[mask]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 259,
"content": "[unk]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "WhitespaceSplit"
},
{
"type": "Split",
"pattern": {
"Regex": "(\\[[A-Z][a-z]?|\\]|Br?|Cl?|N|O|S|P|F|I|H|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\|/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9]|\\[)"
},
"behavior": "Isolated",
"invert": true
}
]
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[cls]": 0,
"[H": 1,
"[He": 2,
"[Li": 3,
"[Be": 4,
"[B": 5,
"[C": 6,
"[N": 7,
"[O": 8,
"[F": 9,
"[Ne": 10,
"[Na": 11,
"[Mg": 12,
"[Al": 13,
"[Si": 14,
"[P": 15,
"[S": 16,
"[Cl": 17,
"[Ar": 18,
"[K": 19,
"[Ca": 20,
"[Sc": 21,
"[Ti": 22,
"[V": 23,
"[Cr": 24,
"[Mn": 25,
"[Fe": 26,
"[Co": 27,
"[Ni": 28,
"[Cu": 29,
"[Zn": 30,
"[Ga": 31,
"[Ge": 32,
"[As": 33,
"[Se": 34,
"[Br": 35,
"[Kr": 36,
"[Rb": 37,
"[Sr": 38,
"[Y": 39,
"[Zr": 40,
"[Nb": 41,
"[Mo": 42,
"[Tc": 43,
"[Ru": 44,
"[Rh": 45,
"[Pd": 46,
"[Ag": 47,
"[Cd": 48,
"[In": 49,
"[Sn": 50,
"[Sb": 51,
"[Te": 52,
"[I": 53,
"[Xe": 54,
"[Cs": 55,
"[Ba": 56,
"[La": 57,
"[Ce": 58,
"[Pr": 59,
"[Nd": 60,
"[Pm": 61,
"[Sm": 62,
"[Eu": 63,
"[Gd": 64,
"[Tb": 65,
"[Dy": 66,
"[Ho": 67,
"[Er": 68,
"[Tm": 69,
"[Yb": 70,
"[Lu": 71,
"[Hf": 72,
"[Ta": 73,
"[W": 74,
"[Re": 75,
"[Os": 76,
"[Ir": 77,
"[Pt": 78,
"[Au": 79,
"[Hg": 80,
"[Tl": 81,
"[Pb": 82,
"[Bi": 83,
"[Po": 84,
"[At": 85,
"[Rn": 86,
"[Fr": 87,
"[Ra": 88,
"[Ac": 89,
"[Th": 90,
"[Pa": 91,
"[U": 92,
"[Np": 93,
"[Pu": 94,
"[Am": 95,
"[Cm": 96,
"[Bk": 97,
"[Cf": 98,
"[Es": 99,
"[Fm": 100,
"[Md": 101,
"[No": 102,
"[Lr": 103,
"[Rf": 104,
"[Db": 105,
"[Sg": 106,
"[Bh": 107,
"[Hs": 108,
"[Mt": 109,
"[Ds": 110,
"[Rg": 111,
"[Cn": 112,
"[Nh": 113,
"[Fl": 114,
"[Mc": 115,
"[Lv": 116,
"[Ts": 117,
"[Og": 118,
"C": 119,
"Cl": 120,
"B": 121,
"Br": 122,
"N": 123,
"O": 124,
"S": 125,
"P": 126,
"F": 127,
"I": 128,
"H": 129,
"b": 130,
"c": 131,
"n": 132,
"o": 133,
"s": 134,
"p": 135,
"~": 136,
"?": 137,
">": 138,
"[": 139,
"]": 140,
":": 141,
"=": 142,
"#": 143,
"$": 144,
"\\": 145,
"/": 146,
"(": 147,
")": 148,
".": 149,
"+": 150,
"-": 151,
"@": 152,
"*": 153,
"0": 154,
"1": 155,
"2": 156,
"3": 157,
"4": 158,
"5": 159,
"6": 160,
"7": 161,
"8": 162,
"9": 163,
"%10": 164,
"%11": 165,
"%12": 166,
"%13": 167,
"%14": 168,
"%15": 169,
"%16": 170,
"%17": 171,
"%18": 172,
"%19": 173,
"%20": 174,
"%21": 175,
"%22": 176,
"%23": 177,
"%24": 178,
"%25": 179,
"%26": 180,
"%27": 181,
"%28": 182,
"%29": 183,
"%30": 184,
"%31": 185,
"%32": 186,
"%33": 187,
"%34": 188,
"%35": 189,
"%36": 190,
"%37": 191,
"%38": 192,
"%39": 193,
"%40": 194,
"%41": 195,
"%42": 196,
"%43": 197,
"%44": 198,
"%45": 199,
"%46": 200,
"%47": 201,
"%48": 202,
"%49": 203,
"%50": 204,
"%51": 205,
"%52": 206,
"%53": 207,
"%54": 208,
"%55": 209,
"%56": 210,
"%57": 211,
"%58": 212,
"%59": 213,
"%60": 214,
"%61": 215,
"%62": 216,
"%63": 217,
"%64": 218,
"%65": 219,
"%66": 220,
"%67": 221,
"%68": 222,
"%69": 223,
"%70": 224,
"%71": 225,
"%72": 226,
"%73": 227,
"%74": 228,
"%75": 229,
"%76": 230,
"%77": 231,
"%78": 232,
"%79": 233,
"%80": 234,
"%81": 235,
"%82": 236,
"%83": 237,
"%84": 238,
"%85": 239,
"%86": 240,
"%87": 241,
"%88": 242,
"%89": 243,
"%90": 244,
"%91": 245,
"%92": 246,
"%93": 247,
"%94": 248,
"%95": 249,
"%96": 250,
"%97": 251,
"%98": 252,
"%99": 253,
"[bos]": 254,
"[pad]": 255,
"[eos]": 256,
"[sep]": 257,
"[mask]": 258,
"[unk]": 259
},
"unk_token": "[unk]"
}
}