code-tokenizer-0526 / tokenizer.json
Heimrih's picture
Upload tokenizer
cb6daee verified
raw
history blame
4.32 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<unk>": 0,
"<pad>": 1,
"(": 2,
")": 3,
"+": 4,
",": 5,
"0": 6,
"1": 7,
":": 8,
"T": 9,
"_": 10,
"a": 11,
"b": 12,
"c": 13,
"d": 14,
"e": 15,
"f": 16,
"g": 17,
"i": 18,
"k": 19,
"l": 20,
"m": 21,
"n": 22,
"o": 23,
"p": 24,
"r": 25,
"s": 26,
"t": 27,
"u": 28,
"z": 29,
"):": 30,
"in": 31,
"__": 32,
"as": 33,
"de": 34,
"ass": 35,
"def": 36,
"10": 37,
"To": 38,
"_s": 39,
"an": 40,
"cl": 41,
"co": 42,
"el": 43,
"en": 44,
"er": 45,
"et": 46,
"e_s": 47,
"fo": 48,
"ge": 49,
"it": 50,
"iz": 51,
"ken": 52,
"mp": 53,
"pr": 54,
"pass": 55,
"rn": 56,
"ran": 57,
"ret": 58,
"sel": 59,
"te_s": 60,
"um": 61,
"urn": 62,
"ute_s": 63,
"int": 64,
"init": 65,
"__init": 66,
"Token": 67,
"class": 68,
"comp": 69,
"for": 70,
"izer": 71,
"print": 72,
"range": 73,
"return": 74,
"self": 75,
"ute_sum": 76,
"__init__": 77,
"Tokenizer": 78,
"compute_sum": 79
},
"merges": [
[
")",
":"
],
[
"i",
"n"
],
[
"_",
"_"
],
[
"a",
"s"
],
[
"d",
"e"
],
[
"as",
"s"
],
[
"de",
"f"
],
[
"1",
"0"
],
[
"T",
"o"
],
[
"_",
"s"
],
[
"a",
"n"
],
[
"c",
"l"
],
[
"c",
"o"
],
[
"e",
"l"
],
[
"e",
"n"
],
[
"e",
"r"
],
[
"e",
"t"
],
[
"e",
"_s"
],
[
"f",
"o"
],
[
"g",
"e"
],
[
"i",
"t"
],
[
"i",
"z"
],
[
"k",
"en"
],
[
"m",
"p"
],
[
"p",
"r"
],
[
"p",
"ass"
],
[
"r",
"n"
],
[
"r",
"an"
],
[
"r",
"et"
],
[
"s",
"el"
],
[
"t",
"e_s"
],
[
"u",
"m"
],
[
"u",
"rn"
],
[
"u",
"te_s"
],
[
"in",
"t"
],
[
"in",
"it"
],
[
"__",
"init"
],
[
"To",
"ken"
],
[
"cl",
"ass"
],
[
"co",
"mp"
],
[
"fo",
"r"
],
[
"iz",
"er"
],
[
"pr",
"int"
],
[
"ran",
"ge"
],
[
"ret",
"urn"
],
[
"sel",
"f"
],
[
"ute_s",
"um"
],
[
"__init",
"__"
],
[
"Token",
"izer"
],
[
"comp",
"ute_sum"
]
]
}
}