mindi-backup / final_model /tokenizer.json
Mindigenous
Sync latest workspace state: data/scripts updates and archive cleanup
5ae3e12
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<PAD>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<BOS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<EOS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<NL>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "<INDENT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "<DEDENT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "<PROMPT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "<CODE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "<PYTHON>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "<JAVASCRIPT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "NFKC"
}
]
},
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Split",
"pattern": {
"Regex": "(==|!=|<=|>=|:=|->|=>|\\+\\+|--|\\+=|-=|\\*=|/=|//=|%=|\\*\\*|&&|\\|\\||<<|>>)"
},
"behavior": "Isolated",
"invert": false
},
{
"type": "Split",
"pattern": {
"Regex": "([()\\[\\]{}.,:;])"
},
"behavior": "Isolated",
"invert": false
},
{
"type": "Metaspace",
"replacement": "_",
"prepend_scheme": "always",
"split": true
}
]
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<BOS>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<EOS>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<BOS>": {
"id": "<BOS>",
"ids": [
2
],
"tokens": [
"<BOS>"
]
},
"<EOS>": {
"id": "<EOS>",
"ids": [
3
],
"tokens": [
"<EOS>"
]
}
}
},
"decoder": {
"type": "BPEDecoder",
"suffix": "</w>"
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<UNK>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<PAD>": 0,
"<UNK>": 1,
"<BOS>": 2,
"<EOS>": 3,
"<NL>": 4,
"<INDENT>": 5,
"<DEDENT>": 6,
"<PROMPT>": 7,
"<CODE>": 8,
"<PYTHON>": 9,
"<JAVASCRIPT>": 10,
"(": 11,
")": 12,
"+": 13,
",": 14,
".": 15,
"0": 16,
"4": 17,
"5": 18,
":": 19,
";": 20,
"<": 21,
"=": 22,
">": 23,
"A": 24,
"C": 25,
"D": 26,
"E": 27,
"F": 28,
"H": 29,
"I": 30,
"J": 31,
"L": 32,
"M": 33,
"N": 34,
"O": 35,
"P": 36,
"R": 37,
"S": 38,
"T": 39,
"V": 40,
"W": 41,
"Y": 42,
"_": 43,
"a": 44,
"b": 45,
"c": 46,
"d": 47,
"e": 48,
"f": 49,
"g": 50,
"h": 51,
"i": 52,
"l": 53,
"m": 54,
"n": 55,
"o": 56,
"p": 57,
"r": 58,
"s": 59,
"t": 60,
"u": 61,
"v": 62,
"w": 63,
"x": 64,
"y": 65,
"{": 66,
"}": 67,
"_<": 68,
"DE": 69,
"T>": 70,
"_a": 71,
"L>": 72,
"NL>": 73,
"_<NL>": 74,
"NT>": 75,
"_t": 76,
"DENT>": 77,
"_i": 78,
"PT>": 79,
"_(": 80,
"_)": 81,
"on": 82,
"_<P": 83,
"_f": 84,
"_l": 85,
"re": 86,
"ri": 87,
"CO": 88,
"IN": 89,
"MPT>": 90,
"OMPT>": 91,
"ROMPT>": 92,
"_;": 93,
"_b": 94,
"at": 95,
"_<DE": 96,
"_<CO": 97,
"_<IN": 98,
"DE>": 99,
"_to": 100,
"_<PROMPT>": 101,
"_lo": 102,
"_<DEDENT>": 103,
"_<CODE>": 104,
"_<INDENT>": 105,
"_+": 106,
"_0": 107,
"_re": 108,
"ct": 109,
"dd": 110,
"ion": 111,
"nct": 112,
"rn": 113,
"tu": 114,
"unct": 115,
"va": 116,
"_add": 117,
"_th": 118,
"_funct": 119,
"_retu": 120,
"_function": 121,
"_return": 122,
"AS": 123,
"AV": 124,
"CR": 125,
"Cre": 126,
"HO": 127,
"IPT>": 128,
"Ja": 129,
"JAV": 130,
"N>": 131,
"Py": 132,
"Sc": 133,
"THO": 134,
"YTHO": 135,
"_,": 136,
"_4": 137,
"_5": 138,
"_:": 139,
"_p": 140,
"_{": 141,
"_}": 142,
"_Cre": 143,
"_Ja": 144,
"_Py": 145,
"hon": 146,
"nt": 147,
"op": 148,
"or": 149,
"pt": 150,
"thon": 151,
"_<JAV": 152,
"_<PYTHO": 153,
"_for": 154,
"rint": 155,
"ript": 156,
"ate": 157,
"_log": 158,
"_loop": 159,
"vaSc": 160,
"_that": 161,
"ASCR": 162,
"_print": 163,
"_Create": 164,
"_JavaSc": 165,
"_Python": 166,
"_<JAVASCR": 167,
"_<PYTHON>": 168,
"_JavaScript": 169,
"_<JAVASCRIPT>": 170
},
"merges": [
[
"_",
"<"
],
[
"D",
"E"
],
[
"T",
">"
],
[
"_",
"a"
],
[
"L",
">"
],
[
"N",
"L>"
],
[
"_<",
"NL>"
],
[
"N",
"T>"
],
[
"_",
"t"
],
[
"DE",
"NT>"
],
[
"_",
"i"
],
[
"P",
"T>"
],
[
"_",
"("
],
[
"_",
")"
],
[
"o",
"n"
],
[
"_<",
"P"
],
[
"_",
"f"
],
[
"_",
"l"
],
[
"r",
"e"
],
[
"r",
"i"
],
[
"C",
"O"
],
[
"I",
"N"
],
[
"M",
"PT>"
],
[
"O",
"MPT>"
],
[
"R",
"OMPT>"
],
[
"_",
";"
],
[
"_",
"b"
],
[
"a",
"t"
],
[
"_<",
"DE"
],
[
"_<",
"CO"
],
[
"_<",
"IN"
],
[
"DE",
">"
],
[
"_t",
"o"
],
[
"_<P",
"ROMPT>"
],
[
"_l",
"o"
],
[
"_<DE",
"DENT>"
],
[
"_<CO",
"DE>"
],
[
"_<IN",
"DENT>"
],
[
"_",
"+"
],
[
"_",
"0"
],
[
"_",
"re"
],
[
"c",
"t"
],
[
"d",
"d"
],
[
"i",
"on"
],
[
"n",
"ct"
],
[
"r",
"n"
],
[
"t",
"u"
],
[
"u",
"nct"
],
[
"v",
"a"
],
[
"_a",
"dd"
],
[
"_t",
"h"
],
[
"_f",
"unct"
],
[
"_re",
"tu"
],
[
"_funct",
"ion"
],
[
"_retu",
"rn"
],
[
"A",
"S"
],
[
"A",
"V"
],
[
"C",
"R"
],
[
"C",
"re"
],
[
"H",
"O"
],
[
"I",
"PT>"
],
[
"J",
"a"
],
[
"J",
"AV"
],
[
"N",
">"
],
[
"P",
"y"
],
[
"S",
"c"
],
[
"T",
"HO"
],
[
"Y",
"THO"
],
[
"_",
","
],
[
"_",
"4"
],
[
"_",
"5"
],
[
"_",
":"
],
[
"_",
"p"
],
[
"_",
"{"
],
[
"_",
"}"
],
[
"_",
"Cre"
],
[
"_",
"Ja"
],
[
"_",
"Py"
],
[
"h",
"on"
],
[
"n",
"t"
],
[
"o",
"p"
],
[
"o",
"r"
],
[
"p",
"t"
],
[
"t",
"hon"
],
[
"_<",
"JAV"
],
[
"_<P",
"YTHO"
],
[
"_f",
"or"
],
[
"ri",
"nt"
],
[
"ri",
"pt"
],
[
"at",
"e"
],
[
"_lo",
"g"
],
[
"_lo",
"op"
],
[
"va",
"Sc"
],
[
"_th",
"at"
],
[
"AS",
"CR"
],
[
"_p",
"rint"
],
[
"_Cre",
"ate"
],
[
"_Ja",
"vaSc"
],
[
"_Py",
"thon"
],
[
"_<JAV",
"ASCR"
],
[
"_<PYTHO",
"N>"
],
[
"_JavaSc",
"ript"
],
[
"_<JAVASCR",
"IPT>"
]
]
}
}