{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 9, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 10, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKC" } ] }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "(==|!=|<=|>=|:=|->|=>|\\+\\+|--|\\+=|-=|\\*=|/=|//=|%=|\\*\\*|&&|\\|\\||<<|>>)" }, "behavior": "Isolated", "invert": false }, { "type": "Split", "pattern": { "Regex": "([()\\[\\]{}.,:;])" }, "behavior": "Isolated", "invert": false }, { "type": "Metaspace", "replacement": "_", "prepend_scheme": "always", "split": true } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 3 ], "tokens": [ "" ] } } }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "": 6, "": 7, "": 8, "": 9, "": 10, "(": 11, ")": 12, "+": 13, ",": 14, ".": 15, "0": 16, "4": 17, "5": 18, ":": 19, ";": 20, "<": 21, "=": 22, ">": 23, "A": 24, "C": 25, "D": 26, "E": 27, "F": 28, "H": 29, "I": 30, "J": 31, "L": 32, "M": 33, "N": 34, "O": 35, "P": 36, "R": 37, "S": 38, "T": 39, "V": 40, "W": 41, "Y": 42, "_": 43, "a": 44, "b": 45, "c": 46, "d": 47, "e": 48, "f": 49, "g": 50, "h": 51, "i": 52, "l": 53, "m": 54, "n": 55, "o": 56, "p": 57, "r": 58, "s": 59, "t": 60, "u": 61, "v": 62, "w": 63, "x": 64, "y": 65, "{": 66, "}": 67, "_<": 68, "DE": 69, "T>": 70, "_a": 71, "L>": 72, "NL>": 73, "_": 74, "NT>": 75, "_t": 76, "DENT>": 77, "_i": 78, "PT>": 79, "_(": 80, "_)": 81, "on": 82, "_": 90, "OMPT>": 91, "ROMPT>": 92, "_;": 93, "_b": 94, "at": 95, "_": 99, "_to": 100, "_": 101, "_lo": 102, "_": 103, "_": 104, "_": 105, "_+": 106, "_0": 107, "_re": 108, "ct": 109, "dd": 110, "ion": 111, "nct": 112, "rn": 113, "tu": 114, "unct": 115, "va": 116, "_add": 117, "_th": 118, "_funct": 119, "_retu": 120, "_function": 121, "_return": 122, "AS": 123, "AV": 124, "CR": 125, "Cre": 126, "HO": 127, "IPT>": 128, "Ja": 129, "JAV": 130, "N>": 131, "Py": 132, "Sc": 133, "THO": 134, "YTHO": 135, "_,": 136, "_4": 137, "_5": 138, "_:": 139, "_p": 140, "_{": 141, "_}": 142, "_Cre": 143, "_Ja": 144, "_Py": 145, "hon": 146, "nt": 147, "op": 148, "or": 149, "pt": 150, "thon": 151, "_": 168, "_JavaScript": 169, "_": 170 }, "merges": [ [ "_", "<" ], [ "D", "E" ], [ "T", ">" ], [ "_", "a" ], [ "L", ">" ], [ "N", "L>" ], [ "_<", "NL>" ], [ "N", "T>" ], [ "_", "t" ], [ "DE", "NT>" ], [ "_", "i" ], [ "P", "T>" ], [ "_", "(" ], [ "_", ")" ], [ "o", "n" ], [ "_<", "P" ], [ "_", "f" ], [ "_", "l" ], [ "r", "e" ], [ "r", "i" ], [ "C", "O" ], [ "I", "N" ], [ "M", "PT>" ], [ "O", "MPT>" ], [ "R", "OMPT>" ], [ "_", ";" ], [ "_", "b" ], [ "a", "t" ], [ "_<", "DE" ], [ "_<", "CO" ], [ "_<", "IN" ], [ "DE", ">" ], [ "_t", "o" ], [ "_" ], [ "_l", "o" ], [ "_" ], [ "_" ], [ "_" ], [ "_", "+" ], [ "_", "0" ], [ "_", "re" ], [ "c", "t" ], [ "d", "d" ], [ "i", "on" ], [ "n", "ct" ], [ "r", "n" ], [ "t", "u" ], [ "u", "nct" ], [ "v", "a" ], [ "_a", "dd" ], [ "_t", "h" ], [ "_f", "unct" ], [ "_re", "tu" ], [ "_funct", "ion" ], [ "_retu", "rn" ], [ "A", "S" ], [ "A", "V" ], [ "C", "R" ], [ "C", "re" ], [ "H", "O" ], [ "I", "PT>" ], [ "J", "a" ], [ "J", "AV" ], [ "N", ">" ], [ "P", "y" ], [ "S", "c" ], [ "T", "HO" ], [ "Y", "THO" ], [ "_", "," ], [ "_", "4" ], [ "_", "5" ], [ "_", ":" ], [ "_", "p" ], [ "_", "{" ], [ "_", "}" ], [ "_", "Cre" ], [ "_", "Ja" ], [ "_", "Py" ], [ "h", "on" ], [ "n", "t" ], [ "o", "p" ], [ "o", "r" ], [ "p", "t" ], [ "t", "hon" ], [ "_<", "JAV" ], [ "_" ], [ "_JavaSc", "ript" ], [ "_" ] ] } }