{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFC" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[UNK]": 0, "[PAD]": 1, "[BOS]": 2, "[EOS]": 3, ".": 4, "0": 5, "1": 6, "2": 7, "3": 8, "4": 9, "5": 10, "6": 11, "7": 12, "8": 13, "9": 14, "a": 15, "c": 16, "d": 17, "e": 18, "f": 19, "g": 20, "h": 21, "i": 22, "j": 23, "k": 24, "l": 25, "m": 26, "n": 27, "o": 28, "p": 29, "r": 30, "s": 31, "t": 32, "u": 33, "v": 34, "x": 35, "y": 36, "z": 37, "Ġ": 38, "Ġt": 39, "Ġf": 40, "in": 41, "en": 42, "is": 43, "er": 44, "Ġto": 45, "Ġa": 46, "ain": 47, "iz": 48, "ken": 49, "le": 50, "rain": 51, "Ġth": 52, "Ġtrain": 53, "Ġtoken": 54, "izer": 55, "Ġtokenizer": 56, "or": 57, "du": 58, "ile": 59, "mm": 60, "om": 61, "rom": 62, "Ġis": 63, "Ġdu": 64, "Ġfor": 65, "Ġfile": 66, "Ġfrom": 67, "ing": 68, "Ġthis": 69, "Ġtraining": 70, "mmy": 71, "Ġdummy": 72, "on": 73, "Ġs": 74, "ce": 75, "ten": 76, "enten": 77, "Ġsenten": 78, "Ġsentence": 79, "cs": 80, "js": 81, "lin": 82, "Ġcs": 83, "Ġjs": 84, "Ġlin": 85, "onl": 86, "Ġcsv": 87, "Ġjsonl": 88, "Ġline": 89, "Ġ1": 90, "Ġ2": 91, "Ġ3": 92, "Ġ4": 93, "her": 94, "no": 95, "ther": 96, "Ġon": 97, "nother": 98, "Ġone": 99, "another": 100, "ex": 101, "Ġanother": 102, "am": 103, "et": 104, "ple": 105, "yet": 106, "Ġex": 107, "Ġthe": 108, "ample": 109, "Ġexample": 110, "Ġ0": 111, "Ġ5": 112, "Ġ6": 113, "Ġ7": 114, "Ġ8": 115, "Ġ9": 116, "Ġ10": 117, "Ġ11": 118, "Ġ12": 119, "Ġ13": 120, "Ġ14": 121, "Ġ15": 122, "Ġ16": 123, "Ġ17": 124, "Ġ18": 125, "Ġ19": 126, "Ġ20": 127, "Ġ21": 128, "Ġ22": 129, "Ġ23": 130, "Ġ24": 131, "Ġ25": 132, "Ġ26": 133, "Ġ27": 134, "Ġ28": 135, "Ġ29": 136, "Ġ30": 137, "Ġ31": 138, "Ġ32": 139, "Ġ33": 140, "Ġ34": 141, "Ġ35": 142, "Ġ36": 143, "Ġ37": 144, "Ġ38": 145, "Ġ39": 146, "Ġ40": 147, "Ġ41": 148, "Ġ42": 149, "Ġ43": 150, "Ġ44": 151, "Ġ45": 152, "Ġ46": 153, "Ġ47": 154, "Ġ48": 155, "Ġ49": 156, "hor": 157, "Ġyet": 158, "Ġtex": 159, "Ġshor": 160, "Ġtext": 161, "Ġshort": 162 }, "merges": [ [ "Ġ", "t" ], [ "Ġ", "f" ], [ "i", "n" ], [ "e", "n" ], [ "i", "s" ], [ "e", "r" ], [ "Ġt", "o" ], [ "Ġ", "a" ], [ "a", "in" ], [ "i", "z" ], [ "k", "en" ], [ "l", "e" ], [ "r", "ain" ], [ "Ġt", "h" ], [ "Ġt", "rain" ], [ "Ġto", "ken" ], [ "iz", "er" ], [ "Ġtoken", "izer" ], [ "o", "r" ], [ "d", "u" ], [ "i", "le" ], [ "m", "m" ], [ "o", "m" ], [ "r", "om" ], [ "Ġ", "is" ], [ "Ġ", "du" ], [ "Ġf", "or" ], [ "Ġf", "ile" ], [ "Ġf", "rom" ], [ "in", "g" ], [ "Ġth", "is" ], [ "Ġtrain", "ing" ], [ "mm", "y" ], [ "Ġdu", "mmy" ], [ "o", "n" ], [ "Ġ", "s" ], [ "c", "e" ], [ "t", "en" ], [ "en", "ten" ], [ "Ġs", "enten" ], [ "Ġsenten", "ce" ], [ "c", "s" ], [ "j", "s" ], [ "l", "in" ], [ "Ġ", "cs" ], [ "Ġ", "js" ], [ "Ġ", "lin" ], [ "on", "l" ], [ "Ġcs", "v" ], [ "Ġjs", "onl" ], [ "Ġlin", "e" ], [ "Ġ", "1" ], [ "Ġ", "2" ], [ "Ġ", "3" ], [ "Ġ", "4" ], [ "h", "er" ], [ "n", "o" ], [ "t", "her" ], [ "Ġ", "on" ], [ "no", "ther" ], [ "Ġon", "e" ], [ "a", "nother" ], [ "e", "x" ], [ "Ġa", "nother" ], [ "a", "m" ], [ "e", "t" ], [ "p", "le" ], [ "y", "et" ], [ "Ġ", "ex" ], [ "Ġth", "e" ], [ "am", "ple" ], [ "Ġex", "ample" ], [ "Ġ", "0" ], [ "Ġ", "5" ], [ "Ġ", "6" ], [ "Ġ", "7" ], [ "Ġ", "8" ], [ "Ġ", "9" ], [ "Ġ1", "0" ], [ "Ġ1", "1" ], [ "Ġ1", "2" ], [ "Ġ1", "3" ], [ "Ġ1", "4" ], [ "Ġ1", "5" ], [ "Ġ1", "6" ], [ "Ġ1", "7" ], [ "Ġ1", "8" ], [ "Ġ1", "9" ], [ "Ġ2", "0" ], [ "Ġ2", "1" ], [ "Ġ2", "2" ], [ "Ġ2", "3" ], [ "Ġ2", "4" ], [ "Ġ2", "5" ], [ "Ġ2", "6" ], [ "Ġ2", "7" ], [ "Ġ2", "8" ], [ "Ġ2", "9" ], [ "Ġ3", "0" ], [ "Ġ3", "1" ], [ "Ġ3", "2" ], [ "Ġ3", "3" ], [ "Ġ3", "4" ], [ "Ġ3", "5" ], [ "Ġ3", "6" ], [ "Ġ3", "7" ], [ "Ġ3", "8" ], [ "Ġ3", "9" ], [ "Ġ4", "0" ], [ "Ġ4", "1" ], [ "Ġ4", "2" ], [ "Ġ4", "3" ], [ "Ġ4", "4" ], [ "Ġ4", "5" ], [ "Ġ4", "6" ], [ "Ġ4", "7" ], [ "Ġ4", "8" ], [ "Ġ4", "9" ], [ "h", "or" ], [ "Ġ", "yet" ], [ "Ġt", "ex" ], [ "Ġs", "hor" ], [ "Ġtex", "t" ], [ "Ġshor", "t" ] ] } }