{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|im_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|im_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "<|image|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "<|audio|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "<|tts|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|im_start|>": 0, "<|im_end|>": 1, "<|endoftext|>": 2, "<|image|>": 3, "<|audio|>": 4, "<|tts|>": 5, "-": 6, ".": 7, "0": 8, "1": 9, "2": 10, "3": 11, "4": 12, "5": 13, "6": 14, "7": 15, "8": 16, "9": 17, "C": 18, "F": 19, "H": 20, "I": 21, "M": 22, "O": 23, "P": 24, "T": 25, "a": 26, "b": 27, "c": 28, "d": 29, "e": 30, "f": 31, "g": 32, "h": 33, "i": 34, "j": 35, "k": 36, "l": 37, "m": 38, "n": 39, "o": 40, "p": 41, "q": 42, "r": 43, "s": 44, "t": 45, "u": 46, "v": 47, "w": 48, "x": 49, "y": 50, "z": 51, "Ġ": 52, "in": 53, "is": 54, "el": 55, "mo": 56, "Ġt": 57, "Ġis": 58, "mod": 59, "Th": 60, "fo": 61, "mu": 62, "ti": 63, "Ġ2": 64, "Ġa": 65, "Ġmod": 66, "Ġfo": 67, "ing": 68, "Ġmodel": 69, "CP": 70, "Fa": 71, "Hu": 72, "In": 73, "It": 74, "Min": 75, "Op": 76, "al": 77, "az": 78, "br": 79, "ce": 80, "ck": 81, "do": 82, "ed": 83, "er": 84, "es": 85, "gg": 86, "he": 87, "iCP": 88, "ick": 89, "ju": 90, "lti": 91, "laz": 92, "mp": 93, "ov": 94, "ow": 95, "qu": 96, "sed": 97, "tel": 98, "ting": 99, "used": 100, "Ġ1": 101, "Ġ3": 102, "Ġ4": 103, "Ġ5": 104, "Ġ6": 105, "Ġ7": 106, "Ġ8": 107, "Ġ9": 108, "Ġmu": 109, "ĠIn": 110, "ĠOp": 111, "Ġbr": 112, "Ġdo": 113, "Ġju": 114, "Ġlaz": 115, "Ġov": 116, "Ġqu": 117, "Ġused": 118, "iny": 119, "Ġtes": 120, "Ġthe": 121, "Ġtiny": 122, "modal": 123, "The": 124, "This": 125, "mum": 126, "timum": 127, "Ġfor": 128, "Ġfox": 129, "ingFa": 130, "Hugg": 131, "MiniCP": 132, "ltimodal": 133, "mps": 134, "own": 135, "Ġmultimodal": 136, "ĠIntel": 137, "ĠOptimum": 138, "Ġbrown": 139, "Ġdog": 140, "Ġjumps": 141, "Ġlazy": 142, "Ġover": 143, "Ġquick": 144, "Ġtesting": 145, "ingFace": 146, "HuggingFace": 147, "MiniCPM": 148 }, "merges": [ [ "i", "n" ], [ "i", "s" ], [ "e", "l" ], [ "m", "o" ], [ "Ġ", "t" ], [ "Ġ", "is" ], [ "mo", "d" ], [ "T", "h" ], [ "f", "o" ], [ "m", "u" ], [ "t", "i" ], [ "Ġ", "2" ], [ "Ġ", "a" ], [ "Ġ", "mod" ], [ "Ġ", "fo" ], [ "in", "g" ], [ "Ġmod", "el" ], [ "C", "P" ], [ "F", "a" ], [ "H", "u" ], [ "I", "n" ], [ "I", "t" ], [ "M", "in" ], [ "O", "p" ], [ "a", "l" ], [ "a", "z" ], [ "b", "r" ], [ "c", "e" ], [ "c", "k" ], [ "d", "o" ], [ "e", "d" ], [ "e", "r" ], [ "e", "s" ], [ "g", "g" ], [ "h", "e" ], [ "i", "CP" ], [ "i", "ck" ], [ "j", "u" ], [ "l", "ti" ], [ "l", "az" ], [ "m", "p" ], [ "o", "v" ], [ "o", "w" ], [ "q", "u" ], [ "s", "ed" ], [ "t", "el" ], [ "t", "ing" ], [ "u", "sed" ], [ "Ġ", "1" ], [ "Ġ", "3" ], [ "Ġ", "4" ], [ "Ġ", "5" ], [ "Ġ", "6" ], [ "Ġ", "7" ], [ "Ġ", "8" ], [ "Ġ", "9" ], [ "Ġ", "mu" ], [ "Ġ", "In" ], [ "Ġ", "Op" ], [ "Ġ", "br" ], [ "Ġ", "do" ], [ "Ġ", "ju" ], [ "Ġ", "laz" ], [ "Ġ", "ov" ], [ "Ġ", "qu" ], [ "Ġ", "used" ], [ "in", "y" ], [ "Ġt", "es" ], [ "Ġt", "he" ], [ "Ġt", "iny" ], [ "mod", "al" ], [ "Th", "e" ], [ "Th", "is" ], [ "mu", "m" ], [ "ti", "mum" ], [ "Ġfo", "r" ], [ "Ġfo", "x" ], [ "ing", "Fa" ], [ "Hu", "gg" ], [ "Min", "iCP" ], [ "lti", "modal" ], [ "mp", "s" ], [ "ow", "n" ], [ "Ġmu", "ltimodal" ], [ "ĠIn", "tel" ], [ "ĠOp", "timum" ], [ "Ġbr", "own" ], [ "Ġdo", "g" ], [ "Ġju", "mps" ], [ "Ġlaz", "y" ], [ "Ġov", "er" ], [ "Ġqu", "ick" ], [ "Ġtes", "ting" ], [ "ingFa", "ce" ], [ "Hugg", "ingFace" ], [ "MiniCP", "M" ] ] } }