{ "vocab_size": { "total": 16377, "total_with_special": 16384, "base": 256, "merges": 16121, "special": 7, "is_power_of_2": true, "power": 14, "matches_expected": true }, "reachability": { "valid_merges": 16121, "invalid_merges": 0, "reachable": 16377, "unreachable": 0, "all_reachable": true }, "length_dist": { "distribution": { "1": 256, "2": 7149, "3": 3360, "4": 3082, "5": 719, "6": 606, "7": 228, "8": 377, "9": 78, "10": 92, "11": 42, "12": 99, "13": 34, "14": 38, "15": 30, "16": 86, "17": 4, "18": 12, "19": 9, "20": 15, "21": 6, "22": 6, "23": 3, "24": 11, "25": 2, "26": 1, "27": 4, "28": 3, "29": 1, "30": 2, "31": 1, "32": 21 }, "avg_length": 3.4977712645783723, "min_length": 1, "max_length": 32, "length_3_count": 3360, "length_3_percent": 20.516578127862246 }, "byte_content": { "null_tokens": 4128, "ascii_printable": 3513, "ascii_only": 7256, "high_byte": 9121, "mixed": 4506, "byte_distribution": { "0": 9741, "255": 1718, "1": 1386, "72": 1352, "32": 958, "139": 955, "3": 913, "2": 866, "116": 752, "204": 751, "36": 735, "64": 734, "101": 685, "128": 579, "65": 556, "4": 543, "137": 522, "249": 518, "97": 515, "114": 494, "232": 482, "105": 456, "110": 436, "115": 435, "8": 432, "111": 432, "15": 423, "16": 405, "99": 384, "131": 367, "48": 360, "108": 358, "68": 357, "117": 346, "224": 329, "84": 327, "169": 325, "112": 323, "192": 323, "100": 316, "76": 307, "5": 304, "6": 298, "69": 298, "95": 296, "73": 280, "145": 267, "66": 266, "141": 265, "31": 262 } }, "diversity": { "1": { "learned": 256, "possible": 256, "coverage": 100.0 }, "2": { "learned": 7149, "possible": 65536, "coverage": 10.90850830078125 }, "3": { "learned": 3360, "possible": 16777216, "coverage": 0.02002716064453125 }, "4": { "learned": 3082, "possible": 4294967296, "coverage": 7.175840437412262e-05 } } }