{ "tokenizer_class": "PreTrainedTokenizerFast", "vocab_size": 65536, "model_type": "glaurung-binary-tokenizer", "version": "001", "encoding": "latin-1", "description": "BPE tokenizer for binary executables and malware (x86-64, ARM64, Windows PE, Linux ELF)", "compression_ratio": 2.849, "training_data": { "size_gb": 13, "files": 30738, "platforms": [ "Linux (Alpine, Debian, Ubuntu)", "Windows (8, 10, 11)" ], "architectures": [ "x86-64", "x86-32", "ARM64" ] }, "performance": { "bytes_per_token": 2.849, "theoretical_efficiency": 0.86, "improvement_over_32k": 0.099 }, "predecessor": "mjbommar/binary-tokenizer-005" }