""" json_tokenizer — A tokenizer optimized for JSON structures. Architecture: - Structural tokens: single-token representations for JSON grammar ({, }, [, ], :, ,) - Key tokens: deduplicated key vocabulary with Key() wrapper - Value BPE: byte-pair encoding trained on JSON string/number values - Type tokens: explicit type markers for faithful roundtrip encoding Delivers 5-15% fewer tokens than cl100k_base on schema-repetitive JSON with a 90x smaller vocabulary, and lossless roundtrip fidelity. """ from json_tokenizer.tokenizer import JSONTokenizer from json_tokenizer.bpe import BPETrainer __version__ = "0.2.0" __all__ = ["JSONTokenizer", "BPETrainer"] try: from json_tokenizer.hf_compat import JSONPreTrainedTokenizer __all__.append("JSONPreTrainedTokenizer") except ImportError: pass # transformers not installed