| """ | |
| json_tokenizer — A tokenizer optimized for JSON structures. | |
| Architecture: | |
| - Structural tokens: single-token representations for JSON grammar ({, }, [, ], :, ,) | |
| - Key tokens: deduplicated key vocabulary with Key() wrapper | |
| - Value BPE: byte-pair encoding trained on JSON string/number values | |
| - Type tokens: explicit type markers for faithful roundtrip encoding | |
| Delivers 5-15% fewer tokens than cl100k_base on schema-repetitive JSON | |
| with a 90x smaller vocabulary, and lossless roundtrip fidelity. | |
| """ | |
| from json_tokenizer.tokenizer import JSONTokenizer | |
| from json_tokenizer.bpe import BPETrainer | |
| __version__ = "0.2.0" | |
| __all__ = ["JSONTokenizer", "BPETrainer"] | |
| try: | |
| from json_tokenizer.hf_compat import JSONPreTrainedTokenizer | |
| __all__.append("JSONPreTrainedTokenizer") | |
| except ImportError: | |
| pass # transformers not installed | |