File size: 707 Bytes
708f4a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | """
Crayon Core Module.
Contains the fundamental algorithms and data structures for tokenization:
1. Tokenizer (The algorithmic driver)
2. Vocabulary (The data structure)
3. Primitives (Metadata structures)
4. Vocab Builder (Entropy-guided construction)
"""
from .tokenizer import crayon_tokenize
from .vocabulary import CrayonVocab
from .primitives import TokenMetadata
from .vocab_builder import (
EntropyVocabBuilder,
construct_optimal_vocabulary,
deterministic_sort_key,
assign_stable_ids
)
__all__ = [
"crayon_tokenize",
"CrayonVocab",
"TokenMetadata",
"EntropyVocabBuilder",
"construct_optimal_vocabulary",
"deterministic_sort_key",
"assign_stable_ids"
] |