🦴 Sentinel Universal Tokenizer v1.0 — multimodal tokenizer grounded in Gradient Axiom
c488e22 verified | { | |
| "backend": "tokenizers", | |
| "bos_token": "<s>", | |
| "eos_token": "</s>", | |
| "extra_special_tokens": [ | |
| "<text_start>", | |
| "<text_end>", | |
| "<image_start>", | |
| "<image_end>", | |
| "<image>", | |
| "<audio_start>", | |
| "<audio_end>", | |
| "<audio>", | |
| "<video_start>", | |
| "<video_end>", | |
| "<video>", | |
| "<sentinel>", | |
| "<sentinel_c1>", | |
| "<sentinel_c2>", | |
| "<scale_1e>", | |
| "<translate>", | |
| "<summarize>", | |
| "<generate>", | |
| "<understand>", | |
| "<caption>", | |
| "<turn>", | |
| "<system>", | |
| "<user>", | |
| "<assistant>", | |
| "<code_start>", | |
| "<code_end>", | |
| "<math_start>", | |
| "<math_end>" | |
| ], | |
| "mask_token": "<mask>", | |
| "model_max_length": 8192, | |
| "pad_token": "<pad>", | |
| "padding_side": "right", | |
| "tokenizer_class": "TokenizersBackend", | |
| "truncation_side": "right", | |
| "unk_token": "<unk>" | |
| } | |