surya-ocr-2 / tokenizer_config.json
vikp's picture
Initial: dedup'd weights from datalab-to/surya-2.1.2-mtp + LICENSE from datalab-to/chandra-ocr-2
ee38d9b verified
{
"backend": "tokenizers",
"bos_token": null,
"eos_token": "<|im_end|>",
"extra_special_tokens": [
"<|im_start|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|image_pad|>",
"<|video_pad|>"
],
"is_local": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<|endoftext|>",
"processor_class": "Qwen3VLProcessor",
"tokenizer_class": "TokenizersBackend",
"unk_token": "<unk>"
}