nuextract-tiny-cv-extraction / tokenizer_config.json
Ad-adv's picture
Training in progress, step 24
0edcacd verified
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|end-output|>",
"errors": "replace",
"extra_special_tokens": [
"<|im_start|>",
"<|im_end|>"
],
"is_local": false,
"local_files_only": false,
"max_length": 2048,
"model_max_length": 32768,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"stride": 0,
"tokenizer_class": "Qwen2Tokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": null
}