| { | |
| "model_type": "sentencepiece", | |
| "tokenizer_class": "PreTrainedTokenizerFast", | |
| "vocab_size": 8000, | |
| "model_max_length": 512, | |
| "bos_token": "<s>", | |
| "eos_token": "</s>", | |
| "unk_token": "<unk>", | |
| "pad_token": "<pad>", | |
| "sp_model_kwargs": {}, | |
| "add_bos_token": false, | |
| "add_eos_token": false, | |
| "clean_up_tokenization_spaces": true, | |
| "legacy": true, | |
| "name_or_path": "khopilot/khmer-tokenizer-v7" | |
| } | |