NedoTurkishTokenizer / tokenizer_config.json
nmstech's picture
Add AutoTokenizer support (trust_remote_code)
be2f46e verified
raw
history blame
358 Bytes
{
"tokenizer_class": "TurkTokenizer",
"model_type": "turk-tokenizer",
"auto_map": {
"AutoTokenizer": ["tokenization_turk.TurkTokenizer", null]
},
"version": "1.0.0",
"language": "tr",
"description": "Turkish morphological tokenizer — TR-MMLU world record 92%",
"requires_java": true,
"dependencies": ["turkish-tokenizer", "jpype1"]
}