Thaitokenizer / training_config.json
JonusNattapong's picture
Upload folder using huggingface_hub
46f1427 verified
raw
history blame contribute delete
369 Bytes
{
"corpus_path": "combined_thai_corpus.txt",
"vocab_size": 35590,
"model_type": "unigram",
"min_frequency": 2,
"max_token_length": 16,
"use_thai_pretokenizer": false,
"thai_engine": null,
"normalize_text": false,
"enable_byte_fallback": false,
"dropout": null,
"special_tokens": ["<unk>"],
"training_time": "2025-07-04 00:00:00"
}