Thaitokenizer / special_tokens_map.json
Nattapong Tapachoom
Add tokenizer configuration file with special tokens and settings
12f0d60
{
"additional_special_tokens": [
{
"content": "<NL>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<TAB>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<|system|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<|user|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<|assistant|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<|eot|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<tool_result>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "</tool_result>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<fim_prefix>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<fim_middle>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<fim_suffix>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<|endofdocument|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_0>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_5>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_6>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_7>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_8>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_9>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_10>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_11>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_12>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_13>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_14>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_15>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_16>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_17>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_18>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_19>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_20>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_21>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_22>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_23>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_24>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_25>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_26>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_27>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_28>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_29>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_30>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_31>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_32>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_33>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_34>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_35>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_36>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_37>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_38>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_39>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_40>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_41>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_42>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_43>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_44>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_45>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_46>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_47>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_48>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_49>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_50>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_51>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_52>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_53>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_54>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_55>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_56>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_57>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_58>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_59>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_60>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_61>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_62>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_63>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_64>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_65>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_66>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_67>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_68>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_69>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_70>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_71>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_72>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_73>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_74>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_75>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_76>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_77>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_78>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_79>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_80>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_81>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_82>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_83>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_84>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_85>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_86>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_87>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_88>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_89>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_90>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_91>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_92>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_93>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_94>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_95>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_96>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_97>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_98>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
{
"content": "<extra_99>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
],
"bos_token": {
"content": "<bos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}