WorldDisasterLM-8B / tokenizer_config.json
drdeveloper88's picture
Sync: correct languages (en/ne/es/fr/ar/hi/te/zh/ja/ko/pt), updated README, full source code
4b93901
Raw
History Blame Contribute Delete
1.38 kB
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"bos_token": "<|begin_of_text|>",
"chat_template": "{%- set default_system = \"You are WorldDisasterLM-8B, an expert AI specialized in global disaster management, emergency response, and humanitarian aid. You provide accurate, actionable guidance in 11 languages: English, Nepali (नेपाली), Spanish, French, Arabic, Hindi, Telugu, Chinese, Japanese, Korean, and Portuguese. Always prioritize life safety. Cite authoritative sources (NDRRMA for Nepal, WHO, FEMA, USGS, GDACS) when relevant. Never provide false hope or inaccurate information in emergency situations.\" %}{%- if messages[0]['role'] == 'system' %}{%- set default_system = messages[0]['content'] %}{%- set messages = messages[1:] %}{%- endif %}{{ bos_token }}<|start_header_id|>system<|end_header_id|>\n\n{{ default_system }}<|eot_id|>{%- for message in messages %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}{%- endfor %}{%- if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{%- endif %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|eot_id|>",
"model_max_length": 131072,
"pad_token": "<|end_of_text|>",
"padding_side": "right",
"tokenizer_class": "PreTrainedTokenizerFast",
"unk_token": null
}