openPangu-R-72B-2512-Int8 / tokenizer_config.json
drizzlezyk's picture
Upload tokenizer_config.json with huggingface_hub
c8621f2 verified
{"add_bos_token": true, "add_eos_token": false, "add_prefix_space": true, "added_tokens_decoder": {"0": {"content": "<unk>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "1": {"content": "<s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "2": {"content": "</s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45806": {"content": "<|User|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45813": {"content": "<|Bot|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45830": {"content": "[unused0]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45840": {"content": "[unused1]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45846": {"content": "[unused2]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45849": {"content": "[unused3]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45861": {"content": "[unused4]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45866": {"content": "[unused5]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45874": {"content": "[unused6]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45883": {"content": "[unused7]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45884": {"content": "[unused8]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45887": {"content": "[unused9]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45892": {"content": "[unused10]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45920": {"content": "[unused11]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45932": {"content": "[unused12]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45938": {"content": "[unused13]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45953": {"content": "[unused14]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45968": {"content": "[unused15]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45974": {"content": "[unused16]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45982": {"content": "[unused17]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45986": {"content": "[unused18]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46005": {"content": "[unused19]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46007": {"content": "[unused20]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46014": {"content": "[unused21]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46017": {"content": "[unused22]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46028": {"content": "[unused23]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46032": {"content": "[unused24]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46081": {"content": "[unused25]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46086": {"content": "[unused26]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46101": {"content": "[unused27]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46183": {"content": "[unused28]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46230": {"content": "[unused29]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46245": {"content": "[unused30]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46257": {"content": "[unused31]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144208": {"content": "[unused32]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144209": {"content": "[unused33]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "auto_map": {"AutoTokenizer": ["tokenization_openpangu.OpenPanguTokenizer", null]}, "bos_token": "<s>", "clean_up_tokenization_spaces": false, "eos_token": "[unused10]", "legacy": true, "model_max_length": 1000000000000000019884624838656, "pad_token": null, "sp_model_kwargs": {}, "spaces_between_special_tokens": false, "tokenizer_class": "OpenPanguTokenizer", "unk_token": "<unk>", "use_default_system_prompt": false, "chat_template": "{%- set ns = namespace(is_first_tool=true) %}\n{%- if not mcp_prompt is defined %}\n {%- set mcp_prompt = true %}\n{%- endif %}\n{%- if not background is defined %}\n {%- set background = none %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n{%- if not think is defined %}\n {%- set think = true %}\n{%- endif %}\n{%- if not reasoning_effort is defined %}\n {%- set reasoning_effort = \"high\" %}\n{%- endif %}\n\n{{- '<s>[unused9]系统:' -}}\n{#- 提取系统消息 #}\n{%- set system_message = \"\" %}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- endif %}\n{#- 如果传入工具将使用mcp人设,可以使用mcp_prompt字段禁用 #}\n{%- if mcp_prompt and tools %}\n {%- if system_message %}\n {%- set system_message = system_message + \"\n\" %}\n {%- endif %}\n {%- set system_message = system_message + \"你是一个能够调用外部工具解决问题的专家,你的目标是高效、准确、清晰地完成任务。\n你需要根据用户的问题,决定是否需要使用工具来完成任务。如果需要,请以明确的格式调用工具;如果不需要,请直接回答。\n你可以根据上下文决定是否继续调用工具或基于已有结果直接回答用户。如果工具调用已足够,请合理组织语言向用户汇报结论。在没有获得显式的调用结果之前,在调用工具的当轮回复之内严禁虚构或者假设一个工具调用结果来完成任务或者回答问题。也不应在没有返回工具调用信息的情况下,在调用工具的当轮假设或者明确声称工具执行成功。\" %}\n{%- endif %}\n{#- 思维链分档 #}\n{%- if reasoning_effort == \"low\" and think %}\n {%- if system_message or tools or background %}\n {%- set system_message = \"<meta>\n<reasoning>\neffort: compact\n</reasoning>\n</meta>\n\n\" + system_message %}\n {%- else %}\n {%- set system_message = \"<meta>\n<reasoning>\neffort: compact\n</reasoning>\n</meta>\" %}\n {%- endif %}\n{%- endif %}\n{{- system_message -}}\n\n{#- 工具使用描述和规范调用格式 #}\n{%- if tools %}\n {{- '\n你将在<tools></tools>标签对内获得每个工具的描述:\n<tools>\n' }}\n {{- tools | tojson(ensure_ascii=False, sort_keys=False) }}\n {{- '\n</tools>\n' }}\n {{- \"对于每个函数调用,返回一个 JSON 对象,放在 [unused11][unused12] 标签对中,多个调用组成一个列表,其中每个函数包含函数名和对应函数的参数,格式如下:\n\" }}\n {{- '[unused11]\n[{\"name\": \"<函数名1>\", \"arguments\": <args1 json对象>}, {\"name\": \"<函数名2>\", \"arguments\": <args2 json对象>}, ...]\n[unused12]' }}\n {{- '\n<工具使用原则>\n1. 只有在所有必填参数(required字段中列出的)都具备有效值时,才能调用该函数\n2. 如果缺少任何必填参数,必须向用户询问缺失的参数,而不是直接调用函数\n3. 可选参数如果没有提供可以忽略或使用默认值\n</工具使用原则>' }}\n{%- endif %}\n\n{#- 背景信息字段 #}\n{%- if background is not none and background -%}\n {{- '\n<背景信息>' -}}\n {{- background -}}\n {{- '</背景信息>' -}}\n{%- endif %}\n\n{%- if messages | length == 0 and not think %}\n {{- \" /no_think\" -}}\n{%- endif %}\n{{- '[unused10]' -}}\n\n{%- if messages | length != 0 %}\n {%- for message in messages[:-1] %}\n {%- if message['role'] == 'user' %}\n {{- '[unused9]用户:' + message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n\n {%- if message['role'] == 'assistant' %}\n {{- '[unused9]助手:[unused16][unused17]' -}}\n {{- message['content'] -}}\n {%- if message.get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = message.tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {{- '[unused10]' }}\n {%- endif %}\n\n {%- if message['role'] == 'tool' %}\n {{- '[unused9]' -}}\n {{- '工具:' + message['content'] + \" /no_think\" -}}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- endfor %}\n\n {#- 处理最后一个角色,判断快慢思考 #}\n {%- if messages[-1]['role'] == \"user\" %}\n {{- '[unused9]' -}}\n {{- '用户:' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {%- if function_item.arguments is string %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments + '}' -}}\n {%- else %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- endif %}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"tool\" %}\n {{- '[unused9]' -}}\n {{- '工具:' + messages[-1]['content'] -}}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n {%- if messages[-1]['role'] == \"assistant\" %}\n {{- '[unused9]' -}}\n {{- '助手:[unused16][unused17]' + messages[-1]['content'] -}}\n {%- if messages[-1].get('tool_calls') %}\n {{- '[unused11]\n[' }}\n {%- set function_list = messages[-1].tool_calls | selectattr('function') | map(attribute='function') | list %}\n {%- for function_item in function_list %}\n {%- if not ns.is_first_tool %}\n {{- ', ' -}}\n {%- endif %}\n {{- '{\"name\": \"' + function_item.name + '\", \"arguments\": ' + function_item.arguments | tojson(ensure_ascii=False, sort_keys=False) + '}' -}}\n {%- set ns.is_first_tool = false %}\n {%- endfor %}\n {%- set ns.is_first_tool = true %}\n {{- ']\n[unused12]' }}\n {%- endif %}\n {%- if not think %}\n {{- \" /no_think\" -}}\n {%- endif %}\n {{- '[unused10]' -}}\n {%- endif %}\n{%- endif %}\n\n{{-'[unused9]助手:' }}\n"}