MuXodious commited on
Commit
64b26a9
·
verified ·
1 Parent(s): 31a4ae4

Upload tokenizer

Browse files
Files changed (3) hide show
  1. chat_template.jinja +7 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +20 -0
chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}{%- set system_prompt = "" -%}{%- set ns = namespace(system_prompt="") -%}{%- if messages[0]["role"] == "system" -%} {%- set ns.system_prompt = messages[0]["content"] -%} {%- set messages = messages[1:] -%}{%- endif -%}{%- if tools -%} {%- set ns.system_prompt = ns.system_prompt + ("
2
+ " if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%} {%- for tool in tools -%} {%- if tool is not string -%} {%- set tool = tool | tojson -%} {%- endif -%} {%- set ns.system_prompt = ns.system_prompt + tool -%} {%- if not loop.last -%} {%- set ns.system_prompt = ns.system_prompt + ", " -%} {%- endif -%} {%- endfor -%} {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}{%- endif -%}{%- if ns.system_prompt -%} {{- "<|im_start|>system
3
+ " + ns.system_prompt + "<|im_end|>
4
+ " -}}{%- endif -%}{%- for message in messages -%} {{- "<|im_start|>" + message["role"] + "
5
+ " -}} {%- set content = message["content"] -%} {%- if content is not string -%} {%- set content = content | tojson -%} {%- endif -%} {%- if message["role"] == "tool" -%} {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} {%- endif -%} {{- content + "<|im_end|>
6
+ " -}}{%- endfor -%}{%- if add_generation_prompt -%} {{- "<|im_start|>assistant
7
+ " -}}{%- endif -%}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|im_end|>",
6
+ "is_local": false,
7
+ "legacy": false,
8
+ "model_input_names": [
9
+ "input_ids",
10
+ "attention_mask"
11
+ ],
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "model_specific_special_tokens": {},
14
+ "pad_token": "<|pad|>",
15
+ "sp_model_kwargs": {},
16
+ "spaces_between_special_tokens": false,
17
+ "tokenizer_class": "TokenizersBackend",
18
+ "use_default_system_prompt": false,
19
+ "use_fast": true
20
+ }