Maani commited on
Commit
c11d4e3
·
verified ·
1 Parent(s): 6772e51

Upload tokenizer

Browse files
chat_template.jinja CHANGED
@@ -1,120 +1,7 @@
1
- {{- bos_token }}
2
- {%- if not tools is defined %}
3
- {%- set tools = none %}
4
- {%- endif %}
5
- {%- if not enable_thinking is defined %}
6
- {%- set enable_thinking = false %}
7
- {%- endif %}
8
- {#- This block extracts the system message, so we can slot it into the right place. #}
9
- {%- if messages[0]['role'] == 'system' %}
10
- {%- set system_message = messages[0]['content']|trim %}
11
- {%- set messages = messages[1:] %}
12
- {%- else %}
13
- {%- set system_message = "" %}
14
- {%- endif %}
15
- {#- Set the system message. If enable_thinking is true, add the "Enable deep thinking subroutine." #}
16
- {%- if enable_thinking %}
17
- {%- if system_message != "" %}
18
- {%- set system_message = "Enable deep thinking subroutine.
19
-
20
- " ~ system_message %}
21
- {%- else %}
22
- {%- set system_message = "Enable deep thinking subroutine." %}
23
- {%- endif %}
24
- {%- endif %}
25
- {#- Set the system message. In case there are tools present, add them to the system message. #}
26
- {%- if tools is not none or system_message != '' %}
27
- {{- "<|start_header_id|>system<|end_header_id|>
28
-
29
- " }}
30
- {{- system_message }}
31
- {%- if tools is not none %}
32
- {%- if system_message != "" %}
33
- {{- "
34
-
35
- " }}
36
- {%- endif %}
37
- {{- "Available Tools:
38
- " }}
39
- {%- for t in tools %}
40
- {{- t | tojson(indent=4) }}
41
- {{- "
42
-
43
- " }}
44
- {%- endfor %}
45
- {%- endif %}
46
- {{- "<|eot_id|>" }}
47
- {%- endif %}
48
-
49
- {#- Rest of the messages #}
50
- {%- for message in messages %}
51
- {#- The special cases are when the message is from a tool (via role ipython/tool/tool_results) or when the message is from the assistant, but has "tool_calls". If not, we add the message directly as usual. #}
52
- {#- Case 1 - Usual, non tool related message. #}
53
- {%- if not (message.role == "ipython" or message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %}
54
- {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>
55
-
56
- ' }}
57
- {%- if message['content'] is string %}
58
- {{- message['content'] | trim }}
59
- {%- else %}
60
- {%- for item in message['content'] %}
61
- {%- if item.type == 'text' %}
62
- {{- item.text | trim }}
63
- {%- endif %}
64
- {%- endfor %}
65
- {%- endif %}
66
- {{- '<|eot_id|>' }}
67
-
68
- {#- Case 2 - the response is from the assistant, but has a tool call returned. The assistant may also have returned some content along with the tool call. #}
69
- {%- elif message.tool_calls is defined and message.tool_calls is not none %}
70
- {{- "<|start_header_id|>assistant<|end_header_id|>
71
-
72
- " }}
73
- {%- if message['content'] is string %}
74
- {{- message['content'] | trim }}
75
- {%- else %}
76
- {%- for item in message['content'] %}
77
- {%- if item.type == 'text' %}
78
- {{- item.text | trim }}
79
- {%- if item.text | trim != "" %}
80
- {{- "
81
-
82
- " }}
83
- {%- endif %}
84
- {%- endif %}
85
- {%- endfor %}
86
- {%- endif %}
87
- {{- "[" }}
88
- {%- for tool_call in message.tool_calls %}
89
- {%- set out = tool_call.function|tojson %}
90
- {%- if not tool_call.id is defined %}
91
- {{- out }}
92
- {%- else %}
93
- {{- out[:-1] }}
94
- {{- ', "id": "' + tool_call.id + '"}' }}
95
- {%- endif %}
96
- {%- if not loop.last %}
97
- {{- ", " }}
98
- {%- else %}
99
- {{- "]<|eot_id|>" }}
100
- {%- endif %}
101
- {%- endfor %}
102
-
103
- {#- Case 3 - the response is from a tool call. The tool call may have an id associated with it as well. If it does, we add it to the prompt. #}
104
- {%- elif message.role == "ipython" or message["role"] == "tool_results" or message["role"] == "tool" %}
105
- {{- "<|start_header_id|>ipython<|end_header_id|>
106
-
107
- " }}
108
- {%- if message.tool_call_id is defined and message.tool_call_id != '' %}
109
- {{- '{"content": ' + (message.content | tojson) + ', "call_id": "' + message.tool_call_id + '"}' }}
110
- {%- else %}
111
- {{- '{"content": ' + (message.content | tojson) + '}' }}
112
- {%- endif %}
113
- {{- "<|eot_id|>" }}
114
- {%- endif %}
115
- {%- endfor %}
116
- {%- if add_generation_prompt %}
117
- {{- '<|start_header_id|>assistant<|end_header_id|>
118
-
119
- ' }}
120
- {%- endif %}
 
1
+ {{- bos_token -}}{%- set system_prompt = "" -%}{%- set ns = namespace(system_prompt="") -%}{%- if messages[0]["role"] == "system" -%} {%- set ns.system_prompt = messages[0]["content"] -%} {%- set messages = messages[1:] -%}{%- endif -%}{%- if tools -%} {%- set ns.system_prompt = ns.system_prompt + ("
2
+ " if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%} {%- for tool in tools -%} {%- if tool is not string -%} {%- set tool = tool | tojson -%} {%- endif -%} {%- set ns.system_prompt = ns.system_prompt + tool -%} {%- if not loop.last -%} {%- set ns.system_prompt = ns.system_prompt + ", " -%} {%- endif -%} {%- endfor -%} {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}{%- endif -%}{%- if ns.system_prompt -%} {{- "<|im_start|>system
3
+ " + ns.system_prompt + "<|im_end|>
4
+ " -}}{%- endif -%}{%- for message in messages -%} {{- "<|im_start|>" + message["role"] + "
5
+ " -}} {%- set content = message["content"] -%} {%- if content is not string -%} {%- set content = content | tojson -%} {%- endif -%} {%- if message["role"] == "tool" -%} {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} {%- endif -%} {{- content + "<|im_end|>
6
+ " -}}{%- endfor -%}{%- if add_generation_prompt -%} {{- "<|im_start|>assistant
7
+ " -}}{%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -1,13 +1,20 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin_of_text|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
 
 
 
 
 
 
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<|startoftext|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f0c8a6a1a96dd707d2efd06d815aa86123aaaad8328dda13188fcad6e25592
3
+ size 4732406
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff