stellaray777 commited on
Commit
dcc4b4a
·
verified ·
1 Parent(s): 2643869

Upload tokenizer

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}
2
+ {% set add_generation_prompt = false %}
3
+ {% endif %}
4
+ {%- set ns = namespace(found=false) -%}
5
+ {%- for message in messages -%}
6
+ {%- if message['role'] == 'system' -%}
7
+ {%- set ns.found = true -%}
8
+ {%- endif -%}
9
+ {%- endfor -%}
10
+ {{bos_token}}{%- if not ns.found -%}
11
+ {{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n'}}
12
+ {%- endif %}
13
+ {%- for message in messages %}
14
+ {%- if message['role'] == 'system' %}
15
+ {{ message['content'] }}
16
+ {%- else %}
17
+ {%- if message['role'] == 'user' %}
18
+ {{'### Instruction:\n' + message['content'] + '\n'}}
19
+ {%- else %}
20
+ {{'### Response:\n' + message['content'] + '\n<|EOT|>\n'}}
21
+ {%- endif %}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {% if add_generation_prompt %}
25
+ {{'### Response:'}}
26
+ {% endif %}
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
12
  "normalized": true,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|EOT|>",
11
  "lstrip": false,
12
  "normalized": true,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -204,7 +204,7 @@
204
  "lstrip": false,
205
  "rstrip": false,
206
  "normalized": true,
207
- "special": false
208
  }
209
  ],
210
  "normalizer": {
 
204
  "lstrip": false,
205
  "rstrip": false,
206
  "normalized": true,
207
+ "special": true
208
  }
209
  ],
210
  "normalizer": {
tokenizer_config.json CHANGED
@@ -177,12 +177,12 @@
177
  "normalized": true,
178
  "rstrip": false,
179
  "single_word": false,
180
- "special": false
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
  "clean_up_tokenization_spaces": false,
185
- "eos_token": "<|end▁of▁sentence|>",
186
  "extra_special_tokens": {},
187
  "legacy": true,
188
  "model_max_length": 16384,
 
177
  "normalized": true,
178
  "rstrip": false,
179
  "single_word": false,
180
+ "special": true
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
  "clean_up_tokenization_spaces": false,
185
+ "eos_token": "<|EOT|>",
186
  "extra_special_tokens": {},
187
  "legacy": true,
188
  "model_max_length": 16384,