Update tokenizer_config.json
Browse files- tokenizer_config.json +5 -5
tokenizer_config.json
CHANGED
|
@@ -113,7 +113,7 @@
|
|
| 113 |
"special": true
|
| 114 |
},
|
| 115 |
"128014": {
|
| 116 |
-
"content": "<|
|
| 117 |
"lstrip": false,
|
| 118 |
"normalized": false,
|
| 119 |
"rstrip": false,
|
|
@@ -121,7 +121,7 @@
|
|
| 121 |
"special": true
|
| 122 |
},
|
| 123 |
"128015": {
|
| 124 |
-
"content": "<|
|
| 125 |
"lstrip": false,
|
| 126 |
"normalized": false,
|
| 127 |
"rstrip": false,
|
|
@@ -129,7 +129,7 @@
|
|
| 129 |
"special": true
|
| 130 |
},
|
| 131 |
"128016": {
|
| 132 |
-
"content": "<|
|
| 133 |
"lstrip": false,
|
| 134 |
"normalized": false,
|
| 135 |
"rstrip": false,
|
|
@@ -2050,7 +2050,7 @@
|
|
| 2050 |
}
|
| 2051 |
},
|
| 2052 |
"bos_token": "<|begin_of_text|>",
|
| 2053 |
-
"chat_template": "{{-
|
| 2054 |
"clean_up_tokenization_spaces": true,
|
| 2055 |
"eos_token": "<|eot_id|>",
|
| 2056 |
"model_input_names": [
|
|
@@ -2061,4 +2061,4 @@
|
|
| 2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
| 2062 |
"padding_side": "left",
|
| 2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
| 2064 |
-
}
|
|
|
|
| 113 |
"special": true
|
| 114 |
},
|
| 115 |
"128014": {
|
| 116 |
+
"content": "<|fim_prefix|>",
|
| 117 |
"lstrip": false,
|
| 118 |
"normalized": false,
|
| 119 |
"rstrip": false,
|
|
|
|
| 121 |
"special": true
|
| 122 |
},
|
| 123 |
"128015": {
|
| 124 |
+
"content": "<|fim_hole|>",
|
| 125 |
"lstrip": false,
|
| 126 |
"normalized": false,
|
| 127 |
"rstrip": false,
|
|
|
|
| 129 |
"special": true
|
| 130 |
},
|
| 131 |
"128016": {
|
| 132 |
+
"content": "<|fim_suffix|>",
|
| 133 |
"lstrip": false,
|
| 134 |
"normalized": false,
|
| 135 |
"rstrip": false,
|
|
|
|
| 2050 |
}
|
| 2051 |
},
|
| 2052 |
"bos_token": "<|begin_of_text|>",
|
| 2053 |
+
"chat_template": "{{- if .Suffix }}<|fim_begin|>{{ .Prompt }}<|fim_hole|>{{ .Suffix }}<|fim_end|>\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = "" %}\n{%- endif %}\n{#- System message #}\n{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}\n{{- system_message }}\n{{- "<|eot_id|>" }}\n{%- for message in messages %}\n{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
|
| 2054 |
"clean_up_tokenization_spaces": true,
|
| 2055 |
"eos_token": "<|eot_id|>",
|
| 2056 |
"model_input_names": [
|
|
|
|
| 2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
| 2062 |
"padding_side": "left",
|
| 2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
| 2064 |
+
}
|