AlexHT_Hung commited on
Commit Β·
6d96773
1
Parent(s): 73c49a1
Revert "Upload tokenizer"
Browse filesThis reverts commit 73c49a12e3243978affacfcaf5427654288b8583.
- added_tokens.json +3 -7
- tokenizer.json +1 -37
- tokenizer_config.json +4 -35
added_tokens.json
CHANGED
|
@@ -1,17 +1,13 @@
|
|
| 1 |
{
|
| 2 |
-
"\"arguments\":": 32014,
|
| 3 |
-
"\"function\":": 32012,
|
| 4 |
-
"\"name\":": 32013,
|
| 5 |
"<|assistant|>": 32001,
|
| 6 |
"<|endoftext|>": 32000,
|
| 7 |
"<|end|>": 32007,
|
| 8 |
-
"<|function_metadata|>": 32008,
|
| 9 |
"<|placeholder1|>": 32002,
|
| 10 |
"<|placeholder2|>": 32003,
|
| 11 |
"<|placeholder3|>": 32004,
|
| 12 |
-
"<|
|
| 13 |
-
"<|
|
| 14 |
-
"<|tool_calls|>": 32011,
|
| 15 |
"<|tool_results|>": 32009,
|
|
|
|
| 16 |
"<|user|>": 32010
|
| 17 |
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
| 2 |
"<|assistant|>": 32001,
|
| 3 |
"<|endoftext|>": 32000,
|
| 4 |
"<|end|>": 32007,
|
|
|
|
| 5 |
"<|placeholder1|>": 32002,
|
| 6 |
"<|placeholder2|>": 32003,
|
| 7 |
"<|placeholder3|>": 32004,
|
| 8 |
+
"<|tool_calls|>": 32005,
|
| 9 |
+
"<|function_metadata|>": 32008,
|
|
|
|
| 10 |
"<|tool_results|>": 32009,
|
| 11 |
+
"<|system|>": 32006,
|
| 12 |
"<|user|>": 32010
|
| 13 |
}
|
tokenizer.json
CHANGED
|
@@ -128,42 +128,6 @@
|
|
| 128 |
"rstrip": true,
|
| 129 |
"normalized": false,
|
| 130 |
"special": true
|
| 131 |
-
},
|
| 132 |
-
{
|
| 133 |
-
"id": 32011,
|
| 134 |
-
"content": "<|tool_calls|>",
|
| 135 |
-
"single_word": false,
|
| 136 |
-
"lstrip": false,
|
| 137 |
-
"rstrip": true,
|
| 138 |
-
"normalized": false,
|
| 139 |
-
"special": true
|
| 140 |
-
},
|
| 141 |
-
{
|
| 142 |
-
"id": 32012,
|
| 143 |
-
"content": "\"function\":",
|
| 144 |
-
"single_word": false,
|
| 145 |
-
"lstrip": false,
|
| 146 |
-
"rstrip": false,
|
| 147 |
-
"normalized": true,
|
| 148 |
-
"special": false
|
| 149 |
-
},
|
| 150 |
-
{
|
| 151 |
-
"id": 32013,
|
| 152 |
-
"content": "\"name\":",
|
| 153 |
-
"single_word": false,
|
| 154 |
-
"lstrip": false,
|
| 155 |
-
"rstrip": false,
|
| 156 |
-
"normalized": true,
|
| 157 |
-
"special": false
|
| 158 |
-
},
|
| 159 |
-
{
|
| 160 |
-
"id": 32014,
|
| 161 |
-
"content": "\"arguments\":",
|
| 162 |
-
"single_word": false,
|
| 163 |
-
"lstrip": false,
|
| 164 |
-
"rstrip": false,
|
| 165 |
-
"normalized": true,
|
| 166 |
-
"special": false
|
| 167 |
}
|
| 168 |
],
|
| 169 |
"normalizer": {
|
|
@@ -93496,4 +93460,4 @@
|
|
| 93496 |
"β ββββββββββββββ"
|
| 93497 |
]
|
| 93498 |
}
|
| 93499 |
-
}
|
|
|
|
| 128 |
"rstrip": true,
|
| 129 |
"normalized": false,
|
| 130 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
| 132 |
],
|
| 133 |
"normalizer": {
|
|
|
|
| 93460 |
"β ββββββββββββββ"
|
| 93461 |
]
|
| 93462 |
}
|
| 93463 |
+
}
|
tokenizer_config.json
CHANGED
|
@@ -67,7 +67,7 @@
|
|
| 67 |
"special": true
|
| 68 |
},
|
| 69 |
"32005": {
|
| 70 |
-
"content": "<|
|
| 71 |
"lstrip": false,
|
| 72 |
"normalized": false,
|
| 73 |
"rstrip": true,
|
|
@@ -113,38 +113,6 @@
|
|
| 113 |
"rstrip": true,
|
| 114 |
"single_word": false,
|
| 115 |
"special": true
|
| 116 |
-
},
|
| 117 |
-
"32011": {
|
| 118 |
-
"content": "<|tool_calls|>",
|
| 119 |
-
"lstrip": false,
|
| 120 |
-
"normalized": false,
|
| 121 |
-
"rstrip": true,
|
| 122 |
-
"single_word": false,
|
| 123 |
-
"special": true
|
| 124 |
-
},
|
| 125 |
-
"32012": {
|
| 126 |
-
"content": "\"function\":",
|
| 127 |
-
"lstrip": false,
|
| 128 |
-
"normalized": true,
|
| 129 |
-
"rstrip": false,
|
| 130 |
-
"single_word": false,
|
| 131 |
-
"special": false
|
| 132 |
-
},
|
| 133 |
-
"32013": {
|
| 134 |
-
"content": "\"name\":",
|
| 135 |
-
"lstrip": false,
|
| 136 |
-
"normalized": true,
|
| 137 |
-
"rstrip": false,
|
| 138 |
-
"single_word": false,
|
| 139 |
-
"special": false
|
| 140 |
-
},
|
| 141 |
-
"32014": {
|
| 142 |
-
"content": "\"arguments\":",
|
| 143 |
-
"lstrip": false,
|
| 144 |
-
"normalized": true,
|
| 145 |
-
"rstrip": false,
|
| 146 |
-
"single_word": false,
|
| 147 |
-
"special": false
|
| 148 |
}
|
| 149 |
},
|
| 150 |
"bos_token": "<s>",
|
|
@@ -160,12 +128,13 @@
|
|
| 160 |
],
|
| 161 |
"clean_up_tokenization_spaces": false,
|
| 162 |
"eos_token": "<|endoftext|>",
|
| 163 |
-
"
|
| 164 |
"model_max_length": 4096,
|
| 165 |
"pad_token": "<|endoftext|>",
|
| 166 |
"padding_side": "left",
|
| 167 |
"sp_model_kwargs": {},
|
| 168 |
"tokenizer_class": "LlamaTokenizer",
|
| 169 |
"unk_token": "<unk>",
|
| 170 |
-
"use_default_system_prompt": false
|
|
|
|
| 171 |
}
|
|
|
|
| 67 |
"special": true
|
| 68 |
},
|
| 69 |
"32005": {
|
| 70 |
+
"content": "<|tool_calls|>",
|
| 71 |
"lstrip": false,
|
| 72 |
"normalized": false,
|
| 73 |
"rstrip": true,
|
|
|
|
| 113 |
"rstrip": true,
|
| 114 |
"single_word": false,
|
| 115 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
}
|
| 117 |
},
|
| 118 |
"bos_token": "<s>",
|
|
|
|
| 128 |
],
|
| 129 |
"clean_up_tokenization_spaces": false,
|
| 130 |
"eos_token": "<|endoftext|>",
|
| 131 |
+
"legacy": false,
|
| 132 |
"model_max_length": 4096,
|
| 133 |
"pad_token": "<|endoftext|>",
|
| 134 |
"padding_side": "left",
|
| 135 |
"sp_model_kwargs": {},
|
| 136 |
"tokenizer_class": "LlamaTokenizer",
|
| 137 |
"unk_token": "<unk>",
|
| 138 |
+
"use_default_system_prompt": false,
|
| 139 |
+
"func_bos_token": "<|tool_calls|>"
|
| 140 |
}
|