Upload tokenizer
Browse files- tokenizer.json +30 -30
- tokenizer_config.json +24 -24
tokenizer.json
CHANGED
|
@@ -2073,6 +2073,33 @@
|
|
| 2073 |
"normalized": false,
|
| 2074 |
"special": true
|
| 2075 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2076 |
{
|
| 2077 |
"id": 233,
|
| 2078 |
"content": ">>UNUSED_221<<",
|
|
@@ -4691,33 +4718,6 @@
|
|
| 4691 |
"rstrip": false,
|
| 4692 |
"normalized": false,
|
| 4693 |
"special": true
|
| 4694 |
-
},
|
| 4695 |
-
{
|
| 4696 |
-
"id": 32768,
|
| 4697 |
-
"content": "<|file_sep|>",
|
| 4698 |
-
"single_word": false,
|
| 4699 |
-
"lstrip": false,
|
| 4700 |
-
"rstrip": false,
|
| 4701 |
-
"normalized": false,
|
| 4702 |
-
"special": true
|
| 4703 |
-
},
|
| 4704 |
-
{
|
| 4705 |
-
"id": 32769,
|
| 4706 |
-
"content": "<|repo_name|>",
|
| 4707 |
-
"single_word": false,
|
| 4708 |
-
"lstrip": false,
|
| 4709 |
-
"rstrip": false,
|
| 4710 |
-
"normalized": false,
|
| 4711 |
-
"special": true
|
| 4712 |
-
},
|
| 4713 |
-
{
|
| 4714 |
-
"id": 32770,
|
| 4715 |
-
"content": "<|repo_tree|>",
|
| 4716 |
-
"single_word": false,
|
| 4717 |
-
"lstrip": false,
|
| 4718 |
-
"rstrip": false,
|
| 4719 |
-
"normalized": false,
|
| 4720 |
-
"special": true
|
| 4721 |
}
|
| 4722 |
],
|
| 4723 |
"normalizer": null,
|
|
@@ -4999,9 +4999,9 @@
|
|
| 4999 |
"<|im_start|>": 227,
|
| 5000 |
"<|im_end|>": 228,
|
| 5001 |
"<|system|>": 229,
|
| 5002 |
-
"
|
| 5003 |
-
"
|
| 5004 |
-
"
|
| 5005 |
">>UNUSED_221<<": 233,
|
| 5006 |
">>UNUSED_222<<": 234,
|
| 5007 |
">>UNUSED_223<<": 235,
|
|
|
|
| 2073 |
"normalized": false,
|
| 2074 |
"special": true
|
| 2075 |
},
|
| 2076 |
+
{
|
| 2077 |
+
"id": 230,
|
| 2078 |
+
"content": "<|file_sep|>",
|
| 2079 |
+
"single_word": false,
|
| 2080 |
+
"lstrip": false,
|
| 2081 |
+
"rstrip": false,
|
| 2082 |
+
"normalized": false,
|
| 2083 |
+
"special": true
|
| 2084 |
+
},
|
| 2085 |
+
{
|
| 2086 |
+
"id": 231,
|
| 2087 |
+
"content": "<|repo_name|>",
|
| 2088 |
+
"single_word": false,
|
| 2089 |
+
"lstrip": false,
|
| 2090 |
+
"rstrip": false,
|
| 2091 |
+
"normalized": false,
|
| 2092 |
+
"special": true
|
| 2093 |
+
},
|
| 2094 |
+
{
|
| 2095 |
+
"id": 232,
|
| 2096 |
+
"content": "<|repo_tree|>",
|
| 2097 |
+
"single_word": false,
|
| 2098 |
+
"lstrip": false,
|
| 2099 |
+
"rstrip": false,
|
| 2100 |
+
"normalized": false,
|
| 2101 |
+
"special": true
|
| 2102 |
+
},
|
| 2103 |
{
|
| 2104 |
"id": 233,
|
| 2105 |
"content": ">>UNUSED_221<<",
|
|
|
|
| 4718 |
"rstrip": false,
|
| 4719 |
"normalized": false,
|
| 4720 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4721 |
}
|
| 4722 |
],
|
| 4723 |
"normalizer": null,
|
|
|
|
| 4999 |
"<|im_start|>": 227,
|
| 5000 |
"<|im_end|>": 228,
|
| 5001 |
"<|system|>": 229,
|
| 5002 |
+
"<|file_sep|>": 230,
|
| 5003 |
+
"<|repo_name|>": 231,
|
| 5004 |
+
"<|repo_tree|>": 232,
|
| 5005 |
">>UNUSED_221<<": 233,
|
| 5006 |
">>UNUSED_222<<": 234,
|
| 5007 |
">>UNUSED_223<<": 235,
|
tokenizer_config.json
CHANGED
|
@@ -1840,6 +1840,30 @@
|
|
| 1840 |
"single_word": false,
|
| 1841 |
"special": true
|
| 1842 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1843 |
"233": {
|
| 1844 |
"content": ">>UNUSED_221<<",
|
| 1845 |
"lstrip": false,
|
|
@@ -4167,30 +4191,6 @@
|
|
| 4167 |
"rstrip": false,
|
| 4168 |
"single_word": false,
|
| 4169 |
"special": true
|
| 4170 |
-
},
|
| 4171 |
-
"32768": {
|
| 4172 |
-
"content": "<|file_sep|>",
|
| 4173 |
-
"lstrip": false,
|
| 4174 |
-
"normalized": false,
|
| 4175 |
-
"rstrip": false,
|
| 4176 |
-
"single_word": false,
|
| 4177 |
-
"special": true
|
| 4178 |
-
},
|
| 4179 |
-
"32769": {
|
| 4180 |
-
"content": "<|repo_name|>",
|
| 4181 |
-
"lstrip": false,
|
| 4182 |
-
"normalized": false,
|
| 4183 |
-
"rstrip": false,
|
| 4184 |
-
"single_word": false,
|
| 4185 |
-
"special": true
|
| 4186 |
-
},
|
| 4187 |
-
"32770": {
|
| 4188 |
-
"content": "<|repo_tree|>",
|
| 4189 |
-
"lstrip": false,
|
| 4190 |
-
"normalized": false,
|
| 4191 |
-
"rstrip": false,
|
| 4192 |
-
"single_word": false,
|
| 4193 |
-
"special": true
|
| 4194 |
}
|
| 4195 |
},
|
| 4196 |
"additional_special_tokens": [
|
|
|
|
| 1840 |
"single_word": false,
|
| 1841 |
"special": true
|
| 1842 |
},
|
| 1843 |
+
"230": {
|
| 1844 |
+
"content": "<|file_sep|>",
|
| 1845 |
+
"lstrip": false,
|
| 1846 |
+
"normalized": false,
|
| 1847 |
+
"rstrip": false,
|
| 1848 |
+
"single_word": false,
|
| 1849 |
+
"special": true
|
| 1850 |
+
},
|
| 1851 |
+
"231": {
|
| 1852 |
+
"content": "<|repo_name|>",
|
| 1853 |
+
"lstrip": false,
|
| 1854 |
+
"normalized": false,
|
| 1855 |
+
"rstrip": false,
|
| 1856 |
+
"single_word": false,
|
| 1857 |
+
"special": true
|
| 1858 |
+
},
|
| 1859 |
+
"232": {
|
| 1860 |
+
"content": "<|repo_tree|>",
|
| 1861 |
+
"lstrip": false,
|
| 1862 |
+
"normalized": false,
|
| 1863 |
+
"rstrip": false,
|
| 1864 |
+
"single_word": false,
|
| 1865 |
+
"special": true
|
| 1866 |
+
},
|
| 1867 |
"233": {
|
| 1868 |
"content": ">>UNUSED_221<<",
|
| 1869 |
"lstrip": false,
|
|
|
|
| 4191 |
"rstrip": false,
|
| 4192 |
"single_word": false,
|
| 4193 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4194 |
}
|
| 4195 |
},
|
| 4196 |
"additional_special_tokens": [
|