Update tokenizer_config.json as Llama3 (#2)
Browse files- Update tokenizer_config.json as Llama3 (b655e0d6166a44cdc1144c03e711b2aa2994ac13)
- tokenizer_config.json +1 -259
tokenizer_config.json
CHANGED
|
@@ -2049,268 +2049,10 @@
|
|
| 2049 |
"special": true
|
| 2050 |
}
|
| 2051 |
},
|
| 2052 |
-
"additional_special_tokens": [
|
| 2053 |
-
"<|begin_of_text|>",
|
| 2054 |
-
"<|end_of_text|>",
|
| 2055 |
-
"<|reserved_special_token_0|>",
|
| 2056 |
-
"<|reserved_special_token_1|>",
|
| 2057 |
-
"<|reserved_special_token_2|>",
|
| 2058 |
-
"<|reserved_special_token_3|>",
|
| 2059 |
-
"<|start_header_id|>",
|
| 2060 |
-
"<|end_header_id|>",
|
| 2061 |
-
"<|reserved_special_token_4|>",
|
| 2062 |
-
"<|eot_id|>",
|
| 2063 |
-
"<|reserved_special_token_5|>",
|
| 2064 |
-
"<|reserved_special_token_6|>",
|
| 2065 |
-
"<|reserved_special_token_7|>",
|
| 2066 |
-
"<|reserved_special_token_8|>",
|
| 2067 |
-
"<|reserved_special_token_9|>",
|
| 2068 |
-
"<|reserved_special_token_10|>",
|
| 2069 |
-
"<|reserved_special_token_11|>",
|
| 2070 |
-
"<|reserved_special_token_12|>",
|
| 2071 |
-
"<|reserved_special_token_13|>",
|
| 2072 |
-
"<|reserved_special_token_14|>",
|
| 2073 |
-
"<|reserved_special_token_15|>",
|
| 2074 |
-
"<|reserved_special_token_16|>",
|
| 2075 |
-
"<|reserved_special_token_17|>",
|
| 2076 |
-
"<|reserved_special_token_18|>",
|
| 2077 |
-
"<|reserved_special_token_19|>",
|
| 2078 |
-
"<|reserved_special_token_20|>",
|
| 2079 |
-
"<|reserved_special_token_21|>",
|
| 2080 |
-
"<|reserved_special_token_22|>",
|
| 2081 |
-
"<|reserved_special_token_23|>",
|
| 2082 |
-
"<|reserved_special_token_24|>",
|
| 2083 |
-
"<|reserved_special_token_25|>",
|
| 2084 |
-
"<|reserved_special_token_26|>",
|
| 2085 |
-
"<|reserved_special_token_27|>",
|
| 2086 |
-
"<|reserved_special_token_28|>",
|
| 2087 |
-
"<|reserved_special_token_29|>",
|
| 2088 |
-
"<|reserved_special_token_30|>",
|
| 2089 |
-
"<|reserved_special_token_31|>",
|
| 2090 |
-
"<|reserved_special_token_32|>",
|
| 2091 |
-
"<|reserved_special_token_33|>",
|
| 2092 |
-
"<|reserved_special_token_34|>",
|
| 2093 |
-
"<|reserved_special_token_35|>",
|
| 2094 |
-
"<|reserved_special_token_36|>",
|
| 2095 |
-
"<|reserved_special_token_37|>",
|
| 2096 |
-
"<|reserved_special_token_38|>",
|
| 2097 |
-
"<|reserved_special_token_39|>",
|
| 2098 |
-
"<|reserved_special_token_40|>",
|
| 2099 |
-
"<|reserved_special_token_41|>",
|
| 2100 |
-
"<|reserved_special_token_42|>",
|
| 2101 |
-
"<|reserved_special_token_43|>",
|
| 2102 |
-
"<|reserved_special_token_44|>",
|
| 2103 |
-
"<|reserved_special_token_45|>",
|
| 2104 |
-
"<|reserved_special_token_46|>",
|
| 2105 |
-
"<|reserved_special_token_47|>",
|
| 2106 |
-
"<|reserved_special_token_48|>",
|
| 2107 |
-
"<|reserved_special_token_49|>",
|
| 2108 |
-
"<|reserved_special_token_50|>",
|
| 2109 |
-
"<|reserved_special_token_51|>",
|
| 2110 |
-
"<|reserved_special_token_52|>",
|
| 2111 |
-
"<|reserved_special_token_53|>",
|
| 2112 |
-
"<|reserved_special_token_54|>",
|
| 2113 |
-
"<|reserved_special_token_55|>",
|
| 2114 |
-
"<|reserved_special_token_56|>",
|
| 2115 |
-
"<|reserved_special_token_57|>",
|
| 2116 |
-
"<|reserved_special_token_58|>",
|
| 2117 |
-
"<|reserved_special_token_59|>",
|
| 2118 |
-
"<|reserved_special_token_60|>",
|
| 2119 |
-
"<|reserved_special_token_61|>",
|
| 2120 |
-
"<|reserved_special_token_62|>",
|
| 2121 |
-
"<|reserved_special_token_63|>",
|
| 2122 |
-
"<|reserved_special_token_64|>",
|
| 2123 |
-
"<|reserved_special_token_65|>",
|
| 2124 |
-
"<|reserved_special_token_66|>",
|
| 2125 |
-
"<|reserved_special_token_67|>",
|
| 2126 |
-
"<|reserved_special_token_68|>",
|
| 2127 |
-
"<|reserved_special_token_69|>",
|
| 2128 |
-
"<|reserved_special_token_70|>",
|
| 2129 |
-
"<|reserved_special_token_71|>",
|
| 2130 |
-
"<|reserved_special_token_72|>",
|
| 2131 |
-
"<|reserved_special_token_73|>",
|
| 2132 |
-
"<|reserved_special_token_74|>",
|
| 2133 |
-
"<|reserved_special_token_75|>",
|
| 2134 |
-
"<|reserved_special_token_76|>",
|
| 2135 |
-
"<|reserved_special_token_77|>",
|
| 2136 |
-
"<|reserved_special_token_78|>",
|
| 2137 |
-
"<|reserved_special_token_79|>",
|
| 2138 |
-
"<|reserved_special_token_80|>",
|
| 2139 |
-
"<|reserved_special_token_81|>",
|
| 2140 |
-
"<|reserved_special_token_82|>",
|
| 2141 |
-
"<|reserved_special_token_83|>",
|
| 2142 |
-
"<|reserved_special_token_84|>",
|
| 2143 |
-
"<|reserved_special_token_85|>",
|
| 2144 |
-
"<|reserved_special_token_86|>",
|
| 2145 |
-
"<|reserved_special_token_87|>",
|
| 2146 |
-
"<|reserved_special_token_88|>",
|
| 2147 |
-
"<|reserved_special_token_89|>",
|
| 2148 |
-
"<|reserved_special_token_90|>",
|
| 2149 |
-
"<|reserved_special_token_91|>",
|
| 2150 |
-
"<|reserved_special_token_92|>",
|
| 2151 |
-
"<|reserved_special_token_93|>",
|
| 2152 |
-
"<|reserved_special_token_94|>",
|
| 2153 |
-
"<|reserved_special_token_95|>",
|
| 2154 |
-
"<|reserved_special_token_96|>",
|
| 2155 |
-
"<|reserved_special_token_97|>",
|
| 2156 |
-
"<|reserved_special_token_98|>",
|
| 2157 |
-
"<|reserved_special_token_99|>",
|
| 2158 |
-
"<|reserved_special_token_100|>",
|
| 2159 |
-
"<|reserved_special_token_101|>",
|
| 2160 |
-
"<|reserved_special_token_102|>",
|
| 2161 |
-
"<|reserved_special_token_103|>",
|
| 2162 |
-
"<|reserved_special_token_104|>",
|
| 2163 |
-
"<|reserved_special_token_105|>",
|
| 2164 |
-
"<|reserved_special_token_106|>",
|
| 2165 |
-
"<|reserved_special_token_107|>",
|
| 2166 |
-
"<|reserved_special_token_108|>",
|
| 2167 |
-
"<|reserved_special_token_109|>",
|
| 2168 |
-
"<|reserved_special_token_110|>",
|
| 2169 |
-
"<|reserved_special_token_111|>",
|
| 2170 |
-
"<|reserved_special_token_112|>",
|
| 2171 |
-
"<|reserved_special_token_113|>",
|
| 2172 |
-
"<|reserved_special_token_114|>",
|
| 2173 |
-
"<|reserved_special_token_115|>",
|
| 2174 |
-
"<|reserved_special_token_116|>",
|
| 2175 |
-
"<|reserved_special_token_117|>",
|
| 2176 |
-
"<|reserved_special_token_118|>",
|
| 2177 |
-
"<|reserved_special_token_119|>",
|
| 2178 |
-
"<|reserved_special_token_120|>",
|
| 2179 |
-
"<|reserved_special_token_121|>",
|
| 2180 |
-
"<|reserved_special_token_122|>",
|
| 2181 |
-
"<|reserved_special_token_123|>",
|
| 2182 |
-
"<|reserved_special_token_124|>",
|
| 2183 |
-
"<|reserved_special_token_125|>",
|
| 2184 |
-
"<|reserved_special_token_126|>",
|
| 2185 |
-
"<|reserved_special_token_127|>",
|
| 2186 |
-
"<|reserved_special_token_128|>",
|
| 2187 |
-
"<|reserved_special_token_129|>",
|
| 2188 |
-
"<|reserved_special_token_130|>",
|
| 2189 |
-
"<|reserved_special_token_131|>",
|
| 2190 |
-
"<|reserved_special_token_132|>",
|
| 2191 |
-
"<|reserved_special_token_133|>",
|
| 2192 |
-
"<|reserved_special_token_134|>",
|
| 2193 |
-
"<|reserved_special_token_135|>",
|
| 2194 |
-
"<|reserved_special_token_136|>",
|
| 2195 |
-
"<|reserved_special_token_137|>",
|
| 2196 |
-
"<|reserved_special_token_138|>",
|
| 2197 |
-
"<|reserved_special_token_139|>",
|
| 2198 |
-
"<|reserved_special_token_140|>",
|
| 2199 |
-
"<|reserved_special_token_141|>",
|
| 2200 |
-
"<|reserved_special_token_142|>",
|
| 2201 |
-
"<|reserved_special_token_143|>",
|
| 2202 |
-
"<|reserved_special_token_144|>",
|
| 2203 |
-
"<|reserved_special_token_145|>",
|
| 2204 |
-
"<|reserved_special_token_146|>",
|
| 2205 |
-
"<|reserved_special_token_147|>",
|
| 2206 |
-
"<|reserved_special_token_148|>",
|
| 2207 |
-
"<|reserved_special_token_149|>",
|
| 2208 |
-
"<|reserved_special_token_150|>",
|
| 2209 |
-
"<|reserved_special_token_151|>",
|
| 2210 |
-
"<|reserved_special_token_152|>",
|
| 2211 |
-
"<|reserved_special_token_153|>",
|
| 2212 |
-
"<|reserved_special_token_154|>",
|
| 2213 |
-
"<|reserved_special_token_155|>",
|
| 2214 |
-
"<|reserved_special_token_156|>",
|
| 2215 |
-
"<|reserved_special_token_157|>",
|
| 2216 |
-
"<|reserved_special_token_158|>",
|
| 2217 |
-
"<|reserved_special_token_159|>",
|
| 2218 |
-
"<|reserved_special_token_160|>",
|
| 2219 |
-
"<|reserved_special_token_161|>",
|
| 2220 |
-
"<|reserved_special_token_162|>",
|
| 2221 |
-
"<|reserved_special_token_163|>",
|
| 2222 |
-
"<|reserved_special_token_164|>",
|
| 2223 |
-
"<|reserved_special_token_165|>",
|
| 2224 |
-
"<|reserved_special_token_166|>",
|
| 2225 |
-
"<|reserved_special_token_167|>",
|
| 2226 |
-
"<|reserved_special_token_168|>",
|
| 2227 |
-
"<|reserved_special_token_169|>",
|
| 2228 |
-
"<|reserved_special_token_170|>",
|
| 2229 |
-
"<|reserved_special_token_171|>",
|
| 2230 |
-
"<|reserved_special_token_172|>",
|
| 2231 |
-
"<|reserved_special_token_173|>",
|
| 2232 |
-
"<|reserved_special_token_174|>",
|
| 2233 |
-
"<|reserved_special_token_175|>",
|
| 2234 |
-
"<|reserved_special_token_176|>",
|
| 2235 |
-
"<|reserved_special_token_177|>",
|
| 2236 |
-
"<|reserved_special_token_178|>",
|
| 2237 |
-
"<|reserved_special_token_179|>",
|
| 2238 |
-
"<|reserved_special_token_180|>",
|
| 2239 |
-
"<|reserved_special_token_181|>",
|
| 2240 |
-
"<|reserved_special_token_182|>",
|
| 2241 |
-
"<|reserved_special_token_183|>",
|
| 2242 |
-
"<|reserved_special_token_184|>",
|
| 2243 |
-
"<|reserved_special_token_185|>",
|
| 2244 |
-
"<|reserved_special_token_186|>",
|
| 2245 |
-
"<|reserved_special_token_187|>",
|
| 2246 |
-
"<|reserved_special_token_188|>",
|
| 2247 |
-
"<|reserved_special_token_189|>",
|
| 2248 |
-
"<|reserved_special_token_190|>",
|
| 2249 |
-
"<|reserved_special_token_191|>",
|
| 2250 |
-
"<|reserved_special_token_192|>",
|
| 2251 |
-
"<|reserved_special_token_193|>",
|
| 2252 |
-
"<|reserved_special_token_194|>",
|
| 2253 |
-
"<|reserved_special_token_195|>",
|
| 2254 |
-
"<|reserved_special_token_196|>",
|
| 2255 |
-
"<|reserved_special_token_197|>",
|
| 2256 |
-
"<|reserved_special_token_198|>",
|
| 2257 |
-
"<|reserved_special_token_199|>",
|
| 2258 |
-
"<|reserved_special_token_200|>",
|
| 2259 |
-
"<|reserved_special_token_201|>",
|
| 2260 |
-
"<|reserved_special_token_202|>",
|
| 2261 |
-
"<|reserved_special_token_203|>",
|
| 2262 |
-
"<|reserved_special_token_204|>",
|
| 2263 |
-
"<|reserved_special_token_205|>",
|
| 2264 |
-
"<|reserved_special_token_206|>",
|
| 2265 |
-
"<|reserved_special_token_207|>",
|
| 2266 |
-
"<|reserved_special_token_208|>",
|
| 2267 |
-
"<|reserved_special_token_209|>",
|
| 2268 |
-
"<|reserved_special_token_210|>",
|
| 2269 |
-
"<|reserved_special_token_211|>",
|
| 2270 |
-
"<|reserved_special_token_212|>",
|
| 2271 |
-
"<|reserved_special_token_213|>",
|
| 2272 |
-
"<|reserved_special_token_214|>",
|
| 2273 |
-
"<|reserved_special_token_215|>",
|
| 2274 |
-
"<|reserved_special_token_216|>",
|
| 2275 |
-
"<|reserved_special_token_217|>",
|
| 2276 |
-
"<|reserved_special_token_218|>",
|
| 2277 |
-
"<|reserved_special_token_219|>",
|
| 2278 |
-
"<|reserved_special_token_220|>",
|
| 2279 |
-
"<|reserved_special_token_221|>",
|
| 2280 |
-
"<|reserved_special_token_222|>",
|
| 2281 |
-
"<|reserved_special_token_223|>",
|
| 2282 |
-
"<|reserved_special_token_224|>",
|
| 2283 |
-
"<|reserved_special_token_225|>",
|
| 2284 |
-
"<|reserved_special_token_226|>",
|
| 2285 |
-
"<|reserved_special_token_227|>",
|
| 2286 |
-
"<|reserved_special_token_228|>",
|
| 2287 |
-
"<|reserved_special_token_229|>",
|
| 2288 |
-
"<|reserved_special_token_230|>",
|
| 2289 |
-
"<|reserved_special_token_231|>",
|
| 2290 |
-
"<|reserved_special_token_232|>",
|
| 2291 |
-
"<|reserved_special_token_233|>",
|
| 2292 |
-
"<|reserved_special_token_234|>",
|
| 2293 |
-
"<|reserved_special_token_235|>",
|
| 2294 |
-
"<|reserved_special_token_236|>",
|
| 2295 |
-
"<|reserved_special_token_237|>",
|
| 2296 |
-
"<|reserved_special_token_238|>",
|
| 2297 |
-
"<|reserved_special_token_239|>",
|
| 2298 |
-
"<|reserved_special_token_240|>",
|
| 2299 |
-
"<|reserved_special_token_241|>",
|
| 2300 |
-
"<|reserved_special_token_242|>",
|
| 2301 |
-
"<|reserved_special_token_243|>",
|
| 2302 |
-
"<|reserved_special_token_244|>",
|
| 2303 |
-
"<|reserved_special_token_245|>",
|
| 2304 |
-
"<|reserved_special_token_246|>",
|
| 2305 |
-
"<|reserved_special_token_247|>",
|
| 2306 |
-
"<|reserved_special_token_248|>",
|
| 2307 |
-
"<|reserved_special_token_249|>",
|
| 2308 |
-
"<|reserved_special_token_250|>"
|
| 2309 |
-
],
|
| 2310 |
"bos_token": "<|begin_of_text|>",
|
| 2311 |
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
| 2312 |
"clean_up_tokenization_spaces": true,
|
| 2313 |
-
"eos_token": "<|
|
| 2314 |
"model_input_names": [
|
| 2315 |
"input_ids",
|
| 2316 |
"attention_mask"
|
|
|
|
| 2049 |
"special": true
|
| 2050 |
}
|
| 2051 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2052 |
"bos_token": "<|begin_of_text|>",
|
| 2053 |
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
| 2054 |
"clean_up_tokenization_spaces": true,
|
| 2055 |
+
"eos_token": "<|eot_id|>",
|
| 2056 |
"model_input_names": [
|
| 2057 |
"input_ids",
|
| 2058 |
"attention_mask"
|