Upload tokenizer.json with huggingface_hub
Browse files- tokenizer.json +70 -59
tokenizer.json
CHANGED
|
@@ -2,11 +2,18 @@
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
-
"max_length":
|
| 6 |
"strategy": "LongestFirst",
|
| 7 |
"stride": 0
|
| 8 |
},
|
| 9 |
-
"padding":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"added_tokens": [
|
| 11 |
{
|
| 12 |
"id": 5139,
|
|
@@ -2343,69 +2350,73 @@
|
|
| 2343 |
]
|
| 2344 |
},
|
| 2345 |
"post_processor": {
|
| 2346 |
-
"type": "
|
| 2347 |
-
"
|
| 2348 |
{
|
| 2349 |
-
"
|
| 2350 |
-
|
| 2351 |
-
|
| 2352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2353 |
},
|
| 2354 |
{
|
| 2355 |
-
"
|
| 2356 |
-
|
| 2357 |
-
|
| 2358 |
-
|
| 2359 |
-
|
| 2360 |
-
|
| 2361 |
-
|
| 2362 |
-
|
| 2363 |
-
|
| 2364 |
-
|
| 2365 |
-
|
| 2366 |
-
|
| 2367 |
-
|
| 2368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2369 |
],
|
| 2370 |
-
"
|
| 2371 |
-
|
| 2372 |
-
|
| 2373 |
-
|
| 2374 |
-
|
| 2375 |
-
|
| 2376 |
-
|
| 2377 |
-
|
| 2378 |
-
"Sequence": {
|
| 2379 |
-
"id": "A",
|
| 2380 |
-
"type_id": 0
|
| 2381 |
-
}
|
| 2382 |
-
},
|
| 2383 |
-
{
|
| 2384 |
-
"SpecialToken": {
|
| 2385 |
-
"id": "<|begin_of_text|>",
|
| 2386 |
-
"type_id": 1
|
| 2387 |
-
}
|
| 2388 |
-
},
|
| 2389 |
-
{
|
| 2390 |
-
"Sequence": {
|
| 2391 |
-
"id": "B",
|
| 2392 |
-
"type_id": 1
|
| 2393 |
-
}
|
| 2394 |
-
}
|
| 2395 |
],
|
| 2396 |
-
"
|
| 2397 |
-
"<|
|
| 2398 |
-
|
| 2399 |
-
"ids": [
|
| 2400 |
-
128000
|
| 2401 |
-
],
|
| 2402 |
-
"tokens": [
|
| 2403 |
-
"<|begin_of_text|>"
|
| 2404 |
-
]
|
| 2405 |
-
}
|
| 2406 |
-
}
|
| 2407 |
}
|
| 2408 |
-
|
| 2409 |
},
|
| 2410 |
"decoder": {
|
| 2411 |
"type": "ByteLevel",
|
|
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
+
"max_length": 512,
|
| 6 |
"strategy": "LongestFirst",
|
| 7 |
"stride": 0
|
| 8 |
},
|
| 9 |
+
"padding": {
|
| 10 |
+
"strategy": "BatchLongest",
|
| 11 |
+
"direction": "Left",
|
| 12 |
+
"pad_to_multiple_of": null,
|
| 13 |
+
"pad_id": 128000,
|
| 14 |
+
"pad_type_id": 0,
|
| 15 |
+
"pad_token": "[PAD]"
|
| 16 |
+
},
|
| 17 |
"added_tokens": [
|
| 18 |
{
|
| 19 |
"id": 5139,
|
|
|
|
| 2350 |
]
|
| 2351 |
},
|
| 2352 |
"post_processor": {
|
| 2353 |
+
"type": "TemplateProcessing",
|
| 2354 |
+
"single": [
|
| 2355 |
{
|
| 2356 |
+
"SpecialToken": {
|
| 2357 |
+
"id": "<|begin_of_text|>",
|
| 2358 |
+
"type_id": 0
|
| 2359 |
+
}
|
| 2360 |
+
},
|
| 2361 |
+
{
|
| 2362 |
+
"Sequence": {
|
| 2363 |
+
"id": "A",
|
| 2364 |
+
"type_id": 0
|
| 2365 |
+
}
|
| 2366 |
+
},
|
| 2367 |
+
{
|
| 2368 |
+
"SpecialToken": {
|
| 2369 |
+
"id": "<|end_of_text|>",
|
| 2370 |
+
"type_id": 0
|
| 2371 |
+
}
|
| 2372 |
+
}
|
| 2373 |
+
],
|
| 2374 |
+
"pair": [
|
| 2375 |
+
{
|
| 2376 |
+
"SpecialToken": {
|
| 2377 |
+
"id": "<|begin_of_text|>",
|
| 2378 |
+
"type_id": 0
|
| 2379 |
+
}
|
| 2380 |
},
|
| 2381 |
{
|
| 2382 |
+
"Sequence": {
|
| 2383 |
+
"id": "A",
|
| 2384 |
+
"type_id": 0
|
| 2385 |
+
}
|
| 2386 |
+
},
|
| 2387 |
+
{
|
| 2388 |
+
"SpecialToken": {
|
| 2389 |
+
"id": "<|end_of_text|>",
|
| 2390 |
+
"type_id": 0
|
| 2391 |
+
}
|
| 2392 |
+
},
|
| 2393 |
+
{
|
| 2394 |
+
"Sequence": {
|
| 2395 |
+
"id": "B",
|
| 2396 |
+
"type_id": 1
|
| 2397 |
+
}
|
| 2398 |
+
}
|
| 2399 |
+
],
|
| 2400 |
+
"special_tokens": {
|
| 2401 |
+
"<|begin_of_text|>": {
|
| 2402 |
+
"id": "<|begin_of_text|>",
|
| 2403 |
+
"ids": [
|
| 2404 |
+
101497
|
| 2405 |
],
|
| 2406 |
+
"tokens": [
|
| 2407 |
+
"<|begin_of_text|>"
|
| 2408 |
+
]
|
| 2409 |
+
},
|
| 2410 |
+
"<|end_of_text|>": {
|
| 2411 |
+
"id": "<|end_of_text|>",
|
| 2412 |
+
"ids": [
|
| 2413 |
+
57368
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2414 |
],
|
| 2415 |
+
"tokens": [
|
| 2416 |
+
"<|end_of_text|>"
|
| 2417 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2418 |
}
|
| 2419 |
+
}
|
| 2420 |
},
|
| 2421 |
"decoder": {
|
| 2422 |
"type": "ByteLevel",
|