Upload tokenizer.json with huggingface_hub
Browse files- tokenizer.json +5 -53
tokenizer.json
CHANGED
|
@@ -10,7 +10,7 @@
|
|
| 10 |
"strategy": {
|
| 11 |
"Fixed": 4096
|
| 12 |
},
|
| 13 |
-
"direction": "
|
| 14 |
"pad_to_multiple_of": null,
|
| 15 |
"pad_id": 32021,
|
| 16 |
"pad_type_id": 0,
|
|
@@ -268,58 +268,10 @@
|
|
| 268 |
]
|
| 269 |
},
|
| 270 |
"post_processor": {
|
| 271 |
-
"type": "
|
| 272 |
-
"
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
"id": "<|begin▁of▁sentence|>",
|
| 276 |
-
"type_id": 0
|
| 277 |
-
}
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"Sequence": {
|
| 281 |
-
"id": "A",
|
| 282 |
-
"type_id": 0
|
| 283 |
-
}
|
| 284 |
-
}
|
| 285 |
-
],
|
| 286 |
-
"pair": [
|
| 287 |
-
{
|
| 288 |
-
"SpecialToken": {
|
| 289 |
-
"id": "<|begin▁of▁sentence|>",
|
| 290 |
-
"type_id": 0
|
| 291 |
-
}
|
| 292 |
-
},
|
| 293 |
-
{
|
| 294 |
-
"Sequence": {
|
| 295 |
-
"id": "A",
|
| 296 |
-
"type_id": 0
|
| 297 |
-
}
|
| 298 |
-
},
|
| 299 |
-
{
|
| 300 |
-
"SpecialToken": {
|
| 301 |
-
"id": "<|begin▁of▁sentence|>",
|
| 302 |
-
"type_id": 1
|
| 303 |
-
}
|
| 304 |
-
},
|
| 305 |
-
{
|
| 306 |
-
"Sequence": {
|
| 307 |
-
"id": "B",
|
| 308 |
-
"type_id": 1
|
| 309 |
-
}
|
| 310 |
-
}
|
| 311 |
-
],
|
| 312 |
-
"special_tokens": {
|
| 313 |
-
"<|begin▁of▁sentence|>": {
|
| 314 |
-
"id": "<|begin▁of▁sentence|>",
|
| 315 |
-
"ids": [
|
| 316 |
-
32013
|
| 317 |
-
],
|
| 318 |
-
"tokens": [
|
| 319 |
-
"<|begin▁of▁sentence|>"
|
| 320 |
-
]
|
| 321 |
-
}
|
| 322 |
-
}
|
| 323 |
},
|
| 324 |
"decoder": {
|
| 325 |
"type": "ByteLevel",
|
|
|
|
| 10 |
"strategy": {
|
| 11 |
"Fixed": 4096
|
| 12 |
},
|
| 13 |
+
"direction": "Right",
|
| 14 |
"pad_to_multiple_of": null,
|
| 15 |
"pad_id": 32021,
|
| 16 |
"pad_type_id": 0,
|
|
|
|
| 268 |
]
|
| 269 |
},
|
| 270 |
"post_processor": {
|
| 271 |
+
"type": "ByteLevel",
|
| 272 |
+
"add_prefix_space": true,
|
| 273 |
+
"trim_offsets": false,
|
| 274 |
+
"use_regex": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
},
|
| 276 |
"decoder": {
|
| 277 |
"type": "ByteLevel",
|