Upload 2 files
Browse files- tokenizer.json +8 -8
- tokenizer_config.json +4 -4
tokenizer.json
CHANGED
|
@@ -122,7 +122,7 @@
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": 13,
|
| 125 |
-
"content": "<|
|
| 126 |
"single_word": false,
|
| 127 |
"lstrip": false,
|
| 128 |
"rstrip": false,
|
|
@@ -131,7 +131,7 @@
|
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"id": 14,
|
| 134 |
-
"content": "<|
|
| 135 |
"single_word": false,
|
| 136 |
"lstrip": false,
|
| 137 |
"rstrip": false,
|
|
@@ -140,7 +140,7 @@
|
|
| 140 |
},
|
| 141 |
{
|
| 142 |
"id": 15,
|
| 143 |
-
"content": "<|
|
| 144 |
"single_word": false,
|
| 145 |
"lstrip": false,
|
| 146 |
"rstrip": false,
|
|
@@ -149,7 +149,7 @@
|
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"id": 16,
|
| 152 |
-
"content": "<|
|
| 153 |
"single_word": false,
|
| 154 |
"lstrip": false,
|
| 155 |
"rstrip": false,
|
|
@@ -429,10 +429,10 @@
|
|
| 429 |
"<general>": 10,
|
| 430 |
"</general>": 11,
|
| 431 |
"<|input_end|>": 12,
|
| 432 |
-
"<|
|
| 433 |
-
"<|
|
| 434 |
-
"<|
|
| 435 |
-
"<|
|
| 436 |
"<|reserved_5|>": 17,
|
| 437 |
"<|reserved_6|>": 18,
|
| 438 |
"<|reserved_7|>": 19,
|
|
|
|
| 122 |
},
|
| 123 |
{
|
| 124 |
"id": 13,
|
| 125 |
+
"content": "<|very_short|>",
|
| 126 |
"single_word": false,
|
| 127 |
"lstrip": false,
|
| 128 |
"rstrip": false,
|
|
|
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"id": 14,
|
| 134 |
+
"content": "<|short|>",
|
| 135 |
"single_word": false,
|
| 136 |
"lstrip": false,
|
| 137 |
"rstrip": false,
|
|
|
|
| 140 |
},
|
| 141 |
{
|
| 142 |
"id": 15,
|
| 143 |
+
"content": "<|long|>",
|
| 144 |
"single_word": false,
|
| 145 |
"lstrip": false,
|
| 146 |
"rstrip": false,
|
|
|
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"id": 16,
|
| 152 |
+
"content": "<|very_long|>",
|
| 153 |
"single_word": false,
|
| 154 |
"lstrip": false,
|
| 155 |
"rstrip": false,
|
|
|
|
| 429 |
"<general>": 10,
|
| 430 |
"</general>": 11,
|
| 431 |
"<|input_end|>": 12,
|
| 432 |
+
"<|very_short|>": 13,
|
| 433 |
+
"<|short|>": 14,
|
| 434 |
+
"<|long|>": 15,
|
| 435 |
+
"<|very_long|>": 16,
|
| 436 |
"<|reserved_5|>": 17,
|
| 437 |
"<|reserved_6|>": 18,
|
| 438 |
"<|reserved_7|>": 19,
|
tokenizer_config.json
CHANGED
|
@@ -106,7 +106,7 @@
|
|
| 106 |
"special": true
|
| 107 |
},
|
| 108 |
"13": {
|
| 109 |
-
"content": "<|
|
| 110 |
"lstrip": false,
|
| 111 |
"normalized": false,
|
| 112 |
"rstrip": false,
|
|
@@ -114,7 +114,7 @@
|
|
| 114 |
"special": true
|
| 115 |
},
|
| 116 |
"14": {
|
| 117 |
-
"content": "<|
|
| 118 |
"lstrip": false,
|
| 119 |
"normalized": false,
|
| 120 |
"rstrip": false,
|
|
@@ -122,7 +122,7 @@
|
|
| 122 |
"special": true
|
| 123 |
},
|
| 124 |
"15": {
|
| 125 |
-
"content": "<|
|
| 126 |
"lstrip": false,
|
| 127 |
"normalized": false,
|
| 128 |
"rstrip": false,
|
|
@@ -130,7 +130,7 @@
|
|
| 130 |
"special": true
|
| 131 |
},
|
| 132 |
"16": {
|
| 133 |
-
"content": "<|
|
| 134 |
"lstrip": false,
|
| 135 |
"normalized": false,
|
| 136 |
"rstrip": false,
|
|
|
|
| 106 |
"special": true
|
| 107 |
},
|
| 108 |
"13": {
|
| 109 |
+
"content": "<|very_short|>",
|
| 110 |
"lstrip": false,
|
| 111 |
"normalized": false,
|
| 112 |
"rstrip": false,
|
|
|
|
| 114 |
"special": true
|
| 115 |
},
|
| 116 |
"14": {
|
| 117 |
+
"content": "<|short|>",
|
| 118 |
"lstrip": false,
|
| 119 |
"normalized": false,
|
| 120 |
"rstrip": false,
|
|
|
|
| 122 |
"special": true
|
| 123 |
},
|
| 124 |
"15": {
|
| 125 |
+
"content": "<|long|>",
|
| 126 |
"lstrip": false,
|
| 127 |
"normalized": false,
|
| 128 |
"rstrip": false,
|
|
|
|
| 130 |
"special": true
|
| 131 |
},
|
| 132 |
"16": {
|
| 133 |
+
"content": "<|very_long|>",
|
| 134 |
"lstrip": false,
|
| 135 |
"normalized": false,
|
| 136 |
"rstrip": false,
|