fix tokenizer misused
Browse files- tokenizer.json +8 -1
tokenizer.json
CHANGED
|
@@ -201,6 +201,13 @@
|
|
| 201 |
"Regex": "[\u0001️⃣]"
|
| 202 |
},
|
| 203 |
"content": " "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
}
|
| 205 |
]
|
| 206 |
},
|
|
@@ -96069,4 +96076,4 @@
|
|
| 96069 |
"▁ent ire"
|
| 96070 |
]
|
| 96071 |
}
|
| 96072 |
-
}
|
|
|
|
| 201 |
"Regex": "[\u0001️⃣]"
|
| 202 |
},
|
| 203 |
"content": " "
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"type": "Replace",
|
| 207 |
+
"pattern": {
|
| 208 |
+
"Regex": " *<mask> *"
|
| 209 |
+
},
|
| 210 |
+
"content": "<mask> "
|
| 211 |
}
|
| 212 |
]
|
| 213 |
},
|
|
|
|
| 96076 |
"▁ent ire"
|
| 96077 |
]
|
| 96078 |
}
|
| 96079 |
+
}
|