add new fast version
Browse files- tokenizer.json +2 -5
tokenizer.json
CHANGED
|
@@ -43,16 +43,13 @@
|
|
| 43 |
"pre_tokenizer": {
|
| 44 |
"type": "Sequence",
|
| 45 |
"pretokenizers": [
|
| 46 |
-
{
|
| 47 |
-
"type": "WhitespaceSplit"
|
| 48 |
-
},
|
| 49 |
{
|
| 50 |
"type": "Split",
|
| 51 |
"pattern": {
|
| 52 |
"Regex": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
|
| 53 |
},
|
| 54 |
-
"behavior": "
|
| 55 |
-
"invert":
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"type": "ByteLevel",
|
|
|
|
| 43 |
"pre_tokenizer": {
|
| 44 |
"type": "Sequence",
|
| 45 |
"pretokenizers": [
|
|
|
|
|
|
|
|
|
|
| 46 |
{
|
| 47 |
"type": "Split",
|
| 48 |
"pattern": {
|
| 49 |
"Regex": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
|
| 50 |
},
|
| 51 |
+
"behavior": "Removed",
|
| 52 |
+
"invert": true
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"type": "ByteLevel",
|