👷♀️ Fix he -> iw change.
Browse files- tokenizer.json +8 -20
tokenizer.json
CHANGED
|
@@ -203,7 +203,7 @@
|
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"id": 50279,
|
| 206 |
-
"content": "<|
|
| 207 |
"single_word": false,
|
| 208 |
"lstrip": false,
|
| 209 |
"rstrip": false,
|
|
@@ -14546,30 +14546,18 @@
|
|
| 14546 |
"special_tokens": {
|
| 14547 |
"<|endoftext|>": {
|
| 14548 |
"id": "<|endoftext|>",
|
| 14549 |
-
"ids": [
|
| 14550 |
-
|
| 14551 |
-
],
|
| 14552 |
-
"tokens": [
|
| 14553 |
-
"<|endoftext|>"
|
| 14554 |
-
]
|
| 14555 |
},
|
| 14556 |
"<|notimestamps|>": {
|
| 14557 |
"id": "<|notimestamps|>",
|
| 14558 |
-
"ids": [
|
| 14559 |
-
|
| 14560 |
-
],
|
| 14561 |
-
"tokens": [
|
| 14562 |
-
"<|notimestamps|>"
|
| 14563 |
-
]
|
| 14564 |
},
|
| 14565 |
"<|startoftranscript|>": {
|
| 14566 |
"id": "<|startoftranscript|>",
|
| 14567 |
-
"ids": [
|
| 14568 |
-
|
| 14569 |
-
],
|
| 14570 |
-
"tokens": [
|
| 14571 |
-
"<|startoftranscript|>"
|
| 14572 |
-
]
|
| 14573 |
}
|
| 14574 |
}
|
| 14575 |
},
|
|
@@ -114849,4 +114837,4 @@
|
|
| 114849 |
"åľ º"
|
| 114850 |
]
|
| 114851 |
}
|
| 114852 |
-
}
|
|
|
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"id": 50279,
|
| 206 |
+
"content": "<|iw|>",
|
| 207 |
"single_word": false,
|
| 208 |
"lstrip": false,
|
| 209 |
"rstrip": false,
|
|
|
|
| 14546 |
"special_tokens": {
|
| 14547 |
"<|endoftext|>": {
|
| 14548 |
"id": "<|endoftext|>",
|
| 14549 |
+
"ids": [50257],
|
| 14550 |
+
"tokens": ["<|endoftext|>"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14551 |
},
|
| 14552 |
"<|notimestamps|>": {
|
| 14553 |
"id": "<|notimestamps|>",
|
| 14554 |
+
"ids": [50363],
|
| 14555 |
+
"tokens": ["<|notimestamps|>"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14556 |
},
|
| 14557 |
"<|startoftranscript|>": {
|
| 14558 |
"id": "<|startoftranscript|>",
|
| 14559 |
+
"ids": [50258],
|
| 14560 |
+
"tokens": ["<|startoftranscript|>"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14561 |
}
|
| 14562 |
}
|
| 14563 |
},
|
|
|
|
| 114837 |
"åľ º"
|
| 114838 |
]
|
| 114839 |
}
|
| 114840 |
+
}
|