unknown token fix
Browse files- tokenizer.json +6 -2
tokenizer.json
CHANGED
|
@@ -57,8 +57,12 @@
|
|
| 57 |
"[CLS]":{
|
| 58 |
"id":"[CLS]", "ids":[4],"tokens":["[CLS]"]},"[SEP]":{
|
| 59 |
"id":"[SEP]", "ids":[5],"tokens":["[SEP]"]}}},"decoder":{
|
| 60 |
-
"type":"WordPiece", "prefix":"##","cleanup":true},
|
| 61 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
"vocab":{
|
| 63 |
"CUSTOM_MASK":0,
|
| 64 |
"CUSTOM_UNKNOWN":1,
|
|
|
|
| 57 |
"[CLS]":{
|
| 58 |
"id":"[CLS]", "ids":[4],"tokens":["[CLS]"]},"[SEP]":{
|
| 59 |
"id":"[SEP]", "ids":[5],"tokens":["[SEP]"]}}},"decoder":{
|
| 60 |
+
"type":"WordPiece", "prefix":"##","cleanup":true},
|
| 61 |
+
"model":{
|
| 62 |
+
"type":"WordPiece",
|
| 63 |
+
"unk_token":"CUSTOM_UNKNOWN",
|
| 64 |
+
"continuing_subword_prefix":"##",
|
| 65 |
+
"max_input_chars_per_word":100,
|
| 66 |
"vocab":{
|
| 67 |
"CUSTOM_MASK":0,
|
| 68 |
"CUSTOM_UNKNOWN":1,
|