Pendrokar commited on
Commit
cad12f0
·
verified ·
1 Parent(s): 5ce67b5

unknown token fix

Browse files
Files changed (1) hide show
  1. tokenizer.json +6 -2
tokenizer.json CHANGED
@@ -57,8 +57,12 @@
57
  "[CLS]":{
58
  "id":"[CLS]", "ids":[4],"tokens":["[CLS]"]},"[SEP]":{
59
  "id":"[SEP]", "ids":[5],"tokens":["[SEP]"]}}},"decoder":{
60
- "type":"WordPiece", "prefix":"##","cleanup":true},"model":{
61
- "type":"WordPiece", "unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,
 
 
 
 
62
  "vocab":{
63
  "CUSTOM_MASK":0,
64
  "CUSTOM_UNKNOWN":1,
 
57
  "[CLS]":{
58
  "id":"[CLS]", "ids":[4],"tokens":["[CLS]"]},"[SEP]":{
59
  "id":"[SEP]", "ids":[5],"tokens":["[SEP]"]}}},"decoder":{
60
+ "type":"WordPiece", "prefix":"##","cleanup":true},
61
+ "model":{
62
+ "type":"WordPiece",
63
+ "unk_token":"CUSTOM_UNKNOWN",
64
+ "continuing_subword_prefix":"##",
65
+ "max_input_chars_per_word":100,
66
  "vocab":{
67
  "CUSTOM_MASK":0,
68
  "CUSTOM_UNKNOWN":1,