Pendrokar
/

TorchMoji

Text Classification

Model card Files Files and versions

Pendrokar commited on Jan 28, 2024

Commit

cad12f0

·

verified ·

1 Parent(s): 5ce67b5

unknown token fix

Files changed (1) hide show

tokenizer.json +6 -2

tokenizer.json CHANGED Viewed

@@ -57,8 +57,12 @@
 	"[CLS]":{
 		"id":"[CLS]",	"ids":[4],"tokens":["[CLS]"]},"[SEP]":{
 		"id":"[SEP]",	"ids":[5],"tokens":["[SEP]"]}}},"decoder":{
-"type":"WordPiece",	"prefix":"##","cleanup":true},"model":{
-	"type":"WordPiece",	"unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,
 	"vocab":{
 	"CUSTOM_MASK":0,
 	"CUSTOM_UNKNOWN":1,

 	"[CLS]":{
 		"id":"[CLS]",	"ids":[4],"tokens":["[CLS]"]},"[SEP]":{
 		"id":"[SEP]",	"ids":[5],"tokens":["[SEP]"]}}},"decoder":{
+	"type":"WordPiece",	"prefix":"##","cleanup":true},
+	"model":{
+	"type":"WordPiece",
+	"unk_token":"CUSTOM_UNKNOWN",
+	"continuing_subword_prefix":"##",
+	"max_input_chars_per_word":100,
 	"vocab":{
 	"CUSTOM_MASK":0,
 	"CUSTOM_UNKNOWN":1,