Blablablab
/

multilingual-style-representation-Llama-3.2

Sentence Similarity

sentence-transformers

feature-extraction

Model card Files Files and versions

junghwanjkim commited on 23 days ago

Commit

4aef030

·

1 Parent(s): a8cf191

Fix tokenizer by adding pad and mask tokens

Files changed (2) hide show

special_tokens_map.json +14 -0
tokenizer_config.json +2 -0

special_tokens_map.json CHANGED Viewed

@@ -12,5 +12,19 @@
     "normalized": false,
     "rstrip": false,
     "single_word": false
   }
 }

     "normalized": false,
     "rstrip": false,
     "single_word": false
+  },
+  "mask_token": {
+    "content": "<|reserved_special_token_247|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|reserved_special_token_247|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
   }
 }

tokenizer_config.json CHANGED Viewed

@@ -2053,10 +2053,12 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "extra_special_tokens": {},
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 131072,
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "clean_up_tokenization_spaces": true,
   "eos_token": "<|end_of_text|>",
   "extra_special_tokens": {},
+  "mask_token": "<|reserved_special_token_247|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 131072,
+  "pad_token": "<|reserved_special_token_247|>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }