RajSang
/

hbert

Model card Files Files and versions

RajSang commited on Apr 19, 2024

Commit

cd874d7

·

verified ·

1 Parent(s): 1ae6205

Upload tokenizer

Files changed (3) hide show

special_tokens_map.json +14 -2
tokenizer.json +10 -31
tokenizer_config.json +5 -2

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,19 @@
 {
   "additional_special_tokens": [
-    "[V0]",
-    "[V1]"
   ],
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",

 {
   "additional_special_tokens": [
+    {
+      "content": "[V0]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[V1]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ],
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",

tokenizer.json CHANGED Viewed

@@ -82,13 +82,7 @@
     "single": [
       {
         "SpecialToken": {
-          "id": "[V0]",
-          "type_id": 0
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "[V1]",
           "type_id": 0
         }
       },
@@ -108,13 +102,7 @@
     "pair": [
       {
         "SpecialToken": {
-          "id": "[V0]",
-          "type_id": 0
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "[V1]",
           "type_id": 0
         }
       },
@@ -144,31 +132,22 @@
       }
     ],
     "special_tokens": {
-      "[SEP]": {
-        "id": "[SEP]",
         "ids": [
-          102
         ],
         "tokens": [
-          "[SEP]"
         ]
       },
-      "[V0]": {
-        "id": "[V0]",
-        "ids": [
-          30522
-        ],
-        "tokens": [
-          "[V0]"
-        ]
-      },
-      "[V1]": {
-        "id": "[V1]",
         "ids": [
-          30523
         ],
         "tokens": [
-          "[V1]"
         ]
       }
     }

     "single": [
       {
         "SpecialToken": {
+          "id": "[CLS]",
           "type_id": 0
         }
       },
     "pair": [
       {
         "SpecialToken": {
+          "id": "[CLS]",
           "type_id": 0
         }
       },
       }
     ],
     "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
         "ids": [
+          101
         ],
         "tokens": [
+          "[CLS]"
         ]
       },
+      "[SEP]": {
+        "id": "[SEP]",
         "ids": [
+          102
         ],
         "tokens": [
+          "[SEP]"
         ]
       }
     }

tokenizer_config.json CHANGED Viewed

@@ -63,10 +63,13 @@
   ],
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }

   ],
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
+  "do_lower_case": true,
   "mask_token": "[MASK]",
+  "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }