Add tokenizer files

Files changed (3) hide show

special_tokens_map.json ADDED Viewed

+{
+  "bos_token": "[BOS]",
+  "eos_token": "[EOS]",
+  "mask_token": "_",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer.json CHANGED Viewed

@@ -66,32 +66,7 @@
   "pre_tokenizer": {
     "type": "Whitespace"
   },
-  "post_processor": {
-    "type": "TemplateProcessing",
-    "single": [
-      {
-        "Sequence": {
-          "id": "A",
-          "type_id": 0
-        }
-      }
-    ],
-    "pair": [
-      {
-        "Sequence": {
-          "id": "A",
-          "type_id": 0
-        }
-      },
-      {
-        "Sequence": {
-          "id": "B",
-          "type_id": 1
-        }
-      }
-    ],
-    "special_tokens": {}
-  },
   "decoder": null,
   "model": {
     "type": "WordLevel",

   "pre_tokenizer": {
     "type": "Whitespace"
   },
+  "post_processor": null,
   "decoder": null,
   "model": {
     "type": "WordLevel",

tokenizer_config.json CHANGED Viewed

@@ -1,10 +1,53 @@
 {
-  "backend": "tokenizers",
   "bos_token": "[BOS]",
   "eos_token": "[EOS]",
   "mask_token": "_",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "tokenizer_class": "TokenizersBackend",
   "unk_token": "[UNK]"
 }

 {
+  "added_tokens_decoder": {
+    "0": {
+      "content": "_",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[BOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[EOS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "bos_token": "[BOS]",
+  "clean_up_tokenization_spaces": false,
   "eos_token": "[EOS]",
+  "extra_special_tokens": {},
   "mask_token": "_",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
+  "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }