Uploaded Tokenizer Config Files

Browse files

Files changed (3) hide show

special_tokens_map.json +3 -0
tokenizer.json +124 -0
tokenizer_config.json +7 -0

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "bos_token": ";"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,124 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 9,
+      "content": ";",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Split",
+    "pattern": {
+      "Regex": "."
+    },
+    "behavior": "Isolated",
+    "invert": false
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": ";",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "": {
+        "id": "",
+        "ids": [
+          0
+        ],
+        "tokens": [
+          ""
+        ]
+      },
+      ";": {
+        "id": ";",
+        "ids": [
+          9
+        ],
+        "tokens": [
+          ";"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "Sequence",
+    "decoders": [
+      {
+        "type": "Replace",
+        "pattern": {
+          "String": " "
+        },
+        "content": "▁"
+      },
+      {
+        "type": "Replace",
+        "pattern": {
+          "String": "▁"
+        },
+        "content": " "
+      }
+    ]
+  },
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      " ": 0,
+      "1": 1,
+      "2": 2,
+      "3": 3,
+      "4": 4,
+      "5": 5,
+      "6": 6,
+      "7": 7,
+      "8": 8,
+      ";": 9,
+      "#": 10,
+      "a": 11,
+      "b": 12,
+      "c": 13,
+      "d": 14,
+      "e": 15,
+      "f": 16,
+      "g": 17,
+      "h": 18,
+      "n": 19,
+      "r": 20,
+      "q": 21,
+      "k": 22
+    },
+    "unk_token": " "
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "add_bos_token": true,
+  "bos_token": ";",
+  "clean_up_tokenization_spaces": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "tokenizer_class": "UciTokenizer"
+}