RA-ALTA
/

tokenizer-tr

suchirsalhan commited on Mar 8

Commit

3a62a5c

verified ·

1 Parent(s): dbfd128

Upload folder using huggingface_hub

Files changed (2) hide show

tokenizer.json CHANGED Viewed

@@ -47,7 +47,32 @@
     "prepend_scheme": "first",
     "split": false
   },
-  "post_processor": null,
   "decoder": {
     "type": "Sequence",
     "decoders": [

     "prepend_scheme": "first",
     "split": false
   },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {}
+  },
   "decoder": {
     "type": "Sequence",
     "decoders": [

tokenizer_config.json CHANGED Viewed

@@ -4,6 +4,7 @@
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "tokenizer_class": "LlamaTokenizer",

   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "model_file": "tmp_factory/tokenizer_tr/spm.model",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "tokenizer_class": "LlamaTokenizer",