ashwinij2
/

deepseek-llama-converted

+{
+  "version": "1.0",
+  "truncation": {
+    "max_length": 2048,
+    "direction": "Right",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": "fixed_size",
+    "direction": "Right",
+    "pad_to_multiple_of": 8,
+    "pad_id": 2,
+    "pad_type_id": 0,
+    "pad_token": "</s>"
+  },
+  "added_tokens": [
+    {
+      "id": 0,
+      "special": true,
+      "content": "<unk>",
+      "single_word": false
+    },
+    {
+      "id": 1,
+      "special": true,
+      "content": "<s>",
+      "single_word": false
+    },
+    {
+      "id": 2,
+      "special": true,
+      "content": "</s>",
+      "single_word": false
+    }
+  ],
+  "normalizer": {
+    "type": "Sequence",
+    "normalizers": [
+      {
+        "type": "Precompiled",
+        "precompiled_charsmap": "",
+        "lstrip": false,
+        "rstrip": false
+      }
+    ]
+  },
+  "pre_tokenizer": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      "$A",
+      "</s>"
+    ],
+    "pair": [
+      "$A",
+      "</s>",
+      "$B",
+      "</s>"
+    ],
+    "special_tokens": {
+      "</s>": {
+        "id": 2,
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "</s>"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": "<unk>",
+    "continuing_subword_prefix": "",
+    "end_of_word_suffix": "",
+    "fuse_unk": false,
+    "vocab": {
+      "<unk>": 0,
+      "<s>": 1,
+      "</s>": 2
+    }
+  }
+}