Upload tokenizer

Browse files

Files changed (11) hide show

added_tokens.json +5 -0
config.json +1 -1
merges.txt +0 -0
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer_config.json +69 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endofprompt|>": 100276,
+  "<|im_end|>": 100265,
+  "<|im_start|>": 100264
+}

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "saved_models/hf/impossibleexchange/0x117",
   "architectures": [
     "LlamaForCausalLM"
   ],

 {
+  "_name_or_path": "/media/user/6864fd50-76ac-4ebd-b6ad-94f2710c7a71/sn37models/imbiuko22",
   "architectures": [
     "LlamaForCausalLM"
   ],

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:753f9d808fc669049e94ab84530f2d3baaa7f0da782b6b75eb56d2d02ad25836
 size 4938143568

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f7875fde121b64913384b9e110ce7b39c6514425ccfd4f3f8c75dd7a425f28e
 size 4938143568

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52736be4ca1f4686cf382a80e9a2f0c6161a9ad18f55261bbe0b165b34784e86
 size 4893374584

 version https://git-lfs.github.com/spec/v1
+oid sha256:135f61732f0e4ebc303dbbfdc076c657afa09f6d7efea62ebe369be9b68009bd
 size 4893374584

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09de985b36e3520c8ece4d34d9a8607e545b62d43eca3b00fef0cec8a65e3d08
 size 4416786288

 version https://git-lfs.github.com/spec/v1
+oid sha256:3faa1a37329702a7daf6fd4386abd4a9506209158d90f53d88dc408bd871a2e6
 size 4416786288

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2902cdbb17c66976485b57af8c67210956e3592d71310c0da3dbaad6c1b5adb0
 size 1182007424

 version https://git-lfs.github.com/spec/v1
+oid sha256:94ddff8823e3c39c92190bf23b1704db36f4ac9319051441e6a424a0512f0647
 size 1182007424

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "100257": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100258": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100259": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100260": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100264": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100265": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100276": {
+      "content": "<|endofprompt|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 8192,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "use_safetensors": true
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff