Updated model weights

Files changed (11) hide show

Modelfile +15 -0
chat_template.jinja +4 -0
config.json +46 -2
generation_config.json +2 -8
model-00001-of-00004.safetensors +2 -2
model-00002-of-00004.safetensors +2 -2
model-00003-of-00004.safetensors +2 -2
model-00004-of-00004.safetensors +2 -2
model.safetensors.index.json +1 -0
special_tokens_map.json +8 -1
tokenizer_config.json +4 -2

Modelfile ADDED Viewed

	@@ -0,0 +1,15 @@

+# ollama modelfile auto-generated by llamafactory
+FROM .
+TEMPLATE """<bos>{{ if .System }}{{ .System }}
+{{ end }}{{ range .Messages }}{{ if eq .Role "user" }}<start_of_turn>user
+{{ .Content }}<end_of_turn>
+<start_of_turn>model
+{{ else if eq .Role "assistant" }}{{ .Content }}<end_of_turn>
+{{ end }}{{ end }}"""
+PARAMETER stop "<eos>"
+PARAMETER stop "<end_of_turn>"
+PARAMETER num_ctx 4096

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,4 @@

+{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
+' + message['content'] | trim + '<end_of_turn>
+' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
+'}}{% endif %}

config.json CHANGED Viewed

@@ -7,6 +7,7 @@
   "attn_logit_softcapping": 50.0,
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "eos_token_id": 1,
   "final_logit_softcapping": 30.0,
   "head_dim": 256,
@@ -15,6 +16,50 @@
   "hidden_size": 3584,
   "initializer_range": 0.02,
   "intermediate_size": 14336,
   "max_position_embeddings": 8192,
   "model_type": "gemma2",
   "num_attention_heads": 16,
@@ -26,8 +71,7 @@
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "sliding_window_size": 4096,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.3",
   "use_cache": true,
   "vocab_size": 256000
 }

   "attn_logit_softcapping": 50.0,
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
+  "dtype": "bfloat16",
   "eos_token_id": 1,
   "final_logit_softcapping": 30.0,
   "head_dim": 256,
   "hidden_size": 3584,
   "initializer_range": 0.02,
   "intermediate_size": 14336,
+  "layer_types": [
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 8192,
   "model_type": "gemma2",
   "num_attention_heads": 16,
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "sliding_window_size": 4096,
+  "transformers_version": "4.57.1",
   "use_cache": true,
   "vocab_size": 256000
 }

generation_config.json CHANGED Viewed

@@ -3,15 +3,9 @@
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "do_sample": true,
-  "eos_token_id": [
-    1,
-    107
-  ],
   "pad_token_id": 0,
-  "stop_strings": [
-    "<end_of_turn>"
-  ],
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.51.3"
 }

   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "do_sample": true,
+  "eos_token_id": 1,
   "pad_token_id": 0,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.57.1"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bacb3606780b1f6bfbab55a6c04e9a91bf1d8c8c9f4a58cfe747a36d8070ff5
-size 325582848

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5ed8885eeefe9d653b1ed6af26dd33fbbecd53bad5ab75e1af69f8a1789844c
+size 4903351912

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa4cc14042ee8844ca11ff8d68ee875e030c41c39e00e5685ac9fd5629d0810f
-size 325582848

 version https://git-lfs.github.com/spec/v1
+oid sha256:7258ebc6ac332963097ea8257959f8daa711cbbef3117dc5a5c879736e272ad2
+size 4947570872

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97a9f953dc02efcc79a79cf0478e034c735d7f30aa1cde6131ddf85eb9148a5d
-size 325844992

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5ca2fdaa8435dd858ef971136c2df92b395da763d3c7e0150d1c3d1f8dea2c2
+size 4962221464

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f05a3036232ed568d6fe5fb46e6e3f89bc46e486676c8acfb00b77dd6ea72b4e
-size 325582848

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9cdbac73b4e1e35196031b0d234100f201d30c8e6f7b1202bbbbc5316ab63bf
+size 3670322200

model.safetensors.index.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "metadata": {
     "total_size": 18483411968
   },
   "weight_map": {

 {
   "metadata": {
+    "total_parameters": 9241705984,
     "total_size": 18483411968
   },
   "weight_map": {

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,14 @@
 {
   "additional_special_tokens": [
     "<start_of_turn>",
-    "<end_of_turn>"
   ],
   "bos_token": {
     "content": "<bos>",

 {
   "additional_special_tokens": [
     "<start_of_turn>",
+    "<end_of_turn>",
+    {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ],
   "bos_token": {
     "content": "<bos>",

tokenizer_config.json CHANGED Viewed

@@ -1997,17 +1997,19 @@
   },
   "additional_special_tokens": [
     "<start_of_turn>",
-    "<end_of_turn>"
   ],
   "bos_token": "<bos>",
-  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<eos>",
   "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false

   },
   "additional_special_tokens": [
     "<start_of_turn>",
+    "<end_of_turn>",
+    "<eos>"
   ],
   "bos_token": "<bos>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<eos>",
   "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
+  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
+  "split_special_tokens": false,
   "tokenizer_class": "GemmaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false