Timotej Knez commited on
Commit
cba24ac
·
1 Parent(s): 24b9366

Updated model weights

Browse files
Modelfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ollama modelfile auto-generated by llamafactory
2
+
3
+ FROM .
4
+
5
+ TEMPLATE """<bos>{{ if .System }}{{ .System }}
6
+
7
+ {{ end }}{{ range .Messages }}{{ if eq .Role "user" }}<start_of_turn>user
8
+ {{ .Content }}<end_of_turn>
9
+ <start_of_turn>model
10
+ {{ else if eq .Role "assistant" }}{{ .Content }}<end_of_turn>
11
+ {{ end }}{{ end }}"""
12
+
13
+ PARAMETER stop "<eos>"
14
+ PARAMETER stop "<end_of_turn>"
15
+ PARAMETER num_ctx 4096
chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
config.json CHANGED
@@ -7,6 +7,7 @@
7
  "attn_logit_softcapping": 50.0,
8
  "bos_token_id": 2,
9
  "cache_implementation": "hybrid",
 
10
  "eos_token_id": 1,
11
  "final_logit_softcapping": 30.0,
12
  "head_dim": 256,
@@ -15,6 +16,50 @@
15
  "hidden_size": 3584,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 14336,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "max_position_embeddings": 8192,
19
  "model_type": "gemma2",
20
  "num_attention_heads": 16,
@@ -26,8 +71,7 @@
26
  "rope_theta": 10000.0,
27
  "sliding_window": 4096,
28
  "sliding_window_size": 4096,
29
- "torch_dtype": "bfloat16",
30
- "transformers_version": "4.51.3",
31
  "use_cache": true,
32
  "vocab_size": 256000
33
  }
 
7
  "attn_logit_softcapping": 50.0,
8
  "bos_token_id": 2,
9
  "cache_implementation": "hybrid",
10
+ "dtype": "bfloat16",
11
  "eos_token_id": 1,
12
  "final_logit_softcapping": 30.0,
13
  "head_dim": 256,
 
16
  "hidden_size": 3584,
17
  "initializer_range": 0.02,
18
  "intermediate_size": 14336,
19
+ "layer_types": [
20
+ "sliding_attention",
21
+ "full_attention",
22
+ "sliding_attention",
23
+ "full_attention",
24
+ "sliding_attention",
25
+ "full_attention",
26
+ "sliding_attention",
27
+ "full_attention",
28
+ "sliding_attention",
29
+ "full_attention",
30
+ "sliding_attention",
31
+ "full_attention",
32
+ "sliding_attention",
33
+ "full_attention",
34
+ "sliding_attention",
35
+ "full_attention",
36
+ "sliding_attention",
37
+ "full_attention",
38
+ "sliding_attention",
39
+ "full_attention",
40
+ "sliding_attention",
41
+ "full_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "full_attention",
46
+ "sliding_attention",
47
+ "full_attention",
48
+ "sliding_attention",
49
+ "full_attention",
50
+ "sliding_attention",
51
+ "full_attention",
52
+ "sliding_attention",
53
+ "full_attention",
54
+ "sliding_attention",
55
+ "full_attention",
56
+ "sliding_attention",
57
+ "full_attention",
58
+ "sliding_attention",
59
+ "full_attention",
60
+ "sliding_attention",
61
+ "full_attention"
62
+ ],
63
  "max_position_embeddings": 8192,
64
  "model_type": "gemma2",
65
  "num_attention_heads": 16,
 
71
  "rope_theta": 10000.0,
72
  "sliding_window": 4096,
73
  "sliding_window_size": 4096,
74
+ "transformers_version": "4.57.1",
 
75
  "use_cache": true,
76
  "vocab_size": 256000
77
  }
generation_config.json CHANGED
@@ -3,15 +3,9 @@
3
  "bos_token_id": 2,
4
  "cache_implementation": "hybrid",
5
  "do_sample": true,
6
- "eos_token_id": [
7
- 1,
8
- 107
9
- ],
10
  "pad_token_id": 0,
11
- "stop_strings": [
12
- "<end_of_turn>"
13
- ],
14
  "temperature": 0.6,
15
  "top_p": 0.9,
16
- "transformers_version": "4.51.3"
17
  }
 
3
  "bos_token_id": 2,
4
  "cache_implementation": "hybrid",
5
  "do_sample": true,
6
+ "eos_token_id": 1,
 
 
 
7
  "pad_token_id": 0,
 
 
 
8
  "temperature": 0.6,
9
  "top_p": 0.9,
10
+ "transformers_version": "4.57.1"
11
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bacb3606780b1f6bfbab55a6c04e9a91bf1d8c8c9f4a58cfe747a36d8070ff5
3
- size 325582848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ed8885eeefe9d653b1ed6af26dd33fbbecd53bad5ab75e1af69f8a1789844c
3
+ size 4903351912
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa4cc14042ee8844ca11ff8d68ee875e030c41c39e00e5685ac9fd5629d0810f
3
- size 325582848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7258ebc6ac332963097ea8257959f8daa711cbbef3117dc5a5c879736e272ad2
3
+ size 4947570872
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97a9f953dc02efcc79a79cf0478e034c735d7f30aa1cde6131ddf85eb9148a5d
3
- size 325844992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ca2fdaa8435dd858ef971136c2df92b395da763d3c7e0150d1c3d1f8dea2c2
3
+ size 4962221464
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f05a3036232ed568d6fe5fb46e6e3f89bc46e486676c8acfb00b77dd6ea72b4e
3
- size 325582848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9cdbac73b4e1e35196031b0d234100f201d30c8e6f7b1202bbbbc5316ab63bf
3
+ size 3670322200
model.safetensors.index.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "metadata": {
 
3
  "total_size": 18483411968
4
  },
5
  "weight_map": {
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 9241705984,
4
  "total_size": 18483411968
5
  },
6
  "weight_map": {
special_tokens_map.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "additional_special_tokens": [
3
  "<start_of_turn>",
4
- "<end_of_turn>"
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<bos>",
 
1
  {
2
  "additional_special_tokens": [
3
  "<start_of_turn>",
4
+ "<end_of_turn>",
5
+ {
6
+ "content": "<eos>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ }
12
  ],
13
  "bos_token": {
14
  "content": "<bos>",
tokenizer_config.json CHANGED
@@ -1997,17 +1997,19 @@
1997
  },
1998
  "additional_special_tokens": [
1999
  "<start_of_turn>",
2000
- "<end_of_turn>"
 
2001
  ],
2002
  "bos_token": "<bos>",
2003
- "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
  "extra_special_tokens": {},
2007
  "model_max_length": 1000000000000000019884624838656,
2008
  "pad_token": "<pad>",
 
2009
  "sp_model_kwargs": {},
2010
  "spaces_between_special_tokens": false,
 
2011
  "tokenizer_class": "GemmaTokenizer",
2012
  "unk_token": "<unk>",
2013
  "use_default_system_prompt": false
 
1997
  },
1998
  "additional_special_tokens": [
1999
  "<start_of_turn>",
2000
+ "<end_of_turn>",
2001
+ "<eos>"
2002
  ],
2003
  "bos_token": "<bos>",
 
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
  "extra_special_tokens": {},
2007
  "model_max_length": 1000000000000000019884624838656,
2008
  "pad_token": "<pad>",
2009
+ "padding_side": "left",
2010
  "sp_model_kwargs": {},
2011
  "spaces_between_special_tokens": false,
2012
+ "split_special_tokens": false,
2013
  "tokenizer_class": "GemmaTokenizer",
2014
  "unk_token": "<unk>",
2015
  "use_default_system_prompt": false