KitsuVp commited on
Commit
0249b1a
·
verified ·
1 Parent(s): bcd5531

Model save

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 4.4840
18
 
19
  ## Model description
20
 
@@ -44,37 +44,11 @@ The following hyperparameters were used during training:
44
 
45
  ### Training results
46
 
47
- | Training Loss | Epoch | Step | Validation Loss |
48
- |:-------------:|:------:|:------:|:---------------:|
49
- | 5.4612 | 0.0337 | 5000 | 5.3719 |
50
- | 5.0925 | 0.0674 | 10000 | 5.0228 |
51
- | 4.9633 | 0.1011 | 15000 | 4.8946 |
52
- | 4.909 | 0.1347 | 20000 | 4.8268 |
53
- | 4.844 | 0.1684 | 25000 | 4.7804 |
54
- | 4.8204 | 0.2021 | 30000 | 4.7456 |
55
- | 4.7826 | 0.2358 | 35000 | 4.7157 |
56
- | 4.7616 | 0.2695 | 40000 | 4.6921 |
57
- | 4.7328 | 0.3032 | 45000 | 4.6735 |
58
- | 4.7271 | 0.3368 | 50000 | 4.6575 |
59
- | 4.7147 | 0.3705 | 55000 | 4.6423 |
60
- | 4.7072 | 0.4042 | 60000 | 4.6325 |
61
- | 4.6978 | 0.4379 | 65000 | 4.6206 |
62
- | 4.6824 | 0.4716 | 70000 | 4.6131 |
63
- | 4.6754 | 0.5053 | 75000 | 4.6040 |
64
- | 4.6769 | 0.5389 | 80000 | 4.5978 |
65
- | 4.6631 | 0.5726 | 85000 | 4.5908 |
66
- | 4.6596 | 0.6063 | 90000 | 4.5845 |
67
- | 4.654 | 0.6400 | 95000 | 4.5789 |
68
- | 4.6503 | 0.6737 | 100000 | 4.5746 |
69
- | 4.6454 | 0.7074 | 105000 | 4.5697 |
70
- | 4.6497 | 0.7411 | 110000 | 4.5653 |
71
- | 4.6363 | 0.7747 | 115000 | 4.5563 |
72
- | 4.6209 | 0.8084 | 120000 | 4.5399 |
73
- | 4.6091 | 0.8421 | 125000 | 4.5266 |
74
- | 4.5895 | 0.8758 | 130000 | 4.5117 |
75
- | 4.5762 | 0.9095 | 135000 | 4.5010 |
76
- | 4.5778 | 0.9432 | 140000 | 4.4914 |
77
- | 4.5552 | 0.9768 | 145000 | 4.4840 |
78
 
79
 
80
  ### Framework versions
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 3.4368
18
 
19
  ## Model description
20
 
 
44
 
45
  ### Training results
46
 
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 3.8209 | 0.32 | 5000 | 3.7695 |
50
+ | 3.6028 | 0.64 | 10000 | 3.5545 |
51
+ | 3.4972 | 0.96 | 15000 | 3.4368 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  ### Framework versions
added_tokens.json CHANGED
@@ -1,7 +1,24 @@
1
  {
2
- "<|endoftext|>": 200004,
3
- "<|padding|>": 200001,
4
- "|||EMAIL_ADDRESS|||": 200002,
5
- "|||IP_ADDRESS|||": 200000,
6
- "|||PHONE_NUMBER|||": 200003
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
  }
chat_template.jinja CHANGED
@@ -3,7 +3,7 @@
3
  {%- if messages[0]['role'] == 'system' %}
4
  {{- messages[0]['content'] }}
5
  {%- else %}
6
- {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
  {%- endif %}
8
  {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
  {%- for tool in tools %}
@@ -15,7 +15,7 @@
15
  {%- if messages[0]['role'] == 'system' %}
16
  {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
  {%- else %}
18
- {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
  {%- endif %}
20
  {%- endif %}
21
  {%- for message in messages %}
 
3
  {%- if messages[0]['role'] == 'system' %}
4
  {{- messages[0]['content'] }}
5
  {%- else %}
6
+ {{- 'You are a helpful assistant.' }}
7
  {%- endif %}
8
  {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
  {%- for tool in tools %}
 
15
  {%- if messages[0]['role'] == 'system' %}
16
  {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
  {%- else %}
18
+ {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
19
  {%- endif %}
20
  {%- endif %}
21
  {%- for message in messages %}
config.json CHANGED
@@ -9,10 +9,9 @@
9
  "AutoModel": "modeling_neollm.NeoLLMModel",
10
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
11
  },
12
- "bos_token_id": 200004,
13
  "dropout_rate": 0.1,
14
  "dtype": "bfloat16",
15
- "eos_token_id": 200004,
16
  "fan_ratio": 0.125,
17
  "fan_ratio_ffn": 0.0625,
18
  "head_dim": 64,
@@ -44,12 +43,11 @@
44
  "num_attention_heads": 8,
45
  "num_hidden_layers": 12,
46
  "num_key_value_heads": 2,
47
- "pad_token_id": 200001,
48
  "partial_rotary_factor": 0.25,
49
- "pope_bias_init": "zero",
50
  "rms_norm_eps": 1e-06,
51
  "rope_scaling": null,
52
  "rope_theta": 10000.0,
53
  "transformers_version": "4.57.3",
54
- "vocab_size": 200005
55
  }
 
9
  "AutoModel": "modeling_neollm.NeoLLMModel",
10
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
11
  },
 
12
  "dropout_rate": 0.1,
13
  "dtype": "bfloat16",
14
+ "eos_token_id": 151643,
15
  "fan_ratio": 0.125,
16
  "fan_ratio_ffn": 0.0625,
17
  "head_dim": 64,
 
43
  "num_attention_heads": 8,
44
  "num_hidden_layers": 12,
45
  "num_key_value_heads": 2,
46
+ "pad_token_id": 151643,
47
  "partial_rotary_factor": 0.25,
 
48
  "rms_norm_eps": 1e-06,
49
  "rope_scaling": null,
50
  "rope_theta": 10000.0,
51
  "transformers_version": "4.57.3",
52
+ "vocab_size": 151665
53
  }
generation_config.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 200004,
4
  "eos_token_id": [
5
- 200004
6
  ],
7
- "pad_token_id": 200001,
8
  "transformers_version": "4.57.3"
9
  }
 
1
  {
2
  "_from_model_config": true,
 
3
  "eos_token_id": [
4
+ 151643
5
  ],
6
+ "pad_token_id": 151643,
7
  "transformers_version": "4.57.3"
8
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29d07219f27aabaf04c4c72023e04cf74a80693ffd561d5bd1cfb05436ff6c0b
3
- size 303441424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f6ad44388434d4fe9ef84940511ab9a2485efe43a14e57790e76a8c77aa893
3
+ size 253937864
special_tokens_map.json CHANGED
@@ -1,11 +1,19 @@
1
  {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
 
 
 
 
 
 
 
 
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
@@ -14,13 +22,6 @@
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|padding|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
26
  "normalized": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
  "eos_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
 
 
 
 
 
 
 
25
  "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c59b266e1dc9a768b2d69493c3f6eb3e46db7deb723411c958fe65ee06c28533
3
- size 14746941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json CHANGED
@@ -1,53 +1,207 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "200000": {
5
- "content": "|||IP_ADDRESS|||",
6
  "lstrip": false,
7
- "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
- "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
12
- "200001": {
13
- "content": "<|padding|>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "200002": {
21
- "content": "|||EMAIL_ADDRESS|||",
22
  "lstrip": false,
23
- "normalized": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": false
27
  },
28
- "200003": {
29
- "content": "|||PHONE_NUMBER|||",
30
  "lstrip": false,
31
- "normalized": true,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": false
35
  },
36
- "200004": {
37
- "content": "<|endoftext|>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
41
  "single_word": false,
42
- "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  },
45
- "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "clean_up_tokenization_spaces": false,
47
  "eos_token": "<|endoftext|>",
 
48
  "extra_special_tokens": {},
49
- "model_max_length": 1000000000000000019884624838656,
50
- "pad_token": "<|padding|>",
51
- "tokenizer_class": "GPT2Tokenizer",
52
- "unk_token": "<|endoftext|>"
 
53
  }
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
  "lstrip": false,
8
+ "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
  },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
  "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
  "rstrip": false,
122
  "single_word": false,
123
  "special": false
124
  },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
  "lstrip": false,
128
+ "normalized": false,
129
  "rstrip": false,
130
  "single_word": false,
131
  "special": false
132
  },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
138
  "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
  }
181
  },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
  "clean_up_tokenization_spaces": false,
199
  "eos_token": "<|endoftext|>",
200
+ "errors": "replace",
201
  "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f21152283bc740ca0041584baa54c17fe71b6098c364c0af1a57ed16de351d4
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950fb1b5260277fb5b4fbd5519494da796a7eb3cdae7696248c9d342a80a4151
3
  size 6033
vocab.json CHANGED
The diff for this file is too large to render. See raw diff