derko83 commited on
Commit
a445e05
·
verified ·
1 Parent(s): 68388ba

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "max_position_embeddings": 32768,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.51.3",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.51.3"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c0906527c4b2624263a5b31936a52b7977bbf10dc6b7354513159a8fb82fed
3
+ size 2384234968
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1ece1d465a9063a116e45fbb9753b649732412ced72734536e04a1915552af
3
+ size 4768663315
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de69a2834426ff9ef8199d077e00892579278af31d8969d77f98235b5cfc010a
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84d11a42c3eeaffdda5adade6b15f4b8d722e929b1856863f9bf6f820dd0250
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f91292b84a994cd254423d23215df10da8aa2ef02d5bf7bbb7aa4b2050cd26
3
+ size 1465
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|endoftext|>",
233
+ "errors": "replace",
234
+ "extra_special_tokens": {},
235
+ "model_max_length": 131072,
236
+ "pad_token": "<|endoftext|>",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
trainer_state.json ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.6413136438793545,
6
+ "eval_steps": 500,
7
+ "global_step": 1800,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.045612634699811846,
14
+ "grad_norm": 59.46120071411133,
15
+ "learning_rate": 6.409090909090908e-07,
16
+ "logits/chosen": -1.3211605548858643,
17
+ "logits/rejected": -1.3492553234100342,
18
+ "logps/chosen": -169.80906677246094,
19
+ "logps/rejected": -200.97677612304688,
20
+ "loss": 0.6812,
21
+ "rewards/accuracies": 0.5562499761581421,
22
+ "rewards/chosen": -0.0059404876083135605,
23
+ "rewards/margins": 0.025952553376555443,
24
+ "rewards/rejected": -0.031893040984869,
25
+ "step": 50
26
+ },
27
+ {
28
+ "epoch": 0.09122526939962369,
29
+ "grad_norm": 86.95048522949219,
30
+ "learning_rate": 1.3227272727272727e-06,
31
+ "logits/chosen": -1.411544919013977,
32
+ "logits/rejected": -1.4442178010940552,
33
+ "logps/chosen": -178.4121551513672,
34
+ "logps/rejected": -219.94424438476562,
35
+ "loss": 0.5771,
36
+ "rewards/accuracies": 0.6875,
37
+ "rewards/chosen": -0.29405269026756287,
38
+ "rewards/margins": 0.4449906647205353,
39
+ "rewards/rejected": -0.7390434145927429,
40
+ "step": 100
41
+ },
42
+ {
43
+ "epoch": 0.13683790409943555,
44
+ "grad_norm": 64.74836730957031,
45
+ "learning_rate": 1.9909090909090913e-06,
46
+ "logits/chosen": -1.485386610031128,
47
+ "logits/rejected": -1.5248019695281982,
48
+ "logps/chosen": -175.85545349121094,
49
+ "logps/rejected": -215.98086547851562,
50
+ "loss": 0.4865,
51
+ "rewards/accuracies": 0.7524999976158142,
52
+ "rewards/chosen": -0.47845691442489624,
53
+ "rewards/margins": 1.005491852760315,
54
+ "rewards/rejected": -1.483948826789856,
55
+ "step": 150
56
+ },
57
+ {
58
+ "epoch": 0.18245053879924739,
59
+ "grad_norm": 48.148223876953125,
60
+ "learning_rate": 2.6727272727272727e-06,
61
+ "logits/chosen": -1.6364929676055908,
62
+ "logits/rejected": -1.6319957971572876,
63
+ "logps/chosen": -179.0496063232422,
64
+ "logps/rejected": -229.71234130859375,
65
+ "loss": 0.4315,
66
+ "rewards/accuracies": 0.7674999833106995,
67
+ "rewards/chosen": -0.8819997906684875,
68
+ "rewards/margins": 1.617713212966919,
69
+ "rewards/rejected": -2.499713182449341,
70
+ "step": 200
71
+ },
72
+ {
73
+ "epoch": 0.22806317349905925,
74
+ "grad_norm": 90.83224487304688,
75
+ "learning_rate": 2.9987134348280704e-06,
76
+ "logits/chosen": -1.5377447605133057,
77
+ "logits/rejected": -1.5789711475372314,
78
+ "logps/chosen": -189.28240966796875,
79
+ "logps/rejected": -241.5106964111328,
80
+ "loss": 0.4431,
81
+ "rewards/accuracies": 0.7674999833106995,
82
+ "rewards/chosen": -1.1264184713363647,
83
+ "rewards/margins": 1.8479437828063965,
84
+ "rewards/rejected": -2.9743621349334717,
85
+ "step": 250
86
+ },
87
+ {
88
+ "epoch": 0.2736758081988711,
89
+ "grad_norm": 54.77507781982422,
90
+ "learning_rate": 2.9890189525113655e-06,
91
+ "logits/chosen": -1.6410760879516602,
92
+ "logits/rejected": -1.5978457927703857,
93
+ "logps/chosen": -178.2059326171875,
94
+ "logps/rejected": -239.80654907226562,
95
+ "loss": 0.4138,
96
+ "rewards/accuracies": 0.7987499833106995,
97
+ "rewards/chosen": -1.0319082736968994,
98
+ "rewards/margins": 2.2714104652404785,
99
+ "rewards/rejected": -3.303318500518799,
100
+ "step": 300
101
+ },
102
+ {
103
+ "epoch": 0.3192884428986829,
104
+ "grad_norm": 40.69072723388672,
105
+ "learning_rate": 2.9698817631509474e-06,
106
+ "logits/chosen": -1.7185229063034058,
107
+ "logits/rejected": -1.751165509223938,
108
+ "logps/chosen": -190.39962768554688,
109
+ "logps/rejected": -247.27247619628906,
110
+ "loss": 0.385,
111
+ "rewards/accuracies": 0.8112499713897705,
112
+ "rewards/chosen": -1.313816785812378,
113
+ "rewards/margins": 2.4288673400878906,
114
+ "rewards/rejected": -3.7426836490631104,
115
+ "step": 350
116
+ },
117
+ {
118
+ "epoch": 0.36490107759849477,
119
+ "grad_norm": 51.27244567871094,
120
+ "learning_rate": 2.9414232264329826e-06,
121
+ "logits/chosen": -1.6467254161834717,
122
+ "logits/rejected": -1.6478239297866821,
123
+ "logps/chosen": -179.54312133789062,
124
+ "logps/rejected": -231.5401153564453,
125
+ "loss": 0.431,
126
+ "rewards/accuracies": 0.7837499976158142,
127
+ "rewards/chosen": -1.276109218597412,
128
+ "rewards/margins": 2.3179662227630615,
129
+ "rewards/rejected": -3.5940752029418945,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.41051371229830663,
134
+ "grad_norm": 70.65045928955078,
135
+ "learning_rate": 2.9038238139523195e-06,
136
+ "logits/chosen": -1.649796724319458,
137
+ "logits/rejected": -1.6734592914581299,
138
+ "logps/chosen": -178.8260955810547,
139
+ "logps/rejected": -234.79722595214844,
140
+ "loss": 0.3768,
141
+ "rewards/accuracies": 0.8149999976158142,
142
+ "rewards/chosen": -1.090864896774292,
143
+ "rewards/margins": 2.503946304321289,
144
+ "rewards/rejected": -3.59481143951416,
145
+ "step": 450
146
+ },
147
+ {
148
+ "epoch": 0.4561263469981185,
149
+ "grad_norm": 53.61136245727539,
150
+ "learning_rate": 2.857321964740558e-06,
151
+ "logits/chosen": -1.5642222166061401,
152
+ "logits/rejected": -1.5648502111434937,
153
+ "logps/chosen": -191.90611267089844,
154
+ "logps/rejected": -252.19386291503906,
155
+ "loss": 0.3945,
156
+ "rewards/accuracies": 0.7962499856948853,
157
+ "rewards/chosen": -1.3972392082214355,
158
+ "rewards/margins": 2.93603515625,
159
+ "rewards/rejected": -4.3332743644714355,
160
+ "step": 500
161
+ },
162
+ {
163
+ "epoch": 0.5017389816979303,
164
+ "grad_norm": 51.6324577331543,
165
+ "learning_rate": 2.802212573189981e-06,
166
+ "logits/chosen": -1.6049363613128662,
167
+ "logits/rejected": -1.6026443243026733,
168
+ "logps/chosen": -187.35401916503906,
169
+ "logps/rejected": -257.5898742675781,
170
+ "loss": 0.331,
171
+ "rewards/accuracies": 0.8287500143051147,
172
+ "rewards/chosen": -1.2774113416671753,
173
+ "rewards/margins": 3.223512649536133,
174
+ "rewards/rejected": -4.500924110412598,
175
+ "step": 550
176
+ },
177
+ {
178
+ "epoch": 0.5473516163977422,
179
+ "grad_norm": 40.728816986083984,
180
+ "learning_rate": 2.738845118962275e-06,
181
+ "logits/chosen": -1.6046830415725708,
182
+ "logits/rejected": -1.5583266019821167,
183
+ "logps/chosen": -197.91168212890625,
184
+ "logps/rejected": -263.0049743652344,
185
+ "loss": 0.3482,
186
+ "rewards/accuracies": 0.8224999904632568,
187
+ "rewards/chosen": -1.6200157403945923,
188
+ "rewards/margins": 3.3425958156585693,
189
+ "rewards/rejected": -4.962611198425293,
190
+ "step": 600
191
+ },
192
+ {
193
+ "epoch": 0.592964251097554,
194
+ "grad_norm": 45.273345947265625,
195
+ "learning_rate": 2.6676214507413463e-06,
196
+ "logits/chosen": -1.6240191459655762,
197
+ "logits/rejected": -1.611716866493225,
198
+ "logps/chosen": -194.59600830078125,
199
+ "logps/rejected": -265.8725891113281,
200
+ "loss": 0.3288,
201
+ "rewards/accuracies": 0.8412500023841858,
202
+ "rewards/chosen": -1.3388230800628662,
203
+ "rewards/margins": 3.5074219703674316,
204
+ "rewards/rejected": -4.846245288848877,
205
+ "step": 650
206
+ },
207
+ {
208
+ "epoch": 0.6385768857973658,
209
+ "grad_norm": 34.892608642578125,
210
+ "learning_rate": 2.5889932378846963e-06,
211
+ "logits/chosen": -1.7866308689117432,
212
+ "logits/rejected": -1.7742173671722412,
213
+ "logps/chosen": -184.9108428955078,
214
+ "logps/rejected": -247.7977294921875,
215
+ "loss": 0.3715,
216
+ "rewards/accuracies": 0.8149999976158142,
217
+ "rewards/chosen": -1.4488279819488525,
218
+ "rewards/margins": 3.0412204265594482,
219
+ "rewards/rejected": -4.490048408508301,
220
+ "step": 700
221
+ },
222
+ {
223
+ "epoch": 0.6841895204971777,
224
+ "grad_norm": 55.344505310058594,
225
+ "learning_rate": 2.5052340273394205e-06,
226
+ "logits/chosen": -1.826340675354004,
227
+ "logits/rejected": -1.8333815336227417,
228
+ "logps/chosen": -184.3016357421875,
229
+ "logps/rejected": -246.4990692138672,
230
+ "loss": 0.3508,
231
+ "rewards/accuracies": 0.8274999856948853,
232
+ "rewards/chosen": -1.5381311178207397,
233
+ "rewards/margins": 3.2081944942474365,
234
+ "rewards/rejected": -4.746325492858887,
235
+ "step": 750
236
+ },
237
+ {
238
+ "epoch": 0.7298021551969895,
239
+ "grad_norm": 16.396432876586914,
240
+ "learning_rate": 2.4134580840681784e-06,
241
+ "logits/chosen": -1.8897172212600708,
242
+ "logits/rejected": -1.9127064943313599,
243
+ "logps/chosen": -206.5291748046875,
244
+ "logps/rejected": -274.1864318847656,
245
+ "loss": 0.3101,
246
+ "rewards/accuracies": 0.8550000190734863,
247
+ "rewards/chosen": -2.1525042057037354,
248
+ "rewards/margins": 3.880983591079712,
249
+ "rewards/rejected": -6.033487319946289,
250
+ "step": 800
251
+ },
252
+ {
253
+ "epoch": 0.7754147898968015,
254
+ "grad_norm": 37.60529327392578,
255
+ "learning_rate": 2.3158893890861404e-06,
256
+ "logits/chosen": -1.7926125526428223,
257
+ "logits/rejected": -1.7985824346542358,
258
+ "logps/chosen": -196.04129028320312,
259
+ "logps/rejected": -259.4538269042969,
260
+ "loss": 0.3114,
261
+ "rewards/accuracies": 0.8500000238418579,
262
+ "rewards/chosen": -2.2751173973083496,
263
+ "rewards/margins": 3.9349923133850098,
264
+ "rewards/rejected": -6.210109710693359,
265
+ "step": 850
266
+ },
267
+ {
268
+ "epoch": 0.8210274245966133,
269
+ "grad_norm": 38.6811408996582,
270
+ "learning_rate": 2.213146680389758e-06,
271
+ "logits/chosen": -1.8608883619308472,
272
+ "logits/rejected": -1.8560402393341064,
273
+ "logps/chosen": -196.21221923828125,
274
+ "logps/rejected": -274.54345703125,
275
+ "loss": 0.3121,
276
+ "rewards/accuracies": 0.8450000286102295,
277
+ "rewards/chosen": -2.3408617973327637,
278
+ "rewards/margins": 4.028858184814453,
279
+ "rewards/rejected": -6.369719982147217,
280
+ "step": 900
281
+ },
282
+ {
283
+ "epoch": 0.8666400592964251,
284
+ "grad_norm": 67.59371948242188,
285
+ "learning_rate": 2.1058815073078422e-06,
286
+ "logits/chosen": -1.945671558380127,
287
+ "logits/rejected": -1.9288123846054077,
288
+ "logps/chosen": -197.5050811767578,
289
+ "logps/rejected": -271.6333312988281,
290
+ "loss": 0.2909,
291
+ "rewards/accuracies": 0.8537499904632568,
292
+ "rewards/chosen": -1.4789001941680908,
293
+ "rewards/margins": 4.0447797775268555,
294
+ "rewards/rejected": -5.523679256439209,
295
+ "step": 950
296
+ },
297
+ {
298
+ "epoch": 0.912252693996237,
299
+ "grad_norm": 28.43963050842285,
300
+ "learning_rate": 1.99477409866065e-06,
301
+ "logits/chosen": -1.9384291172027588,
302
+ "logits/rejected": -1.9162278175354004,
303
+ "logps/chosen": -200.1200408935547,
304
+ "logps/rejected": -264.21453857421875,
305
+ "loss": 0.3136,
306
+ "rewards/accuracies": 0.8462499976158142,
307
+ "rewards/chosen": -1.7790579795837402,
308
+ "rewards/margins": 3.894080400466919,
309
+ "rewards/rejected": -5.673138618469238,
310
+ "step": 1000
311
+ },
312
+ {
313
+ "epoch": 0.9578653286960488,
314
+ "grad_norm": 49.5960807800293,
315
+ "learning_rate": 1.8805290490461682e-06,
316
+ "logits/chosen": -1.9607409238815308,
317
+ "logits/rejected": -1.9329227209091187,
318
+ "logps/chosen": -190.69606018066406,
319
+ "logps/rejected": -262.543212890625,
320
+ "loss": 0.2701,
321
+ "rewards/accuracies": 0.8799999952316284,
322
+ "rewards/chosen": -1.4124534130096436,
323
+ "rewards/margins": 3.9889495372772217,
324
+ "rewards/rejected": -5.401403427124023,
325
+ "step": 1050
326
+ },
327
+ {
328
+ "epoch": 1.0027367580819888,
329
+ "grad_norm": 8.592507362365723,
330
+ "learning_rate": 1.763870850609299e-06,
331
+ "logits/chosen": -2.1728835105895996,
332
+ "logits/rejected": -2.0995230674743652,
333
+ "logps/chosen": -191.92657470703125,
334
+ "logps/rejected": -273.9614562988281,
335
+ "loss": 0.2427,
336
+ "rewards/accuracies": 0.8856416940689087,
337
+ "rewards/chosen": -1.5243728160858154,
338
+ "rewards/margins": 4.81222677230835,
339
+ "rewards/rejected": -6.336598873138428,
340
+ "step": 1100
341
+ },
342
+ {
343
+ "epoch": 1.0483493927818006,
344
+ "grad_norm": 15.577631950378418,
345
+ "learning_rate": 1.6455392986294975e-06,
346
+ "logits/chosen": -2.4582772254943848,
347
+ "logits/rejected": -2.3846638202667236,
348
+ "logps/chosen": -189.32957458496094,
349
+ "logps/rejected": -283.9933166503906,
350
+ "loss": 0.063,
351
+ "rewards/accuracies": 0.9825000166893005,
352
+ "rewards/chosen": -1.279435634613037,
353
+ "rewards/margins": 6.347279071807861,
354
+ "rewards/rejected": -7.626713752746582,
355
+ "step": 1150
356
+ },
357
+ {
358
+ "epoch": 1.0939620274816124,
359
+ "grad_norm": 43.521793365478516,
360
+ "learning_rate": 1.5262848000626241e-06,
361
+ "logits/chosen": -3.1720001697540283,
362
+ "logits/rejected": -3.0936548709869385,
363
+ "logps/chosen": -191.5170440673828,
364
+ "logps/rejected": -302.63201904296875,
365
+ "loss": 0.0713,
366
+ "rewards/accuracies": 0.9800000190734863,
367
+ "rewards/chosen": -1.885518193244934,
368
+ "rewards/margins": 7.22011661529541,
369
+ "rewards/rejected": -9.105633735656738,
370
+ "step": 1200
371
+ },
372
+ {
373
+ "epoch": 1.1395746621814242,
374
+ "grad_norm": 24.051679611206055,
375
+ "learning_rate": 1.4068636147881868e-06,
376
+ "logits/chosen": -3.590402126312256,
377
+ "logits/rejected": -3.4608168601989746,
378
+ "logps/chosen": -199.92221069335938,
379
+ "logps/rejected": -318.69219970703125,
380
+ "loss": 0.05,
381
+ "rewards/accuracies": 0.981249988079071,
382
+ "rewards/chosen": -2.2472312450408936,
383
+ "rewards/margins": 8.160175323486328,
384
+ "rewards/rejected": -10.407405853271484,
385
+ "step": 1250
386
+ },
387
+ {
388
+ "epoch": 1.185187296881236,
389
+ "grad_norm": 9.708622932434082,
390
+ "learning_rate": 1.288033059739901e-06,
391
+ "logits/chosen": -3.806685209274292,
392
+ "logits/rejected": -3.6277265548706055,
393
+ "logps/chosen": -205.2801971435547,
394
+ "logps/rejected": -317.83807373046875,
395
+ "loss": 0.063,
396
+ "rewards/accuracies": 0.9825000166893005,
397
+ "rewards/chosen": -2.7129626274108887,
398
+ "rewards/margins": 8.290043830871582,
399
+ "rewards/rejected": -11.003006935119629,
400
+ "step": 1300
401
+ },
402
+ {
403
+ "epoch": 1.230799931581048,
404
+ "grad_norm": 2.236952781677246,
405
+ "learning_rate": 1.170546706332872e-06,
406
+ "logits/chosen": -3.8718817234039307,
407
+ "logits/rejected": -3.7462046146392822,
408
+ "logps/chosen": -209.9652862548828,
409
+ "logps/rejected": -328.6727600097656,
410
+ "loss": 0.0501,
411
+ "rewards/accuracies": 0.981249988079071,
412
+ "rewards/chosen": -3.4696946144104004,
413
+ "rewards/margins": 8.770748138427734,
414
+ "rewards/rejected": -12.240442276000977,
415
+ "step": 1350
416
+ },
417
+ {
418
+ "epoch": 1.2764125662808599,
419
+ "grad_norm": 1.4985854625701904,
420
+ "learning_rate": 1.0551496016432202e-06,
421
+ "logits/chosen": -4.127021312713623,
422
+ "logits/rejected": -3.9822707176208496,
423
+ "logps/chosen": -216.8661651611328,
424
+ "logps/rejected": -341.8252258300781,
425
+ "loss": 0.0577,
426
+ "rewards/accuracies": 0.981249988079071,
427
+ "rewards/chosen": -3.4729561805725098,
428
+ "rewards/margins": 8.868853569030762,
429
+ "rewards/rejected": -12.34181022644043,
430
+ "step": 1400
431
+ },
432
+ {
433
+ "epoch": 1.3220252009806717,
434
+ "grad_norm": 37.12932205200195,
435
+ "learning_rate": 9.425735436453361e-07,
436
+ "logits/chosen": -4.2538580894470215,
437
+ "logits/rejected": -4.086177825927734,
438
+ "logps/chosen": -206.88836669921875,
439
+ "logps/rejected": -324.8296813964844,
440
+ "loss": 0.0603,
441
+ "rewards/accuracies": 0.9775000214576721,
442
+ "rewards/chosen": -3.3129966259002686,
443
+ "rewards/margins": 8.81570053100586,
444
+ "rewards/rejected": -12.128695487976074,
445
+ "step": 1450
446
+ },
447
+ {
448
+ "epoch": 1.3676378356804835,
449
+ "grad_norm": 19.34769630432129,
450
+ "learning_rate": 8.356741060752298e-07,
451
+ "logits/chosen": -4.326617240905762,
452
+ "logits/rejected": -4.108016490936279,
453
+ "logps/chosen": -211.08132934570312,
454
+ "logps/rejected": -342.179443359375,
455
+ "loss": 0.0639,
456
+ "rewards/accuracies": 0.9787499904632568,
457
+ "rewards/chosen": -3.5119857788085938,
458
+ "rewards/margins": 9.29565715789795,
459
+ "rewards/rejected": -12.807641983032227,
460
+ "step": 1500
461
+ },
462
+ {
463
+ "epoch": 1.4132504703802953,
464
+ "grad_norm": 3.8228871822357178,
465
+ "learning_rate": 7.307683094676016e-07,
466
+ "logits/chosen": -4.324881076812744,
467
+ "logits/rejected": -4.193660259246826,
468
+ "logps/chosen": -209.66769409179688,
469
+ "logps/rejected": -341.2529296875,
470
+ "loss": 0.0562,
471
+ "rewards/accuracies": 0.981249988079071,
472
+ "rewards/chosen": -3.7620954513549805,
473
+ "rewards/margins": 9.563469886779785,
474
+ "rewards/rejected": -13.32556438446045,
475
+ "step": 1550
476
+ },
477
+ {
478
+ "epoch": 1.4588631050801073,
479
+ "grad_norm": 10.610180854797363,
480
+ "learning_rate": 6.30740643853663e-07,
481
+ "logits/chosen": -4.231253623962402,
482
+ "logits/rejected": -4.145637512207031,
483
+ "logps/chosen": -206.0325927734375,
484
+ "logps/rejected": -329.6341857910156,
485
+ "loss": 0.0599,
486
+ "rewards/accuracies": 0.9775000214576721,
487
+ "rewards/chosen": -3.3670105934143066,
488
+ "rewards/margins": 9.107893943786621,
489
+ "rewards/rejected": -12.474905014038086,
490
+ "step": 1600
491
+ },
492
+ {
493
+ "epoch": 1.5044757397799189,
494
+ "grad_norm": 3.419102191925049,
495
+ "learning_rate": 5.362254409462939e-07,
496
+ "logits/chosen": -4.283145904541016,
497
+ "logits/rejected": -4.177493095397949,
498
+ "logps/chosen": -207.3034210205078,
499
+ "logps/rejected": -323.41595458984375,
500
+ "loss": 0.0611,
501
+ "rewards/accuracies": 0.9737499952316284,
502
+ "rewards/chosen": -3.4872689247131348,
503
+ "rewards/margins": 8.951678276062012,
504
+ "rewards/rejected": -12.438947677612305,
505
+ "step": 1650
506
+ },
507
+ {
508
+ "epoch": 1.550088374479731,
509
+ "grad_norm": 9.434532165527344,
510
+ "learning_rate": 4.478220748305115e-07,
511
+ "logits/chosen": -4.194823265075684,
512
+ "logits/rejected": -4.0666184425354,
513
+ "logps/chosen": -206.98306274414062,
514
+ "logps/rejected": -332.0705871582031,
515
+ "loss": 0.0626,
516
+ "rewards/accuracies": 0.9800000190734863,
517
+ "rewards/chosen": -3.25546932220459,
518
+ "rewards/margins": 9.118634223937988,
519
+ "rewards/rejected": -12.374103546142578,
520
+ "step": 1700
521
+ },
522
+ {
523
+ "epoch": 1.5957010091795427,
524
+ "grad_norm": 25.252395629882812,
525
+ "learning_rate": 3.6609116099512447e-07,
526
+ "logits/chosen": -4.359646320343018,
527
+ "logits/rejected": -4.207290172576904,
528
+ "logps/chosen": -213.02706909179688,
529
+ "logps/rejected": -331.1492919921875,
530
+ "loss": 0.0584,
531
+ "rewards/accuracies": 0.9787499904632568,
532
+ "rewards/chosen": -3.5253746509552,
533
+ "rewards/margins": 9.35997200012207,
534
+ "rewards/rejected": -12.885346412658691,
535
+ "step": 1750
536
+ },
537
+ {
538
+ "epoch": 1.6413136438793545,
539
+ "grad_norm": 3.1381542682647705,
540
+ "learning_rate": 2.915510011544664e-07,
541
+ "logits/chosen": -4.435996055603027,
542
+ "logits/rejected": -4.291849136352539,
543
+ "logps/chosen": -211.94927978515625,
544
+ "logps/rejected": -346.5935974121094,
545
+ "loss": 0.0417,
546
+ "rewards/accuracies": 0.9850000143051147,
547
+ "rewards/chosen": -3.7163028717041016,
548
+ "rewards/margins": 9.784122467041016,
549
+ "rewards/rejected": -13.5004243850708,
550
+ "step": 1800
551
+ }
552
+ ],
553
+ "logging_steps": 50,
554
+ "max_steps": 2192,
555
+ "num_input_tokens_seen": 0,
556
+ "num_train_epochs": 2,
557
+ "save_steps": 200,
558
+ "stateful_callbacks": {
559
+ "TrainerControl": {
560
+ "args": {
561
+ "should_epoch_stop": false,
562
+ "should_evaluate": false,
563
+ "should_log": false,
564
+ "should_save": true,
565
+ "should_training_stop": false
566
+ },
567
+ "attributes": {}
568
+ }
569
+ },
570
+ "total_flos": 0.0,
571
+ "train_batch_size": 1,
572
+ "trial_name": null,
573
+ "trial_params": null
574
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdc17851ff3ea21e6d6a9096173770699ec390ffb756097275a3c3f068c3e3c
3
+ size 6545
vocab.json ADDED
The diff for this file is too large to render. See raw diff