koreankiwi99 commited on
Commit
1de6541
·
verified ·
1 Parent(s): 524c300

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1350/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
checkpoint-1350/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
checkpoint-1350/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "max_position_embeddings": 32768,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.52.3",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
checkpoint-1350/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.52.3"
6
+ }
checkpoint-1350/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1350/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a793c47676550a51ebca3f46d38c9caf6f35ccebbc241c469e660d591a568bc0
3
+ size 2384234968
checkpoint-1350/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337a963dbd5ae1571ab463f145ea25d41b3736353cde0dfc162c5a3ae3185370
3
+ size 4768662910
checkpoint-1350/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405
3
+ size 14244
checkpoint-1350/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b478be7422409187c8570cd62ff2d6c1f23d07dbe65a32454f0d210b437c34
3
+ size 988
checkpoint-1350/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd836cfabec29ee2868162191588ec230736f86630fa4f89c3fb14d0bf6fcc3
3
+ size 1064
checkpoint-1350/special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<|endoftext|>"
25
+ }
checkpoint-1350/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
checkpoint-1350/tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": null,
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|endoftext|>",
233
+ "errors": "replace",
234
+ "extra_special_tokens": {},
235
+ "model_max_length": 131072,
236
+ "pad_token": "<|endoftext|>",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
checkpoint-1350/trainer_state.json ADDED
@@ -0,0 +1,2059 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1350,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.007410151908114116,
14
+ "grad_norm": 62.764404296875,
15
+ "learning_rate": 4.974074074074075e-06,
16
+ "logits/chosen": -1.1614253520965576,
17
+ "logits/rejected": -1.096572995185852,
18
+ "logps/chosen": -74.20314025878906,
19
+ "logps/rejected": -76.51347351074219,
20
+ "loss": 0.6586,
21
+ "rewards/accuracies": 0.5375000238418579,
22
+ "rewards/chosen": -0.20016133785247803,
23
+ "rewards/margins": 0.0967479944229126,
24
+ "rewards/rejected": -0.2969093322753906,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.014820303816228233,
29
+ "grad_norm": 79.51490020751953,
30
+ "learning_rate": 4.940740740740741e-06,
31
+ "logits/chosen": -1.5121562480926514,
32
+ "logits/rejected": -1.5445247888565063,
33
+ "logps/chosen": -85.74129486083984,
34
+ "logps/rejected": -93.8956527709961,
35
+ "loss": 0.6982,
36
+ "rewards/accuracies": 0.5,
37
+ "rewards/chosen": -1.1762564182281494,
38
+ "rewards/margins": 0.23413367569446564,
39
+ "rewards/rejected": -1.4103901386260986,
40
+ "step": 20
41
+ },
42
+ {
43
+ "epoch": 0.02223045572434235,
44
+ "grad_norm": 53.0880012512207,
45
+ "learning_rate": 4.907407407407408e-06,
46
+ "logits/chosen": -1.6576855182647705,
47
+ "logits/rejected": -1.526139259338379,
48
+ "logps/chosen": -86.98616027832031,
49
+ "logps/rejected": -96.59648132324219,
50
+ "loss": 0.5807,
51
+ "rewards/accuracies": 0.6499999761581421,
52
+ "rewards/chosen": -1.2037205696105957,
53
+ "rewards/margins": 0.6045142412185669,
54
+ "rewards/rejected": -1.8082348108291626,
55
+ "step": 30
56
+ },
57
+ {
58
+ "epoch": 0.029640607632456465,
59
+ "grad_norm": 70.3161849975586,
60
+ "learning_rate": 4.870370370370371e-06,
61
+ "logits/chosen": -1.95455801486969,
62
+ "logits/rejected": -1.9040778875350952,
63
+ "logps/chosen": -86.21485900878906,
64
+ "logps/rejected": -90.55685424804688,
65
+ "loss": 0.7098,
66
+ "rewards/accuracies": 0.7250000238418579,
67
+ "rewards/chosen": -1.4126113653182983,
68
+ "rewards/margins": 0.5408647656440735,
69
+ "rewards/rejected": -1.9534763097763062,
70
+ "step": 40
71
+ },
72
+ {
73
+ "epoch": 0.037050759540570584,
74
+ "grad_norm": 86.17560577392578,
75
+ "learning_rate": 4.833333333333333e-06,
76
+ "logits/chosen": -1.7684093713760376,
77
+ "logits/rejected": -1.6274621486663818,
78
+ "logps/chosen": -97.04888916015625,
79
+ "logps/rejected": -95.69999694824219,
80
+ "loss": 0.6707,
81
+ "rewards/accuracies": 0.5625,
82
+ "rewards/chosen": -1.532711386680603,
83
+ "rewards/margins": 0.37558549642562866,
84
+ "rewards/rejected": -1.9082969427108765,
85
+ "step": 50
86
+ },
87
+ {
88
+ "epoch": 0.0444609114486847,
89
+ "grad_norm": 64.5966796875,
90
+ "learning_rate": 4.800000000000001e-06,
91
+ "logits/chosen": -1.7628320455551147,
92
+ "logits/rejected": -1.7703828811645508,
93
+ "logps/chosen": -90.20537567138672,
94
+ "logps/rejected": -97.63748168945312,
95
+ "loss": 0.6683,
96
+ "rewards/accuracies": 0.5625,
97
+ "rewards/chosen": -1.3552738428115845,
98
+ "rewards/margins": 0.38691654801368713,
99
+ "rewards/rejected": -1.7421903610229492,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.051871063356798815,
104
+ "grad_norm": 85.56486511230469,
105
+ "learning_rate": 4.762962962962963e-06,
106
+ "logits/chosen": -1.415287733078003,
107
+ "logits/rejected": -1.4015685319900513,
108
+ "logps/chosen": -91.52272033691406,
109
+ "logps/rejected": -100.97013092041016,
110
+ "loss": 0.6871,
111
+ "rewards/accuracies": 0.675000011920929,
112
+ "rewards/chosen": -1.1221778392791748,
113
+ "rewards/margins": 0.47898730635643005,
114
+ "rewards/rejected": -1.6011650562286377,
115
+ "step": 70
116
+ },
117
+ {
118
+ "epoch": 0.05928121526491293,
119
+ "grad_norm": 47.64088439941406,
120
+ "learning_rate": 4.725925925925926e-06,
121
+ "logits/chosen": -1.169304609298706,
122
+ "logits/rejected": -1.170772910118103,
123
+ "logps/chosen": -80.82547760009766,
124
+ "logps/rejected": -93.60406494140625,
125
+ "loss": 0.5665,
126
+ "rewards/accuracies": 0.699999988079071,
127
+ "rewards/chosen": -0.8211374282836914,
128
+ "rewards/margins": 0.6154714822769165,
129
+ "rewards/rejected": -1.4366087913513184,
130
+ "step": 80
131
+ },
132
+ {
133
+ "epoch": 0.06669136717302705,
134
+ "grad_norm": 56.08961486816406,
135
+ "learning_rate": 4.6888888888888895e-06,
136
+ "logits/chosen": -1.2894606590270996,
137
+ "logits/rejected": -1.1954705715179443,
138
+ "logps/chosen": -84.70814514160156,
139
+ "logps/rejected": -93.02404022216797,
140
+ "loss": 0.6926,
141
+ "rewards/accuracies": 0.5625,
142
+ "rewards/chosen": -1.0803619623184204,
143
+ "rewards/margins": 0.41851162910461426,
144
+ "rewards/rejected": -1.4988737106323242,
145
+ "step": 90
146
+ },
147
+ {
148
+ "epoch": 0.07410151908114117,
149
+ "grad_norm": 51.74102020263672,
150
+ "learning_rate": 4.651851851851853e-06,
151
+ "logits/chosen": -1.5028270483016968,
152
+ "logits/rejected": -1.3893922567367554,
153
+ "logps/chosen": -88.91434478759766,
154
+ "logps/rejected": -91.33464813232422,
155
+ "loss": 0.6143,
156
+ "rewards/accuracies": 0.6499999761581421,
157
+ "rewards/chosen": -1.174202561378479,
158
+ "rewards/margins": 0.4613053798675537,
159
+ "rewards/rejected": -1.6355078220367432,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 0.08151167098925528,
164
+ "grad_norm": 50.91643524169922,
165
+ "learning_rate": 4.614814814814815e-06,
166
+ "logits/chosen": -1.6547952890396118,
167
+ "logits/rejected": -1.4582024812698364,
168
+ "logps/chosen": -90.47889709472656,
169
+ "logps/rejected": -104.38134765625,
170
+ "loss": 0.5684,
171
+ "rewards/accuracies": 0.675000011920929,
172
+ "rewards/chosen": -1.2481868267059326,
173
+ "rewards/margins": 0.8181821703910828,
174
+ "rewards/rejected": -2.066368818283081,
175
+ "step": 110
176
+ },
177
+ {
178
+ "epoch": 0.0889218228973694,
179
+ "grad_norm": 94.5538330078125,
180
+ "learning_rate": 4.5777777777777785e-06,
181
+ "logits/chosen": -1.5053731203079224,
182
+ "logits/rejected": -1.3366467952728271,
183
+ "logps/chosen": -88.6790542602539,
184
+ "logps/rejected": -104.670166015625,
185
+ "loss": 0.5921,
186
+ "rewards/accuracies": 0.6875,
187
+ "rewards/chosen": -1.5242931842803955,
188
+ "rewards/margins": 0.9062445759773254,
189
+ "rewards/rejected": -2.430537700653076,
190
+ "step": 120
191
+ },
192
+ {
193
+ "epoch": 0.09633197480548351,
194
+ "grad_norm": 33.83641815185547,
195
+ "learning_rate": 4.540740740740741e-06,
196
+ "logits/chosen": -1.409499168395996,
197
+ "logits/rejected": -1.2274638414382935,
198
+ "logps/chosen": -85.58585357666016,
199
+ "logps/rejected": -108.52848815917969,
200
+ "loss": 0.4394,
201
+ "rewards/accuracies": 0.7749999761581421,
202
+ "rewards/chosen": -1.420691967010498,
203
+ "rewards/margins": 1.632450819015503,
204
+ "rewards/rejected": -3.053142786026001,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.10374212671359763,
209
+ "grad_norm": 220.59970092773438,
210
+ "learning_rate": 4.503703703703704e-06,
211
+ "logits/chosen": -1.4208014011383057,
212
+ "logits/rejected": -1.2115123271942139,
213
+ "logps/chosen": -98.5712890625,
214
+ "logps/rejected": -114.5568618774414,
215
+ "loss": 0.5026,
216
+ "rewards/accuracies": 0.6875,
217
+ "rewards/chosen": -2.161109209060669,
218
+ "rewards/margins": 1.499983787536621,
219
+ "rewards/rejected": -3.661093235015869,
220
+ "step": 140
221
+ },
222
+ {
223
+ "epoch": 0.11115227862171174,
224
+ "grad_norm": 53.83354949951172,
225
+ "learning_rate": 4.4666666666666665e-06,
226
+ "logits/chosen": -1.5498363971710205,
227
+ "logits/rejected": -1.485797643661499,
228
+ "logps/chosen": -97.19245147705078,
229
+ "logps/rejected": -110.35697174072266,
230
+ "loss": 0.5838,
231
+ "rewards/accuracies": 0.637499988079071,
232
+ "rewards/chosen": -1.9847490787506104,
233
+ "rewards/margins": 1.1178936958312988,
234
+ "rewards/rejected": -3.102642774581909,
235
+ "step": 150
236
+ },
237
+ {
238
+ "epoch": 0.11856243052982586,
239
+ "grad_norm": 89.90550231933594,
240
+ "learning_rate": 4.42962962962963e-06,
241
+ "logits/chosen": -1.5130503177642822,
242
+ "logits/rejected": -1.3890920877456665,
243
+ "logps/chosen": -93.79810333251953,
244
+ "logps/rejected": -108.19990539550781,
245
+ "loss": 0.6064,
246
+ "rewards/accuracies": 0.6625000238418579,
247
+ "rewards/chosen": -1.9345242977142334,
248
+ "rewards/margins": 1.380746841430664,
249
+ "rewards/rejected": -3.3152713775634766,
250
+ "step": 160
251
+ },
252
+ {
253
+ "epoch": 0.12597258243794,
254
+ "grad_norm": 52.394100189208984,
255
+ "learning_rate": 4.392592592592593e-06,
256
+ "logits/chosen": -1.5120335817337036,
257
+ "logits/rejected": -1.4776674509048462,
258
+ "logps/chosen": -93.30475616455078,
259
+ "logps/rejected": -110.5815200805664,
260
+ "loss": 0.5675,
261
+ "rewards/accuracies": 0.637499988079071,
262
+ "rewards/chosen": -1.8305046558380127,
263
+ "rewards/margins": 1.1410784721374512,
264
+ "rewards/rejected": -2.9715828895568848,
265
+ "step": 170
266
+ },
267
+ {
268
+ "epoch": 0.1333827343460541,
269
+ "grad_norm": 55.45066833496094,
270
+ "learning_rate": 4.3555555555555555e-06,
271
+ "logits/chosen": -1.941173791885376,
272
+ "logits/rejected": -1.788526177406311,
273
+ "logps/chosen": -86.53892517089844,
274
+ "logps/rejected": -108.03669738769531,
275
+ "loss": 0.4828,
276
+ "rewards/accuracies": 0.7250000238418579,
277
+ "rewards/chosen": -1.8137779235839844,
278
+ "rewards/margins": 1.234937310218811,
279
+ "rewards/rejected": -3.048715114593506,
280
+ "step": 180
281
+ },
282
+ {
283
+ "epoch": 0.14079288625416822,
284
+ "grad_norm": 50.05571746826172,
285
+ "learning_rate": 4.318518518518519e-06,
286
+ "logits/chosen": -1.8429396152496338,
287
+ "logits/rejected": -1.8329576253890991,
288
+ "logps/chosen": -94.84736633300781,
289
+ "logps/rejected": -114.12214660644531,
290
+ "loss": 0.5133,
291
+ "rewards/accuracies": 0.6875,
292
+ "rewards/chosen": -2.004016160964966,
293
+ "rewards/margins": 1.0727269649505615,
294
+ "rewards/rejected": -3.0767431259155273,
295
+ "step": 190
296
+ },
297
+ {
298
+ "epoch": 0.14820303816228234,
299
+ "grad_norm": 70.21109008789062,
300
+ "learning_rate": 4.281481481481482e-06,
301
+ "logits/chosen": -2.149946451187134,
302
+ "logits/rejected": -2.0653207302093506,
303
+ "logps/chosen": -96.63078308105469,
304
+ "logps/rejected": -110.8408432006836,
305
+ "loss": 0.5697,
306
+ "rewards/accuracies": 0.5874999761581421,
307
+ "rewards/chosen": -2.3063206672668457,
308
+ "rewards/margins": 0.9903339147567749,
309
+ "rewards/rejected": -3.296654462814331,
310
+ "step": 200
311
+ },
312
+ {
313
+ "epoch": 0.15561319007039645,
314
+ "grad_norm": 44.284324645996094,
315
+ "learning_rate": 4.244444444444445e-06,
316
+ "logits/chosen": -2.2816834449768066,
317
+ "logits/rejected": -2.0756924152374268,
318
+ "logps/chosen": -101.39938354492188,
319
+ "logps/rejected": -104.1975326538086,
320
+ "loss": 0.7565,
321
+ "rewards/accuracies": 0.574999988079071,
322
+ "rewards/chosen": -2.6701712608337402,
323
+ "rewards/margins": 0.539871096611023,
324
+ "rewards/rejected": -3.2100422382354736,
325
+ "step": 210
326
+ },
327
+ {
328
+ "epoch": 0.16302334197851057,
329
+ "grad_norm": 49.169559478759766,
330
+ "learning_rate": 4.207407407407408e-06,
331
+ "logits/chosen": -2.303546667098999,
332
+ "logits/rejected": -2.1084389686584473,
333
+ "logps/chosen": -93.49501037597656,
334
+ "logps/rejected": -111.30097961425781,
335
+ "loss": 0.5543,
336
+ "rewards/accuracies": 0.675000011920929,
337
+ "rewards/chosen": -1.9729340076446533,
338
+ "rewards/margins": 1.0684603452682495,
339
+ "rewards/rejected": -3.0413944721221924,
340
+ "step": 220
341
+ },
342
+ {
343
+ "epoch": 0.17043349388662468,
344
+ "grad_norm": 47.69071578979492,
345
+ "learning_rate": 4.170370370370371e-06,
346
+ "logits/chosen": -2.608030319213867,
347
+ "logits/rejected": -2.3824543952941895,
348
+ "logps/chosen": -89.42241668701172,
349
+ "logps/rejected": -109.88175964355469,
350
+ "loss": 0.5986,
351
+ "rewards/accuracies": 0.7124999761581421,
352
+ "rewards/chosen": -1.9395774602890015,
353
+ "rewards/margins": 0.9270322918891907,
354
+ "rewards/rejected": -2.866609573364258,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 0.1778436457947388,
359
+ "grad_norm": 51.891334533691406,
360
+ "learning_rate": 4.133333333333333e-06,
361
+ "logits/chosen": -2.294018268585205,
362
+ "logits/rejected": -2.186135768890381,
363
+ "logps/chosen": -91.37318420410156,
364
+ "logps/rejected": -103.55364990234375,
365
+ "loss": 0.5908,
366
+ "rewards/accuracies": 0.675000011920929,
367
+ "rewards/chosen": -1.9577686786651611,
368
+ "rewards/margins": 0.8284266591072083,
369
+ "rewards/rejected": -2.7861952781677246,
370
+ "step": 240
371
+ },
372
+ {
373
+ "epoch": 0.1852537977028529,
374
+ "grad_norm": 60.76057434082031,
375
+ "learning_rate": 4.0962962962962965e-06,
376
+ "logits/chosen": -2.3174843788146973,
377
+ "logits/rejected": -2.1223363876342773,
378
+ "logps/chosen": -94.4203109741211,
379
+ "logps/rejected": -108.79350280761719,
380
+ "loss": 0.5691,
381
+ "rewards/accuracies": 0.6625000238418579,
382
+ "rewards/chosen": -1.9872305393218994,
383
+ "rewards/margins": 1.057706356048584,
384
+ "rewards/rejected": -3.0449366569519043,
385
+ "step": 250
386
+ },
387
+ {
388
+ "epoch": 0.19266394961096703,
389
+ "grad_norm": 63.53352737426758,
390
+ "learning_rate": 4.05925925925926e-06,
391
+ "logits/chosen": -2.0859274864196777,
392
+ "logits/rejected": -1.9589965343475342,
393
+ "logps/chosen": -94.96440124511719,
394
+ "logps/rejected": -102.25624084472656,
395
+ "loss": 0.7933,
396
+ "rewards/accuracies": 0.5625,
397
+ "rewards/chosen": -1.9980733394622803,
398
+ "rewards/margins": 0.4961894154548645,
399
+ "rewards/rejected": -2.4942626953125,
400
+ "step": 260
401
+ },
402
+ {
403
+ "epoch": 0.20007410151908114,
404
+ "grad_norm": 97.90867614746094,
405
+ "learning_rate": 4.022222222222222e-06,
406
+ "logits/chosen": -1.9325847625732422,
407
+ "logits/rejected": -1.7728859186172485,
408
+ "logps/chosen": -94.22940826416016,
409
+ "logps/rejected": -105.72850036621094,
410
+ "loss": 0.5711,
411
+ "rewards/accuracies": 0.699999988079071,
412
+ "rewards/chosen": -1.8600021600723267,
413
+ "rewards/margins": 0.9546739459037781,
414
+ "rewards/rejected": -2.81467604637146,
415
+ "step": 270
416
+ },
417
+ {
418
+ "epoch": 0.20748425342719526,
419
+ "grad_norm": 50.39332962036133,
420
+ "learning_rate": 3.9851851851851855e-06,
421
+ "logits/chosen": -1.6392498016357422,
422
+ "logits/rejected": -1.746097207069397,
423
+ "logps/chosen": -90.52095794677734,
424
+ "logps/rejected": -107.78398132324219,
425
+ "loss": 0.4994,
426
+ "rewards/accuracies": 0.699999988079071,
427
+ "rewards/chosen": -1.6299854516983032,
428
+ "rewards/margins": 1.126053810119629,
429
+ "rewards/rejected": -2.7560391426086426,
430
+ "step": 280
431
+ },
432
+ {
433
+ "epoch": 0.21489440533530937,
434
+ "grad_norm": 82.5980453491211,
435
+ "learning_rate": 3.948148148148149e-06,
436
+ "logits/chosen": -1.9976590871810913,
437
+ "logits/rejected": -1.591271996498108,
438
+ "logps/chosen": -97.7765121459961,
439
+ "logps/rejected": -106.0452880859375,
440
+ "loss": 0.5625,
441
+ "rewards/accuracies": 0.699999988079071,
442
+ "rewards/chosen": -1.6871846914291382,
443
+ "rewards/margins": 1.2763103246688843,
444
+ "rewards/rejected": -2.9634947776794434,
445
+ "step": 290
446
+ },
447
+ {
448
+ "epoch": 0.2223045572434235,
449
+ "grad_norm": 47.00569152832031,
450
+ "learning_rate": 3.911111111111112e-06,
451
+ "logits/chosen": -1.844430923461914,
452
+ "logits/rejected": -1.5602703094482422,
453
+ "logps/chosen": -95.04554748535156,
454
+ "logps/rejected": -110.90150451660156,
455
+ "loss": 0.5173,
456
+ "rewards/accuracies": 0.75,
457
+ "rewards/chosen": -1.7995688915252686,
458
+ "rewards/margins": 1.4488320350646973,
459
+ "rewards/rejected": -3.248400926589966,
460
+ "step": 300
461
+ },
462
+ {
463
+ "epoch": 0.2297147091515376,
464
+ "grad_norm": 80.46615600585938,
465
+ "learning_rate": 3.874074074074074e-06,
466
+ "logits/chosen": -1.9769401550292969,
467
+ "logits/rejected": -1.816585898399353,
468
+ "logps/chosen": -99.07120513916016,
469
+ "logps/rejected": -114.0129623413086,
470
+ "loss": 0.5575,
471
+ "rewards/accuracies": 0.6875,
472
+ "rewards/chosen": -2.19231915473938,
473
+ "rewards/margins": 1.2036749124526978,
474
+ "rewards/rejected": -3.395993709564209,
475
+ "step": 310
476
+ },
477
+ {
478
+ "epoch": 0.23712486105965172,
479
+ "grad_norm": 59.91643142700195,
480
+ "learning_rate": 3.837037037037038e-06,
481
+ "logits/chosen": -2.00789213180542,
482
+ "logits/rejected": -1.9294627904891968,
483
+ "logps/chosen": -100.81642150878906,
484
+ "logps/rejected": -114.23243713378906,
485
+ "loss": 0.6409,
486
+ "rewards/accuracies": 0.6625000238418579,
487
+ "rewards/chosen": -2.405135154724121,
488
+ "rewards/margins": 0.944028377532959,
489
+ "rewards/rejected": -3.34916353225708,
490
+ "step": 320
491
+ },
492
+ {
493
+ "epoch": 0.24453501296776584,
494
+ "grad_norm": 71.39558410644531,
495
+ "learning_rate": 3.8000000000000005e-06,
496
+ "logits/chosen": -2.077759265899658,
497
+ "logits/rejected": -1.8107578754425049,
498
+ "logps/chosen": -99.47129821777344,
499
+ "logps/rejected": -104.8876953125,
500
+ "loss": 0.6597,
501
+ "rewards/accuracies": 0.6000000238418579,
502
+ "rewards/chosen": -2.003695011138916,
503
+ "rewards/margins": 0.9091768264770508,
504
+ "rewards/rejected": -2.912871837615967,
505
+ "step": 330
506
+ },
507
+ {
508
+ "epoch": 0.25194516487588,
509
+ "grad_norm": 37.14337921142578,
510
+ "learning_rate": 3.7629629629629633e-06,
511
+ "logits/chosen": -2.1400954723358154,
512
+ "logits/rejected": -2.0752546787261963,
513
+ "logps/chosen": -97.14881134033203,
514
+ "logps/rejected": -105.8427505493164,
515
+ "loss": 0.6113,
516
+ "rewards/accuracies": 0.6499999761581421,
517
+ "rewards/chosen": -1.9141082763671875,
518
+ "rewards/margins": 0.7792128324508667,
519
+ "rewards/rejected": -2.6933212280273438,
520
+ "step": 340
521
+ },
522
+ {
523
+ "epoch": 0.2593553167839941,
524
+ "grad_norm": 56.138423919677734,
525
+ "learning_rate": 3.725925925925926e-06,
526
+ "logits/chosen": -2.091094970703125,
527
+ "logits/rejected": -2.0105533599853516,
528
+ "logps/chosen": -92.09162139892578,
529
+ "logps/rejected": -110.2835922241211,
530
+ "loss": 0.492,
531
+ "rewards/accuracies": 0.737500011920929,
532
+ "rewards/chosen": -1.6331777572631836,
533
+ "rewards/margins": 1.2891775369644165,
534
+ "rewards/rejected": -2.9223551750183105,
535
+ "step": 350
536
+ },
537
+ {
538
+ "epoch": 0.2667654686921082,
539
+ "grad_norm": 52.79886245727539,
540
+ "learning_rate": 3.688888888888889e-06,
541
+ "logits/chosen": -2.2325026988983154,
542
+ "logits/rejected": -2.1436426639556885,
543
+ "logps/chosen": -89.82434844970703,
544
+ "logps/rejected": -112.33735656738281,
545
+ "loss": 0.4911,
546
+ "rewards/accuracies": 0.75,
547
+ "rewards/chosen": -1.5006159543991089,
548
+ "rewards/margins": 1.1897337436676025,
549
+ "rewards/rejected": -2.69035005569458,
550
+ "step": 360
551
+ },
552
+ {
553
+ "epoch": 0.2741756206002223,
554
+ "grad_norm": 150.0937042236328,
555
+ "learning_rate": 3.651851851851852e-06,
556
+ "logits/chosen": -1.8895237445831299,
557
+ "logits/rejected": -1.8900665044784546,
558
+ "logps/chosen": -103.93204498291016,
559
+ "logps/rejected": -112.97098541259766,
560
+ "loss": 0.5877,
561
+ "rewards/accuracies": 0.7124999761581421,
562
+ "rewards/chosen": -2.1075615882873535,
563
+ "rewards/margins": 1.1100621223449707,
564
+ "rewards/rejected": -3.2176239490509033,
565
+ "step": 370
566
+ },
567
+ {
568
+ "epoch": 0.28158577250833644,
569
+ "grad_norm": 42.55241012573242,
570
+ "learning_rate": 3.614814814814815e-06,
571
+ "logits/chosen": -1.945433259010315,
572
+ "logits/rejected": -1.7756521701812744,
573
+ "logps/chosen": -106.47743225097656,
574
+ "logps/rejected": -114.155517578125,
575
+ "loss": 0.6501,
576
+ "rewards/accuracies": 0.737500011920929,
577
+ "rewards/chosen": -2.3606436252593994,
578
+ "rewards/margins": 1.0331517457962036,
579
+ "rewards/rejected": -3.3937950134277344,
580
+ "step": 380
581
+ },
582
+ {
583
+ "epoch": 0.28899592441645056,
584
+ "grad_norm": 52.49542236328125,
585
+ "learning_rate": 3.577777777777778e-06,
586
+ "logits/chosen": -1.8807388544082642,
587
+ "logits/rejected": -1.8252395391464233,
588
+ "logps/chosen": -102.06422424316406,
589
+ "logps/rejected": -112.83709716796875,
590
+ "loss": 0.6055,
591
+ "rewards/accuracies": 0.675000011920929,
592
+ "rewards/chosen": -2.23865008354187,
593
+ "rewards/margins": 0.9633029103279114,
594
+ "rewards/rejected": -3.201953172683716,
595
+ "step": 390
596
+ },
597
+ {
598
+ "epoch": 0.29640607632456467,
599
+ "grad_norm": 88.17601013183594,
600
+ "learning_rate": 3.540740740740741e-06,
601
+ "logits/chosen": -1.6389392614364624,
602
+ "logits/rejected": -1.5933473110198975,
603
+ "logps/chosen": -99.57685852050781,
604
+ "logps/rejected": -115.68553161621094,
605
+ "loss": 0.5417,
606
+ "rewards/accuracies": 0.675000011920929,
607
+ "rewards/chosen": -1.9246084690093994,
608
+ "rewards/margins": 1.4971764087677002,
609
+ "rewards/rejected": -3.4217846393585205,
610
+ "step": 400
611
+ },
612
+ {
613
+ "epoch": 0.3038162282326788,
614
+ "grad_norm": 62.808006286621094,
615
+ "learning_rate": 3.503703703703704e-06,
616
+ "logits/chosen": -1.5727903842926025,
617
+ "logits/rejected": -1.5283151865005493,
618
+ "logps/chosen": -93.6846694946289,
619
+ "logps/rejected": -106.69166564941406,
620
+ "loss": 0.5396,
621
+ "rewards/accuracies": 0.75,
622
+ "rewards/chosen": -2.084920883178711,
623
+ "rewards/margins": 1.2174980640411377,
624
+ "rewards/rejected": -3.3024184703826904,
625
+ "step": 410
626
+ },
627
+ {
628
+ "epoch": 0.3112263801407929,
629
+ "grad_norm": 29.901016235351562,
630
+ "learning_rate": 3.4666666666666672e-06,
631
+ "logits/chosen": -1.862898826599121,
632
+ "logits/rejected": -1.6489702463150024,
633
+ "logps/chosen": -97.56269836425781,
634
+ "logps/rejected": -110.67304992675781,
635
+ "loss": 0.5257,
636
+ "rewards/accuracies": 0.737500011920929,
637
+ "rewards/chosen": -2.035705089569092,
638
+ "rewards/margins": 1.2200233936309814,
639
+ "rewards/rejected": -3.255728244781494,
640
+ "step": 420
641
+ },
642
+ {
643
+ "epoch": 0.318636532048907,
644
+ "grad_norm": 44.00906753540039,
645
+ "learning_rate": 3.42962962962963e-06,
646
+ "logits/chosen": -1.9901949167251587,
647
+ "logits/rejected": -1.7650057077407837,
648
+ "logps/chosen": -95.73258209228516,
649
+ "logps/rejected": -118.86299133300781,
650
+ "loss": 0.4368,
651
+ "rewards/accuracies": 0.75,
652
+ "rewards/chosen": -1.874515175819397,
653
+ "rewards/margins": 1.841843605041504,
654
+ "rewards/rejected": -3.7163589000701904,
655
+ "step": 430
656
+ },
657
+ {
658
+ "epoch": 0.32604668395702113,
659
+ "grad_norm": 124.45765686035156,
660
+ "learning_rate": 3.392592592592593e-06,
661
+ "logits/chosen": -1.9858062267303467,
662
+ "logits/rejected": -1.8073575496673584,
663
+ "logps/chosen": -106.0125732421875,
664
+ "logps/rejected": -122.56363677978516,
665
+ "loss": 0.5898,
666
+ "rewards/accuracies": 0.699999988079071,
667
+ "rewards/chosen": -2.860097646713257,
668
+ "rewards/margins": 1.184191346168518,
669
+ "rewards/rejected": -4.044289588928223,
670
+ "step": 440
671
+ },
672
+ {
673
+ "epoch": 0.33345683586513525,
674
+ "grad_norm": 156.9593963623047,
675
+ "learning_rate": 3.3555555555555557e-06,
676
+ "logits/chosen": -1.7876373529434204,
677
+ "logits/rejected": -1.6721996068954468,
678
+ "logps/chosen": -106.1432113647461,
679
+ "logps/rejected": -121.39786529541016,
680
+ "loss": 0.5335,
681
+ "rewards/accuracies": 0.7250000238418579,
682
+ "rewards/chosen": -3.1309943199157715,
683
+ "rewards/margins": 1.0686304569244385,
684
+ "rewards/rejected": -4.199625015258789,
685
+ "step": 450
686
+ },
687
+ {
688
+ "epoch": 0.34086698777324936,
689
+ "grad_norm": 91.00648498535156,
690
+ "learning_rate": 3.3185185185185185e-06,
691
+ "logits/chosen": -2.0905487537384033,
692
+ "logits/rejected": -2.0243828296661377,
693
+ "logps/chosen": -99.6212387084961,
694
+ "logps/rejected": -112.38456726074219,
695
+ "loss": 0.4929,
696
+ "rewards/accuracies": 0.7124999761581421,
697
+ "rewards/chosen": -2.6742300987243652,
698
+ "rewards/margins": 1.208742380142212,
699
+ "rewards/rejected": -3.882972240447998,
700
+ "step": 460
701
+ },
702
+ {
703
+ "epoch": 0.3482771396813635,
704
+ "grad_norm": 73.95698547363281,
705
+ "learning_rate": 3.281481481481482e-06,
706
+ "logits/chosen": -2.102916717529297,
707
+ "logits/rejected": -1.9817787408828735,
708
+ "logps/chosen": -107.4316177368164,
709
+ "logps/rejected": -120.3554916381836,
710
+ "loss": 0.607,
711
+ "rewards/accuracies": 0.637499988079071,
712
+ "rewards/chosen": -3.0798966884613037,
713
+ "rewards/margins": 1.1342341899871826,
714
+ "rewards/rejected": -4.2141313552856445,
715
+ "step": 470
716
+ },
717
+ {
718
+ "epoch": 0.3556872915894776,
719
+ "grad_norm": 115.34271240234375,
720
+ "learning_rate": 3.2444444444444446e-06,
721
+ "logits/chosen": -1.920265793800354,
722
+ "logits/rejected": -1.776262879371643,
723
+ "logps/chosen": -107.17350006103516,
724
+ "logps/rejected": -119.41255950927734,
725
+ "loss": 0.5724,
726
+ "rewards/accuracies": 0.7124999761581421,
727
+ "rewards/chosen": -3.024296283721924,
728
+ "rewards/margins": 1.301172137260437,
729
+ "rewards/rejected": -4.325467586517334,
730
+ "step": 480
731
+ },
732
+ {
733
+ "epoch": 0.3630974434975917,
734
+ "grad_norm": 124.22569274902344,
735
+ "learning_rate": 3.2074074074074075e-06,
736
+ "logits/chosen": -2.066603660583496,
737
+ "logits/rejected": -1.9234092235565186,
738
+ "logps/chosen": -107.19173431396484,
739
+ "logps/rejected": -125.5118637084961,
740
+ "loss": 0.5957,
741
+ "rewards/accuracies": 0.6625000238418579,
742
+ "rewards/chosen": -3.1629674434661865,
743
+ "rewards/margins": 1.20828115940094,
744
+ "rewards/rejected": -4.371248722076416,
745
+ "step": 490
746
+ },
747
+ {
748
+ "epoch": 0.3705075954057058,
749
+ "grad_norm": 93.52369689941406,
750
+ "learning_rate": 3.1703703703703707e-06,
751
+ "logits/chosen": -2.049516201019287,
752
+ "logits/rejected": -1.80709707736969,
753
+ "logps/chosen": -108.04731750488281,
754
+ "logps/rejected": -131.86807250976562,
755
+ "loss": 0.4991,
756
+ "rewards/accuracies": 0.7124999761581421,
757
+ "rewards/chosen": -2.871161699295044,
758
+ "rewards/margins": 1.387619972229004,
759
+ "rewards/rejected": -4.258781909942627,
760
+ "step": 500
761
+ },
762
+ {
763
+ "epoch": 0.37791774731381994,
764
+ "grad_norm": 55.953941345214844,
765
+ "learning_rate": 3.133333333333334e-06,
766
+ "logits/chosen": -2.3617918491363525,
767
+ "logits/rejected": -2.0952062606811523,
768
+ "logps/chosen": -95.71025085449219,
769
+ "logps/rejected": -117.8790512084961,
770
+ "loss": 0.4238,
771
+ "rewards/accuracies": 0.7875000238418579,
772
+ "rewards/chosen": -2.4553141593933105,
773
+ "rewards/margins": 1.845973253250122,
774
+ "rewards/rejected": -4.301287651062012,
775
+ "step": 510
776
+ },
777
+ {
778
+ "epoch": 0.38532789922193406,
779
+ "grad_norm": 87.4388427734375,
780
+ "learning_rate": 3.096296296296297e-06,
781
+ "logits/chosen": -2.3576157093048096,
782
+ "logits/rejected": -2.3086090087890625,
783
+ "logps/chosen": -103.87492370605469,
784
+ "logps/rejected": -129.99880981445312,
785
+ "loss": 0.4485,
786
+ "rewards/accuracies": 0.7749999761581421,
787
+ "rewards/chosen": -2.7980117797851562,
788
+ "rewards/margins": 1.933409333229065,
789
+ "rewards/rejected": -4.731420993804932,
790
+ "step": 520
791
+ },
792
+ {
793
+ "epoch": 0.39273805113004817,
794
+ "grad_norm": 139.21774291992188,
795
+ "learning_rate": 3.0592592592592596e-06,
796
+ "logits/chosen": -2.3830113410949707,
797
+ "logits/rejected": -2.236015796661377,
798
+ "logps/chosen": -105.96150970458984,
799
+ "logps/rejected": -123.8498306274414,
800
+ "loss": 0.4772,
801
+ "rewards/accuracies": 0.737500011920929,
802
+ "rewards/chosen": -2.7964818477630615,
803
+ "rewards/margins": 1.444065809249878,
804
+ "rewards/rejected": -4.2405476570129395,
805
+ "step": 530
806
+ },
807
+ {
808
+ "epoch": 0.4001482030381623,
809
+ "grad_norm": 60.24081802368164,
810
+ "learning_rate": 3.0222222222222225e-06,
811
+ "logits/chosen": -2.510646104812622,
812
+ "logits/rejected": -2.3236303329467773,
813
+ "logps/chosen": -106.36656188964844,
814
+ "logps/rejected": -126.50679016113281,
815
+ "loss": 0.5079,
816
+ "rewards/accuracies": 0.75,
817
+ "rewards/chosen": -3.144716739654541,
818
+ "rewards/margins": 1.5049692392349243,
819
+ "rewards/rejected": -4.649685859680176,
820
+ "step": 540
821
+ },
822
+ {
823
+ "epoch": 0.4075583549462764,
824
+ "grad_norm": 44.708290100097656,
825
+ "learning_rate": 2.9851851851851853e-06,
826
+ "logits/chosen": -2.6934173107147217,
827
+ "logits/rejected": -2.5574169158935547,
828
+ "logps/chosen": -106.81391906738281,
829
+ "logps/rejected": -124.56950378417969,
830
+ "loss": 0.546,
831
+ "rewards/accuracies": 0.737500011920929,
832
+ "rewards/chosen": -3.2360634803771973,
833
+ "rewards/margins": 1.5249487161636353,
834
+ "rewards/rejected": -4.761012077331543,
835
+ "step": 550
836
+ },
837
+ {
838
+ "epoch": 0.4149685068543905,
839
+ "grad_norm": 100.06108856201172,
840
+ "learning_rate": 2.948148148148148e-06,
841
+ "logits/chosen": -2.540220260620117,
842
+ "logits/rejected": -2.5645575523376465,
843
+ "logps/chosen": -105.29388427734375,
844
+ "logps/rejected": -127.27003479003906,
845
+ "loss": 0.4493,
846
+ "rewards/accuracies": 0.737500011920929,
847
+ "rewards/chosen": -3.06217885017395,
848
+ "rewards/margins": 1.8223583698272705,
849
+ "rewards/rejected": -4.884537696838379,
850
+ "step": 560
851
+ },
852
+ {
853
+ "epoch": 0.42237865876250463,
854
+ "grad_norm": 68.57003784179688,
855
+ "learning_rate": 2.9111111111111114e-06,
856
+ "logits/chosen": -2.6107475757598877,
857
+ "logits/rejected": -2.4408977031707764,
858
+ "logps/chosen": -102.4161605834961,
859
+ "logps/rejected": -123.5961685180664,
860
+ "loss": 0.4529,
861
+ "rewards/accuracies": 0.8125,
862
+ "rewards/chosen": -2.7161800861358643,
863
+ "rewards/margins": 1.7760255336761475,
864
+ "rewards/rejected": -4.492205619812012,
865
+ "step": 570
866
+ },
867
+ {
868
+ "epoch": 0.42978881067061875,
869
+ "grad_norm": 83.09320068359375,
870
+ "learning_rate": 2.874074074074074e-06,
871
+ "logits/chosen": -2.5648531913757324,
872
+ "logits/rejected": -2.4905104637145996,
873
+ "logps/chosen": -93.52774810791016,
874
+ "logps/rejected": -117.04872131347656,
875
+ "loss": 0.5508,
876
+ "rewards/accuracies": 0.75,
877
+ "rewards/chosen": -2.38288950920105,
878
+ "rewards/margins": 1.4900834560394287,
879
+ "rewards/rejected": -3.8729729652404785,
880
+ "step": 580
881
+ },
882
+ {
883
+ "epoch": 0.43719896257873286,
884
+ "grad_norm": 114.40435791015625,
885
+ "learning_rate": 2.837037037037037e-06,
886
+ "logits/chosen": -2.6075551509857178,
887
+ "logits/rejected": -2.3488199710845947,
888
+ "logps/chosen": -108.3016128540039,
889
+ "logps/rejected": -127.59623718261719,
890
+ "loss": 0.532,
891
+ "rewards/accuracies": 0.6499999761581421,
892
+ "rewards/chosen": -3.0478949546813965,
893
+ "rewards/margins": 1.5749890804290771,
894
+ "rewards/rejected": -4.6228837966918945,
895
+ "step": 590
896
+ },
897
+ {
898
+ "epoch": 0.444609114486847,
899
+ "grad_norm": 48.23006057739258,
900
+ "learning_rate": 2.8000000000000003e-06,
901
+ "logits/chosen": -2.520981788635254,
902
+ "logits/rejected": -2.405494213104248,
903
+ "logps/chosen": -111.14179992675781,
904
+ "logps/rejected": -126.3864974975586,
905
+ "loss": 0.5698,
906
+ "rewards/accuracies": 0.75,
907
+ "rewards/chosen": -3.072885036468506,
908
+ "rewards/margins": 1.396822214126587,
909
+ "rewards/rejected": -4.469707012176514,
910
+ "step": 600
911
+ },
912
+ {
913
+ "epoch": 0.4520192663949611,
914
+ "grad_norm": 52.764652252197266,
915
+ "learning_rate": 2.7629629629629636e-06,
916
+ "logits/chosen": -2.569906711578369,
917
+ "logits/rejected": -2.4188952445983887,
918
+ "logps/chosen": -109.3153305053711,
919
+ "logps/rejected": -122.66573333740234,
920
+ "loss": 0.6618,
921
+ "rewards/accuracies": 0.7749999761581421,
922
+ "rewards/chosen": -2.8832812309265137,
923
+ "rewards/margins": 1.2652729749679565,
924
+ "rewards/rejected": -4.14855432510376,
925
+ "step": 610
926
+ },
927
+ {
928
+ "epoch": 0.4594294183030752,
929
+ "grad_norm": 56.053504943847656,
930
+ "learning_rate": 2.7259259259259264e-06,
931
+ "logits/chosen": -2.3336873054504395,
932
+ "logits/rejected": -2.220808267593384,
933
+ "logps/chosen": -97.85954284667969,
934
+ "logps/rejected": -117.603759765625,
935
+ "loss": 0.5057,
936
+ "rewards/accuracies": 0.737500011920929,
937
+ "rewards/chosen": -2.430934190750122,
938
+ "rewards/margins": 1.6638364791870117,
939
+ "rewards/rejected": -4.094770908355713,
940
+ "step": 620
941
+ },
942
+ {
943
+ "epoch": 0.4668395702111893,
944
+ "grad_norm": 55.09632110595703,
945
+ "learning_rate": 2.6888888888888892e-06,
946
+ "logits/chosen": -2.3458609580993652,
947
+ "logits/rejected": -2.090057611465454,
948
+ "logps/chosen": -99.01778411865234,
949
+ "logps/rejected": -116.4244155883789,
950
+ "loss": 0.5289,
951
+ "rewards/accuracies": 0.737500011920929,
952
+ "rewards/chosen": -2.8742129802703857,
953
+ "rewards/margins": 1.1919338703155518,
954
+ "rewards/rejected": -4.066147327423096,
955
+ "step": 630
956
+ },
957
+ {
958
+ "epoch": 0.47424972211930344,
959
+ "grad_norm": 46.486846923828125,
960
+ "learning_rate": 2.651851851851852e-06,
961
+ "logits/chosen": -2.6187617778778076,
962
+ "logits/rejected": -2.299057722091675,
963
+ "logps/chosen": -106.09327697753906,
964
+ "logps/rejected": -121.18121337890625,
965
+ "loss": 0.5003,
966
+ "rewards/accuracies": 0.737500011920929,
967
+ "rewards/chosen": -2.976048469543457,
968
+ "rewards/margins": 1.5380513668060303,
969
+ "rewards/rejected": -4.514100074768066,
970
+ "step": 640
971
+ },
972
+ {
973
+ "epoch": 0.48165987402741756,
974
+ "grad_norm": 66.68344116210938,
975
+ "learning_rate": 2.614814814814815e-06,
976
+ "logits/chosen": -2.4219138622283936,
977
+ "logits/rejected": -2.2749531269073486,
978
+ "logps/chosen": -101.74494934082031,
979
+ "logps/rejected": -117.6960678100586,
980
+ "loss": 0.548,
981
+ "rewards/accuracies": 0.6875,
982
+ "rewards/chosen": -3.033750534057617,
983
+ "rewards/margins": 1.507805347442627,
984
+ "rewards/rejected": -4.541555881500244,
985
+ "step": 650
986
+ },
987
+ {
988
+ "epoch": 0.48907002593553167,
989
+ "grad_norm": 63.8469123840332,
990
+ "learning_rate": 2.577777777777778e-06,
991
+ "logits/chosen": -2.4797866344451904,
992
+ "logits/rejected": -2.3341572284698486,
993
+ "logps/chosen": -106.57930755615234,
994
+ "logps/rejected": -124.18519592285156,
995
+ "loss": 0.5449,
996
+ "rewards/accuracies": 0.7124999761581421,
997
+ "rewards/chosen": -3.2545254230499268,
998
+ "rewards/margins": 1.5712066888809204,
999
+ "rewards/rejected": -4.825732231140137,
1000
+ "step": 660
1001
+ },
1002
+ {
1003
+ "epoch": 0.4964801778436458,
1004
+ "grad_norm": 36.812870025634766,
1005
+ "learning_rate": 2.540740740740741e-06,
1006
+ "logits/chosen": -2.4504082202911377,
1007
+ "logits/rejected": -2.317399501800537,
1008
+ "logps/chosen": -101.97000122070312,
1009
+ "logps/rejected": -122.93861389160156,
1010
+ "loss": 0.4462,
1011
+ "rewards/accuracies": 0.800000011920929,
1012
+ "rewards/chosen": -2.8019375801086426,
1013
+ "rewards/margins": 1.7157955169677734,
1014
+ "rewards/rejected": -4.517733573913574,
1015
+ "step": 670
1016
+ },
1017
+ {
1018
+ "epoch": 0.50389032975176,
1019
+ "grad_norm": 87.01313781738281,
1020
+ "learning_rate": 2.503703703703704e-06,
1021
+ "logits/chosen": -2.5619702339172363,
1022
+ "logits/rejected": -2.2939906120300293,
1023
+ "logps/chosen": -104.42756652832031,
1024
+ "logps/rejected": -122.24269104003906,
1025
+ "loss": 0.5498,
1026
+ "rewards/accuracies": 0.7250000238418579,
1027
+ "rewards/chosen": -2.752537250518799,
1028
+ "rewards/margins": 1.4413821697235107,
1029
+ "rewards/rejected": -4.1939191818237305,
1030
+ "step": 680
1031
+ },
1032
+ {
1033
+ "epoch": 0.5113004816598741,
1034
+ "grad_norm": 33.50537872314453,
1035
+ "learning_rate": 2.466666666666667e-06,
1036
+ "logits/chosen": -2.397477626800537,
1037
+ "logits/rejected": -2.298727035522461,
1038
+ "logps/chosen": -101.79267883300781,
1039
+ "logps/rejected": -121.082763671875,
1040
+ "loss": 0.4498,
1041
+ "rewards/accuracies": 0.75,
1042
+ "rewards/chosen": -2.776843309402466,
1043
+ "rewards/margins": 1.5951542854309082,
1044
+ "rewards/rejected": -4.371997833251953,
1045
+ "step": 690
1046
+ },
1047
+ {
1048
+ "epoch": 0.5187106335679882,
1049
+ "grad_norm": 28.241764068603516,
1050
+ "learning_rate": 2.42962962962963e-06,
1051
+ "logits/chosen": -2.191267251968384,
1052
+ "logits/rejected": -2.1144583225250244,
1053
+ "logps/chosen": -105.1906967163086,
1054
+ "logps/rejected": -119.65342712402344,
1055
+ "loss": 0.3694,
1056
+ "rewards/accuracies": 0.8374999761581421,
1057
+ "rewards/chosen": -2.89141583442688,
1058
+ "rewards/margins": 1.8759052753448486,
1059
+ "rewards/rejected": -4.7673211097717285,
1060
+ "step": 700
1061
+ },
1062
+ {
1063
+ "epoch": 0.5261207854761023,
1064
+ "grad_norm": 93.26385498046875,
1065
+ "learning_rate": 2.3925925925925927e-06,
1066
+ "logits/chosen": -2.700456142425537,
1067
+ "logits/rejected": -2.5114896297454834,
1068
+ "logps/chosen": -102.400146484375,
1069
+ "logps/rejected": -129.38079833984375,
1070
+ "loss": 0.456,
1071
+ "rewards/accuracies": 0.737500011920929,
1072
+ "rewards/chosen": -2.760223627090454,
1073
+ "rewards/margins": 2.1241064071655273,
1074
+ "rewards/rejected": -4.8843302726745605,
1075
+ "step": 710
1076
+ },
1077
+ {
1078
+ "epoch": 0.5335309373842164,
1079
+ "grad_norm": 43.060646057128906,
1080
+ "learning_rate": 2.3555555555555555e-06,
1081
+ "logits/chosen": -2.715698719024658,
1082
+ "logits/rejected": -2.641836643218994,
1083
+ "logps/chosen": -111.295166015625,
1084
+ "logps/rejected": -129.7608184814453,
1085
+ "loss": 0.5273,
1086
+ "rewards/accuracies": 0.7250000238418579,
1087
+ "rewards/chosen": -3.2993626594543457,
1088
+ "rewards/margins": 1.6410433053970337,
1089
+ "rewards/rejected": -4.940405368804932,
1090
+ "step": 720
1091
+ },
1092
+ {
1093
+ "epoch": 0.5409410892923305,
1094
+ "grad_norm": 92.00672912597656,
1095
+ "learning_rate": 2.318518518518519e-06,
1096
+ "logits/chosen": -2.390772819519043,
1097
+ "logits/rejected": -2.2357537746429443,
1098
+ "logps/chosen": -106.68476867675781,
1099
+ "logps/rejected": -131.3654327392578,
1100
+ "loss": 0.5709,
1101
+ "rewards/accuracies": 0.699999988079071,
1102
+ "rewards/chosen": -3.0549259185791016,
1103
+ "rewards/margins": 1.6485790014266968,
1104
+ "rewards/rejected": -4.703505039215088,
1105
+ "step": 730
1106
+ },
1107
+ {
1108
+ "epoch": 0.5483512412004447,
1109
+ "grad_norm": 57.26805114746094,
1110
+ "learning_rate": 2.2814814814814816e-06,
1111
+ "logits/chosen": -2.196866035461426,
1112
+ "logits/rejected": -2.0569887161254883,
1113
+ "logps/chosen": -100.42630004882812,
1114
+ "logps/rejected": -125.41304779052734,
1115
+ "loss": 0.4778,
1116
+ "rewards/accuracies": 0.75,
1117
+ "rewards/chosen": -2.8341736793518066,
1118
+ "rewards/margins": 1.9348560571670532,
1119
+ "rewards/rejected": -4.76902961730957,
1120
+ "step": 740
1121
+ },
1122
+ {
1123
+ "epoch": 0.5557613931085588,
1124
+ "grad_norm": 58.62699508666992,
1125
+ "learning_rate": 2.2444444444444445e-06,
1126
+ "logits/chosen": -2.461791753768921,
1127
+ "logits/rejected": -2.336097240447998,
1128
+ "logps/chosen": -113.08296966552734,
1129
+ "logps/rejected": -132.7778778076172,
1130
+ "loss": 0.5236,
1131
+ "rewards/accuracies": 0.762499988079071,
1132
+ "rewards/chosen": -3.270090103149414,
1133
+ "rewards/margins": 1.6124324798583984,
1134
+ "rewards/rejected": -4.882522106170654,
1135
+ "step": 750
1136
+ },
1137
+ {
1138
+ "epoch": 0.5631715450166729,
1139
+ "grad_norm": 41.212913513183594,
1140
+ "learning_rate": 2.2074074074074077e-06,
1141
+ "logits/chosen": -2.4121994972229004,
1142
+ "logits/rejected": -2.247474193572998,
1143
+ "logps/chosen": -105.69578552246094,
1144
+ "logps/rejected": -122.80550384521484,
1145
+ "loss": 0.3995,
1146
+ "rewards/accuracies": 0.7875000238418579,
1147
+ "rewards/chosen": -2.9703726768493652,
1148
+ "rewards/margins": 1.6150137186050415,
1149
+ "rewards/rejected": -4.585386276245117,
1150
+ "step": 760
1151
+ },
1152
+ {
1153
+ "epoch": 0.570581696924787,
1154
+ "grad_norm": 75.84191131591797,
1155
+ "learning_rate": 2.1703703703703705e-06,
1156
+ "logits/chosen": -2.296901226043701,
1157
+ "logits/rejected": -2.2925124168395996,
1158
+ "logps/chosen": -110.8564224243164,
1159
+ "logps/rejected": -127.59207916259766,
1160
+ "loss": 0.5596,
1161
+ "rewards/accuracies": 0.7250000238418579,
1162
+ "rewards/chosen": -3.3091530799865723,
1163
+ "rewards/margins": 1.2775094509124756,
1164
+ "rewards/rejected": -4.5866618156433105,
1165
+ "step": 770
1166
+ },
1167
+ {
1168
+ "epoch": 0.5779918488329011,
1169
+ "grad_norm": 89.46887969970703,
1170
+ "learning_rate": 2.133333333333334e-06,
1171
+ "logits/chosen": -2.6353976726531982,
1172
+ "logits/rejected": -2.512312173843384,
1173
+ "logps/chosen": -102.060791015625,
1174
+ "logps/rejected": -132.6158447265625,
1175
+ "loss": 0.3622,
1176
+ "rewards/accuracies": 0.800000011920929,
1177
+ "rewards/chosen": -2.7914113998413086,
1178
+ "rewards/margins": 2.160378932952881,
1179
+ "rewards/rejected": -4.951790809631348,
1180
+ "step": 780
1181
+ },
1182
+ {
1183
+ "epoch": 0.5854020007410152,
1184
+ "grad_norm": 69.98067474365234,
1185
+ "learning_rate": 2.0962962962962966e-06,
1186
+ "logits/chosen": -2.48567533493042,
1187
+ "logits/rejected": -2.188729763031006,
1188
+ "logps/chosen": -97.02547454833984,
1189
+ "logps/rejected": -121.714111328125,
1190
+ "loss": 0.4915,
1191
+ "rewards/accuracies": 0.7124999761581421,
1192
+ "rewards/chosen": -2.587202548980713,
1193
+ "rewards/margins": 1.8987194299697876,
1194
+ "rewards/rejected": -4.485922336578369,
1195
+ "step": 790
1196
+ },
1197
+ {
1198
+ "epoch": 0.5928121526491293,
1199
+ "grad_norm": 22.259716033935547,
1200
+ "learning_rate": 2.0592592592592595e-06,
1201
+ "logits/chosen": -2.56459641456604,
1202
+ "logits/rejected": -2.396904945373535,
1203
+ "logps/chosen": -99.90885925292969,
1204
+ "logps/rejected": -127.66976165771484,
1205
+ "loss": 0.3965,
1206
+ "rewards/accuracies": 0.75,
1207
+ "rewards/chosen": -2.7819571495056152,
1208
+ "rewards/margins": 1.9999752044677734,
1209
+ "rewards/rejected": -4.781932830810547,
1210
+ "step": 800
1211
+ },
1212
+ {
1213
+ "epoch": 0.6002223045572435,
1214
+ "grad_norm": 83.96212005615234,
1215
+ "learning_rate": 2.0222222222222223e-06,
1216
+ "logits/chosen": -2.694140672683716,
1217
+ "logits/rejected": -2.4873809814453125,
1218
+ "logps/chosen": -99.41667175292969,
1219
+ "logps/rejected": -127.27046203613281,
1220
+ "loss": 0.3876,
1221
+ "rewards/accuracies": 0.800000011920929,
1222
+ "rewards/chosen": -2.891671657562256,
1223
+ "rewards/margins": 2.2859904766082764,
1224
+ "rewards/rejected": -5.177661895751953,
1225
+ "step": 810
1226
+ },
1227
+ {
1228
+ "epoch": 0.6076324564653576,
1229
+ "grad_norm": 67.90824127197266,
1230
+ "learning_rate": 1.985185185185185e-06,
1231
+ "logits/chosen": -2.767648220062256,
1232
+ "logits/rejected": -2.585026264190674,
1233
+ "logps/chosen": -104.31745910644531,
1234
+ "logps/rejected": -125.73681640625,
1235
+ "loss": 0.5279,
1236
+ "rewards/accuracies": 0.762499988079071,
1237
+ "rewards/chosen": -3.1707539558410645,
1238
+ "rewards/margins": 1.6347720623016357,
1239
+ "rewards/rejected": -4.8055267333984375,
1240
+ "step": 820
1241
+ },
1242
+ {
1243
+ "epoch": 0.6150426083734717,
1244
+ "grad_norm": 33.568599700927734,
1245
+ "learning_rate": 1.9481481481481484e-06,
1246
+ "logits/chosen": -2.702396869659424,
1247
+ "logits/rejected": -2.4589433670043945,
1248
+ "logps/chosen": -103.00837707519531,
1249
+ "logps/rejected": -135.2923583984375,
1250
+ "loss": 0.3985,
1251
+ "rewards/accuracies": 0.824999988079071,
1252
+ "rewards/chosen": -3.202925443649292,
1253
+ "rewards/margins": 2.087952136993408,
1254
+ "rewards/rejected": -5.290877342224121,
1255
+ "step": 830
1256
+ },
1257
+ {
1258
+ "epoch": 0.6224527602815858,
1259
+ "grad_norm": 140.95526123046875,
1260
+ "learning_rate": 1.9111111111111112e-06,
1261
+ "logits/chosen": -2.784031629562378,
1262
+ "logits/rejected": -2.869075298309326,
1263
+ "logps/chosen": -113.4225845336914,
1264
+ "logps/rejected": -129.70352172851562,
1265
+ "loss": 0.5025,
1266
+ "rewards/accuracies": 0.737500011920929,
1267
+ "rewards/chosen": -3.5999884605407715,
1268
+ "rewards/margins": 1.6119463443756104,
1269
+ "rewards/rejected": -5.211935520172119,
1270
+ "step": 840
1271
+ },
1272
+ {
1273
+ "epoch": 0.6298629121896999,
1274
+ "grad_norm": 52.54026412963867,
1275
+ "learning_rate": 1.8740740740740743e-06,
1276
+ "logits/chosen": -2.8133158683776855,
1277
+ "logits/rejected": -2.6122548580169678,
1278
+ "logps/chosen": -111.7987289428711,
1279
+ "logps/rejected": -136.263671875,
1280
+ "loss": 0.4448,
1281
+ "rewards/accuracies": 0.800000011920929,
1282
+ "rewards/chosen": -3.69873046875,
1283
+ "rewards/margins": 1.8727216720581055,
1284
+ "rewards/rejected": -5.5714521408081055,
1285
+ "step": 850
1286
+ },
1287
+ {
1288
+ "epoch": 0.637273064097814,
1289
+ "grad_norm": 92.39305114746094,
1290
+ "learning_rate": 1.8407407407407409e-06,
1291
+ "logits/chosen": -2.6346824169158936,
1292
+ "logits/rejected": -2.5525918006896973,
1293
+ "logps/chosen": -126.9793930053711,
1294
+ "logps/rejected": -136.64266967773438,
1295
+ "loss": 0.6642,
1296
+ "rewards/accuracies": 0.6625000238418579,
1297
+ "rewards/chosen": -4.109932899475098,
1298
+ "rewards/margins": 1.4245363473892212,
1299
+ "rewards/rejected": -5.534468650817871,
1300
+ "step": 860
1301
+ },
1302
+ {
1303
+ "epoch": 0.6446832160059282,
1304
+ "grad_norm": 123.8089370727539,
1305
+ "learning_rate": 1.803703703703704e-06,
1306
+ "logits/chosen": -2.7528748512268066,
1307
+ "logits/rejected": -2.5514931678771973,
1308
+ "logps/chosen": -108.72850036621094,
1309
+ "logps/rejected": -127.7916488647461,
1310
+ "loss": 0.5106,
1311
+ "rewards/accuracies": 0.762499988079071,
1312
+ "rewards/chosen": -3.318143367767334,
1313
+ "rewards/margins": 2.0562808513641357,
1314
+ "rewards/rejected": -5.374424934387207,
1315
+ "step": 870
1316
+ },
1317
+ {
1318
+ "epoch": 0.6520933679140423,
1319
+ "grad_norm": 42.69985580444336,
1320
+ "learning_rate": 1.7666666666666668e-06,
1321
+ "logits/chosen": -2.861846923828125,
1322
+ "logits/rejected": -2.5939736366271973,
1323
+ "logps/chosen": -106.8058090209961,
1324
+ "logps/rejected": -130.95132446289062,
1325
+ "loss": 0.3698,
1326
+ "rewards/accuracies": 0.8374999761581421,
1327
+ "rewards/chosen": -3.020836353302002,
1328
+ "rewards/margins": 2.2084438800811768,
1329
+ "rewards/rejected": -5.2292799949646,
1330
+ "step": 880
1331
+ },
1332
+ {
1333
+ "epoch": 0.6595035198221564,
1334
+ "grad_norm": 100.36827850341797,
1335
+ "learning_rate": 1.7296296296296298e-06,
1336
+ "logits/chosen": -2.8901448249816895,
1337
+ "logits/rejected": -2.638826608657837,
1338
+ "logps/chosen": -113.2393798828125,
1339
+ "logps/rejected": -136.57003784179688,
1340
+ "loss": 0.506,
1341
+ "rewards/accuracies": 0.762499988079071,
1342
+ "rewards/chosen": -3.743712902069092,
1343
+ "rewards/margins": 1.8884308338165283,
1344
+ "rewards/rejected": -5.632143974304199,
1345
+ "step": 890
1346
+ },
1347
+ {
1348
+ "epoch": 0.6669136717302705,
1349
+ "grad_norm": 47.63283157348633,
1350
+ "learning_rate": 1.6925925925925926e-06,
1351
+ "logits/chosen": -2.751713991165161,
1352
+ "logits/rejected": -2.6139540672302246,
1353
+ "logps/chosen": -117.19677734375,
1354
+ "logps/rejected": -131.9976806640625,
1355
+ "loss": 0.6344,
1356
+ "rewards/accuracies": 0.737500011920929,
1357
+ "rewards/chosen": -3.642165422439575,
1358
+ "rewards/margins": 1.7322568893432617,
1359
+ "rewards/rejected": -5.374422550201416,
1360
+ "step": 900
1361
+ },
1362
+ {
1363
+ "epoch": 0.6743238236383846,
1364
+ "grad_norm": 112.51361083984375,
1365
+ "learning_rate": 1.6555555555555559e-06,
1366
+ "logits/chosen": -2.636543035507202,
1367
+ "logits/rejected": -2.5468497276306152,
1368
+ "logps/chosen": -112.26319885253906,
1369
+ "logps/rejected": -135.6024932861328,
1370
+ "loss": 0.4688,
1371
+ "rewards/accuracies": 0.7749999761581421,
1372
+ "rewards/chosen": -3.398538589477539,
1373
+ "rewards/margins": 1.8205482959747314,
1374
+ "rewards/rejected": -5.21908712387085,
1375
+ "step": 910
1376
+ },
1377
+ {
1378
+ "epoch": 0.6817339755464987,
1379
+ "grad_norm": 63.83774185180664,
1380
+ "learning_rate": 1.6185185185185187e-06,
1381
+ "logits/chosen": -2.567595958709717,
1382
+ "logits/rejected": -2.4459292888641357,
1383
+ "logps/chosen": -113.2123031616211,
1384
+ "logps/rejected": -133.6845245361328,
1385
+ "loss": 0.5357,
1386
+ "rewards/accuracies": 0.762499988079071,
1387
+ "rewards/chosen": -3.6509101390838623,
1388
+ "rewards/margins": 1.547790765762329,
1389
+ "rewards/rejected": -5.19870138168335,
1390
+ "step": 920
1391
+ },
1392
+ {
1393
+ "epoch": 0.6891441274546128,
1394
+ "grad_norm": 97.52435302734375,
1395
+ "learning_rate": 1.5814814814814816e-06,
1396
+ "logits/chosen": -2.5026490688323975,
1397
+ "logits/rejected": -2.3349032402038574,
1398
+ "logps/chosen": -110.58065032958984,
1399
+ "logps/rejected": -134.26712036132812,
1400
+ "loss": 0.4725,
1401
+ "rewards/accuracies": 0.737500011920929,
1402
+ "rewards/chosen": -3.394016981124878,
1403
+ "rewards/margins": 1.8871221542358398,
1404
+ "rewards/rejected": -5.281139373779297,
1405
+ "step": 930
1406
+ },
1407
+ {
1408
+ "epoch": 0.696554279362727,
1409
+ "grad_norm": 44.30360412597656,
1410
+ "learning_rate": 1.5444444444444446e-06,
1411
+ "logits/chosen": -2.691323757171631,
1412
+ "logits/rejected": -2.4929356575012207,
1413
+ "logps/chosen": -98.93566131591797,
1414
+ "logps/rejected": -121.6566162109375,
1415
+ "loss": 0.3602,
1416
+ "rewards/accuracies": 0.8374999761581421,
1417
+ "rewards/chosen": -2.962592601776123,
1418
+ "rewards/margins": 2.068859100341797,
1419
+ "rewards/rejected": -5.03145170211792,
1420
+ "step": 940
1421
+ },
1422
+ {
1423
+ "epoch": 0.7039644312708411,
1424
+ "grad_norm": 38.775630950927734,
1425
+ "learning_rate": 1.5074074074074074e-06,
1426
+ "logits/chosen": -2.431483745574951,
1427
+ "logits/rejected": -2.3471839427948,
1428
+ "logps/chosen": -107.6336441040039,
1429
+ "logps/rejected": -129.42567443847656,
1430
+ "loss": 0.4332,
1431
+ "rewards/accuracies": 0.7875000238418579,
1432
+ "rewards/chosen": -3.1802752017974854,
1433
+ "rewards/margins": 1.8515145778656006,
1434
+ "rewards/rejected": -5.031789779663086,
1435
+ "step": 950
1436
+ },
1437
+ {
1438
+ "epoch": 0.7113745831789552,
1439
+ "grad_norm": 94.63509368896484,
1440
+ "learning_rate": 1.4703703703703707e-06,
1441
+ "logits/chosen": -2.6569230556488037,
1442
+ "logits/rejected": -2.5298376083374023,
1443
+ "logps/chosen": -112.77482604980469,
1444
+ "logps/rejected": -136.20745849609375,
1445
+ "loss": 0.5287,
1446
+ "rewards/accuracies": 0.737500011920929,
1447
+ "rewards/chosen": -3.546157121658325,
1448
+ "rewards/margins": 2.0040981769561768,
1449
+ "rewards/rejected": -5.55025577545166,
1450
+ "step": 960
1451
+ },
1452
+ {
1453
+ "epoch": 0.7187847350870693,
1454
+ "grad_norm": 63.06848907470703,
1455
+ "learning_rate": 1.4333333333333335e-06,
1456
+ "logits/chosen": -2.6780967712402344,
1457
+ "logits/rejected": -2.4963746070861816,
1458
+ "logps/chosen": -102.239990234375,
1459
+ "logps/rejected": -125.75809478759766,
1460
+ "loss": 0.558,
1461
+ "rewards/accuracies": 0.6875,
1462
+ "rewards/chosen": -3.2254154682159424,
1463
+ "rewards/margins": 1.714345932006836,
1464
+ "rewards/rejected": -4.939761161804199,
1465
+ "step": 970
1466
+ },
1467
+ {
1468
+ "epoch": 0.7261948869951834,
1469
+ "grad_norm": 113.9366683959961,
1470
+ "learning_rate": 1.3962962962962963e-06,
1471
+ "logits/chosen": -2.5560450553894043,
1472
+ "logits/rejected": -2.4831178188323975,
1473
+ "logps/chosen": -114.14649963378906,
1474
+ "logps/rejected": -129.1222381591797,
1475
+ "loss": 0.6047,
1476
+ "rewards/accuracies": 0.6875,
1477
+ "rewards/chosen": -3.847761869430542,
1478
+ "rewards/margins": 1.4522716999053955,
1479
+ "rewards/rejected": -5.3000335693359375,
1480
+ "step": 980
1481
+ },
1482
+ {
1483
+ "epoch": 0.7336050389032975,
1484
+ "grad_norm": 122.37875366210938,
1485
+ "learning_rate": 1.3592592592592594e-06,
1486
+ "logits/chosen": -2.778879165649414,
1487
+ "logits/rejected": -2.591799736022949,
1488
+ "logps/chosen": -103.28349304199219,
1489
+ "logps/rejected": -119.67398834228516,
1490
+ "loss": 0.4954,
1491
+ "rewards/accuracies": 0.762499988079071,
1492
+ "rewards/chosen": -2.8379952907562256,
1493
+ "rewards/margins": 1.7630186080932617,
1494
+ "rewards/rejected": -4.601014137268066,
1495
+ "step": 990
1496
+ },
1497
+ {
1498
+ "epoch": 0.7410151908114117,
1499
+ "grad_norm": 104.96907043457031,
1500
+ "learning_rate": 1.3222222222222222e-06,
1501
+ "logits/chosen": -2.5238616466522217,
1502
+ "logits/rejected": -2.4784162044525146,
1503
+ "logps/chosen": -107.37699890136719,
1504
+ "logps/rejected": -130.21548461914062,
1505
+ "loss": 0.5063,
1506
+ "rewards/accuracies": 0.7250000238418579,
1507
+ "rewards/chosen": -3.024723529815674,
1508
+ "rewards/margins": 1.954242467880249,
1509
+ "rewards/rejected": -4.978966236114502,
1510
+ "step": 1000
1511
+ },
1512
+ {
1513
+ "epoch": 0.7484253427195258,
1514
+ "grad_norm": 44.657108306884766,
1515
+ "learning_rate": 1.2851851851851855e-06,
1516
+ "logits/chosen": -2.546823024749756,
1517
+ "logits/rejected": -2.4334805011749268,
1518
+ "logps/chosen": -106.6165771484375,
1519
+ "logps/rejected": -125.7170639038086,
1520
+ "loss": 0.3697,
1521
+ "rewards/accuracies": 0.8500000238418579,
1522
+ "rewards/chosen": -2.4411277770996094,
1523
+ "rewards/margins": 2.227128505706787,
1524
+ "rewards/rejected": -4.668255805969238,
1525
+ "step": 1010
1526
+ },
1527
+ {
1528
+ "epoch": 0.7558354946276399,
1529
+ "grad_norm": 90.46170806884766,
1530
+ "learning_rate": 1.248148148148148e-06,
1531
+ "logits/chosen": -2.828085422515869,
1532
+ "logits/rejected": -2.6428349018096924,
1533
+ "logps/chosen": -105.6900405883789,
1534
+ "logps/rejected": -133.78445434570312,
1535
+ "loss": 0.3561,
1536
+ "rewards/accuracies": 0.800000011920929,
1537
+ "rewards/chosen": -2.77301025390625,
1538
+ "rewards/margins": 2.5772953033447266,
1539
+ "rewards/rejected": -5.350305080413818,
1540
+ "step": 1020
1541
+ },
1542
+ {
1543
+ "epoch": 0.763245646535754,
1544
+ "grad_norm": 21.936511993408203,
1545
+ "learning_rate": 1.2111111111111111e-06,
1546
+ "logits/chosen": -2.6992528438568115,
1547
+ "logits/rejected": -2.567366361618042,
1548
+ "logps/chosen": -103.77992248535156,
1549
+ "logps/rejected": -136.04214477539062,
1550
+ "loss": 0.405,
1551
+ "rewards/accuracies": 0.8500000238418579,
1552
+ "rewards/chosen": -3.2632954120635986,
1553
+ "rewards/margins": 2.4483699798583984,
1554
+ "rewards/rejected": -5.711665630340576,
1555
+ "step": 1030
1556
+ },
1557
+ {
1558
+ "epoch": 0.7706557984438681,
1559
+ "grad_norm": 32.31972122192383,
1560
+ "learning_rate": 1.1740740740740742e-06,
1561
+ "logits/chosen": -2.772641897201538,
1562
+ "logits/rejected": -2.6311516761779785,
1563
+ "logps/chosen": -104.34123229980469,
1564
+ "logps/rejected": -128.68487548828125,
1565
+ "loss": 0.5004,
1566
+ "rewards/accuracies": 0.7250000238418579,
1567
+ "rewards/chosen": -3.079659938812256,
1568
+ "rewards/margins": 1.8224769830703735,
1569
+ "rewards/rejected": -4.90213680267334,
1570
+ "step": 1040
1571
+ },
1572
+ {
1573
+ "epoch": 0.7780659503519822,
1574
+ "grad_norm": 162.9198455810547,
1575
+ "learning_rate": 1.1370370370370372e-06,
1576
+ "logits/chosen": -2.8467555046081543,
1577
+ "logits/rejected": -2.515394926071167,
1578
+ "logps/chosen": -105.55623626708984,
1579
+ "logps/rejected": -125.57078552246094,
1580
+ "loss": 0.5177,
1581
+ "rewards/accuracies": 0.7250000238418579,
1582
+ "rewards/chosen": -3.2788283824920654,
1583
+ "rewards/margins": 1.9488775730133057,
1584
+ "rewards/rejected": -5.227705955505371,
1585
+ "step": 1050
1586
+ },
1587
+ {
1588
+ "epoch": 0.7854761022600963,
1589
+ "grad_norm": 72.62833404541016,
1590
+ "learning_rate": 1.1e-06,
1591
+ "logits/chosen": -2.7466042041778564,
1592
+ "logits/rejected": -2.6573309898376465,
1593
+ "logps/chosen": -108.9305419921875,
1594
+ "logps/rejected": -129.41574096679688,
1595
+ "loss": 0.5488,
1596
+ "rewards/accuracies": 0.75,
1597
+ "rewards/chosen": -3.1458160877227783,
1598
+ "rewards/margins": 1.7604339122772217,
1599
+ "rewards/rejected": -4.906250476837158,
1600
+ "step": 1060
1601
+ },
1602
+ {
1603
+ "epoch": 0.7928862541682105,
1604
+ "grad_norm": 33.611995697021484,
1605
+ "learning_rate": 1.062962962962963e-06,
1606
+ "logits/chosen": -2.7340753078460693,
1607
+ "logits/rejected": -2.536836862564087,
1608
+ "logps/chosen": -108.49119567871094,
1609
+ "logps/rejected": -133.3302764892578,
1610
+ "loss": 0.4382,
1611
+ "rewards/accuracies": 0.800000011920929,
1612
+ "rewards/chosen": -3.197749614715576,
1613
+ "rewards/margins": 2.093479633331299,
1614
+ "rewards/rejected": -5.291229724884033,
1615
+ "step": 1070
1616
+ },
1617
+ {
1618
+ "epoch": 0.8002964060763246,
1619
+ "grad_norm": 74.17855834960938,
1620
+ "learning_rate": 1.0259259259259261e-06,
1621
+ "logits/chosen": -2.8403737545013428,
1622
+ "logits/rejected": -2.6320414543151855,
1623
+ "logps/chosen": -114.62956237792969,
1624
+ "logps/rejected": -133.36578369140625,
1625
+ "loss": 0.4072,
1626
+ "rewards/accuracies": 0.7749999761581421,
1627
+ "rewards/chosen": -3.09629487991333,
1628
+ "rewards/margins": 2.1655564308166504,
1629
+ "rewards/rejected": -5.2618513107299805,
1630
+ "step": 1080
1631
+ },
1632
+ {
1633
+ "epoch": 0.8077065579844387,
1634
+ "grad_norm": 54.3246955871582,
1635
+ "learning_rate": 9.88888888888889e-07,
1636
+ "logits/chosen": -2.557582139968872,
1637
+ "logits/rejected": -2.426109552383423,
1638
+ "logps/chosen": -110.915283203125,
1639
+ "logps/rejected": -134.53126525878906,
1640
+ "loss": 0.4985,
1641
+ "rewards/accuracies": 0.7749999761581421,
1642
+ "rewards/chosen": -3.3661487102508545,
1643
+ "rewards/margins": 1.8312829732894897,
1644
+ "rewards/rejected": -5.197432041168213,
1645
+ "step": 1090
1646
+ },
1647
+ {
1648
+ "epoch": 0.8151167098925528,
1649
+ "grad_norm": 33.4342041015625,
1650
+ "learning_rate": 9.51851851851852e-07,
1651
+ "logits/chosen": -2.843123197555542,
1652
+ "logits/rejected": -2.759000301361084,
1653
+ "logps/chosen": -112.1214828491211,
1654
+ "logps/rejected": -135.30357360839844,
1655
+ "loss": 0.4424,
1656
+ "rewards/accuracies": 0.7749999761581421,
1657
+ "rewards/chosen": -3.535928249359131,
1658
+ "rewards/margins": 1.8440746068954468,
1659
+ "rewards/rejected": -5.380003452301025,
1660
+ "step": 1100
1661
+ },
1662
+ {
1663
+ "epoch": 0.8225268618006669,
1664
+ "grad_norm": 61.59471130371094,
1665
+ "learning_rate": 9.14814814814815e-07,
1666
+ "logits/chosen": -2.5715582370758057,
1667
+ "logits/rejected": -2.4673850536346436,
1668
+ "logps/chosen": -109.1755142211914,
1669
+ "logps/rejected": -132.7253875732422,
1670
+ "loss": 0.5435,
1671
+ "rewards/accuracies": 0.737500011920929,
1672
+ "rewards/chosen": -3.2600913047790527,
1673
+ "rewards/margins": 2.2205255031585693,
1674
+ "rewards/rejected": -5.480616569519043,
1675
+ "step": 1110
1676
+ },
1677
+ {
1678
+ "epoch": 0.829937013708781,
1679
+ "grad_norm": 81.85456848144531,
1680
+ "learning_rate": 8.777777777777778e-07,
1681
+ "logits/chosen": -2.6088593006134033,
1682
+ "logits/rejected": -2.4452264308929443,
1683
+ "logps/chosen": -109.85011291503906,
1684
+ "logps/rejected": -135.8500518798828,
1685
+ "loss": 0.3276,
1686
+ "rewards/accuracies": 0.887499988079071,
1687
+ "rewards/chosen": -3.3149032592773438,
1688
+ "rewards/margins": 2.037301778793335,
1689
+ "rewards/rejected": -5.3522047996521,
1690
+ "step": 1120
1691
+ },
1692
+ {
1693
+ "epoch": 0.8373471656168952,
1694
+ "grad_norm": 47.77298355102539,
1695
+ "learning_rate": 8.407407407407408e-07,
1696
+ "logits/chosen": -2.701817035675049,
1697
+ "logits/rejected": -2.6270086765289307,
1698
+ "logps/chosen": -108.1476058959961,
1699
+ "logps/rejected": -127.43824768066406,
1700
+ "loss": 0.5859,
1701
+ "rewards/accuracies": 0.7124999761581421,
1702
+ "rewards/chosen": -3.2532172203063965,
1703
+ "rewards/margins": 1.7242540121078491,
1704
+ "rewards/rejected": -4.977471351623535,
1705
+ "step": 1130
1706
+ },
1707
+ {
1708
+ "epoch": 0.8447573175250093,
1709
+ "grad_norm": 70.82620239257812,
1710
+ "learning_rate": 8.037037037037038e-07,
1711
+ "logits/chosen": -2.738939046859741,
1712
+ "logits/rejected": -2.635751247406006,
1713
+ "logps/chosen": -101.56913757324219,
1714
+ "logps/rejected": -134.12655639648438,
1715
+ "loss": 0.3653,
1716
+ "rewards/accuracies": 0.824999988079071,
1717
+ "rewards/chosen": -2.848381280899048,
1718
+ "rewards/margins": 2.132460832595825,
1719
+ "rewards/rejected": -4.980842590332031,
1720
+ "step": 1140
1721
+ },
1722
+ {
1723
+ "epoch": 0.8521674694331234,
1724
+ "grad_norm": 44.759246826171875,
1725
+ "learning_rate": 7.666666666666667e-07,
1726
+ "logits/chosen": -2.4769434928894043,
1727
+ "logits/rejected": -2.237217426300049,
1728
+ "logps/chosen": -103.843994140625,
1729
+ "logps/rejected": -129.95098876953125,
1730
+ "loss": 0.3671,
1731
+ "rewards/accuracies": 0.8500000238418579,
1732
+ "rewards/chosen": -3.2238991260528564,
1733
+ "rewards/margins": 2.1413819789886475,
1734
+ "rewards/rejected": -5.365281105041504,
1735
+ "step": 1150
1736
+ },
1737
+ {
1738
+ "epoch": 0.8595776213412375,
1739
+ "grad_norm": 82.56806182861328,
1740
+ "learning_rate": 7.296296296296297e-07,
1741
+ "logits/chosen": -2.6925208568573,
1742
+ "logits/rejected": -2.4951748847961426,
1743
+ "logps/chosen": -108.81231689453125,
1744
+ "logps/rejected": -123.46539306640625,
1745
+ "loss": 0.5316,
1746
+ "rewards/accuracies": 0.75,
1747
+ "rewards/chosen": -2.8224823474884033,
1748
+ "rewards/margins": 1.9338245391845703,
1749
+ "rewards/rejected": -4.7563066482543945,
1750
+ "step": 1160
1751
+ },
1752
+ {
1753
+ "epoch": 0.8669877732493516,
1754
+ "grad_norm": 79.25946044921875,
1755
+ "learning_rate": 6.925925925925926e-07,
1756
+ "logits/chosen": -2.795814275741577,
1757
+ "logits/rejected": -2.6198534965515137,
1758
+ "logps/chosen": -114.69990539550781,
1759
+ "logps/rejected": -138.99383544921875,
1760
+ "loss": 0.4481,
1761
+ "rewards/accuracies": 0.762499988079071,
1762
+ "rewards/chosen": -3.544638156890869,
1763
+ "rewards/margins": 1.8667469024658203,
1764
+ "rewards/rejected": -5.411385536193848,
1765
+ "step": 1170
1766
+ },
1767
+ {
1768
+ "epoch": 0.8743979251574657,
1769
+ "grad_norm": 68.91136932373047,
1770
+ "learning_rate": 6.555555555555556e-07,
1771
+ "logits/chosen": -2.5401740074157715,
1772
+ "logits/rejected": -2.3985536098480225,
1773
+ "logps/chosen": -108.79698181152344,
1774
+ "logps/rejected": -127.04399108886719,
1775
+ "loss": 0.4939,
1776
+ "rewards/accuracies": 0.7250000238418579,
1777
+ "rewards/chosen": -3.1688778400421143,
1778
+ "rewards/margins": 1.6756236553192139,
1779
+ "rewards/rejected": -4.844501495361328,
1780
+ "step": 1180
1781
+ },
1782
+ {
1783
+ "epoch": 0.8818080770655798,
1784
+ "grad_norm": 153.0009765625,
1785
+ "learning_rate": 6.185185185185186e-07,
1786
+ "logits/chosen": -2.725766181945801,
1787
+ "logits/rejected": -2.6588385105133057,
1788
+ "logps/chosen": -115.24592590332031,
1789
+ "logps/rejected": -126.50578308105469,
1790
+ "loss": 0.6857,
1791
+ "rewards/accuracies": 0.637499988079071,
1792
+ "rewards/chosen": -3.7392802238464355,
1793
+ "rewards/margins": 1.0371185541152954,
1794
+ "rewards/rejected": -4.7763991355896,
1795
+ "step": 1190
1796
+ },
1797
+ {
1798
+ "epoch": 0.889218228973694,
1799
+ "grad_norm": 50.05393981933594,
1800
+ "learning_rate": 5.814814814814816e-07,
1801
+ "logits/chosen": -2.800872325897217,
1802
+ "logits/rejected": -2.5388267040252686,
1803
+ "logps/chosen": -108.91355895996094,
1804
+ "logps/rejected": -140.00509643554688,
1805
+ "loss": 0.5076,
1806
+ "rewards/accuracies": 0.737500011920929,
1807
+ "rewards/chosen": -3.350496292114258,
1808
+ "rewards/margins": 2.106170892715454,
1809
+ "rewards/rejected": -5.456666946411133,
1810
+ "step": 1200
1811
+ },
1812
+ {
1813
+ "epoch": 0.8966283808818081,
1814
+ "grad_norm": 67.96463775634766,
1815
+ "learning_rate": 5.444444444444444e-07,
1816
+ "logits/chosen": -2.7067558765411377,
1817
+ "logits/rejected": -2.599600315093994,
1818
+ "logps/chosen": -107.89814758300781,
1819
+ "logps/rejected": -124.91080474853516,
1820
+ "loss": 0.5518,
1821
+ "rewards/accuracies": 0.7124999761581421,
1822
+ "rewards/chosen": -3.1988513469696045,
1823
+ "rewards/margins": 1.6774219274520874,
1824
+ "rewards/rejected": -4.876273155212402,
1825
+ "step": 1210
1826
+ },
1827
+ {
1828
+ "epoch": 0.9040385327899222,
1829
+ "grad_norm": 51.324058532714844,
1830
+ "learning_rate": 5.074074074074075e-07,
1831
+ "logits/chosen": -2.5362696647644043,
1832
+ "logits/rejected": -2.354970693588257,
1833
+ "logps/chosen": -115.45835876464844,
1834
+ "logps/rejected": -136.94302368164062,
1835
+ "loss": 0.4526,
1836
+ "rewards/accuracies": 0.800000011920929,
1837
+ "rewards/chosen": -3.65421986579895,
1838
+ "rewards/margins": 2.047398090362549,
1839
+ "rewards/rejected": -5.701618671417236,
1840
+ "step": 1220
1841
+ },
1842
+ {
1843
+ "epoch": 0.9114486846980363,
1844
+ "grad_norm": 72.69883728027344,
1845
+ "learning_rate": 4.703703703703704e-07,
1846
+ "logits/chosen": -2.776798725128174,
1847
+ "logits/rejected": -2.5926573276519775,
1848
+ "logps/chosen": -109.353515625,
1849
+ "logps/rejected": -133.78953552246094,
1850
+ "loss": 0.479,
1851
+ "rewards/accuracies": 0.7250000238418579,
1852
+ "rewards/chosen": -3.6324219703674316,
1853
+ "rewards/margins": 1.8643248081207275,
1854
+ "rewards/rejected": -5.496747016906738,
1855
+ "step": 1230
1856
+ },
1857
+ {
1858
+ "epoch": 0.9188588366061504,
1859
+ "grad_norm": 76.83854675292969,
1860
+ "learning_rate": 4.333333333333334e-07,
1861
+ "logits/chosen": -2.752346992492676,
1862
+ "logits/rejected": -2.629638195037842,
1863
+ "logps/chosen": -105.3023910522461,
1864
+ "logps/rejected": -126.5351791381836,
1865
+ "loss": 0.5053,
1866
+ "rewards/accuracies": 0.762499988079071,
1867
+ "rewards/chosen": -3.197103977203369,
1868
+ "rewards/margins": 1.733891487121582,
1869
+ "rewards/rejected": -4.930995464324951,
1870
+ "step": 1240
1871
+ },
1872
+ {
1873
+ "epoch": 0.9262689885142645,
1874
+ "grad_norm": 110.72493743896484,
1875
+ "learning_rate": 3.9629629629629634e-07,
1876
+ "logits/chosen": -2.612422227859497,
1877
+ "logits/rejected": -2.4569220542907715,
1878
+ "logps/chosen": -97.88721466064453,
1879
+ "logps/rejected": -128.8172607421875,
1880
+ "loss": 0.4024,
1881
+ "rewards/accuracies": 0.800000011920929,
1882
+ "rewards/chosen": -3.1734538078308105,
1883
+ "rewards/margins": 2.2661962509155273,
1884
+ "rewards/rejected": -5.439650535583496,
1885
+ "step": 1250
1886
+ },
1887
+ {
1888
+ "epoch": 0.9336791404223787,
1889
+ "grad_norm": 79.09251403808594,
1890
+ "learning_rate": 3.592592592592593e-07,
1891
+ "logits/chosen": -2.5835788249969482,
1892
+ "logits/rejected": -2.425140857696533,
1893
+ "logps/chosen": -118.10221862792969,
1894
+ "logps/rejected": -140.7429656982422,
1895
+ "loss": 0.374,
1896
+ "rewards/accuracies": 0.8125,
1897
+ "rewards/chosen": -3.600130558013916,
1898
+ "rewards/margins": 2.1908457279205322,
1899
+ "rewards/rejected": -5.790975570678711,
1900
+ "step": 1260
1901
+ },
1902
+ {
1903
+ "epoch": 0.9410892923304928,
1904
+ "grad_norm": 77.63102722167969,
1905
+ "learning_rate": 3.2222222222222227e-07,
1906
+ "logits/chosen": -2.833207607269287,
1907
+ "logits/rejected": -2.741405487060547,
1908
+ "logps/chosen": -105.04319763183594,
1909
+ "logps/rejected": -129.08157348632812,
1910
+ "loss": 0.4978,
1911
+ "rewards/accuracies": 0.762499988079071,
1912
+ "rewards/chosen": -3.2051002979278564,
1913
+ "rewards/margins": 2.0284972190856934,
1914
+ "rewards/rejected": -5.233597278594971,
1915
+ "step": 1270
1916
+ },
1917
+ {
1918
+ "epoch": 0.9484994442386069,
1919
+ "grad_norm": 45.666744232177734,
1920
+ "learning_rate": 2.851851851851852e-07,
1921
+ "logits/chosen": -2.8316311836242676,
1922
+ "logits/rejected": -2.6417407989501953,
1923
+ "logps/chosen": -113.1464614868164,
1924
+ "logps/rejected": -136.09994506835938,
1925
+ "loss": 0.3906,
1926
+ "rewards/accuracies": 0.762499988079071,
1927
+ "rewards/chosen": -3.425858736038208,
1928
+ "rewards/margins": 2.1212241649627686,
1929
+ "rewards/rejected": -5.547082424163818,
1930
+ "step": 1280
1931
+ },
1932
+ {
1933
+ "epoch": 0.955909596146721,
1934
+ "grad_norm": 126.80838775634766,
1935
+ "learning_rate": 2.4814814814814814e-07,
1936
+ "logits/chosen": -2.479860782623291,
1937
+ "logits/rejected": -2.4349493980407715,
1938
+ "logps/chosen": -110.22750091552734,
1939
+ "logps/rejected": -130.07176208496094,
1940
+ "loss": 0.5546,
1941
+ "rewards/accuracies": 0.6875,
1942
+ "rewards/chosen": -3.495988368988037,
1943
+ "rewards/margins": 1.9390910863876343,
1944
+ "rewards/rejected": -5.435080051422119,
1945
+ "step": 1290
1946
+ },
1947
+ {
1948
+ "epoch": 0.9633197480548351,
1949
+ "grad_norm": 104.56671142578125,
1950
+ "learning_rate": 2.1111111111111113e-07,
1951
+ "logits/chosen": -2.8810842037200928,
1952
+ "logits/rejected": -2.64825701713562,
1953
+ "logps/chosen": -114.70051574707031,
1954
+ "logps/rejected": -133.77554321289062,
1955
+ "loss": 0.4823,
1956
+ "rewards/accuracies": 0.762499988079071,
1957
+ "rewards/chosen": -3.2450995445251465,
1958
+ "rewards/margins": 1.894256830215454,
1959
+ "rewards/rejected": -5.13935661315918,
1960
+ "step": 1300
1961
+ },
1962
+ {
1963
+ "epoch": 0.9707298999629492,
1964
+ "grad_norm": 69.64128875732422,
1965
+ "learning_rate": 1.7407407407407407e-07,
1966
+ "logits/chosen": -2.7370734214782715,
1967
+ "logits/rejected": -2.5394835472106934,
1968
+ "logps/chosen": -120.4969711303711,
1969
+ "logps/rejected": -136.19540405273438,
1970
+ "loss": 0.6125,
1971
+ "rewards/accuracies": 0.762499988079071,
1972
+ "rewards/chosen": -3.9360873699188232,
1973
+ "rewards/margins": 1.48752760887146,
1974
+ "rewards/rejected": -5.423615455627441,
1975
+ "step": 1310
1976
+ },
1977
+ {
1978
+ "epoch": 0.9781400518710633,
1979
+ "grad_norm": 72.55949401855469,
1980
+ "learning_rate": 1.3703703703703706e-07,
1981
+ "logits/chosen": -2.5739331245422363,
1982
+ "logits/rejected": -2.3697855472564697,
1983
+ "logps/chosen": -111.98551940917969,
1984
+ "logps/rejected": -126.80464935302734,
1985
+ "loss": 0.4807,
1986
+ "rewards/accuracies": 0.7250000238418579,
1987
+ "rewards/chosen": -3.24163556098938,
1988
+ "rewards/margins": 1.6213098764419556,
1989
+ "rewards/rejected": -4.862946510314941,
1990
+ "step": 1320
1991
+ },
1992
+ {
1993
+ "epoch": 0.9855502037791775,
1994
+ "grad_norm": 74.78096008300781,
1995
+ "learning_rate": 1.0000000000000001e-07,
1996
+ "logits/chosen": -2.6215052604675293,
1997
+ "logits/rejected": -2.4891343116760254,
1998
+ "logps/chosen": -104.3646469116211,
1999
+ "logps/rejected": -129.4760284423828,
2000
+ "loss": 0.377,
2001
+ "rewards/accuracies": 0.8999999761581421,
2002
+ "rewards/chosen": -2.8517489433288574,
2003
+ "rewards/margins": 2.320369005203247,
2004
+ "rewards/rejected": -5.172118186950684,
2005
+ "step": 1330
2006
+ },
2007
+ {
2008
+ "epoch": 0.9929603556872916,
2009
+ "grad_norm": 88.45356750488281,
2010
+ "learning_rate": 6.296296296296297e-08,
2011
+ "logits/chosen": -2.569523334503174,
2012
+ "logits/rejected": -2.4072022438049316,
2013
+ "logps/chosen": -110.60871887207031,
2014
+ "logps/rejected": -127.8760986328125,
2015
+ "loss": 0.5243,
2016
+ "rewards/accuracies": 0.737500011920929,
2017
+ "rewards/chosen": -3.452281951904297,
2018
+ "rewards/margins": 1.5607750415802002,
2019
+ "rewards/rejected": -5.013056755065918,
2020
+ "step": 1340
2021
+ },
2022
+ {
2023
+ "epoch": 1.0,
2024
+ "grad_norm": 10.505631446838379,
2025
+ "learning_rate": 2.5925925925925926e-08,
2026
+ "logits/chosen": -2.393158435821533,
2027
+ "logits/rejected": -2.35024356842041,
2028
+ "logps/chosen": -112.46697998046875,
2029
+ "logps/rejected": -128.6726837158203,
2030
+ "loss": 0.4409,
2031
+ "rewards/accuracies": 0.7894737124443054,
2032
+ "rewards/chosen": -3.711907148361206,
2033
+ "rewards/margins": 1.5488325357437134,
2034
+ "rewards/rejected": -5.260739803314209,
2035
+ "step": 1350
2036
+ }
2037
+ ],
2038
+ "logging_steps": 10,
2039
+ "max_steps": 1350,
2040
+ "num_input_tokens_seen": 0,
2041
+ "num_train_epochs": 1,
2042
+ "save_steps": 500,
2043
+ "stateful_callbacks": {
2044
+ "TrainerControl": {
2045
+ "args": {
2046
+ "should_epoch_stop": false,
2047
+ "should_evaluate": false,
2048
+ "should_log": false,
2049
+ "should_save": true,
2050
+ "should_training_stop": true
2051
+ },
2052
+ "attributes": {}
2053
+ }
2054
+ },
2055
+ "total_flos": 0.0,
2056
+ "train_batch_size": 2,
2057
+ "trial_name": null,
2058
+ "trial_params": null
2059
+ }
checkpoint-1350/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a7b7968a8ae2ad676d8cb9afe1a793e119b7999c5723f8eae7378c1356c231
3
+ size 6392
checkpoint-1350/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "max_position_embeddings": 32768,
15
+ "max_window_layers": 28,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 28,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.52.3",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
dpo_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "beta": 0.1,
3
+ "learning_rate": 5e-06,
4
+ "per_device_train_batch_size": 2,
5
+ "gradient_accumulation_steps": 4,
6
+ "max_length": 512,
7
+ "max_prompt_length": 128,
8
+ "num_train_epochs": 1,
9
+ "logging_steps": 10,
10
+ "save_strategy": "epoch",
11
+ "output_dir": "./koreankiwi99_dpo_model_base_Math-Step-DPO-10K",
12
+ "remove_unused_columns": false,
13
+ "fp16": true,
14
+ "bf16": false,
15
+ "gradient_checkpointing": false,
16
+ "max_grad_norm": 1.0,
17
+ "push_to_hub_model_id": "koreankiwi99/dpo_model_base_Math-Step-DPO-10K"
18
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.52.3"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a793c47676550a51ebca3f46d38c9caf6f35ccebbc241c469e660d591a568bc0
3
+ size 2384234968
runs/Jun05_14-17-48_47dafa9566a9/events.out.tfevents.1749133094.47dafa9566a9.1112.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d1decf67b86a4524ea49d67ed4a789a9486fb37d14d9d98f8e93acf00754f0a
3
+ size 99192
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<|endoftext|>"
25
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": null,
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|endoftext|>",
233
+ "errors": "replace",
234
+ "extra_special_tokens": {},
235
+ "model_max_length": 131072,
236
+ "pad_token": "<|endoftext|>",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff