Infernaught commited on
Commit
e112099
·
verified ·
1 Parent(s): b3d99cd

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -24,10 +24,10 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
27
  "k_proj",
28
  "v_proj",
29
- "o_proj",
30
- "q_proj"
31
  ],
32
  "task_type": "CAUSAL_LM",
33
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
+ "q_proj",
28
  "k_proj",
29
  "v_proj",
30
+ "o_proj"
 
31
  ],
32
  "task_type": "CAUSAL_LM",
33
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0543ae6c11fab0472ae1c3965dfb2c0d4f76bf25073705dda75301212159cd
3
- size 405942856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc085ca55181132e4e631578112d3022be2801a6172ecd0a1200c6119b1c011
3
+ size 12591456
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d777418b4fe20f7b3114e65415f3dba051eae3e937841cccb69c756e9cd186
3
  size 25219898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e1eda41d1c581fa2206543d71ebe88eaa51fac4149f55cb2adc88b7131c770
3
  size 25219898
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63fecec8d46cd2642ce3c4267e396be5d4f1873c560e7c604417611b1535bdd3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0393816af5e506720af7a25500bfd823405c1f828d9e947cee94a8ab4c238a
3
  size 14244
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 40,
3
- "best_metric": 0.003174182726070285,
4
- "best_model_checkpoint": "outputs/{BASE_MODEL}/{args.task}_sft-pretrained_n100_mcl_256/checkpoint-40",
5
  "epoch": 3.0,
6
  "eval_steps": 5,
7
  "global_step": 75,
@@ -11,345 +11,345 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.2,
14
- "grad_norm": 0.039206475019454956,
15
  "learning_rate": 9.466666666666666e-07,
16
- "loss": 0.0331,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2,
21
  "eval_clip_ratio": 0.0,
22
- "eval_completion_length": 249.4625,
23
- "eval_kl": 2.9596686363220216e-05,
24
- "eval_loss": 0.01392994076013565,
25
- "eval_num_tokens": 31072.0,
26
- "eval_reward": 0.075,
27
- "eval_reward_std": 0.12287135720252991,
28
- "eval_rewards/equation_reward_func": 0.025,
29
  "eval_rewards/format_reward_func": 0.05,
30
- "eval_runtime": 287.2484,
31
- "eval_samples_per_second": 0.07,
32
- "eval_steps_per_second": 0.017,
33
  "step": 5
34
  },
35
  {
36
  "epoch": 0.4,
37
- "grad_norm": 0.02188793569803238,
38
  "learning_rate": 8.799999999999999e-07,
39
- "loss": 0.0184,
40
  "step": 10
41
  },
42
  {
43
  "epoch": 0.4,
44
  "eval_clip_ratio": 0.0,
45
- "eval_completion_length": 244.675,
46
- "eval_kl": 3.2660365104675296e-05,
47
- "eval_loss": 0.03559673950076103,
48
- "eval_num_tokens": 62173.0,
49
  "eval_reward": 0.1,
50
- "eval_reward_std": 0.15773502588272095,
51
- "eval_rewards/equation_reward_func": 0.1,
52
- "eval_rewards/format_reward_func": 0.0,
53
- "eval_runtime": 286.2155,
54
- "eval_samples_per_second": 0.07,
55
- "eval_steps_per_second": 0.017,
56
  "step": 10
57
  },
58
  {
59
  "epoch": 0.6,
60
- "grad_norm": 0.0426708348095417,
61
  "learning_rate": 8.133333333333333e-07,
62
- "loss": 0.0235,
63
  "step": 15
64
  },
65
  {
66
  "epoch": 0.6,
67
  "eval_clip_ratio": 0.0,
68
- "eval_completion_length": 246.7,
69
- "eval_kl": 3.2341480255126954e-05,
70
- "eval_loss": 0.027126455679535866,
71
- "eval_num_tokens": 93005.0,
72
- "eval_reward": 0.15,
73
- "eval_reward_std": 0.19082483053207397,
74
- "eval_rewards/equation_reward_func": 0.075,
75
- "eval_rewards/format_reward_func": 0.075,
76
- "eval_runtime": 286.1471,
77
- "eval_samples_per_second": 0.07,
78
- "eval_steps_per_second": 0.017,
79
  "step": 15
80
  },
81
  {
82
  "epoch": 0.8,
83
- "grad_norm": 0.019130932167172432,
84
  "learning_rate": 7.466666666666667e-07,
85
- "loss": 0.0308,
86
  "step": 20
87
  },
88
  {
89
  "epoch": 0.8,
90
  "eval_clip_ratio": 0.0,
91
- "eval_completion_length": 244.8125,
92
- "eval_kl": 3.119707107543945e-05,
93
- "eval_loss": 0.027214299887418747,
94
- "eval_num_tokens": 124029.0,
95
  "eval_reward": 0.1125,
96
- "eval_reward_std": 0.18273502588272095,
97
- "eval_rewards/equation_reward_func": 0.075,
98
- "eval_rewards/format_reward_func": 0.0375,
99
- "eval_runtime": 286.884,
100
- "eval_samples_per_second": 0.07,
101
- "eval_steps_per_second": 0.017,
102
  "step": 20
103
  },
104
  {
105
  "epoch": 1.0,
106
- "grad_norm": 0.03768523037433624,
107
  "learning_rate": 6.800000000000001e-07,
108
- "loss": 0.0548,
109
  "step": 25
110
  },
111
  {
112
  "epoch": 1.0,
113
  "eval_clip_ratio": 0.0,
114
- "eval_completion_length": 239.9875,
115
- "eval_kl": 3.15934419631958e-05,
116
- "eval_loss": 0.04795133322477341,
117
- "eval_num_tokens": 154460.0,
118
- "eval_reward": 0.1875,
119
- "eval_reward_std": 0.2996102273464203,
120
- "eval_rewards/equation_reward_func": 0.0875,
121
- "eval_rewards/format_reward_func": 0.1,
122
- "eval_runtime": 286.3028,
123
- "eval_samples_per_second": 0.07,
124
- "eval_steps_per_second": 0.017,
125
  "step": 25
126
  },
127
  {
128
  "epoch": 1.2,
129
- "grad_norm": 0.021382536739110947,
130
  "learning_rate": 6.133333333333332e-07,
131
- "loss": 0.0264,
132
  "step": 30
133
  },
134
  {
135
  "epoch": 1.2,
136
  "eval_clip_ratio": 0.0,
137
- "eval_completion_length": 241.1125,
138
- "eval_kl": 3.0347704887390135e-05,
139
- "eval_loss": 0.02830352820456028,
140
- "eval_num_tokens": 185490.0,
141
- "eval_reward": 0.0875,
142
- "eval_reward_std": 0.15386751294136047,
143
- "eval_rewards/equation_reward_func": 0.05,
144
  "eval_rewards/format_reward_func": 0.0375,
145
- "eval_runtime": 286.3313,
146
- "eval_samples_per_second": 0.07,
147
- "eval_steps_per_second": 0.017,
148
  "step": 30
149
  },
150
  {
151
  "epoch": 1.4,
152
- "grad_norm": 0.03168244659900665,
153
  "learning_rate": 5.466666666666666e-07,
154
- "loss": 0.0253,
155
  "step": 35
156
  },
157
  {
158
  "epoch": 1.4,
159
  "eval_clip_ratio": 0.0,
160
- "eval_completion_length": 250.0125,
161
- "eval_kl": 3.032684326171875e-05,
162
- "eval_loss": 0.019817089661955833,
163
- "eval_num_tokens": 216713.0,
164
- "eval_reward": 0.075,
165
- "eval_reward_std": 0.12886751294136048,
166
- "eval_rewards/equation_reward_func": 0.05,
167
- "eval_rewards/format_reward_func": 0.025,
168
- "eval_runtime": 286.7823,
169
  "eval_samples_per_second": 0.07,
170
- "eval_steps_per_second": 0.017,
171
  "step": 35
172
  },
173
  {
174
  "epoch": 1.6,
175
- "grad_norm": 3.9236266502484796e-07,
176
  "learning_rate": 4.8e-07,
177
- "loss": 0.0085,
178
  "step": 40
179
  },
180
  {
181
  "epoch": 1.6,
182
  "eval_clip_ratio": 0.0,
183
- "eval_completion_length": 245.225,
184
- "eval_kl": 3.27765941619873e-05,
185
- "eval_loss": 0.003174182726070285,
186
- "eval_num_tokens": 247603.0,
187
- "eval_reward": 0.0625,
188
- "eval_reward_std": 0.10386751294136047,
189
- "eval_rewards/equation_reward_func": 0.0375,
190
- "eval_rewards/format_reward_func": 0.025,
191
- "eval_runtime": 286.336,
192
  "eval_samples_per_second": 0.07,
193
- "eval_steps_per_second": 0.017,
194
  "step": 40
195
  },
196
  {
197
  "epoch": 1.8,
198
- "grad_norm": 0.03481123968958855,
199
  "learning_rate": 4.1333333333333333e-07,
200
- "loss": 0.0441,
201
  "step": 45
202
  },
203
  {
204
  "epoch": 1.8,
205
  "eval_clip_ratio": 0.0,
206
- "eval_completion_length": 247.3,
207
- "eval_kl": 2.6440620422363283e-05,
208
- "eval_loss": 0.030359486117959023,
209
- "eval_num_tokens": 278403.0,
210
- "eval_reward": 0.1375,
211
- "eval_reward_std": 0.22673887014389038,
212
- "eval_rewards/equation_reward_func": 0.1,
213
- "eval_rewards/format_reward_func": 0.0375,
214
- "eval_runtime": 286.4232,
215
  "eval_samples_per_second": 0.07,
216
- "eval_steps_per_second": 0.017,
217
  "step": 45
218
  },
219
  {
220
  "epoch": 2.0,
221
- "grad_norm": 0.03657938167452812,
222
  "learning_rate": 3.4666666666666665e-07,
223
- "loss": 0.0572,
224
  "step": 50
225
  },
226
  {
227
  "epoch": 2.0,
228
  "eval_clip_ratio": 0.0,
229
- "eval_completion_length": 248.95,
230
- "eval_kl": 2.9081106185913085e-05,
231
- "eval_loss": 0.015535709448158741,
232
- "eval_num_tokens": 309080.0,
233
- "eval_reward": 0.1,
234
- "eval_reward_std": 0.15,
235
- "eval_rewards/equation_reward_func": 0.0625,
236
  "eval_rewards/format_reward_func": 0.0375,
237
- "eval_runtime": 284.5486,
238
  "eval_samples_per_second": 0.07,
239
  "eval_steps_per_second": 0.018,
240
  "step": 50
241
  },
242
  {
243
  "epoch": 2.2,
244
- "grad_norm": 0.024848679080605507,
245
  "learning_rate": 2.8e-07,
246
- "loss": 0.0289,
247
  "step": 55
248
  },
249
  {
250
  "epoch": 2.2,
251
  "eval_clip_ratio": 0.0,
252
- "eval_completion_length": 246.8875,
253
- "eval_kl": 3.223121166229248e-05,
254
- "eval_loss": 0.023179035633802414,
255
- "eval_num_tokens": 340282.0,
256
- "eval_reward": 0.1375,
257
- "eval_reward_std": 0.22074271440505983,
258
- "eval_rewards/equation_reward_func": 0.05,
259
- "eval_rewards/format_reward_func": 0.0875,
260
- "eval_runtime": 284.2027,
261
- "eval_samples_per_second": 0.07,
262
  "eval_steps_per_second": 0.018,
263
  "step": 55
264
  },
265
  {
266
  "epoch": 2.4,
267
- "grad_norm": 0.0,
268
  "learning_rate": 2.1333333333333334e-07,
269
- "loss": 0.0567,
270
  "step": 60
271
  },
272
  {
273
  "epoch": 2.4,
274
  "eval_clip_ratio": 0.0,
275
- "eval_completion_length": 248.3875,
276
- "eval_kl": 3.6197900772094724e-05,
277
- "eval_loss": 0.018750805407762527,
278
- "eval_num_tokens": 371169.0,
279
- "eval_reward": 0.0875,
280
- "eval_reward_std": 0.175,
281
- "eval_rewards/equation_reward_func": 0.0375,
282
- "eval_rewards/format_reward_func": 0.05,
283
- "eval_runtime": 284.2169,
284
  "eval_samples_per_second": 0.07,
285
  "eval_steps_per_second": 0.018,
286
  "step": 60
287
  },
288
  {
289
  "epoch": 2.6,
290
- "grad_norm": 0.028844181448221207,
291
  "learning_rate": 1.4666666666666666e-07,
292
- "loss": 0.0199,
293
  "step": 65
294
  },
295
  {
296
  "epoch": 2.6,
297
  "eval_clip_ratio": 0.0,
298
- "eval_completion_length": 247.4375,
299
- "eval_kl": 3.5449862480163574e-05,
300
- "eval_loss": 0.04226290062069893,
301
- "eval_num_tokens": 402032.0,
302
- "eval_reward": 0.1,
303
- "eval_reward_std": 0.15173887014389037,
304
- "eval_rewards/equation_reward_func": 0.05,
305
- "eval_rewards/format_reward_func": 0.05,
306
- "eval_runtime": 284.2641,
307
- "eval_samples_per_second": 0.07,
308
  "eval_steps_per_second": 0.018,
309
  "step": 65
310
  },
311
  {
312
  "epoch": 2.8,
313
- "grad_norm": 0.0,
314
  "learning_rate": 8e-08,
315
- "loss": 0.0277,
316
  "step": 70
317
  },
318
  {
319
  "epoch": 2.8,
320
  "eval_clip_ratio": 0.0,
321
- "eval_completion_length": 241.95,
322
- "eval_kl": 3.163218498229981e-05,
323
- "eval_loss": 0.05129896476864815,
324
- "eval_num_tokens": 433087.0,
325
- "eval_reward": 0.1,
326
- "eval_reward_std": 0.17886751294136047,
327
- "eval_rewards/equation_reward_func": 0.05,
328
- "eval_rewards/format_reward_func": 0.05,
329
- "eval_runtime": 284.2163,
330
  "eval_samples_per_second": 0.07,
331
  "eval_steps_per_second": 0.018,
332
  "step": 70
333
  },
334
  {
335
  "epoch": 3.0,
336
- "grad_norm": 0.022495441138744354,
337
  "learning_rate": 1.3333333333333334e-08,
338
- "loss": 0.037,
339
  "step": 75
340
  },
341
  {
342
  "epoch": 3.0,
343
  "eval_clip_ratio": 0.0,
344
- "eval_completion_length": 246.875,
345
- "eval_kl": 3.185570240020752e-05,
346
- "eval_loss": 0.030237644910812378,
347
- "eval_num_tokens": 464241.0,
348
- "eval_reward": 0.0875,
349
- "eval_reward_std": 0.14787135720252992,
350
  "eval_rewards/equation_reward_func": 0.0625,
351
- "eval_rewards/format_reward_func": 0.025,
352
- "eval_runtime": 284.2765,
353
  "eval_samples_per_second": 0.07,
354
  "eval_steps_per_second": 0.018,
355
  "step": 75
 
1
  {
2
+ "best_global_step": 30,
3
+ "best_metric": 0.009350189939141273,
4
+ "best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown_n100_mcl_256_pretrained/checkpoint-30",
5
  "epoch": 3.0,
6
  "eval_steps": 5,
7
  "global_step": 75,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.2,
14
+ "grad_norm": 0.04710305854678154,
15
  "learning_rate": 9.466666666666666e-07,
16
+ "loss": 0.0581,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2,
21
  "eval_clip_ratio": 0.0,
22
+ "eval_completion_length": 249.7375,
23
+ "eval_kl": 2.7620792388916014e-05,
24
+ "eval_loss": 0.011466369964182377,
25
+ "eval_num_tokens": 30743.0,
26
+ "eval_reward": 0.0875,
27
+ "eval_reward_std": 0.14787135720252992,
28
+ "eval_rewards/equation_reward_func": 0.0375,
29
  "eval_rewards/format_reward_func": 0.05,
30
+ "eval_runtime": 283.5242,
31
+ "eval_samples_per_second": 0.071,
32
+ "eval_steps_per_second": 0.018,
33
  "step": 5
34
  },
35
  {
36
  "epoch": 0.4,
37
+ "grad_norm": 0.0248898696154356,
38
  "learning_rate": 8.799999999999999e-07,
39
+ "loss": 0.0305,
40
  "step": 10
41
  },
42
  {
43
  "epoch": 0.4,
44
  "eval_clip_ratio": 0.0,
45
+ "eval_completion_length": 248.3125,
46
+ "eval_kl": 3.051459789276123e-05,
47
+ "eval_loss": 0.010714234784245491,
48
+ "eval_num_tokens": 61654.0,
49
  "eval_reward": 0.1,
50
+ "eval_reward_std": 0.11969234347343445,
51
+ "eval_rewards/equation_reward_func": 0.075,
52
+ "eval_rewards/format_reward_func": 0.025,
53
+ "eval_runtime": 279.7789,
54
+ "eval_samples_per_second": 0.071,
55
+ "eval_steps_per_second": 0.018,
56
  "step": 10
57
  },
58
  {
59
  "epoch": 0.6,
60
+ "grad_norm": 0.03560088202357292,
61
  "learning_rate": 8.133333333333333e-07,
62
+ "loss": 0.0333,
63
  "step": 15
64
  },
65
  {
66
  "epoch": 0.6,
67
  "eval_clip_ratio": 0.0,
68
+ "eval_completion_length": 248.8125,
69
+ "eval_kl": 3.6323070526123046e-05,
70
+ "eval_loss": 0.016332309693098068,
71
+ "eval_num_tokens": 92513.0,
72
+ "eval_reward": 0.1,
73
+ "eval_reward_std": 0.15773502588272095,
74
+ "eval_rewards/equation_reward_func": 0.0375,
75
+ "eval_rewards/format_reward_func": 0.0625,
76
+ "eval_runtime": 282.7334,
77
+ "eval_samples_per_second": 0.071,
78
+ "eval_steps_per_second": 0.018,
79
  "step": 15
80
  },
81
  {
82
  "epoch": 0.8,
83
+ "grad_norm": 0.02903689257800579,
84
  "learning_rate": 7.466666666666667e-07,
85
+ "loss": 0.0363,
86
  "step": 20
87
  },
88
  {
89
  "epoch": 0.8,
90
  "eval_clip_ratio": 0.0,
91
+ "eval_completion_length": 241.4375,
92
+ "eval_kl": 3.466010093688965e-05,
93
+ "eval_loss": 0.03578554838895798,
94
+ "eval_num_tokens": 123451.0,
95
  "eval_reward": 0.1125,
96
+ "eval_reward_std": 0.1978713572025299,
97
+ "eval_rewards/equation_reward_func": 0.0625,
98
+ "eval_rewards/format_reward_func": 0.05,
99
+ "eval_runtime": 282.549,
100
+ "eval_samples_per_second": 0.071,
101
+ "eval_steps_per_second": 0.018,
102
  "step": 20
103
  },
104
  {
105
  "epoch": 1.0,
106
+ "grad_norm": 5.04811282553419e-07,
107
  "learning_rate": 6.800000000000001e-07,
108
+ "loss": 0.0011,
109
  "step": 25
110
  },
111
  {
112
  "epoch": 1.0,
113
  "eval_clip_ratio": 0.0,
114
+ "eval_completion_length": 247.55,
115
+ "eval_kl": 3.0243396759033202e-05,
116
+ "eval_loss": 0.023990554735064507,
117
+ "eval_num_tokens": 154568.0,
118
+ "eval_reward": 0.1,
119
+ "eval_reward_std": 0.17886751294136047,
120
+ "eval_rewards/equation_reward_func": 0.05,
121
+ "eval_rewards/format_reward_func": 0.05,
122
+ "eval_runtime": 282.4574,
123
+ "eval_samples_per_second": 0.071,
124
+ "eval_steps_per_second": 0.018,
125
  "step": 25
126
  },
127
  {
128
  "epoch": 1.2,
129
+ "grad_norm": 0.0,
130
  "learning_rate": 6.133333333333332e-07,
131
+ "loss": 0.0337,
132
  "step": 30
133
  },
134
  {
135
  "epoch": 1.2,
136
  "eval_clip_ratio": 0.0,
137
+ "eval_completion_length": 249.3875,
138
+ "eval_kl": 2.872645854949951e-05,
139
+ "eval_loss": 0.009350189939141273,
140
+ "eval_num_tokens": 185750.0,
141
+ "eval_reward": 0.1,
142
+ "eval_reward_std": 0.15,
143
+ "eval_rewards/equation_reward_func": 0.0625,
144
  "eval_rewards/format_reward_func": 0.0375,
145
+ "eval_runtime": 283.2443,
146
+ "eval_samples_per_second": 0.071,
147
+ "eval_steps_per_second": 0.018,
148
  "step": 30
149
  },
150
  {
151
  "epoch": 1.4,
152
+ "grad_norm": 0.025708282366394997,
153
  "learning_rate": 5.466666666666666e-07,
154
+ "loss": 0.0273,
155
  "step": 35
156
  },
157
  {
158
  "epoch": 1.4,
159
  "eval_clip_ratio": 0.0,
160
+ "eval_completion_length": 242.05,
161
+ "eval_kl": 3.2845139503479e-05,
162
+ "eval_loss": 0.038312580436468124,
163
+ "eval_num_tokens": 217087.0,
164
+ "eval_reward": 0.1375,
165
+ "eval_reward_std": 0.1978713572025299,
166
+ "eval_rewards/equation_reward_func": 0.0625,
167
+ "eval_rewards/format_reward_func": 0.075,
168
+ "eval_runtime": 283.9676,
169
  "eval_samples_per_second": 0.07,
170
+ "eval_steps_per_second": 0.018,
171
  "step": 35
172
  },
173
  {
174
  "epoch": 1.6,
175
+ "grad_norm": 0.04088641330599785,
176
  "learning_rate": 4.8e-07,
177
+ "loss": 0.0318,
178
  "step": 40
179
  },
180
  {
181
  "epoch": 1.6,
182
  "eval_clip_ratio": 0.0,
183
+ "eval_completion_length": 247.9375,
184
+ "eval_kl": 3.358125686645508e-05,
185
+ "eval_loss": 0.028362590819597244,
186
+ "eval_num_tokens": 247949.0,
187
+ "eval_reward": 0.1375,
188
+ "eval_reward_std": 0.1886961877346039,
189
+ "eval_rewards/equation_reward_func": 0.0875,
190
+ "eval_rewards/format_reward_func": 0.05,
191
+ "eval_runtime": 285.356,
192
  "eval_samples_per_second": 0.07,
193
+ "eval_steps_per_second": 0.018,
194
  "step": 40
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "grad_norm": 0.024730732664465904,
199
  "learning_rate": 4.1333333333333333e-07,
200
+ "loss": 0.0578,
201
  "step": 45
202
  },
203
  {
204
  "epoch": 1.8,
205
  "eval_clip_ratio": 0.0,
206
+ "eval_completion_length": 246.9625,
207
+ "eval_kl": 2.7140974998474122e-05,
208
+ "eval_loss": 0.043169133365154266,
209
+ "eval_num_tokens": 278710.0,
210
+ "eval_reward": 0.125,
211
+ "eval_reward_std": 0.2,
212
+ "eval_rewards/equation_reward_func": 0.075,
213
+ "eval_rewards/format_reward_func": 0.05,
214
+ "eval_runtime": 285.2657,
215
  "eval_samples_per_second": 0.07,
216
+ "eval_steps_per_second": 0.018,
217
  "step": 45
218
  },
219
  {
220
  "epoch": 2.0,
221
+ "grad_norm": 0.03765915334224701,
222
  "learning_rate": 3.4666666666666665e-07,
223
+ "loss": 0.0234,
224
  "step": 50
225
  },
226
  {
227
  "epoch": 2.0,
228
  "eval_clip_ratio": 0.0,
229
+ "eval_completion_length": 247.05,
230
+ "eval_kl": 3.2660365104675296e-05,
231
+ "eval_loss": 0.026807209476828575,
232
+ "eval_num_tokens": 309954.0,
233
+ "eval_reward": 0.125,
234
+ "eval_reward_std": 0.20773502588272094,
235
+ "eval_rewards/equation_reward_func": 0.0875,
236
  "eval_rewards/format_reward_func": 0.0375,
237
+ "eval_runtime": 284.404,
238
  "eval_samples_per_second": 0.07,
239
  "eval_steps_per_second": 0.018,
240
  "step": 50
241
  },
242
  {
243
  "epoch": 2.2,
244
+ "grad_norm": 0.01991177722811699,
245
  "learning_rate": 2.8e-07,
246
+ "loss": 0.0691,
247
  "step": 55
248
  },
249
  {
250
  "epoch": 2.2,
251
  "eval_clip_ratio": 0.0,
252
+ "eval_completion_length": 242.6375,
253
+ "eval_kl": 3.68952751159668e-05,
254
+ "eval_loss": 0.04483898729085922,
255
+ "eval_num_tokens": 340605.0,
256
+ "eval_reward": 0.1625,
257
+ "eval_reward_std": 0.25560638308525085,
258
+ "eval_rewards/equation_reward_func": 0.1,
259
+ "eval_rewards/format_reward_func": 0.0625,
260
+ "eval_runtime": 283.6459,
261
+ "eval_samples_per_second": 0.071,
262
  "eval_steps_per_second": 0.018,
263
  "step": 55
264
  },
265
  {
266
  "epoch": 2.4,
267
+ "grad_norm": 0.0200728178024292,
268
  "learning_rate": 2.1333333333333334e-07,
269
+ "loss": 0.0518,
270
  "step": 60
271
  },
272
  {
273
  "epoch": 2.4,
274
  "eval_clip_ratio": 0.0,
275
+ "eval_completion_length": 250.45,
276
+ "eval_kl": 3.1629204750061034e-05,
277
+ "eval_loss": 0.009741068817675114,
278
+ "eval_num_tokens": 371457.0,
279
+ "eval_reward": 0.075,
280
+ "eval_reward_std": 0.12886751294136048,
281
+ "eval_rewards/equation_reward_func": 0.05,
282
+ "eval_rewards/format_reward_func": 0.025,
283
+ "eval_runtime": 283.7186,
284
  "eval_samples_per_second": 0.07,
285
  "eval_steps_per_second": 0.018,
286
  "step": 60
287
  },
288
  {
289
  "epoch": 2.6,
290
+ "grad_norm": 0.021919438615441322,
291
  "learning_rate": 1.4666666666666666e-07,
292
+ "loss": 0.0167,
293
  "step": 65
294
  },
295
  {
296
  "epoch": 2.6,
297
  "eval_clip_ratio": 0.0,
298
+ "eval_completion_length": 250.35,
299
+ "eval_kl": 2.8392672538757326e-05,
300
+ "eval_loss": 0.016574550420045853,
301
+ "eval_num_tokens": 402684.0,
302
+ "eval_reward": 0.1125,
303
+ "eval_reward_std": 0.14469234347343446,
304
+ "eval_rewards/equation_reward_func": 0.075,
305
+ "eval_rewards/format_reward_func": 0.0375,
306
+ "eval_runtime": 283.4564,
307
+ "eval_samples_per_second": 0.071,
308
  "eval_steps_per_second": 0.018,
309
  "step": 65
310
  },
311
  {
312
  "epoch": 2.8,
313
+ "grad_norm": 0.03317731246352196,
314
  "learning_rate": 8e-08,
315
+ "loss": 0.0358,
316
  "step": 70
317
  },
318
  {
319
  "epoch": 2.8,
320
  "eval_clip_ratio": 0.0,
321
+ "eval_completion_length": 246.825,
322
+ "eval_kl": 3.1587481498718264e-05,
323
+ "eval_loss": 0.04395188018679619,
324
+ "eval_num_tokens": 433755.0,
325
+ "eval_reward": 0.2,
326
+ "eval_reward_std": 0.3154700517654419,
327
+ "eval_rewards/equation_reward_func": 0.0625,
328
+ "eval_rewards/format_reward_func": 0.1375,
329
+ "eval_runtime": 284.1288,
330
  "eval_samples_per_second": 0.07,
331
  "eval_steps_per_second": 0.018,
332
  "step": 70
333
  },
334
  {
335
  "epoch": 3.0,
336
+ "grad_norm": 0.04194802790880203,
337
  "learning_rate": 1.3333333333333334e-08,
338
+ "loss": 0.0372,
339
  "step": 75
340
  },
341
  {
342
  "epoch": 3.0,
343
  "eval_clip_ratio": 0.0,
344
+ "eval_completion_length": 247.825,
345
+ "eval_kl": 3.3229589462280273e-05,
346
+ "eval_loss": 0.034269753843545914,
347
+ "eval_num_tokens": 464648.0,
348
+ "eval_reward": 0.1,
349
+ "eval_reward_std": 0.15,
350
  "eval_rewards/equation_reward_func": 0.0625,
351
+ "eval_rewards/format_reward_func": 0.0375,
352
+ "eval_runtime": 283.7321,
353
  "eval_samples_per_second": 0.07,
354
  "eval_steps_per_second": 0.018,
355
  "step": 75
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:633f04d4d98924999118658effbac31e2ac83252474889cdafe3a83c1ffa82c4
3
- size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e10b521573cac25922d2ee9076e71c9a2aa5ac01265f1f3bac4705ac267a9aa
3
+ size 6264