Arittro2 commited on
Commit
9156f19
·
verified ·
1 Parent(s): cf98863

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "q_proj",
33
  "v_proj",
34
- "down_proj",
35
  "k_proj",
36
- "up_proj",
37
- "o_proj",
38
- "gate_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "up_proj",
33
  "v_proj",
34
+ "q_proj",
35
  "k_proj",
36
+ "gate_proj",
37
+ "down_proj",
38
+ "o_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b8863a08a9e03e1385b1d8e43d6ceb78d559e59c3fd560051dd6c4ffea5ef01
3
  size 262406656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aac9ea0e9492c5121969cb8cd1243a9cbd537758d4beee2498d08aa1bfc7fb3b
3
  size 262406656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fd8fb0b9919222682bea835ce2ae0481fc1b0de959a33d4abb353b639eca7b1
3
- size 121633045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1849c8173db31fb56cd75df79edfbc60942ddcc33bec2fbe2f50157adaf37e
3
+ size 122871883
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7f558d8446482efa60b14ceb530782da57f6ab2cf1d67c09fd1e5605f003a25
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81888abba6fcb84e8644e14930eab357fa25630389fc14dcfeecc975e067404d
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:427c0c946dd9504d04f599e31438298f1906271364d5cb9e0d7a9a8880940c29
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb49e328593b94208faae32bb0c92c92ac86ee6160811712359ffbd888f082c
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.003819126183929117,
6
  "eval_steps": 500,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -138,11 +138,401 @@
138
  "rewards/quality_reward_func/mean": 0.020000000298023225,
139
  "rewards/quality_reward_func/std": 0.04000000059604645,
140
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
142
  ],
143
  "logging_steps": 10,
144
- "max_steps": 13092,
145
- "num_input_tokens_seen": 67990,
146
  "num_train_epochs": 1,
147
  "save_steps": 50,
148
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.013751375137513752,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
138
  "rewards/quality_reward_func/mean": 0.020000000298023225,
139
  "rewards/quality_reward_func/std": 0.04000000059604645,
140
  "step": 50
141
+ },
142
+ {
143
+ "completion_length": 1.0,
144
+ "completions/clipped_ratio": 0.0,
145
+ "completions/max_length": 1.0,
146
+ "completions/max_terminated_length": 1.0,
147
+ "completions/mean_length": 1.0,
148
+ "completions/mean_terminated_length": 1.0,
149
+ "completions/min_length": 1.0,
150
+ "completions/min_terminated_length": 1.0,
151
+ "epoch": 0.004125412541254125,
152
+ "frac_reward_zero_std": 1.0,
153
+ "grad_norm": 0.0,
154
+ "kl": 1.6421079635620118e-06,
155
+ "learning_rate": 1.9666666666666668e-06,
156
+ "loss": 0.0,
157
+ "num_tokens": 79438.0,
158
+ "reward": 0.0,
159
+ "reward_std": 0.0,
160
+ "rewards/coherence_reward_func/mean": 0.0,
161
+ "rewards/coherence_reward_func/std": 0.0,
162
+ "rewards/formatting_reward_func/mean": 0.0,
163
+ "rewards/formatting_reward_func/std": 0.0,
164
+ "rewards/quality_reward_func/mean": 0.0,
165
+ "rewards/quality_reward_func/std": 0.0,
166
+ "step": 60
167
+ },
168
+ {
169
+ "completion_length": 1.6,
170
+ "completions/clipped_ratio": 0.0,
171
+ "completions/max_length": 1.6,
172
+ "completions/max_terminated_length": 1.6,
173
+ "completions/mean_length": 1.6,
174
+ "completions/mean_terminated_length": 1.6,
175
+ "completions/min_length": 1.6,
176
+ "completions/min_terminated_length": 1.6,
177
+ "epoch": 0.004812981298129813,
178
+ "frac_reward_zero_std": 1.0,
179
+ "grad_norm": 0.0,
180
+ "kl": 1.8922347294392238e-06,
181
+ "learning_rate": 2.3000000000000004e-06,
182
+ "loss": 0.0,
183
+ "num_tokens": 92538.0,
184
+ "reward": 0.1100000023841858,
185
+ "reward_std": 0.0,
186
+ "rewards/coherence_reward_func/mean": 0.06000000238418579,
187
+ "rewards/coherence_reward_func/std": 0.0,
188
+ "rewards/formatting_reward_func/mean": 0.05,
189
+ "rewards/formatting_reward_func/std": 0.0,
190
+ "rewards/quality_reward_func/mean": 0.0,
191
+ "rewards/quality_reward_func/std": 0.0,
192
+ "step": 70
193
+ },
194
+ {
195
+ "completion_length": 1.0,
196
+ "completions/clipped_ratio": 0.0,
197
+ "completions/max_length": 1.0,
198
+ "completions/max_terminated_length": 1.0,
199
+ "completions/mean_length": 1.0,
200
+ "completions/mean_terminated_length": 1.0,
201
+ "completions/min_length": 1.0,
202
+ "completions/min_terminated_length": 1.0,
203
+ "epoch": 0.005500550055005501,
204
+ "frac_reward_zero_std": 1.0,
205
+ "grad_norm": 0.0,
206
+ "kl": 1.56402587890625e-05,
207
+ "learning_rate": 2.6333333333333332e-06,
208
+ "loss": 0.0,
209
+ "num_tokens": 106926.0,
210
+ "reward": 0.0,
211
+ "reward_std": 0.0,
212
+ "rewards/coherence_reward_func/mean": 0.0,
213
+ "rewards/coherence_reward_func/std": 0.0,
214
+ "rewards/formatting_reward_func/mean": 0.0,
215
+ "rewards/formatting_reward_func/std": 0.0,
216
+ "rewards/quality_reward_func/mean": 0.0,
217
+ "rewards/quality_reward_func/std": 0.0,
218
+ "step": 80
219
+ },
220
+ {
221
+ "completion_length": 1.0,
222
+ "completions/clipped_ratio": 0.0,
223
+ "completions/max_length": 1.0,
224
+ "completions/max_terminated_length": 1.0,
225
+ "completions/mean_length": 1.0,
226
+ "completions/mean_terminated_length": 1.0,
227
+ "completions/min_length": 1.0,
228
+ "completions/min_terminated_length": 1.0,
229
+ "epoch": 0.006188118811881188,
230
+ "frac_reward_zero_std": 1.0,
231
+ "grad_norm": 0.0,
232
+ "kl": 2.9624998569488524e-05,
233
+ "learning_rate": 2.9666666666666673e-06,
234
+ "loss": 0.0,
235
+ "num_tokens": 120118.0,
236
+ "reward": 0.0,
237
+ "reward_std": 0.0,
238
+ "rewards/coherence_reward_func/mean": 0.0,
239
+ "rewards/coherence_reward_func/std": 0.0,
240
+ "rewards/formatting_reward_func/mean": 0.0,
241
+ "rewards/formatting_reward_func/std": 0.0,
242
+ "rewards/quality_reward_func/mean": 0.0,
243
+ "rewards/quality_reward_func/std": 0.0,
244
+ "step": 90
245
+ },
246
+ {
247
+ "completion_length": 2.0,
248
+ "completions/clipped_ratio": 0.0,
249
+ "completions/max_length": 2.0,
250
+ "completions/max_terminated_length": 2.0,
251
+ "completions/mean_length": 1.25,
252
+ "completions/mean_terminated_length": 1.25,
253
+ "completions/min_length": 1.0,
254
+ "completions/min_terminated_length": 1.0,
255
+ "epoch": 0.006875687568756876,
256
+ "frac_reward_zero_std": 0.9,
257
+ "grad_norm": 0.0,
258
+ "kl": 0.0018734597397269681,
259
+ "learning_rate": 3.3000000000000006e-06,
260
+ "loss": 0.0,
261
+ "num_tokens": 134128.0,
262
+ "reward": 0.0899999976158142,
263
+ "reward_std": 0.1799999952316284,
264
+ "rewards/coherence_reward_func/mean": 0.032499998807907104,
265
+ "rewards/coherence_reward_func/std": 0.06499999761581421,
266
+ "rewards/formatting_reward_func/mean": 0.05,
267
+ "rewards/formatting_reward_func/std": 0.1,
268
+ "rewards/quality_reward_func/mean": 0.007500000298023224,
269
+ "rewards/quality_reward_func/std": 0.015000002086162567,
270
+ "step": 100
271
+ },
272
+ {
273
+ "completion_length": 5.9,
274
+ "completions/clipped_ratio": 0.0,
275
+ "completions/max_length": 5.9,
276
+ "completions/max_terminated_length": 5.9,
277
+ "completions/mean_length": 3.5,
278
+ "completions/mean_terminated_length": 3.5,
279
+ "completions/min_length": 1.0,
280
+ "completions/min_terminated_length": 1.0,
281
+ "epoch": 0.007563256325632563,
282
+ "frac_reward_zero_std": 0.7,
283
+ "grad_norm": 0.5969054698944092,
284
+ "kl": 0.3142867418937385,
285
+ "learning_rate": 3.633333333333334e-06,
286
+ "loss": 0.0,
287
+ "num_tokens": 149936.0,
288
+ "reward": 0.6974999606609344,
289
+ "reward_std": 0.574999988079071,
290
+ "rewards/coherence_reward_func/mean": 0.21999999284744262,
291
+ "rewards/coherence_reward_func/std": 0.1799999952316284,
292
+ "rewards/formatting_reward_func/mean": 0.3375,
293
+ "rewards/formatting_reward_func/std": 0.275,
294
+ "rewards/quality_reward_func/mean": 0.14000000506639482,
295
+ "rewards/quality_reward_func/std": 0.12000000178813934,
296
+ "step": 110
297
+ },
298
+ {
299
+ "completion_length": 10.3,
300
+ "completions/clipped_ratio": 0.0,
301
+ "completions/max_length": 10.3,
302
+ "completions/max_terminated_length": 10.3,
303
+ "completions/mean_length": 7.55,
304
+ "completions/mean_terminated_length": 7.55,
305
+ "completions/min_length": 6.0,
306
+ "completions/min_terminated_length": 6.0,
307
+ "epoch": 0.00825082508250825,
308
+ "frac_reward_zero_std": 0.7,
309
+ "grad_norm": 0.0026277885772287846,
310
+ "kl": 470.12245586041826,
311
+ "learning_rate": 3.966666666666667e-06,
312
+ "loss": 0.0119,
313
+ "num_tokens": 164358.0,
314
+ "reward": 1.6574999570846558,
315
+ "reward_std": 0.27903410643339155,
316
+ "rewards/coherence_reward_func/mean": 0.5399999856948853,
317
+ "rewards/coherence_reward_func/std": 0.09237603992223739,
318
+ "rewards/formatting_reward_func/mean": 0.8375,
319
+ "rewards/formatting_reward_func/std": 0.1404700517654419,
320
+ "rewards/quality_reward_func/mean": 0.2800000041723251,
321
+ "rewards/quality_reward_func/std": 0.046188023686408994,
322
+ "step": 120
323
+ },
324
+ {
325
+ "completion_length": 18.6,
326
+ "completions/clipped_ratio": 0.0,
327
+ "completions/max_length": 18.6,
328
+ "completions/max_terminated_length": 18.6,
329
+ "completions/mean_length": 15.8,
330
+ "completions/mean_terminated_length": 15.8,
331
+ "completions/min_length": 11.9,
332
+ "completions/min_terminated_length": 11.9,
333
+ "epoch": 0.008938393839383938,
334
+ "frac_reward_zero_std": 0.8,
335
+ "grad_norm": 0.0,
336
+ "kl": 0.46882193982601167,
337
+ "learning_rate": 4.3e-06,
338
+ "loss": 0.0,
339
+ "num_tokens": 178634.0,
340
+ "reward": 3.5749999046325684,
341
+ "reward_std": 0.22999999523162842,
342
+ "rewards/coherence_reward_func/mean": 1.1374999582767487,
343
+ "rewards/coherence_reward_func/std": 0.06499999761581421,
344
+ "rewards/formatting_reward_func/mean": 1.75,
345
+ "rewards/formatting_reward_func/std": 0.1,
346
+ "rewards/quality_reward_func/mean": 0.6875000119209289,
347
+ "rewards/quality_reward_func/std": 0.06500000059604645,
348
+ "step": 130
349
+ },
350
+ {
351
+ "completion_length": 20.6,
352
+ "completions/clipped_ratio": 0.0,
353
+ "completions/max_length": 20.6,
354
+ "completions/max_terminated_length": 20.6,
355
+ "completions/mean_length": 16.4,
356
+ "completions/mean_terminated_length": 16.4,
357
+ "completions/min_length": 11.8,
358
+ "completions/min_terminated_length": 11.8,
359
+ "epoch": 0.009625962596259627,
360
+ "frac_reward_zero_std": 0.7,
361
+ "grad_norm": 0.0,
362
+ "kl": 0.4925146855413914,
363
+ "learning_rate": 4.633333333333334e-06,
364
+ "loss": 0.0,
365
+ "num_tokens": 192926.0,
366
+ "reward": 3.2499999046325683,
367
+ "reward_std": 0.44126754999160767,
368
+ "rewards/coherence_reward_func/mean": 1.0424999654293061,
369
+ "rewards/coherence_reward_func/std": 0.14356523752212524,
370
+ "rewards/formatting_reward_func/mean": 1.6125,
371
+ "rewards/formatting_reward_func/std": 0.21160253882408142,
372
+ "rewards/quality_reward_func/mean": 0.595000010728836,
373
+ "rewards/quality_reward_func/std": 0.10350853204727173,
374
+ "step": 140
375
+ },
376
+ {
377
+ "completion_length": 23.0,
378
+ "completions/clipped_ratio": 0.0,
379
+ "completions/max_length": 23.0,
380
+ "completions/max_terminated_length": 23.0,
381
+ "completions/mean_length": 17.825,
382
+ "completions/mean_terminated_length": 17.825,
383
+ "completions/min_length": 13.0,
384
+ "completions/min_terminated_length": 13.0,
385
+ "epoch": 0.010313531353135313,
386
+ "frac_reward_zero_std": 0.9,
387
+ "grad_norm": 0.0,
388
+ "kl": 1.0868607074022294,
389
+ "learning_rate": 4.966666666666667e-06,
390
+ "loss": 0.0,
391
+ "num_tokens": 206363.0,
392
+ "reward": 4.059999895095825,
393
+ "reward_std": 0.07999999523162842,
394
+ "rewards/coherence_reward_func/mean": 1.2924999475479126,
395
+ "rewards/coherence_reward_func/std": 0.01499999761581421,
396
+ "rewards/formatting_reward_func/mean": 1.9875,
397
+ "rewards/formatting_reward_func/std": 0.025,
398
+ "rewards/quality_reward_func/mean": 0.7800000131130218,
399
+ "rewards/quality_reward_func/std": 0.04000000059604645,
400
+ "step": 150
401
+ },
402
+ {
403
+ "completion_length": 31.1,
404
+ "completions/clipped_ratio": 0.0,
405
+ "completions/max_length": 31.1,
406
+ "completions/max_terminated_length": 31.1,
407
+ "completions/mean_length": 20.3,
408
+ "completions/mean_terminated_length": 20.3,
409
+ "completions/min_length": 14.3,
410
+ "completions/min_terminated_length": 14.3,
411
+ "epoch": 0.011001100110011002,
412
+ "frac_reward_zero_std": 0.9,
413
+ "grad_norm": 0.0007889735861681402,
414
+ "kl": 0.7975522613618523,
415
+ "learning_rate": 4.999451708687114e-06,
416
+ "loss": 0.0,
417
+ "num_tokens": 219259.0,
418
+ "reward": 4.079999899864196,
419
+ "reward_std": 0.04000000059604645,
420
+ "rewards/coherence_reward_func/mean": 1.2924999475479126,
421
+ "rewards/coherence_reward_func/std": 0.01499999761581421,
422
+ "rewards/formatting_reward_func/mean": 1.9875,
423
+ "rewards/formatting_reward_func/std": 0.025,
424
+ "rewards/quality_reward_func/mean": 0.800000011920929,
425
+ "rewards/quality_reward_func/std": 0.0,
426
+ "step": 160
427
+ },
428
+ {
429
+ "completion_length": 34.6,
430
+ "completions/clipped_ratio": 0.0,
431
+ "completions/max_length": 34.6,
432
+ "completions/max_terminated_length": 34.6,
433
+ "completions/mean_length": 23.1,
434
+ "completions/mean_terminated_length": 23.1,
435
+ "completions/min_length": 17.5,
436
+ "completions/min_terminated_length": 17.5,
437
+ "epoch": 0.011688668866886688,
438
+ "frac_reward_zero_std": 0.8,
439
+ "grad_norm": 0.419414222240448,
440
+ "kl": 0.7056910984218121,
441
+ "learning_rate": 4.9975566894538954e-06,
442
+ "loss": 0.0,
443
+ "num_tokens": 235091.0,
444
+ "reward": 4.077499866485596,
445
+ "reward_std": 0.03232050836086273,
446
+ "rewards/coherence_reward_func/mean": 1.2774999499320985,
447
+ "rewards/coherence_reward_func/std": 0.03232050389051437,
448
+ "rewards/formatting_reward_func/mean": 2.0,
449
+ "rewards/formatting_reward_func/std": 0.0,
450
+ "rewards/quality_reward_func/mean": 0.800000011920929,
451
+ "rewards/quality_reward_func/std": 0.0,
452
+ "step": 170
453
+ },
454
+ {
455
+ "completion_length": 22.9,
456
+ "completions/clipped_ratio": 0.0,
457
+ "completions/max_length": 22.9,
458
+ "completions/max_terminated_length": 22.9,
459
+ "completions/mean_length": 18.05,
460
+ "completions/mean_terminated_length": 18.05,
461
+ "completions/min_length": 14.7,
462
+ "completions/min_terminated_length": 14.7,
463
+ "epoch": 0.012376237623762377,
464
+ "frac_reward_zero_std": 0.9,
465
+ "grad_norm": 0.0,
466
+ "kl": 1124.961197933089,
467
+ "learning_rate": 4.994309199213748e-06,
468
+ "loss": 0.0461,
469
+ "num_tokens": 247253.0,
470
+ "reward": 4.074999904632568,
471
+ "reward_std": 0.028867512941360474,
472
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
473
+ "rewards/coherence_reward_func/std": 0.0,
474
+ "rewards/formatting_reward_func/mean": 2.0,
475
+ "rewards/formatting_reward_func/std": 0.0,
476
+ "rewards/quality_reward_func/mean": 0.7750000119209289,
477
+ "rewards/quality_reward_func/std": 0.028867512941360474,
478
+ "step": 180
479
+ },
480
+ {
481
+ "completion_length": 22.5,
482
+ "completions/clipped_ratio": 0.0,
483
+ "completions/max_length": 22.5,
484
+ "completions/max_terminated_length": 22.5,
485
+ "completions/mean_length": 20.05,
486
+ "completions/mean_terminated_length": 20.05,
487
+ "completions/min_length": 17.4,
488
+ "completions/min_terminated_length": 17.4,
489
+ "epoch": 0.013063806380638063,
490
+ "frac_reward_zero_std": 1.0,
491
+ "grad_norm": 0.0,
492
+ "kl": 0.693901395983994,
493
+ "learning_rate": 4.989710996539926e-06,
494
+ "loss": 0.0,
495
+ "num_tokens": 261675.0,
496
+ "reward": 4.099999904632568,
497
+ "reward_std": 0.0,
498
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
499
+ "rewards/coherence_reward_func/std": 0.0,
500
+ "rewards/formatting_reward_func/mean": 2.0,
501
+ "rewards/formatting_reward_func/std": 0.0,
502
+ "rewards/quality_reward_func/mean": 0.800000011920929,
503
+ "rewards/quality_reward_func/std": 0.0,
504
+ "step": 190
505
+ },
506
+ {
507
+ "completion_length": 25.2,
508
+ "completions/clipped_ratio": 0.0,
509
+ "completions/max_length": 25.2,
510
+ "completions/max_terminated_length": 25.2,
511
+ "completions/mean_length": 21.575,
512
+ "completions/mean_terminated_length": 21.575,
513
+ "completions/min_length": 17.8,
514
+ "completions/min_terminated_length": 17.8,
515
+ "epoch": 0.013751375137513752,
516
+ "frac_reward_zero_std": 1.0,
517
+ "grad_norm": 0.0,
518
+ "kl": 0.7635734604671598,
519
+ "learning_rate": 4.983764571440296e-06,
520
+ "loss": 0.0,
521
+ "num_tokens": 277602.0,
522
+ "reward": 4.099999904632568,
523
+ "reward_std": 0.0,
524
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
525
+ "rewards/coherence_reward_func/std": 0.0,
526
+ "rewards/formatting_reward_func/mean": 2.0,
527
+ "rewards/formatting_reward_func/std": 0.0,
528
+ "rewards/quality_reward_func/mean": 0.800000011920929,
529
+ "rewards/quality_reward_func/std": 0.0,
530
+ "step": 200
531
  }
532
  ],
533
  "logging_steps": 10,
534
+ "max_steps": 1500,
535
+ "num_input_tokens_seen": 277602,
536
  "num_train_epochs": 1,
537
  "save_steps": 50,
538
  "stateful_callbacks": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddb04574f7b9ade574879398471f2f78cd542b3e6ae301dc53991b01c8eb01df
3
  size 7057
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517ca74ddbfa64fa78f010d3585be0b071bd656ca06cca4c198fbdfb003305e6
3
  size 7057