Safetensors
qwen3
FLAMEHAZE commited on
Commit
8b6f8a7
·
verified ·
1 Parent(s): 0f67def

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1282
trainer_state.json DELETED
@@ -1,1282 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 0.9984,
6
- "eval_steps": 500,
7
- "global_step": 78,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.0128,
14
- "grad_norm": 8.806152221916586,
15
- "learning_rate": 1.25e-07,
16
- "logits/chosen": -0.603271484375,
17
- "logits/rejected": -0.609130859375,
18
- "logps/chosen": -0.79052734375,
19
- "logps/rejected": -0.80419921875,
20
- "loss": 1.5099,
21
- "nll_loss": 0.76611328125,
22
- "rewards/accuracies": 0.5078125,
23
- "rewards/chosen": -0.157958984375,
24
- "rewards/margins": 0.0026302337646484375,
25
- "rewards/rejected": -0.16064453125,
26
- "step": 1
27
- },
28
- {
29
- "epoch": 0.0256,
30
- "grad_norm": 10.295030998935504,
31
- "learning_rate": 2.5e-07,
32
- "logits/chosen": -0.54736328125,
33
- "logits/rejected": -0.553466796875,
34
- "logps/chosen": -0.86181640625,
35
- "logps/rejected": -0.8994140625,
36
- "loss": 1.5817,
37
- "nll_loss": 0.8330078125,
38
- "rewards/accuracies": 0.5234375,
39
- "rewards/chosen": -0.1724853515625,
40
- "rewards/margins": 0.00754547119140625,
41
- "rewards/rejected": -0.1798095703125,
42
- "step": 2
43
- },
44
- {
45
- "epoch": 0.0384,
46
- "grad_norm": 9.606994036202016,
47
- "learning_rate": 3.75e-07,
48
- "logits/chosen": -0.573486328125,
49
- "logits/rejected": -0.5751953125,
50
- "logps/chosen": -0.7890625,
51
- "logps/rejected": -0.85205078125,
52
- "loss": 1.5178,
53
- "nll_loss": 0.77392578125,
54
- "rewards/accuracies": 0.5234375,
55
- "rewards/chosen": -0.1578369140625,
56
- "rewards/margins": 0.01264190673828125,
57
- "rewards/rejected": -0.170654296875,
58
- "step": 3
59
- },
60
- {
61
- "epoch": 0.0512,
62
- "grad_norm": 9.199710542852184,
63
- "learning_rate": 5e-07,
64
- "logits/chosen": -0.542724609375,
65
- "logits/rejected": -0.5205078125,
66
- "logps/chosen": -0.80712890625,
67
- "logps/rejected": -0.83447265625,
68
- "loss": 1.556,
69
- "nll_loss": 0.80517578125,
70
- "rewards/accuracies": 0.5,
71
- "rewards/chosen": -0.1614990234375,
72
- "rewards/margins": 0.005481719970703125,
73
- "rewards/rejected": -0.1669921875,
74
- "step": 4
75
- },
76
- {
77
- "epoch": 0.064,
78
- "grad_norm": 8.376911002240213,
79
- "learning_rate": 6.249999999999999e-07,
80
- "logits/chosen": -0.5986328125,
81
- "logits/rejected": -0.60107421875,
82
- "logps/chosen": -0.7783203125,
83
- "logps/rejected": -0.81396484375,
84
- "loss": 1.522,
85
- "nll_loss": 0.77880859375,
86
- "rewards/accuracies": 0.578125,
87
- "rewards/chosen": -0.15576171875,
88
- "rewards/margins": 0.00701141357421875,
89
- "rewards/rejected": -0.16259765625,
90
- "step": 5
91
- },
92
- {
93
- "epoch": 0.0768,
94
- "grad_norm": 8.510503674314895,
95
- "learning_rate": 7.5e-07,
96
- "logits/chosen": -0.583740234375,
97
- "logits/rejected": -0.547119140625,
98
- "logps/chosen": -0.80078125,
99
- "logps/rejected": -0.82666015625,
100
- "loss": 1.5448,
101
- "nll_loss": 0.79638671875,
102
- "rewards/accuracies": 0.4921875,
103
- "rewards/chosen": -0.16015625,
104
- "rewards/margins": 0.005126953125,
105
- "rewards/rejected": -0.1650390625,
106
- "step": 6
107
- },
108
- {
109
- "epoch": 0.0896,
110
- "grad_norm": 8.952623070299966,
111
- "learning_rate": 8.75e-07,
112
- "logits/chosen": -0.56787109375,
113
- "logits/rejected": -0.58544921875,
114
- "logps/chosen": -0.80615234375,
115
- "logps/rejected": -0.8486328125,
116
- "loss": 1.5365,
117
- "nll_loss": 0.79150390625,
118
- "rewards/accuracies": 0.578125,
119
- "rewards/chosen": -0.1612548828125,
120
- "rewards/margins": 0.008548736572265625,
121
- "rewards/rejected": -0.1695556640625,
122
- "step": 7
123
- },
124
- {
125
- "epoch": 0.1024,
126
- "grad_norm": 7.748818338200202,
127
- "learning_rate": 1e-06,
128
- "logits/chosen": -0.57763671875,
129
- "logits/rejected": -0.548828125,
130
- "logps/chosen": -0.77783203125,
131
- "logps/rejected": -0.81201171875,
132
- "loss": 1.5188,
133
- "nll_loss": 0.7744140625,
134
- "rewards/accuracies": 0.578125,
135
- "rewards/chosen": -0.1556396484375,
136
- "rewards/margins": 0.0068511962890625,
137
- "rewards/rejected": -0.1622314453125,
138
- "step": 8
139
- },
140
- {
141
- "epoch": 0.1152,
142
- "grad_norm": 8.056921633356916,
143
- "learning_rate": 9.994965332706572e-07,
144
- "logits/chosen": -0.57373046875,
145
- "logits/rejected": -0.57373046875,
146
- "logps/chosen": -0.8466796875,
147
- "logps/rejected": -0.81640625,
148
- "loss": 1.5742,
149
- "nll_loss": 0.82666015625,
150
- "rewards/accuracies": 0.5,
151
- "rewards/chosen": -0.16943359375,
152
- "rewards/margins": -0.006122589111328125,
153
- "rewards/rejected": -0.163330078125,
154
- "step": 9
155
- },
156
- {
157
- "epoch": 0.128,
158
- "grad_norm": 4.494952443924689,
159
- "learning_rate": 9.979871469976195e-07,
160
- "logits/chosen": -0.630859375,
161
- "logits/rejected": -0.6259765625,
162
- "logps/chosen": -0.7421875,
163
- "logps/rejected": -0.73193359375,
164
- "loss": 1.4967,
165
- "nll_loss": 0.75048828125,
166
- "rewards/accuracies": 0.453125,
167
- "rewards/chosen": -0.1485595703125,
168
- "rewards/margins": -0.002147674560546875,
169
- "rewards/rejected": -0.1466064453125,
170
- "step": 10
171
- },
172
- {
173
- "epoch": 0.1408,
174
- "grad_norm": 4.844771345193396,
175
- "learning_rate": 9.954748808839674e-07,
176
- "logits/chosen": -0.63134765625,
177
- "logits/rejected": -0.626953125,
178
- "logps/chosen": -0.78759765625,
179
- "logps/rejected": -0.79541015625,
180
- "loss": 1.5304,
181
- "nll_loss": 0.791015625,
182
- "rewards/accuracies": 0.4921875,
183
- "rewards/chosen": -0.1575927734375,
184
- "rewards/margins": 0.00148773193359375,
185
- "rewards/rejected": -0.158935546875,
186
- "step": 11
187
- },
188
- {
189
- "epoch": 0.1536,
190
- "grad_norm": 4.494757883851837,
191
- "learning_rate": 9.919647942993147e-07,
192
- "logits/chosen": -0.65283203125,
193
- "logits/rejected": -0.64892578125,
194
- "logps/chosen": -0.7685546875,
195
- "logps/rejected": -0.74462890625,
196
- "loss": 1.5063,
197
- "nll_loss": 0.76025390625,
198
- "rewards/accuracies": 0.4296875,
199
- "rewards/chosen": -0.153564453125,
200
- "rewards/margins": -0.0048351287841796875,
201
- "rewards/rejected": -0.1488037109375,
202
- "step": 12
203
- },
204
- {
205
- "epoch": 0.1664,
206
- "grad_norm": 4.166590855226656,
207
- "learning_rate": 9.874639560909118e-07,
208
- "logits/chosen": -0.65966796875,
209
- "logits/rejected": -0.619140625,
210
- "logps/chosen": -0.7275390625,
211
- "logps/rejected": -0.74951171875,
212
- "loss": 1.4634,
213
- "nll_loss": 0.7216796875,
214
- "rewards/accuracies": 0.5234375,
215
- "rewards/chosen": -0.1453857421875,
216
- "rewards/margins": 0.0044803619384765625,
217
- "rewards/rejected": -0.14990234375,
218
- "step": 13
219
- },
220
- {
221
- "epoch": 0.1792,
222
- "grad_norm": 3.92079299651966,
223
- "learning_rate": 9.819814303479267e-07,
224
- "logits/chosen": -0.78564453125,
225
- "logits/rejected": -0.771484375,
226
- "logps/chosen": -0.6826171875,
227
- "logps/rejected": -0.7392578125,
228
- "loss": 1.4136,
229
- "nll_loss": 0.68017578125,
230
- "rewards/accuracies": 0.4921875,
231
- "rewards/chosen": -0.1365966796875,
232
- "rewards/margins": 0.011296272277832031,
233
- "rewards/rejected": -0.1475830078125,
234
- "step": 14
235
- },
236
- {
237
- "epoch": 0.192,
238
- "grad_norm": 3.8602455794330903,
239
- "learning_rate": 9.755282581475767e-07,
240
- "logits/chosen": -0.81591796875,
241
- "logits/rejected": -0.8095703125,
242
- "logps/chosen": -0.6787109375,
243
- "logps/rejected": -0.7314453125,
244
- "loss": 1.4152,
245
- "nll_loss": 0.67919921875,
246
- "rewards/accuracies": 0.515625,
247
- "rewards/chosen": -0.13555908203125,
248
- "rewards/margins": 0.010591506958007812,
249
- "rewards/rejected": -0.146240234375,
250
- "step": 15
251
- },
252
- {
253
- "epoch": 0.2048,
254
- "grad_norm": 3.7707893130154932,
255
- "learning_rate": 9.681174353198686e-07,
256
- "logits/chosen": -0.833984375,
257
- "logits/rejected": -0.81689453125,
258
- "logps/chosen": -0.65380859375,
259
- "logps/rejected": -0.71435546875,
260
- "loss": 1.371,
261
- "nll_loss": 0.638671875,
262
- "rewards/accuracies": 0.59375,
263
- "rewards/chosen": -0.1307373046875,
264
- "rewards/margins": 0.012170791625976562,
265
- "rewards/rejected": -0.1429443359375,
266
- "step": 16
267
- },
268
- {
269
- "epoch": 0.2176,
270
- "grad_norm": 3.766035255465641,
271
- "learning_rate": 9.597638862757253e-07,
272
- "logits/chosen": -0.904296875,
273
- "logits/rejected": -0.89111328125,
274
- "logps/chosen": -0.67138671875,
275
- "logps/rejected": -0.69287109375,
276
- "loss": 1.4225,
277
- "nll_loss": 0.68896484375,
278
- "rewards/accuracies": 0.5390625,
279
- "rewards/chosen": -0.13427734375,
280
- "rewards/margins": 0.0042171478271484375,
281
- "rewards/rejected": -0.138671875,
282
- "step": 17
283
- },
284
- {
285
- "epoch": 0.2304,
286
- "grad_norm": 3.471968249039566,
287
- "learning_rate": 9.504844339512094e-07,
288
- "logits/chosen": -0.95751953125,
289
- "logits/rejected": -0.95556640625,
290
- "logps/chosen": -0.65478515625,
291
- "logps/rejected": -0.67822265625,
292
- "loss": 1.4049,
293
- "nll_loss": 0.67041015625,
294
- "rewards/accuracies": 0.5078125,
295
- "rewards/chosen": -0.130859375,
296
- "rewards/margins": 0.004756927490234375,
297
- "rewards/rejected": -0.13555908203125,
298
- "step": 18
299
- },
300
- {
301
- "epoch": 0.2432,
302
- "grad_norm": 3.624883189456873,
303
- "learning_rate": 9.402977659283689e-07,
304
- "logits/chosen": -1.06982421875,
305
- "logits/rejected": -1.03466796875,
306
- "logps/chosen": -0.658203125,
307
- "logps/rejected": -0.6982421875,
308
- "loss": 1.3997,
309
- "nll_loss": 0.6669921875,
310
- "rewards/accuracies": 0.5625,
311
- "rewards/chosen": -0.131591796875,
312
- "rewards/margins": 0.00794219970703125,
313
- "rewards/rejected": -0.13958740234375,
314
- "step": 19
315
- },
316
- {
317
- "epoch": 0.256,
318
- "grad_norm": 3.776625903303235,
319
- "learning_rate": 9.29224396800933e-07,
320
- "logits/chosen": -1.0859375,
321
- "logits/rejected": -1.07421875,
322
- "logps/chosen": -0.70361328125,
323
- "logps/rejected": -0.70263671875,
324
- "loss": 1.4501,
325
- "nll_loss": 0.71630859375,
326
- "rewards/accuracies": 0.4609375,
327
- "rewards/chosen": -0.140869140625,
328
- "rewards/margins": -0.0001373291015625,
329
- "rewards/rejected": -0.140625,
330
- "step": 20
331
- },
332
- {
333
- "epoch": 0.2688,
334
- "grad_norm": 3.9609518257799983,
335
- "learning_rate": 9.172866268606513e-07,
336
- "logits/chosen": -1.0908203125,
337
- "logits/rejected": -1.08740234375,
338
- "logps/chosen": -0.69873046875,
339
- "logps/rejected": -0.72412109375,
340
- "loss": 1.4291,
341
- "nll_loss": 0.7021484375,
342
- "rewards/accuracies": 0.46875,
343
- "rewards/chosen": -0.139892578125,
344
- "rewards/margins": 0.00504302978515625,
345
- "rewards/rejected": -0.14483642578125,
346
- "step": 21
347
- },
348
- {
349
- "epoch": 0.2816,
350
- "grad_norm": 4.302366208928238,
351
- "learning_rate": 9.045084971874737e-07,
352
- "logits/chosen": -1.30078125,
353
- "logits/rejected": -1.294921875,
354
- "logps/chosen": -0.6845703125,
355
- "logps/rejected": -0.66650390625,
356
- "loss": 1.4155,
357
- "nll_loss": 0.677734375,
358
- "rewards/accuracies": 0.5,
359
- "rewards/chosen": -0.1368408203125,
360
- "rewards/margins": -0.0035572052001953125,
361
- "rewards/rejected": -0.13323974609375,
362
- "step": 22
363
- },
364
- {
365
- "epoch": 0.2944,
366
- "grad_norm": 3.58562486403533,
367
- "learning_rate": 8.909157412340149e-07,
368
- "logits/chosen": -1.2666015625,
369
- "logits/rejected": -1.294921875,
370
- "logps/chosen": -0.63330078125,
371
- "logps/rejected": -0.65185546875,
372
- "loss": 1.3572,
373
- "nll_loss": 0.62646484375,
374
- "rewards/accuracies": 0.5390625,
375
- "rewards/chosen": -0.12664794921875,
376
- "rewards/margins": 0.003692626953125,
377
- "rewards/rejected": -0.13043212890625,
378
- "step": 23
379
- },
380
- {
381
- "epoch": 0.3072,
382
- "grad_norm": 4.211955158078266,
383
- "learning_rate": 8.765357330018055e-07,
384
- "logits/chosen": -1.1435546875,
385
- "logits/rejected": -1.126953125,
386
- "logps/chosen": -0.69140625,
387
- "logps/rejected": -0.68115234375,
388
- "loss": 1.4253,
389
- "nll_loss": 0.69287109375,
390
- "rewards/accuracies": 0.4765625,
391
- "rewards/chosen": -0.13812255859375,
392
- "rewards/margins": -0.0019550323486328125,
393
- "rewards/rejected": -0.13629150390625,
394
- "step": 24
395
- },
396
- {
397
- "epoch": 0.32,
398
- "grad_norm": 3.6426142550151868,
399
- "learning_rate": 8.613974319136957e-07,
400
- "logits/chosen": -1.046875,
401
- "logits/rejected": -1.03515625,
402
- "logps/chosen": -0.6533203125,
403
- "logps/rejected": -0.62353515625,
404
- "loss": 1.3998,
405
- "nll_loss": 0.6640625,
406
- "rewards/accuracies": 0.4609375,
407
- "rewards/chosen": -0.130615234375,
408
- "rewards/margins": -0.005828857421875,
409
- "rewards/rejected": -0.12469482421875,
410
- "step": 25
411
- },
412
- {
413
- "epoch": 0.3328,
414
- "grad_norm": 3.170105238410109,
415
- "learning_rate": 8.455313244934324e-07,
416
- "logits/chosen": -0.92333984375,
417
- "logits/rejected": -0.90771484375,
418
- "logps/chosen": -0.6552734375,
419
- "logps/rejected": -0.65478515625,
420
- "loss": 1.403,
421
- "nll_loss": 0.6640625,
422
- "rewards/accuracies": 0.4921875,
423
- "rewards/chosen": -0.13092041015625,
424
- "rewards/margins": -6.67572021484375e-05,
425
- "rewards/rejected": -0.13104248046875,
426
- "step": 26
427
- },
428
- {
429
- "epoch": 0.3456,
430
- "grad_norm": 3.525274842281527,
431
- "learning_rate": 8.289693629698563e-07,
432
- "logits/chosen": -0.84326171875,
433
- "logits/rejected": -0.81298828125,
434
- "logps/chosen": -0.63037109375,
435
- "logps/rejected": -0.65234375,
436
- "loss": 1.3705,
437
- "nll_loss": 0.6376953125,
438
- "rewards/accuracies": 0.5234375,
439
- "rewards/chosen": -0.12615966796875,
440
- "rewards/margins": 0.004344940185546875,
441
- "rewards/rejected": -0.130615234375,
442
- "step": 27
443
- },
444
- {
445
- "epoch": 0.3584,
446
- "grad_norm": 3.183918605214547,
447
- "learning_rate": 8.117449009293668e-07,
448
- "logits/chosen": -0.806640625,
449
- "logits/rejected": -0.8115234375,
450
- "logps/chosen": -0.65087890625,
451
- "logps/rejected": -0.65576171875,
452
- "loss": 1.3967,
453
- "nll_loss": 0.6630859375,
454
- "rewards/accuracies": 0.515625,
455
- "rewards/chosen": -0.13018798828125,
456
- "rewards/margins": 0.0011444091796875,
457
- "rewards/rejected": -0.13128662109375,
458
- "step": 28
459
- },
460
- {
461
- "epoch": 0.3712,
462
- "grad_norm": 3.444618483901198,
463
- "learning_rate": 7.938926261462365e-07,
464
- "logits/chosen": -0.76025390625,
465
- "logits/rejected": -0.74072265625,
466
- "logps/chosen": -0.64453125,
467
- "logps/rejected": -0.67333984375,
468
- "loss": 1.373,
469
- "nll_loss": 0.6455078125,
470
- "rewards/accuracies": 0.5234375,
471
- "rewards/chosen": -0.12890625,
472
- "rewards/margins": 0.005756378173828125,
473
- "rewards/rejected": -0.1346435546875,
474
- "step": 29
475
- },
476
- {
477
- "epoch": 0.384,
478
- "grad_norm": 2.991968211612028,
479
- "learning_rate": 7.754484907260512e-07,
480
- "logits/chosen": -0.81640625,
481
- "logits/rejected": -0.7978515625,
482
- "logps/chosen": -0.62548828125,
483
- "logps/rejected": -0.64306640625,
484
- "loss": 1.3764,
485
- "nll_loss": 0.64501953125,
486
- "rewards/accuracies": 0.5234375,
487
- "rewards/chosen": -0.1251220703125,
488
- "rewards/margins": 0.003475189208984375,
489
- "rewards/rejected": -0.12872314453125,
490
- "step": 30
491
- },
492
- {
493
- "epoch": 0.3968,
494
- "grad_norm": 3.3394459626962907,
495
- "learning_rate": 7.564496387029531e-07,
496
- "logits/chosen": -0.796875,
497
- "logits/rejected": -0.77587890625,
498
- "logps/chosen": -0.60498046875,
499
- "logps/rejected": -0.63427734375,
500
- "loss": 1.344,
501
- "nll_loss": 0.61181640625,
502
- "rewards/accuracies": 0.5390625,
503
- "rewards/chosen": -0.1209716796875,
504
- "rewards/margins": 0.005809783935546875,
505
- "rewards/rejected": -0.12664794921875,
506
- "step": 31
507
- },
508
- {
509
- "epoch": 0.4096,
510
- "grad_norm": 3.1372300962795117,
511
- "learning_rate": 7.369343312364993e-07,
512
- "logits/chosen": -0.79248046875,
513
- "logits/rejected": -0.763671875,
514
- "logps/chosen": -0.609375,
515
- "logps/rejected": -0.66357421875,
516
- "loss": 1.3492,
517
- "nll_loss": 0.62060546875,
518
- "rewards/accuracies": 0.5234375,
519
- "rewards/chosen": -0.121826171875,
520
- "rewards/margins": 0.01091766357421875,
521
- "rewards/rejected": -0.13275146484375,
522
- "step": 32
523
- },
524
- {
525
- "epoch": 0.4224,
526
- "grad_norm": 2.9015142762383306,
527
- "learning_rate": 7.16941869558779e-07,
528
- "logits/chosen": -0.8271484375,
529
- "logits/rejected": -0.81298828125,
530
- "logps/chosen": -0.6416015625,
531
- "logps/rejected": -0.5986328125,
532
- "loss": 1.3815,
533
- "nll_loss": 0.6455078125,
534
- "rewards/accuracies": 0.4296875,
535
- "rewards/chosen": -0.12841796875,
536
- "rewards/margins": -0.008785247802734375,
537
- "rewards/rejected": -0.11944580078125,
538
- "step": 33
539
- },
540
- {
541
- "epoch": 0.4352,
542
- "grad_norm": 2.8996886974175613,
543
- "learning_rate": 6.965125158269618e-07,
544
- "logits/chosen": -0.83349609375,
545
- "logits/rejected": -0.802734375,
546
- "logps/chosen": -0.62060546875,
547
- "logps/rejected": -0.671875,
548
- "loss": 1.3428,
549
- "nll_loss": 0.61962890625,
550
- "rewards/accuracies": 0.53125,
551
- "rewards/chosen": -0.1240234375,
552
- "rewards/margins": 0.01045989990234375,
553
- "rewards/rejected": -0.1343994140625,
554
- "step": 34
555
- },
556
- {
557
- "epoch": 0.448,
558
- "grad_norm": 3.2300028712965543,
559
- "learning_rate": 6.756874120406714e-07,
560
- "logits/chosen": -0.87744140625,
561
- "logits/rejected": -0.84326171875,
562
- "logps/chosen": -0.67919921875,
563
- "logps/rejected": -0.64697265625,
564
- "loss": 1.4155,
565
- "nll_loss": 0.68017578125,
566
- "rewards/accuracies": 0.4296875,
567
- "rewards/chosen": -0.1356201171875,
568
- "rewards/margins": -0.0063838958740234375,
569
- "rewards/rejected": -0.1295166015625,
570
- "step": 35
571
- },
572
- {
573
- "epoch": 0.4608,
574
- "grad_norm": 3.2182679046770506,
575
- "learning_rate": 6.545084971874736e-07,
576
- "logits/chosen": -0.7998046875,
577
- "logits/rejected": -0.7900390625,
578
- "logps/chosen": -0.69189453125,
579
- "logps/rejected": -0.66015625,
580
- "loss": 1.4003,
581
- "nll_loss": 0.66796875,
582
- "rewards/accuracies": 0.46875,
583
- "rewards/chosen": -0.13861083984375,
584
- "rewards/margins": -0.0065364837646484375,
585
- "rewards/rejected": -0.13177490234375,
586
- "step": 36
587
- },
588
- {
589
- "epoch": 0.4736,
590
- "grad_norm": 3.357656349751252,
591
- "learning_rate": 6.330184227833375e-07,
592
- "logits/chosen": -0.75,
593
- "logits/rejected": -0.77734375,
594
- "logps/chosen": -0.6796875,
595
- "logps/rejected": -0.6865234375,
596
- "loss": 1.3884,
597
- "nll_loss": 0.6611328125,
598
- "rewards/accuracies": 0.484375,
599
- "rewards/chosen": -0.13580322265625,
600
- "rewards/margins": 0.00138092041015625,
601
- "rewards/rejected": -0.13726806640625,
602
- "step": 37
603
- },
604
- {
605
- "epoch": 0.4864,
606
- "grad_norm": 2.958420783527872,
607
- "learning_rate": 6.112604669781572e-07,
608
- "logits/chosen": -0.80078125,
609
- "logits/rejected": -0.7607421875,
610
- "logps/chosen": -0.62548828125,
611
- "logps/rejected": -0.62060546875,
612
- "loss": 1.3707,
613
- "nll_loss": 0.63818359375,
614
- "rewards/accuracies": 0.5390625,
615
- "rewards/chosen": -0.1251220703125,
616
- "rewards/margins": -0.0009746551513671875,
617
- "rewards/rejected": -0.12420654296875,
618
- "step": 38
619
- },
620
- {
621
- "epoch": 0.4992,
622
- "grad_norm": 2.9518014676807196,
623
- "learning_rate": 5.892784473993183e-07,
624
- "logits/chosen": -0.76416015625,
625
- "logits/rejected": -0.75390625,
626
- "logps/chosen": -0.62060546875,
627
- "logps/rejected": -0.64599609375,
628
- "loss": 1.3696,
629
- "nll_loss": 0.64501953125,
630
- "rewards/accuracies": 0.4921875,
631
- "rewards/chosen": -0.1241455078125,
632
- "rewards/margins": 0.004924774169921875,
633
- "rewards/rejected": -0.129150390625,
634
- "step": 39
635
- },
636
- {
637
- "epoch": 0.512,
638
- "grad_norm": 3.378944917137857,
639
- "learning_rate": 5.671166329088277e-07,
640
- "logits/chosen": -0.7734375,
641
- "logits/rejected": -0.765625,
642
- "logps/chosen": -0.6513671875,
643
- "logps/rejected": -0.63671875,
644
- "loss": 1.3698,
645
- "nll_loss": 0.63525390625,
646
- "rewards/accuracies": 0.4921875,
647
- "rewards/chosen": -0.13043212890625,
648
- "rewards/margins": -0.0031585693359375,
649
- "rewards/rejected": -0.12725830078125,
650
- "step": 40
651
- },
652
- {
653
- "epoch": 0.5248,
654
- "grad_norm": 2.9085205396792935,
655
- "learning_rate": 5.448196544517167e-07,
656
- "logits/chosen": -0.732421875,
657
- "logits/rejected": -0.75390625,
658
- "logps/chosen": -0.60546875,
659
- "logps/rejected": -0.61376953125,
660
- "loss": 1.3409,
661
- "nll_loss": 0.6103515625,
662
- "rewards/accuracies": 0.5390625,
663
- "rewards/chosen": -0.12115478515625,
664
- "rewards/margins": 0.0018444061279296875,
665
- "rewards/rejected": -0.12286376953125,
666
- "step": 41
667
- },
668
- {
669
- "epoch": 0.5376,
670
- "grad_norm": 3.181717208399589,
671
- "learning_rate": 5.224324151752575e-07,
672
- "logits/chosen": -0.6962890625,
673
- "logits/rejected": -0.6875,
674
- "logps/chosen": -0.703125,
675
- "logps/rejected": -0.64794921875,
676
- "loss": 1.4314,
677
- "nll_loss": 0.6953125,
678
- "rewards/accuracies": 0.421875,
679
- "rewards/chosen": -0.14068603515625,
680
- "rewards/margins": -0.0110321044921875,
681
- "rewards/rejected": -0.1295166015625,
682
- "step": 42
683
- },
684
- {
685
- "epoch": 0.5504,
686
- "grad_norm": 2.9543674015200043,
687
- "learning_rate": 5e-07,
688
- "logits/chosen": -0.6279296875,
689
- "logits/rejected": -0.63818359375,
690
- "logps/chosen": -0.62158203125,
691
- "logps/rejected": -0.68359375,
692
- "loss": 1.3519,
693
- "nll_loss": 0.62548828125,
694
- "rewards/accuracies": 0.546875,
695
- "rewards/chosen": -0.1243896484375,
696
- "rewards/margins": 0.01232147216796875,
697
- "rewards/rejected": -0.1365966796875,
698
- "step": 43
699
- },
700
- {
701
- "epoch": 0.5632,
702
- "grad_norm": 2.886051167891686,
703
- "learning_rate": 4.775675848247427e-07,
704
- "logits/chosen": -0.66796875,
705
- "logits/rejected": -0.6806640625,
706
- "logps/chosen": -0.62890625,
707
- "logps/rejected": -0.64501953125,
708
- "loss": 1.3691,
709
- "nll_loss": 0.64013671875,
710
- "rewards/accuracies": 0.4296875,
711
- "rewards/chosen": -0.12591552734375,
712
- "rewards/margins": 0.003047943115234375,
713
- "rewards/rejected": -0.1290283203125,
714
- "step": 44
715
- },
716
- {
717
- "epoch": 0.576,
718
- "grad_norm": 2.9113924204417487,
719
- "learning_rate": 4.5518034554828327e-07,
720
- "logits/chosen": -0.673828125,
721
- "logits/rejected": -0.65234375,
722
- "logps/chosen": -0.619140625,
723
- "logps/rejected": -0.629150390625,
724
- "loss": 1.3697,
725
- "nll_loss": 0.63916015625,
726
- "rewards/accuracies": 0.4453125,
727
- "rewards/chosen": -0.12384033203125,
728
- "rewards/margins": 0.001987457275390625,
729
- "rewards/rejected": -0.1258544921875,
730
- "step": 45
731
- },
732
- {
733
- "epoch": 0.5888,
734
- "grad_norm": 2.8855468683899925,
735
- "learning_rate": 4.328833670911724e-07,
736
- "logits/chosen": -0.580078125,
737
- "logits/rejected": -0.5810546875,
738
- "logps/chosen": -0.638671875,
739
- "logps/rejected": -0.6015625,
740
- "loss": 1.3763,
741
- "nll_loss": 0.6435546875,
742
- "rewards/accuracies": 0.4453125,
743
- "rewards/chosen": -0.12762451171875,
744
- "rewards/margins": -0.00727081298828125,
745
- "rewards/rejected": -0.120361328125,
746
- "step": 46
747
- },
748
- {
749
- "epoch": 0.6016,
750
- "grad_norm": 3.0797159953277253,
751
- "learning_rate": 4.107215526006817e-07,
752
- "logits/chosen": -0.59326171875,
753
- "logits/rejected": -0.58642578125,
754
- "logps/chosen": -0.65673828125,
755
- "logps/rejected": -0.6201171875,
756
- "loss": 1.3972,
757
- "nll_loss": 0.66162109375,
758
- "rewards/accuracies": 0.484375,
759
- "rewards/chosen": -0.13153076171875,
760
- "rewards/margins": -0.0073795318603515625,
761
- "rewards/rejected": -0.12408447265625,
762
- "step": 47
763
- },
764
- {
765
- "epoch": 0.6144,
766
- "grad_norm": 2.8066569349734865,
767
- "learning_rate": 3.8873953302184283e-07,
768
- "logits/chosen": -0.609619140625,
769
- "logits/rejected": -0.586669921875,
770
- "logps/chosen": -0.65576171875,
771
- "logps/rejected": -0.63525390625,
772
- "loss": 1.3826,
773
- "nll_loss": 0.650390625,
774
- "rewards/accuracies": 0.40625,
775
- "rewards/chosen": -0.1312255859375,
776
- "rewards/margins": -0.0042667388916015625,
777
- "rewards/rejected": -0.1270751953125,
778
- "step": 48
779
- },
780
- {
781
- "epoch": 0.6272,
782
- "grad_norm": 2.8520211464952627,
783
- "learning_rate": 3.669815772166625e-07,
784
- "logits/chosen": -0.55810546875,
785
- "logits/rejected": -0.52392578125,
786
- "logps/chosen": -0.59716796875,
787
- "logps/rejected": -0.61669921875,
788
- "loss": 1.3468,
789
- "nll_loss": 0.6171875,
790
- "rewards/accuracies": 0.453125,
791
- "rewards/chosen": -0.1195068359375,
792
- "rewards/margins": 0.00395965576171875,
793
- "rewards/rejected": -0.12322998046875,
794
- "step": 49
795
- },
796
- {
797
- "epoch": 0.64,
798
- "grad_norm": 2.7486285344409525,
799
- "learning_rate": 3.454915028125263e-07,
800
- "logits/chosen": -0.614013671875,
801
- "logits/rejected": -0.58740234375,
802
- "logps/chosen": -0.68701171875,
803
- "logps/rejected": -0.6611328125,
804
- "loss": 1.4158,
805
- "nll_loss": 0.6806640625,
806
- "rewards/accuracies": 0.4140625,
807
- "rewards/chosen": -0.1375732421875,
808
- "rewards/margins": -0.0052089691162109375,
809
- "rewards/rejected": -0.1324462890625,
810
- "step": 50
811
- },
812
- {
813
- "epoch": 0.6528,
814
- "grad_norm": 2.703029476528635,
815
- "learning_rate": 3.243125879593286e-07,
816
- "logits/chosen": -0.6357421875,
817
- "logits/rejected": -0.63671875,
818
- "logps/chosen": -0.65234375,
819
- "logps/rejected": -0.625,
820
- "loss": 1.3873,
821
- "nll_loss": 0.65283203125,
822
- "rewards/accuracies": 0.421875,
823
- "rewards/chosen": -0.13043212890625,
824
- "rewards/margins": -0.0056476593017578125,
825
- "rewards/rejected": -0.12469482421875,
826
- "step": 51
827
- },
828
- {
829
- "epoch": 0.6656,
830
- "grad_norm": 2.913686937773188,
831
- "learning_rate": 3.034874841730382e-07,
832
- "logits/chosen": -0.62744140625,
833
- "logits/rejected": -0.63427734375,
834
- "logps/chosen": -0.6162109375,
835
- "logps/rejected": -0.592041015625,
836
- "loss": 1.3665,
837
- "nll_loss": 0.63330078125,
838
- "rewards/accuracies": 0.4765625,
839
- "rewards/chosen": -0.12322998046875,
840
- "rewards/margins": -0.004718780517578125,
841
- "rewards/rejected": -0.11846923828125,
842
- "step": 52
843
- },
844
- {
845
- "epoch": 0.6784,
846
- "grad_norm": 2.8293012838870455,
847
- "learning_rate": 2.8305813044122093e-07,
848
- "logits/chosen": -0.603271484375,
849
- "logits/rejected": -0.560546875,
850
- "logps/chosen": -0.63525390625,
851
- "logps/rejected": -0.6455078125,
852
- "loss": 1.3786,
853
- "nll_loss": 0.6474609375,
854
- "rewards/accuracies": 0.4765625,
855
- "rewards/chosen": -0.1270751953125,
856
- "rewards/margins": 0.002071380615234375,
857
- "rewards/rejected": -0.12908935546875,
858
- "step": 53
859
- },
860
- {
861
- "epoch": 0.6912,
862
- "grad_norm": 2.9232332876985856,
863
- "learning_rate": 2.6306566876350067e-07,
864
- "logits/chosen": -0.585205078125,
865
- "logits/rejected": -0.56689453125,
866
- "logps/chosen": -0.59716796875,
867
- "logps/rejected": -0.619140625,
868
- "loss": 1.3566,
869
- "nll_loss": 0.6279296875,
870
- "rewards/accuracies": 0.5703125,
871
- "rewards/chosen": -0.11944580078125,
872
- "rewards/margins": 0.0044689178466796875,
873
- "rewards/rejected": -0.12384033203125,
874
- "step": 54
875
- },
876
- {
877
- "epoch": 0.704,
878
- "grad_norm": 2.896454906763871,
879
- "learning_rate": 2.4355036129704696e-07,
880
- "logits/chosen": -0.61572265625,
881
- "logits/rejected": -0.5947265625,
882
- "logps/chosen": -0.69970703125,
883
- "logps/rejected": -0.66015625,
884
- "loss": 1.4196,
885
- "nll_loss": 0.68603515625,
886
- "rewards/accuracies": 0.4453125,
887
- "rewards/chosen": -0.1400146484375,
888
- "rewards/margins": -0.00769805908203125,
889
- "rewards/rejected": -0.13226318359375,
890
- "step": 55
891
- },
892
- {
893
- "epoch": 0.7168,
894
- "grad_norm": 3.1419979908995637,
895
- "learning_rate": 2.2455150927394878e-07,
896
- "logits/chosen": -0.609375,
897
- "logits/rejected": -0.6103515625,
898
- "logps/chosen": -0.669921875,
899
- "logps/rejected": -0.66162109375,
900
- "loss": 1.4077,
901
- "nll_loss": 0.6787109375,
902
- "rewards/accuracies": 0.4375,
903
- "rewards/chosen": -0.1339111328125,
904
- "rewards/margins": -0.001544952392578125,
905
- "rewards/rejected": -0.1322021484375,
906
- "step": 56
907
- },
908
- {
909
- "epoch": 0.7296,
910
- "grad_norm": 2.6977667650287436,
911
- "learning_rate": 2.0610737385376348e-07,
912
- "logits/chosen": -0.626953125,
913
- "logits/rejected": -0.64013671875,
914
- "logps/chosen": -0.6337890625,
915
- "logps/rejected": -0.65771484375,
916
- "loss": 1.3617,
917
- "nll_loss": 0.62744140625,
918
- "rewards/accuracies": 0.4921875,
919
- "rewards/chosen": -0.1268310546875,
920
- "rewards/margins": 0.0049648284912109375,
921
- "rewards/rejected": -0.13165283203125,
922
- "step": 57
923
- },
924
- {
925
- "epoch": 0.7424,
926
- "grad_norm": 2.6962689015906887,
927
- "learning_rate": 1.8825509907063326e-07,
928
- "logits/chosen": -0.60107421875,
929
- "logits/rejected": -0.595703125,
930
- "logps/chosen": -0.67333984375,
931
- "logps/rejected": -0.65576171875,
932
- "loss": 1.4066,
933
- "nll_loss": 0.67236328125,
934
- "rewards/accuracies": 0.46875,
935
- "rewards/chosen": -0.13458251953125,
936
- "rewards/margins": -0.0033740997314453125,
937
- "rewards/rejected": -0.13134765625,
938
- "step": 58
939
- },
940
- {
941
- "epoch": 0.7552,
942
- "grad_norm": 2.831274649742223,
943
- "learning_rate": 1.710306370301437e-07,
944
- "logits/chosen": -0.606689453125,
945
- "logits/rejected": -0.6005859375,
946
- "logps/chosen": -0.6484375,
947
- "logps/rejected": -0.63916015625,
948
- "loss": 1.387,
949
- "nll_loss": 0.65234375,
950
- "rewards/accuracies": 0.4375,
951
- "rewards/chosen": -0.129638671875,
952
- "rewards/margins": -0.0019359588623046875,
953
- "rewards/rejected": -0.12774658203125,
954
- "step": 59
955
- },
956
- {
957
- "epoch": 0.768,
958
- "grad_norm": 2.8001049294565368,
959
- "learning_rate": 1.5446867550656767e-07,
960
- "logits/chosen": -0.61376953125,
961
- "logits/rejected": -0.606689453125,
962
- "logps/chosen": -0.57080078125,
963
- "logps/rejected": -0.60791015625,
964
- "loss": 1.3087,
965
- "nll_loss": 0.5830078125,
966
- "rewards/accuracies": 0.5234375,
967
- "rewards/chosen": -0.114013671875,
968
- "rewards/margins": 0.007526397705078125,
969
- "rewards/rejected": -0.12158203125,
970
- "step": 60
971
- },
972
- {
973
- "epoch": 0.7808,
974
- "grad_norm": 2.6480528965763517,
975
- "learning_rate": 1.3860256808630427e-07,
976
- "logits/chosen": -0.648193359375,
977
- "logits/rejected": -0.64013671875,
978
- "logps/chosen": -0.593994140625,
979
- "logps/rejected": -0.638671875,
980
- "loss": 1.3475,
981
- "nll_loss": 0.62060546875,
982
- "rewards/accuracies": 0.53125,
983
- "rewards/chosen": -0.1187744140625,
984
- "rewards/margins": 0.009082794189453125,
985
- "rewards/rejected": -0.1278076171875,
986
- "step": 61
987
- },
988
- {
989
- "epoch": 0.7936,
990
- "grad_norm": 2.8416205177773515,
991
- "learning_rate": 1.2346426699819456e-07,
992
- "logits/chosen": -0.5224609375,
993
- "logits/rejected": -0.52880859375,
994
- "logps/chosen": -0.60302734375,
995
- "logps/rejected": -0.620361328125,
996
- "loss": 1.3488,
997
- "nll_loss": 0.62255859375,
998
- "rewards/accuracies": 0.5078125,
999
- "rewards/chosen": -0.12066650390625,
1000
- "rewards/margins": 0.00335693359375,
1001
- "rewards/rejected": -0.1240234375,
1002
- "step": 62
1003
- },
1004
- {
1005
- "epoch": 0.8064,
1006
- "grad_norm": 2.991877049209725,
1007
- "learning_rate": 1.090842587659851e-07,
1008
- "logits/chosen": -0.56982421875,
1009
- "logits/rejected": -0.55224609375,
1010
- "logps/chosen": -0.65283203125,
1011
- "logps/rejected": -0.61962890625,
1012
- "loss": 1.3911,
1013
- "nll_loss": 0.65869140625,
1014
- "rewards/accuracies": 0.453125,
1015
- "rewards/chosen": -0.13055419921875,
1016
- "rewards/margins": -0.0066547393798828125,
1017
- "rewards/rejected": -0.1239013671875,
1018
- "step": 63
1019
- },
1020
- {
1021
- "epoch": 0.8192,
1022
- "grad_norm": 2.9240870950358966,
1023
- "learning_rate": 9.549150281252632e-08,
1024
- "logits/chosen": -0.64208984375,
1025
- "logits/rejected": -0.62939453125,
1026
- "logps/chosen": -0.6103515625,
1027
- "logps/rejected": -0.68408203125,
1028
- "loss": 1.3376,
1029
- "nll_loss": 0.61083984375,
1030
- "rewards/accuracies": 0.53125,
1031
- "rewards/chosen": -0.1221923828125,
1032
- "rewards/margins": 0.014739990234375,
1033
- "rewards/rejected": -0.13702392578125,
1034
- "step": 64
1035
- },
1036
- {
1037
- "epoch": 0.832,
1038
- "grad_norm": 2.8243780447328737,
1039
- "learning_rate": 8.271337313934867e-08,
1040
- "logits/chosen": -0.61376953125,
1041
- "logits/rejected": -0.61181640625,
1042
- "logps/chosen": -0.62109375,
1043
- "logps/rejected": -0.63037109375,
1044
- "loss": 1.366,
1045
- "nll_loss": 0.6328125,
1046
- "rewards/accuracies": 0.53125,
1047
- "rewards/chosen": -0.12420654296875,
1048
- "rewards/margins": 0.001834869384765625,
1049
- "rewards/rejected": -0.1260986328125,
1050
- "step": 65
1051
- },
1052
- {
1053
- "epoch": 0.8448,
1054
- "grad_norm": 2.7696317108399424,
1055
- "learning_rate": 7.077560319906694e-08,
1056
- "logits/chosen": -0.611328125,
1057
- "logits/rejected": -0.583984375,
1058
- "logps/chosen": -0.62158203125,
1059
- "logps/rejected": -0.6455078125,
1060
- "loss": 1.3609,
1061
- "nll_loss": 0.63330078125,
1062
- "rewards/accuracies": 0.5078125,
1063
- "rewards/chosen": -0.124267578125,
1064
- "rewards/margins": 0.00481414794921875,
1065
- "rewards/rejected": -0.129150390625,
1066
- "step": 66
1067
- },
1068
- {
1069
- "epoch": 0.8576,
1070
- "grad_norm": 2.830373976297677,
1071
- "learning_rate": 5.9702234071631e-08,
1072
- "logits/chosen": -0.65771484375,
1073
- "logits/rejected": -0.63037109375,
1074
- "logps/chosen": -0.619140625,
1075
- "logps/rejected": -0.60791015625,
1076
- "loss": 1.3408,
1077
- "nll_loss": 0.611328125,
1078
- "rewards/accuracies": 0.484375,
1079
- "rewards/chosen": -0.12371826171875,
1080
- "rewards/margins": -0.0019969940185546875,
1081
- "rewards/rejected": -0.1217041015625,
1082
- "step": 67
1083
- },
1084
- {
1085
- "epoch": 0.8704,
1086
- "grad_norm": 3.1263626179626285,
1087
- "learning_rate": 4.951556604879048e-08,
1088
- "logits/chosen": -0.590087890625,
1089
- "logits/rejected": -0.60205078125,
1090
- "logps/chosen": -0.6533203125,
1091
- "logps/rejected": -0.68310546875,
1092
- "loss": 1.3989,
1093
- "nll_loss": 0.6640625,
1094
- "rewards/accuracies": 0.53125,
1095
- "rewards/chosen": -0.13067626953125,
1096
- "rewards/margins": 0.005911827087402344,
1097
- "rewards/rejected": -0.13671875,
1098
- "step": 68
1099
- },
1100
- {
1101
- "epoch": 0.8832,
1102
- "grad_norm": 2.6445234966305287,
1103
- "learning_rate": 4.023611372427471e-08,
1104
- "logits/chosen": -0.63037109375,
1105
- "logits/rejected": -0.60400390625,
1106
- "logps/chosen": -0.6259765625,
1107
- "logps/rejected": -0.63330078125,
1108
- "loss": 1.3585,
1109
- "nll_loss": 0.626953125,
1110
- "rewards/accuracies": 0.4453125,
1111
- "rewards/chosen": -0.12506103515625,
1112
- "rewards/margins": 0.0015201568603515625,
1113
- "rewards/rejected": -0.1265869140625,
1114
- "step": 69
1115
- },
1116
- {
1117
- "epoch": 0.896,
1118
- "grad_norm": 2.7600272041859806,
1119
- "learning_rate": 3.188256468013139e-08,
1120
- "logits/chosen": -0.606689453125,
1121
- "logits/rejected": -0.603515625,
1122
- "logps/chosen": -0.62939453125,
1123
- "logps/rejected": -0.64794921875,
1124
- "loss": 1.3466,
1125
- "nll_loss": 0.61669921875,
1126
- "rewards/accuracies": 0.5234375,
1127
- "rewards/chosen": -0.1260986328125,
1128
- "rewards/margins": 0.00376129150390625,
1129
- "rewards/rejected": -0.129638671875,
1130
- "step": 70
1131
- },
1132
- {
1133
- "epoch": 0.9088,
1134
- "grad_norm": 2.8920578163456443,
1135
- "learning_rate": 2.4471741852423233e-08,
1136
- "logits/chosen": -0.602294921875,
1137
- "logits/rejected": -0.62451171875,
1138
- "logps/chosen": -0.6181640625,
1139
- "logps/rejected": -0.638671875,
1140
- "loss": 1.3366,
1141
- "nll_loss": 0.60791015625,
1142
- "rewards/accuracies": 0.4921875,
1143
- "rewards/chosen": -0.12371826171875,
1144
- "rewards/margins": 0.0041656494140625,
1145
- "rewards/rejected": -0.12786865234375,
1146
- "step": 71
1147
- },
1148
- {
1149
- "epoch": 0.9216,
1150
- "grad_norm": 2.824155688076418,
1151
- "learning_rate": 1.8018569652073378e-08,
1152
- "logits/chosen": -0.599609375,
1153
- "logits/rejected": -0.596923828125,
1154
- "logps/chosen": -0.65673828125,
1155
- "logps/rejected": -0.62646484375,
1156
- "loss": 1.3907,
1157
- "nll_loss": 0.65576171875,
1158
- "rewards/accuracies": 0.421875,
1159
- "rewards/chosen": -0.13116455078125,
1160
- "rewards/margins": -0.005977630615234375,
1161
- "rewards/rejected": -0.1251220703125,
1162
- "step": 72
1163
- },
1164
- {
1165
- "epoch": 0.9344,
1166
- "grad_norm": 2.853212647277363,
1167
- "learning_rate": 1.253604390908819e-08,
1168
- "logits/chosen": -0.614990234375,
1169
- "logits/rejected": -0.59326171875,
1170
- "logps/chosen": -0.6591796875,
1171
- "logps/rejected": -0.64306640625,
1172
- "loss": 1.4049,
1173
- "nll_loss": 0.671875,
1174
- "rewards/accuracies": 0.4453125,
1175
- "rewards/chosen": -0.13177490234375,
1176
- "rewards/margins": -0.00328826904296875,
1177
- "rewards/rejected": -0.128662109375,
1178
- "step": 73
1179
- },
1180
- {
1181
- "epoch": 0.9472,
1182
- "grad_norm": 2.7951321419475836,
1183
- "learning_rate": 8.035205700685165e-09,
1184
- "logits/chosen": -0.595703125,
1185
- "logits/rejected": -0.59765625,
1186
- "logps/chosen": -0.669921875,
1187
- "logps/rejected": -0.63427734375,
1188
- "loss": 1.3859,
1189
- "nll_loss": 0.65185546875,
1190
- "rewards/accuracies": 0.4765625,
1191
- "rewards/chosen": -0.13385009765625,
1192
- "rewards/margins": -0.0070209503173828125,
1193
- "rewards/rejected": -0.126953125,
1194
- "step": 74
1195
- },
1196
- {
1197
- "epoch": 0.96,
1198
- "grad_norm": 3.0464830977927186,
1199
- "learning_rate": 4.5251191160326495e-09,
1200
- "logits/chosen": -0.63232421875,
1201
- "logits/rejected": -0.66015625,
1202
- "logps/chosen": -0.64501953125,
1203
- "logps/rejected": -0.64453125,
1204
- "loss": 1.3751,
1205
- "nll_loss": 0.6435546875,
1206
- "rewards/accuracies": 0.5,
1207
- "rewards/chosen": -0.1290283203125,
1208
- "rewards/margins": -9.1552734375e-05,
1209
- "rewards/rejected": -0.12890625,
1210
- "step": 75
1211
- },
1212
- {
1213
- "epoch": 0.9728,
1214
- "grad_norm": 2.6891038234268576,
1215
- "learning_rate": 2.0128530023804656e-09,
1216
- "logits/chosen": -0.65966796875,
1217
- "logits/rejected": -0.6416015625,
1218
- "logps/chosen": -0.61376953125,
1219
- "logps/rejected": -0.62841796875,
1220
- "loss": 1.3403,
1221
- "nll_loss": 0.60986328125,
1222
- "rewards/accuracies": 0.5,
1223
- "rewards/chosen": -0.1226806640625,
1224
- "rewards/margins": 0.00304412841796875,
1225
- "rewards/rejected": -0.12579345703125,
1226
- "step": 76
1227
- },
1228
- {
1229
- "epoch": 0.9856,
1230
- "grad_norm": 2.8901246333719484,
1231
- "learning_rate": 5.034667293427053e-10,
1232
- "logits/chosen": -0.557373046875,
1233
- "logits/rejected": -0.55078125,
1234
- "logps/chosen": -0.62939453125,
1235
- "logps/rejected": -0.62890625,
1236
- "loss": 1.3691,
1237
- "nll_loss": 0.6416015625,
1238
- "rewards/accuracies": 0.4609375,
1239
- "rewards/chosen": -0.1258544921875,
1240
- "rewards/margins": -0.000141143798828125,
1241
- "rewards/rejected": -0.1256103515625,
1242
- "step": 77
1243
- },
1244
- {
1245
- "epoch": 0.9984,
1246
- "grad_norm": 2.938601997312395,
1247
- "learning_rate": 0.0,
1248
- "logits/chosen": -0.515625,
1249
- "logits/rejected": -0.54541015625,
1250
- "logps/chosen": -0.6240234375,
1251
- "logps/rejected": -0.6455078125,
1252
- "loss": 1.3417,
1253
- "nll_loss": 0.61376953125,
1254
- "rewards/accuracies": 0.5078125,
1255
- "rewards/chosen": -0.124755859375,
1256
- "rewards/margins": 0.004207611083984375,
1257
- "rewards/rejected": -0.1290283203125,
1258
- "step": 78
1259
- }
1260
- ],
1261
- "logging_steps": 1.0,
1262
- "max_steps": 78,
1263
- "num_input_tokens_seen": 0,
1264
- "num_train_epochs": 1,
1265
- "save_steps": 500,
1266
- "stateful_callbacks": {
1267
- "TrainerControl": {
1268
- "args": {
1269
- "should_epoch_stop": false,
1270
- "should_evaluate": false,
1271
- "should_log": false,
1272
- "should_save": true,
1273
- "should_training_stop": true
1274
- },
1275
- "attributes": {}
1276
- }
1277
- },
1278
- "total_flos": 0.0,
1279
- "train_batch_size": 2,
1280
- "trial_name": null,
1281
- "trial_params": null
1282
- }