Muhammed164 commited on
Commit
6919f0a
·
verified ·
1 Parent(s): 97db53a

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6578cecfe50e7d3a20d040c9b1cdda32c58f9cfcb54fdc5334ee95fe69c2eddf
3
  size 204500912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96db5154f98f00e2835e99ed5a8fbdf293d8f63243a6f707c81db39c6c06f0a2
3
  size 204500912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d1cf02cb795b4d866231697a267728073ceac5b926c4192928980f2ac671e58
3
- size 104062923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdbeaba6f405ddd835994d4498a70666b22a02f42c00b97d4334a7f1e1e3bb2
3
+ size 104062731
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b147ab74f32279b151eadea15baf5f3d04f7bd6e1b4f97bb054a7a56cf5a70d9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c7e585bf25f929251599f24890acceb440d06b049a496384748f1eed552840
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,618 +2,168 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0611498836822866,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.026586905948820207,
14
- "grad_norm": 229.74172973632812,
15
- "learning_rate": 1.8000000000000001e-06,
16
- "logits/chosen": 1.892960548400879,
17
- "logits/rejected": 2.2739109992980957,
18
- "logps/chosen": -180.7786102294922,
19
- "logps/rejected": -296.7843017578125,
20
- "loss": 41.754217529296874,
21
  "rewards/accuracies": 0.643750011920929,
22
- "rewards/chosen": 127.22708892822266,
23
- "rewards/margins": 51.65830612182617,
24
- "rewards/rejected": 75.56879425048828,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.053173811897640415,
29
- "grad_norm": 127.54531860351562,
30
- "learning_rate": 3.8000000000000005e-06,
31
- "logits/chosen": 2.360779285430908,
32
- "logits/rejected": 2.5129952430725098,
33
- "logps/chosen": -198.55577087402344,
34
- "logps/rejected": -311.8116149902344,
35
- "loss": 34.334066772460936,
36
- "rewards/accuracies": 0.637499988079071,
37
- "rewards/chosen": 133.30413818359375,
38
- "rewards/margins": 69.63650512695312,
39
- "rewards/rejected": 63.667640686035156,
40
  "step": 20
41
  },
42
  {
43
  "epoch": 0.07976071784646062,
44
- "grad_norm": 195.1651153564453,
45
- "learning_rate": 5.8e-06,
46
- "logits/chosen": 2.7846579551696777,
47
- "logits/rejected": 2.912212610244751,
48
- "logps/chosen": -201.8841094970703,
49
- "logps/rejected": -343.28619384765625,
50
- "loss": 31.000405883789064,
51
- "rewards/accuracies": 0.706250011920929,
52
- "rewards/chosen": 149.70481872558594,
53
- "rewards/margins": 90.86196899414062,
54
- "rewards/rejected": 58.842857360839844,
55
  "step": 30
56
  },
57
  {
58
  "epoch": 0.10634762379528083,
59
- "grad_norm": 217.8875274658203,
60
- "learning_rate": 7.800000000000002e-06,
61
- "logits/chosen": 2.687530040740967,
62
- "logits/rejected": 2.89595365524292,
63
- "logps/chosen": -180.60574340820312,
64
- "logps/rejected": -395.2937927246094,
65
- "loss": 33.74924926757812,
66
- "rewards/accuracies": 0.675000011920929,
67
- "rewards/chosen": 124.906494140625,
68
- "rewards/margins": 96.38619995117188,
69
- "rewards/rejected": 28.52029037475586,
70
  "step": 40
71
  },
72
  {
73
  "epoch": 0.13293452974410103,
74
- "grad_norm": 123.57787322998047,
75
- "learning_rate": 9.800000000000001e-06,
76
- "logits/chosen": 3.6891541481018066,
77
- "logits/rejected": 3.7287964820861816,
78
- "logps/chosen": -233.40304565429688,
79
- "logps/rejected": -501.949951171875,
80
- "loss": 33.165060424804686,
81
- "rewards/accuracies": 0.706250011920929,
82
- "rewards/chosen": 132.43130493164062,
83
- "rewards/margins": 153.64065551757812,
84
- "rewards/rejected": -21.209354400634766,
85
  "step": 50
86
  },
87
  {
88
  "epoch": 0.15952143569292124,
89
- "grad_norm": 240.26422119140625,
90
- "learning_rate": 9.742857142857143e-06,
91
- "logits/chosen": 4.325669288635254,
92
- "logits/rejected": 4.669508934020996,
93
- "logps/chosen": -227.3274688720703,
94
- "logps/rejected": -761.2488403320312,
95
- "loss": 9.919349670410156,
96
- "rewards/accuracies": 0.8374999761581421,
97
- "rewards/chosen": 78.26126861572266,
98
- "rewards/margins": 259.2169494628906,
99
- "rewards/rejected": -180.95565795898438,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.18610834164174145,
104
- "grad_norm": 18.67397117614746,
105
- "learning_rate": 9.457142857142858e-06,
106
- "logits/chosen": 6.029001235961914,
107
- "logits/rejected": 6.398881912231445,
108
- "logps/chosen": -361.8104248046875,
109
- "logps/rejected": -936.2852783203125,
110
- "loss": 8.505984497070312,
111
- "rewards/accuracies": 0.8999999761581421,
112
- "rewards/chosen": 62.65727615356445,
113
- "rewards/margins": 295.85296630859375,
114
- "rewards/rejected": -233.1957550048828,
115
  "step": 70
116
  },
117
  {
118
  "epoch": 0.21269524759056166,
119
- "grad_norm": 424.631591796875,
120
- "learning_rate": 9.171428571428572e-06,
121
- "logits/chosen": 7.3346147537231445,
122
- "logits/rejected": 7.6371870040893555,
123
- "logps/chosen": -487.5819396972656,
124
- "logps/rejected": -1231.750244140625,
125
- "loss": 5.0393016815185545,
126
- "rewards/accuracies": 0.925000011920929,
127
- "rewards/chosen": 3.2465362548828125,
128
- "rewards/margins": 386.61541748046875,
129
- "rewards/rejected": -383.368896484375,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 0.23928215353938184,
134
- "grad_norm": 58.2872200012207,
135
- "learning_rate": 8.885714285714286e-06,
136
- "logits/chosen": 7.502760410308838,
137
- "logits/rejected": 7.7781524658203125,
138
- "logps/chosen": -515.661376953125,
139
- "logps/rejected": -1255.3602294921875,
140
- "loss": 2.7182350158691406,
141
- "rewards/accuracies": 0.956250011920929,
142
- "rewards/chosen": -3.7650718688964844,
143
- "rewards/margins": 391.1650390625,
144
- "rewards/rejected": -394.9300842285156,
145
  "step": 90
146
  },
147
  {
148
  "epoch": 0.26586905948820205,
149
- "grad_norm": 0.0009401601273566484,
150
- "learning_rate": 8.6e-06,
151
- "logits/chosen": 6.9696502685546875,
152
- "logits/rejected": 7.490464687347412,
153
- "logps/chosen": -407.62591552734375,
154
- "logps/rejected": -1201.7757568359375,
155
- "loss": 1.852958869934082,
156
- "rewards/accuracies": 0.9437500238418579,
157
- "rewards/chosen": 16.64017105102539,
158
- "rewards/margins": 388.97650146484375,
159
- "rewards/rejected": -372.3362731933594,
160
  "step": 100
161
- },
162
- {
163
- "epoch": 0.2924559654370223,
164
- "grad_norm": 776.8017578125,
165
- "learning_rate": 8.314285714285715e-06,
166
- "logits/chosen": 7.605508327484131,
167
- "logits/rejected": 8.056883811950684,
168
- "logps/chosen": -500.75714111328125,
169
- "logps/rejected": -1358.50732421875,
170
- "loss": 0.4602807998657227,
171
- "rewards/accuracies": 0.9750000238418579,
172
- "rewards/chosen": -11.148886680603027,
173
- "rewards/margins": 444.7156677246094,
174
- "rewards/rejected": -455.8646545410156,
175
- "step": 110
176
- },
177
- {
178
- "epoch": 0.3190428713858425,
179
- "grad_norm": 9.229455307652179e-14,
180
- "learning_rate": 8.02857142857143e-06,
181
- "logits/chosen": 7.707437992095947,
182
- "logits/rejected": 8.208626747131348,
183
- "logps/chosen": -493.72503662109375,
184
- "logps/rejected": -1418.4669189453125,
185
- "loss": 0.4660654544830322,
186
- "rewards/accuracies": 0.9750000238418579,
187
- "rewards/chosen": -27.65629005432129,
188
- "rewards/margins": 457.8779296875,
189
- "rewards/rejected": -485.53424072265625,
190
- "step": 120
191
- },
192
- {
193
- "epoch": 0.34562977733466266,
194
- "grad_norm": 3.0277444440507395e-10,
195
- "learning_rate": 7.742857142857144e-06,
196
- "logits/chosen": 7.820192813873291,
197
- "logits/rejected": 8.277512550354004,
198
- "logps/chosen": -490.735595703125,
199
- "logps/rejected": -1337.3505859375,
200
- "loss": 1.2780420303344726,
201
- "rewards/accuracies": 0.981249988079071,
202
- "rewards/chosen": 0.13531294465065002,
203
- "rewards/margins": 430.638671875,
204
- "rewards/rejected": -430.50335693359375,
205
- "step": 130
206
- },
207
- {
208
- "epoch": 0.3722166832834829,
209
- "grad_norm": 1.2154150397236663e-07,
210
- "learning_rate": 7.457142857142857e-06,
211
- "logits/chosen": 7.428654670715332,
212
- "logits/rejected": 7.884097099304199,
213
- "logps/chosen": -490.9019470214844,
214
- "logps/rejected": -1213.6722412109375,
215
- "loss": 0.03439792990684509,
216
- "rewards/accuracies": 0.9937499761581421,
217
- "rewards/chosen": 3.2693276405334473,
218
- "rewards/margins": 378.11016845703125,
219
- "rewards/rejected": -374.8408203125,
220
- "step": 140
221
- },
222
- {
223
- "epoch": 0.3988035892323031,
224
- "grad_norm": 0.0008078943355940282,
225
- "learning_rate": 7.1714285714285725e-06,
226
- "logits/chosen": 7.338967800140381,
227
- "logits/rejected": 7.722776889801025,
228
- "logps/chosen": -444.8753356933594,
229
- "logps/rejected": -1266.625244140625,
230
- "loss": 1.42781343460083,
231
- "rewards/accuracies": 0.96875,
232
- "rewards/chosen": -2.331421375274658,
233
- "rewards/margins": 393.41400146484375,
234
- "rewards/rejected": -395.74542236328125,
235
- "step": 150
236
- },
237
- {
238
- "epoch": 0.4253904951811233,
239
- "grad_norm": 0.0,
240
- "learning_rate": 6.885714285714287e-06,
241
- "logits/chosen": 7.96518087387085,
242
- "logits/rejected": 8.39413833618164,
243
- "logps/chosen": -586.011962890625,
244
- "logps/rejected": -1365.374267578125,
245
- "loss": 1.9711128234863282,
246
- "rewards/accuracies": 0.96875,
247
- "rewards/chosen": -28.12943458557129,
248
- "rewards/margins": 417.09210205078125,
249
- "rewards/rejected": -445.22149658203125,
250
- "step": 160
251
- },
252
- {
253
- "epoch": 0.4519774011299435,
254
- "grad_norm": 5.4764127260797935e-12,
255
- "learning_rate": 6.600000000000001e-06,
256
- "logits/chosen": 7.3422675132751465,
257
- "logits/rejected": 7.857165336608887,
258
- "logps/chosen": -465.5393981933594,
259
- "logps/rejected": -1300.1103515625,
260
- "loss": 0.004332171380519867,
261
- "rewards/accuracies": 0.9937499761581421,
262
- "rewards/chosen": 1.256854772567749,
263
- "rewards/margins": 424.2041931152344,
264
- "rewards/rejected": -422.9473571777344,
265
- "step": 170
266
- },
267
- {
268
- "epoch": 0.4785643070787637,
269
- "grad_norm": 422.58892822265625,
270
- "learning_rate": 6.314285714285715e-06,
271
- "logits/chosen": 7.199074745178223,
272
- "logits/rejected": 7.617570400238037,
273
- "logps/chosen": -446.2982482910156,
274
- "logps/rejected": -1317.12890625,
275
- "loss": 0.9593421936035156,
276
- "rewards/accuracies": 0.9750000238418579,
277
- "rewards/chosen": 2.7993197441101074,
278
- "rewards/margins": 444.40753173828125,
279
- "rewards/rejected": -441.60821533203125,
280
- "step": 180
281
- },
282
- {
283
- "epoch": 0.5051512130275839,
284
- "grad_norm": 0.0,
285
- "learning_rate": 6.028571428571429e-06,
286
- "logits/chosen": 7.070580959320068,
287
- "logits/rejected": 7.494720458984375,
288
- "logps/chosen": -430.97760009765625,
289
- "logps/rejected": -1291.35205078125,
290
- "loss": 0.09595458507537842,
291
- "rewards/accuracies": 0.9937499761581421,
292
- "rewards/chosen": 7.344033241271973,
293
- "rewards/margins": 433.2731018066406,
294
- "rewards/rejected": -425.9291076660156,
295
- "step": 190
296
- },
297
- {
298
- "epoch": 0.5317381189764041,
299
- "grad_norm": 1.3654603958129883,
300
- "learning_rate": 5.742857142857143e-06,
301
- "logits/chosen": 7.252472877502441,
302
- "logits/rejected": 7.820960998535156,
303
- "logps/chosen": -435.6075134277344,
304
- "logps/rejected": -1333.8228759765625,
305
- "loss": 0.7474074840545655,
306
- "rewards/accuracies": 0.956250011920929,
307
- "rewards/chosen": 1.129046082496643,
308
- "rewards/margins": 456.1576232910156,
309
- "rewards/rejected": -455.028564453125,
310
- "step": 200
311
- },
312
- {
313
- "epoch": 0.5583250249252243,
314
- "grad_norm": 32.07512283325195,
315
- "learning_rate": 5.457142857142858e-06,
316
- "logits/chosen": 7.640280723571777,
317
- "logits/rejected": 8.078705787658691,
318
- "logps/chosen": -507.60870361328125,
319
- "logps/rejected": -1413.518798828125,
320
- "loss": 0.023566444218158723,
321
- "rewards/accuracies": 0.9937499761581421,
322
- "rewards/chosen": -28.288721084594727,
323
- "rewards/margins": 461.63006591796875,
324
- "rewards/rejected": -489.9187927246094,
325
- "step": 210
326
- },
327
- {
328
- "epoch": 0.5849119308740446,
329
- "grad_norm": 0.0,
330
- "learning_rate": 5.171428571428571e-06,
331
- "logits/chosen": 7.952935218811035,
332
- "logits/rejected": 8.260855674743652,
333
- "logps/chosen": -547.0781860351562,
334
- "logps/rejected": -1421.848388671875,
335
- "loss": 0.036107948422431944,
336
- "rewards/accuracies": 0.9937499761581421,
337
- "rewards/chosen": -42.91097640991211,
338
- "rewards/margins": 435.43353271484375,
339
- "rewards/rejected": -478.34454345703125,
340
- "step": 220
341
- },
342
- {
343
- "epoch": 0.6114988368228648,
344
- "grad_norm": 0.0,
345
- "learning_rate": 4.885714285714286e-06,
346
- "logits/chosen": 8.114786148071289,
347
- "logits/rejected": 8.457345962524414,
348
- "logps/chosen": -507.6402893066406,
349
- "logps/rejected": -1458.050537109375,
350
- "loss": 0.014430716633796692,
351
- "rewards/accuracies": 0.981249988079071,
352
- "rewards/chosen": -36.03577423095703,
353
- "rewards/margins": 463.03021240234375,
354
- "rewards/rejected": -499.06591796875,
355
- "step": 230
356
- },
357
- {
358
- "epoch": 0.638085742771685,
359
- "grad_norm": 0.0,
360
- "learning_rate": 4.600000000000001e-06,
361
- "logits/chosen": 7.714877128601074,
362
- "logits/rejected": 8.194944381713867,
363
- "logps/chosen": -527.2571411132812,
364
- "logps/rejected": -1552.201171875,
365
- "loss": 0.07465354204177857,
366
- "rewards/accuracies": 0.9937499761581421,
367
- "rewards/chosen": -26.272327423095703,
368
- "rewards/margins": 538.8110961914062,
369
- "rewards/rejected": -565.0833740234375,
370
- "step": 240
371
- },
372
- {
373
- "epoch": 0.6646726487205051,
374
- "grad_norm": 0.0,
375
- "learning_rate": 4.314285714285714e-06,
376
- "logits/chosen": 8.05278205871582,
377
- "logits/rejected": 8.397611618041992,
378
- "logps/chosen": -562.1492919921875,
379
- "logps/rejected": -1432.1495361328125,
380
- "loss": 1.750390303248341e-16,
381
- "rewards/accuracies": 1.0,
382
- "rewards/chosen": -13.465299606323242,
383
- "rewards/margins": 452.33758544921875,
384
- "rewards/rejected": -465.80291748046875,
385
- "step": 250
386
- },
387
- {
388
- "epoch": 0.6912595546693253,
389
- "grad_norm": 0.0,
390
- "learning_rate": 4.028571428571429e-06,
391
- "logits/chosen": 7.9839582443237305,
392
- "logits/rejected": 8.325165748596191,
393
- "logps/chosen": -531.9362182617188,
394
- "logps/rejected": -1311.642578125,
395
- "loss": 0.5010682582855225,
396
- "rewards/accuracies": 0.981249988079071,
397
- "rewards/chosen": -7.1212358474731445,
398
- "rewards/margins": 406.3907165527344,
399
- "rewards/rejected": -413.511962890625,
400
- "step": 260
401
- },
402
- {
403
- "epoch": 0.7178464606181456,
404
- "grad_norm": 72.22783660888672,
405
- "learning_rate": 3.742857142857143e-06,
406
- "logits/chosen": 7.493854522705078,
407
- "logits/rejected": 7.991011142730713,
408
- "logps/chosen": -503.574462890625,
409
- "logps/rejected": -1281.570068359375,
410
- "loss": 0.3572836875915527,
411
- "rewards/accuracies": 0.981249988079071,
412
- "rewards/chosen": -10.670049667358398,
413
- "rewards/margins": 382.18377685546875,
414
- "rewards/rejected": -392.85382080078125,
415
- "step": 270
416
- },
417
- {
418
- "epoch": 0.7444333665669658,
419
- "grad_norm": 209.91946411132812,
420
- "learning_rate": 3.4571428571428574e-06,
421
- "logits/chosen": 7.536074161529541,
422
- "logits/rejected": 7.9550018310546875,
423
- "logps/chosen": -423.2442321777344,
424
- "logps/rejected": -1381.945068359375,
425
- "loss": 0.241387939453125,
426
- "rewards/accuracies": 0.987500011920929,
427
- "rewards/chosen": -2.9049324989318848,
428
- "rewards/margins": 460.4957580566406,
429
- "rewards/rejected": -463.40069580078125,
430
- "step": 280
431
- },
432
- {
433
- "epoch": 0.771020272515786,
434
- "grad_norm": 0.0,
435
- "learning_rate": 3.1714285714285714e-06,
436
- "logits/chosen": 7.7217535972595215,
437
- "logits/rejected": 8.095100402832031,
438
- "logps/chosen": -452.80010986328125,
439
- "logps/rejected": -1453.189208984375,
440
- "loss": 7.635563088115305e-06,
441
- "rewards/accuracies": 1.0,
442
- "rewards/chosen": -13.30943489074707,
443
- "rewards/margins": 486.92889404296875,
444
- "rewards/rejected": -500.23834228515625,
445
- "step": 290
446
- },
447
- {
448
- "epoch": 0.7976071784646062,
449
- "grad_norm": 0.0,
450
- "learning_rate": 2.885714285714286e-06,
451
- "logits/chosen": 7.356575012207031,
452
- "logits/rejected": 7.875399589538574,
453
- "logps/chosen": -446.17254638671875,
454
- "logps/rejected": -1462.5728759765625,
455
- "loss": 0.026995140314102172,
456
- "rewards/accuracies": 0.987500011920929,
457
- "rewards/chosen": -26.922204971313477,
458
- "rewards/margins": 505.7860412597656,
459
- "rewards/rejected": -532.708251953125,
460
- "step": 300
461
- },
462
- {
463
- "epoch": 0.8241940844134263,
464
- "grad_norm": 0.0,
465
- "learning_rate": 2.6e-06,
466
- "logits/chosen": 7.945906162261963,
467
- "logits/rejected": 8.316286087036133,
468
- "logps/chosen": -521.7435302734375,
469
- "logps/rejected": -1435.0,
470
- "loss": 2.198499298095703,
471
- "rewards/accuracies": 0.956250011920929,
472
- "rewards/chosen": -26.506107330322266,
473
- "rewards/margins": 458.09234619140625,
474
- "rewards/rejected": -484.59844970703125,
475
- "step": 310
476
- },
477
- {
478
- "epoch": 0.8507809903622466,
479
- "grad_norm": 0.0,
480
- "learning_rate": 2.3142857142857145e-06,
481
- "logits/chosen": 7.83941650390625,
482
- "logits/rejected": 8.193696975708008,
483
- "logps/chosen": -551.3535766601562,
484
- "logps/rejected": -1372.135498046875,
485
- "loss": 0.004357181861996651,
486
- "rewards/accuracies": 0.9937499761581421,
487
- "rewards/chosen": -26.256458282470703,
488
- "rewards/margins": 423.38323974609375,
489
- "rewards/rejected": -449.63970947265625,
490
- "step": 320
491
- },
492
- {
493
- "epoch": 0.8773678963110668,
494
- "grad_norm": 0.0,
495
- "learning_rate": 2.028571428571429e-06,
496
- "logits/chosen": 7.511332035064697,
497
- "logits/rejected": 8.009842872619629,
498
- "logps/chosen": -482.1061096191406,
499
- "logps/rejected": -1416.7646484375,
500
- "loss": 0.0004897473379969596,
501
- "rewards/accuracies": 1.0,
502
- "rewards/chosen": -24.751493453979492,
503
- "rewards/margins": 472.49639892578125,
504
- "rewards/rejected": -497.2478942871094,
505
- "step": 330
506
- },
507
- {
508
- "epoch": 0.903954802259887,
509
- "grad_norm": 4.885124487524457e-19,
510
- "learning_rate": 1.7428571428571432e-06,
511
- "logits/chosen": 7.67673397064209,
512
- "logits/rejected": 8.061668395996094,
513
- "logps/chosen": -477.88983154296875,
514
- "logps/rejected": -1371.306396484375,
515
- "loss": 0.02846769690513611,
516
- "rewards/accuracies": 0.987500011920929,
517
- "rewards/chosen": -19.97150230407715,
518
- "rewards/margins": 429.90509033203125,
519
- "rewards/rejected": -449.87664794921875,
520
- "step": 340
521
- },
522
- {
523
- "epoch": 0.9305417082087072,
524
- "grad_norm": 0.0,
525
- "learning_rate": 1.4571428571428573e-06,
526
- "logits/chosen": 7.819458961486816,
527
- "logits/rejected": 8.190754890441895,
528
- "logps/chosen": -501.7386169433594,
529
- "logps/rejected": -1499.830322265625,
530
- "loss": 0.0005395257845520973,
531
- "rewards/accuracies": 1.0,
532
- "rewards/chosen": -13.413067817687988,
533
- "rewards/margins": 516.6709594726562,
534
- "rewards/rejected": -530.083984375,
535
- "step": 350
536
- },
537
- {
538
- "epoch": 0.9571286141575274,
539
- "grad_norm": 0.0,
540
- "learning_rate": 1.1714285714285715e-06,
541
- "logits/chosen": 7.6596269607543945,
542
- "logits/rejected": 8.10220718383789,
543
- "logps/chosen": -496.450439453125,
544
- "logps/rejected": -1371.864501953125,
545
- "loss": 0.00866500437259674,
546
- "rewards/accuracies": 0.987500011920929,
547
- "rewards/chosen": -29.596303939819336,
548
- "rewards/margins": 433.4952697753906,
549
- "rewards/rejected": -463.09149169921875,
550
- "step": 360
551
- },
552
- {
553
- "epoch": 0.9837155201063477,
554
- "grad_norm": 1.5750356366836245e-15,
555
- "learning_rate": 8.857142857142857e-07,
556
- "logits/chosen": 7.884603977203369,
557
- "logits/rejected": 8.13658618927002,
558
- "logps/chosen": -544.7962646484375,
559
- "logps/rejected": -1370.7030029296875,
560
- "loss": 0.030383533239364623,
561
- "rewards/accuracies": 0.987500011920929,
562
- "rewards/chosen": -27.421581268310547,
563
- "rewards/margins": 431.65802001953125,
564
- "rewards/rejected": -459.07965087890625,
565
- "step": 370
566
- },
567
- {
568
- "epoch": 1.007976071784646,
569
- "grad_norm": 0.0,
570
- "learning_rate": 6.000000000000001e-07,
571
- "logits/chosen": 7.871831893920898,
572
- "logits/rejected": 8.307570457458496,
573
- "logps/chosen": -519.7576904296875,
574
- "logps/rejected": -1396.640869140625,
575
- "loss": 0.06974923014640808,
576
- "rewards/accuracies": 0.9931507110595703,
577
- "rewards/chosen": -26.421640396118164,
578
- "rewards/margins": 436.3954162597656,
579
- "rewards/rejected": -462.8170471191406,
580
- "step": 380
581
- },
582
- {
583
- "epoch": 1.0345629777334662,
584
- "grad_norm": 3.976285881214031e-21,
585
- "learning_rate": 3.1428571428571433e-07,
586
- "logits/chosen": 7.635078430175781,
587
- "logits/rejected": 8.017423629760742,
588
- "logps/chosen": -461.38946533203125,
589
- "logps/rejected": -1499.245361328125,
590
- "loss": 0.03160061240196228,
591
- "rewards/accuracies": 0.987500011920929,
592
- "rewards/chosen": -30.11972427368164,
593
- "rewards/margins": 512.0826416015625,
594
- "rewards/rejected": -542.2023315429688,
595
- "step": 390
596
- },
597
- {
598
- "epoch": 1.0611498836822866,
599
- "grad_norm": 0.0,
600
- "learning_rate": 2.8571428571428575e-08,
601
- "logits/chosen": 7.7975664138793945,
602
- "logits/rejected": 8.254159927368164,
603
- "logps/chosen": -477.83282470703125,
604
- "logps/rejected": -1497.345703125,
605
- "loss": 0.0043321698904037476,
606
- "rewards/accuracies": 0.9937499761581421,
607
- "rewards/chosen": -29.0898380279541,
608
- "rewards/margins": 499.4642639160156,
609
- "rewards/rejected": -528.5540771484375,
610
- "step": 400
611
  }
612
  ],
613
  "logging_steps": 10,
614
- "max_steps": 400,
615
  "num_input_tokens_seen": 0,
616
- "num_train_epochs": 2,
617
  "save_steps": 200,
618
  "stateful_callbacks": {
619
  "TrainerControl": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.26586905948820205,
6
  "eval_steps": 500,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.026586905948820207,
14
+ "grad_norm": 235.8455810546875,
15
+ "learning_rate": 9e-06,
16
+ "logits/chosen": 1.961168646812439,
17
+ "logits/rejected": 2.3471131324768066,
18
+ "logps/chosen": -180.52096557617188,
19
+ "logps/rejected": -306.41729736328125,
20
+ "loss": 41.09808349609375,
21
  "rewards/accuracies": 0.643750011920929,
22
+ "rewards/chosen": 127.35591888427734,
23
+ "rewards/margins": 56.6036376953125,
24
+ "rewards/rejected": 70.75228118896484,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.053173811897640415,
29
+ "grad_norm": 132.22105407714844,
30
+ "learning_rate": 9e-06,
31
+ "logits/chosen": 2.862368106842041,
32
+ "logits/rejected": 3.0271706581115723,
33
+ "logps/chosen": -199.9718017578125,
34
+ "logps/rejected": -383.2444763183594,
35
+ "loss": 30.093466186523436,
36
+ "rewards/accuracies": 0.6812499761581421,
37
+ "rewards/chosen": 132.59613037109375,
38
+ "rewards/margins": 104.64494323730469,
39
+ "rewards/rejected": 27.951187133789062,
40
  "step": 20
41
  },
42
  {
43
  "epoch": 0.07976071784646062,
44
+ "grad_norm": 234.4345703125,
45
+ "learning_rate": 7.88888888888889e-06,
46
+ "logits/chosen": 3.7322630882263184,
47
+ "logits/rejected": 3.8905537128448486,
48
+ "logps/chosen": -219.1993865966797,
49
+ "logps/rejected": -487.88043212890625,
50
+ "loss": 23.500108337402345,
51
+ "rewards/accuracies": 0.768750011920929,
52
+ "rewards/chosen": 141.04718017578125,
53
+ "rewards/margins": 154.50144958496094,
54
+ "rewards/rejected": -13.454266548156738,
55
  "step": 30
56
  },
57
  {
58
  "epoch": 0.10634762379528083,
59
+ "grad_norm": 110.76012420654297,
60
+ "learning_rate": 6.777777777777779e-06,
61
+ "logits/chosen": 3.839090347290039,
62
+ "logits/rejected": 4.133347988128662,
63
+ "logps/chosen": -210.9350128173828,
64
+ "logps/rejected": -591.01513671875,
65
+ "loss": 23.24909210205078,
66
+ "rewards/accuracies": 0.7749999761581421,
67
+ "rewards/chosen": 109.74186706542969,
68
+ "rewards/margins": 179.082275390625,
69
+ "rewards/rejected": -69.34040832519531,
70
  "step": 40
71
  },
72
  {
73
  "epoch": 0.13293452974410103,
74
+ "grad_norm": 221.89686584472656,
75
+ "learning_rate": 5.666666666666667e-06,
76
+ "logits/chosen": 5.0413312911987305,
77
+ "logits/rejected": 5.170095920562744,
78
+ "logps/chosen": -311.14190673828125,
79
+ "logps/rejected": -735.1285400390625,
80
+ "loss": 20.513507080078124,
81
+ "rewards/accuracies": 0.762499988079071,
82
+ "rewards/chosen": 93.5618667602539,
83
+ "rewards/margins": 231.3604736328125,
84
+ "rewards/rejected": -137.79861450195312,
85
  "step": 50
86
  },
87
  {
88
  "epoch": 0.15952143569292124,
89
+ "grad_norm": 190.1571044921875,
90
+ "learning_rate": 4.555555555555556e-06,
91
+ "logits/chosen": 5.5310869216918945,
92
+ "logits/rejected": 5.918412208557129,
93
+ "logps/chosen": -291.5699157714844,
94
+ "logps/rejected": -943.5242919921875,
95
+ "loss": 4.122843551635742,
96
+ "rewards/accuracies": 0.925000011920929,
97
+ "rewards/chosen": 46.140045166015625,
98
+ "rewards/margins": 318.23345947265625,
99
+ "rewards/rejected": -272.0934143066406,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.18610834164174145,
104
+ "grad_norm": 20.02957534790039,
105
+ "learning_rate": 3.444444444444445e-06,
106
+ "logits/chosen": 6.543715476989746,
107
+ "logits/rejected": 6.924801826477051,
108
+ "logps/chosen": -418.068359375,
109
+ "logps/rejected": -989.2503662109375,
110
+ "loss": 5.07203254699707,
111
+ "rewards/accuracies": 0.90625,
112
+ "rewards/chosen": 34.52830123901367,
113
+ "rewards/margins": 294.2066345214844,
114
+ "rewards/rejected": -259.6783447265625,
115
  "step": 70
116
  },
117
  {
118
  "epoch": 0.21269524759056166,
119
+ "grad_norm": 365.81671142578125,
120
+ "learning_rate": 2.3333333333333336e-06,
121
+ "logits/chosen": 7.004462242126465,
122
+ "logits/rejected": 7.306379795074463,
123
+ "logps/chosen": -456.623291015625,
124
+ "logps/rejected": -1127.0947265625,
125
+ "loss": 4.6394092559814455,
126
+ "rewards/accuracies": 0.9375,
127
+ "rewards/chosen": 18.725841522216797,
128
+ "rewards/margins": 349.7669372558594,
129
+ "rewards/rejected": -331.0411071777344,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 0.23928215353938184,
134
+ "grad_norm": 442.5915222167969,
135
+ "learning_rate": 1.2222222222222223e-06,
136
+ "logits/chosen": 7.063316345214844,
137
+ "logits/rejected": 7.30740213394165,
138
+ "logps/chosen": -479.52117919921875,
139
+ "logps/rejected": -1115.75146484375,
140
+ "loss": 3.6795909881591795,
141
+ "rewards/accuracies": 0.9375,
142
+ "rewards/chosen": 14.30505084991455,
143
+ "rewards/margins": 339.4307556152344,
144
+ "rewards/rejected": -325.1257019042969,
145
  "step": 90
146
  },
147
  {
148
  "epoch": 0.26586905948820205,
149
+ "grad_norm": 170.1773223876953,
150
+ "learning_rate": 1.1111111111111112e-07,
151
+ "logits/chosen": 6.971957206726074,
152
+ "logits/rejected": 7.46600866317749,
153
+ "logps/chosen": -429.0472106933594,
154
+ "logps/rejected": -1123.8218994140625,
155
+ "loss": 3.1557437896728517,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 5.929503440856934,
158
+ "rewards/margins": 339.28887939453125,
159
+ "rewards/rejected": -333.359375,
160
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
  ],
163
  "logging_steps": 10,
164
+ "max_steps": 100,
165
  "num_input_tokens_seen": 0,
166
+ "num_train_epochs": 1,
167
  "save_steps": 200,
168
  "stateful_callbacks": {
169
  "TrainerControl": {
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa5979d784b3be5f03398730b0db9a0aaad24ae1fdea10accf8ecc4f7c831b44
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e9844d94b2ddeacda52d988f72cd6b4206ea325ad209511ab311437d2b42ef
3
  size 6289