FormlessAI commited on
Commit
fd250b1
·
verified ·
1 Parent(s): 60c62c2

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4b544e347b4f8e2a4742b0ff67170ed51fff3a887807982bb031c6d49dbad9
3
  size 2751499016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5283ab5dc1b04471ebdac9c69d4ddc3c9daadb1cd9659495feafd52deba15535
3
  size 2751499016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30b548386475ae7a25a886219f15374e14106e03a6b00193c76fd328aa34450e
3
  size 1397530085
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7343fb23d7b9ae991994bd95b71751b6fe75a7696f84ec85a1c9ef768b87760c
3
  size 1397530085
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb229f1eff16c146ee732ddac4ee46d69bfb16d06dc3d643f8561c719332e846
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589a29a7a9601ca691e8af1337d86f4213715f3591d526b3325012bd0d4260dd
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.05044383555650711,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.00764876854826373,
6
  "eval_steps": 50,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -174,6 +174,338 @@
174
  "eval_samples_per_second": 11.342,
175
  "eval_steps_per_second": 5.679,
176
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  }
178
  ],
179
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.024217354133725166,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.02294630564479119,
6
  "eval_steps": 50,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
174
  "eval_samples_per_second": 11.342,
175
  "eval_steps_per_second": 5.679,
176
  "step": 50
177
+ },
178
+ {
179
+ "epoch": 0.008413645403090102,
180
+ "grad_norm": 0.0028622462414205074,
181
+ "learning_rate": 0.00014993216160618053,
182
+ "logits/chosen": 2.2500367164611816,
183
+ "logits/rejected": 0.9480382204055786,
184
+ "logps/chosen": -436.58172607421875,
185
+ "logps/rejected": -573.2215576171875,
186
+ "loss": 0.0001,
187
+ "rewards/accuracies": 1.0,
188
+ "rewards/chosen": -1.8464500904083252,
189
+ "rewards/margins": 13.733373641967773,
190
+ "rewards/rejected": -15.579824447631836,
191
+ "step": 55
192
+ },
193
+ {
194
+ "epoch": 0.009178522257916476,
195
+ "grad_norm": 0.00047387686208821833,
196
+ "learning_rate": 0.00016381476916230834,
197
+ "logits/chosen": 2.368880271911621,
198
+ "logits/rejected": 1.2400633096694946,
199
+ "logps/chosen": -460.0272521972656,
200
+ "logps/rejected": -477.02166748046875,
201
+ "loss": 0.0002,
202
+ "rewards/accuracies": 1.0,
203
+ "rewards/chosen": -1.5477186441421509,
204
+ "rewards/margins": 11.986092567443848,
205
+ "rewards/rejected": -13.53381061553955,
206
+ "step": 60
207
+ },
208
+ {
209
+ "epoch": 0.009943399112742848,
210
+ "grad_norm": 2.4312286768690683e-05,
211
+ "learning_rate": 0.00017769737671843617,
212
+ "logits/chosen": 2.1582725048065186,
213
+ "logits/rejected": 0.8890345692634583,
214
+ "logps/chosen": -359.4410705566406,
215
+ "logps/rejected": -731.4852294921875,
216
+ "loss": 0.0,
217
+ "rewards/accuracies": 1.0,
218
+ "rewards/chosen": -1.1775275468826294,
219
+ "rewards/margins": 19.947010040283203,
220
+ "rewards/rejected": -21.12453842163086,
221
+ "step": 65
222
+ },
223
+ {
224
+ "epoch": 0.01070827596756922,
225
+ "grad_norm": 0.0019105683313682675,
226
+ "learning_rate": 0.00019157998427456397,
227
+ "logits/chosen": 1.9572877883911133,
228
+ "logits/rejected": 0.4613746106624603,
229
+ "logps/chosen": -433.6973571777344,
230
+ "logps/rejected": -575.4284057617188,
231
+ "loss": 0.0,
232
+ "rewards/accuracies": 1.0,
233
+ "rewards/chosen": -3.012183904647827,
234
+ "rewards/margins": 13.66297721862793,
235
+ "rewards/rejected": -16.675161361694336,
236
+ "step": 70
237
+ },
238
+ {
239
+ "epoch": 0.011473152822395595,
240
+ "grad_norm": 0.003828815184533596,
241
+ "learning_rate": 0.0002054625918306918,
242
+ "logits/chosen": NaN,
243
+ "logits/rejected": 0.8698433041572571,
244
+ "logps/chosen": -400.0428161621094,
245
+ "logps/rejected": -832.5679931640625,
246
+ "loss": 0.0693,
247
+ "rewards/accuracies": 0.8999999761581421,
248
+ "rewards/chosen": -3.338275194168091,
249
+ "rewards/margins": 21.856613159179688,
250
+ "rewards/rejected": -25.194889068603516,
251
+ "step": 75
252
+ },
253
+ {
254
+ "epoch": 0.012238029677221967,
255
+ "grad_norm": 0.010469191707670689,
256
+ "learning_rate": 0.00021934519938681966,
257
+ "logits/chosen": 1.8671172857284546,
258
+ "logits/rejected": 0.7096843719482422,
259
+ "logps/chosen": -443.547119140625,
260
+ "logps/rejected": -688.80126953125,
261
+ "loss": 0.0,
262
+ "rewards/accuracies": 1.0,
263
+ "rewards/chosen": -3.6706607341766357,
264
+ "rewards/margins": 15.804478645324707,
265
+ "rewards/rejected": -19.47513771057129,
266
+ "step": 80
267
+ },
268
+ {
269
+ "epoch": 0.013002906532048341,
270
+ "grad_norm": 2.6392877771286294e-05,
271
+ "learning_rate": 0.00023322780694294746,
272
+ "logits/chosen": 1.9022146463394165,
273
+ "logits/rejected": 0.36020129919052124,
274
+ "logps/chosen": -396.5116271972656,
275
+ "logps/rejected": -583.2808837890625,
276
+ "loss": 0.0,
277
+ "rewards/accuracies": 1.0,
278
+ "rewards/chosen": -1.5602790117263794,
279
+ "rewards/margins": 15.830926895141602,
280
+ "rewards/rejected": -17.391204833984375,
281
+ "step": 85
282
+ },
283
+ {
284
+ "epoch": 0.013767783386874713,
285
+ "grad_norm": 1.727592433553582e-07,
286
+ "learning_rate": 0.0002471104144990753,
287
+ "logits/chosen": 1.5785582065582275,
288
+ "logits/rejected": 0.47088107466697693,
289
+ "logps/chosen": -400.20953369140625,
290
+ "logps/rejected": -640.7406616210938,
291
+ "loss": 0.0,
292
+ "rewards/accuracies": 1.0,
293
+ "rewards/chosen": -0.2463744431734085,
294
+ "rewards/margins": 18.521793365478516,
295
+ "rewards/rejected": -18.768169403076172,
296
+ "step": 90
297
+ },
298
+ {
299
+ "epoch": 0.014532660241701086,
300
+ "grad_norm": 5.100153430248611e-05,
301
+ "learning_rate": 0.0002609930220552031,
302
+ "logits/chosen": 1.551195502281189,
303
+ "logits/rejected": 0.6250472664833069,
304
+ "logps/chosen": -400.8873596191406,
305
+ "logps/rejected": -630.4702758789062,
306
+ "loss": 0.0,
307
+ "rewards/accuracies": 1.0,
308
+ "rewards/chosen": -0.10455240309238434,
309
+ "rewards/margins": 17.793054580688477,
310
+ "rewards/rejected": -17.897607803344727,
311
+ "step": 95
312
+ },
313
+ {
314
+ "epoch": 0.01529753709652746,
315
+ "grad_norm": 1.2168807472789922e-07,
316
+ "learning_rate": 0.00027487562961133095,
317
+ "logits/chosen": 2.179185390472412,
318
+ "logits/rejected": 0.9079009890556335,
319
+ "logps/chosen": -295.3583984375,
320
+ "logps/rejected": -859.0484619140625,
321
+ "loss": 0.0,
322
+ "rewards/accuracies": 1.0,
323
+ "rewards/chosen": 0.7290161848068237,
324
+ "rewards/margins": 25.750385284423828,
325
+ "rewards/rejected": -25.02136993408203,
326
+ "step": 100
327
+ },
328
+ {
329
+ "epoch": 0.01529753709652746,
330
+ "eval_logits/chosen": NaN,
331
+ "eval_logits/rejected": 0.5695675611495972,
332
+ "eval_logps/chosen": -330.794189453125,
333
+ "eval_logps/rejected": -718.6740112304688,
334
+ "eval_loss": 0.02476140484213829,
335
+ "eval_rewards/accuracies": 0.9811594486236572,
336
+ "eval_rewards/chosen": 0.7904098033905029,
337
+ "eval_rewards/margins": 22.044944763183594,
338
+ "eval_rewards/rejected": -21.254533767700195,
339
+ "eval_runtime": 62.4706,
340
+ "eval_samples_per_second": 11.029,
341
+ "eval_steps_per_second": 5.523,
342
+ "step": 100
343
+ },
344
+ {
345
+ "epoch": 0.016062413951353834,
346
+ "grad_norm": 8.499349557489211e-10,
347
+ "learning_rate": 0.0002776520392843145,
348
+ "logits/chosen": 2.1345207691192627,
349
+ "logits/rejected": 0.47184300422668457,
350
+ "logps/chosen": -315.34930419921875,
351
+ "logps/rejected": -695.0374145507812,
352
+ "loss": 0.0,
353
+ "rewards/accuracies": 1.0,
354
+ "rewards/chosen": 0.8640382885932922,
355
+ "rewards/margins": 21.742961883544922,
356
+ "rewards/rejected": -20.878923416137695,
357
+ "step": 105
358
+ },
359
+ {
360
+ "epoch": 0.016827290806180204,
361
+ "grad_norm": 3.2272314456349704e-06,
362
+ "learning_rate": 0.0002776515849417652,
363
+ "logits/chosen": 2.066007614135742,
364
+ "logits/rejected": 0.7821122407913208,
365
+ "logps/chosen": -273.3198547363281,
366
+ "logps/rejected": -622.60205078125,
367
+ "loss": 0.0,
368
+ "rewards/accuracies": 1.0,
369
+ "rewards/chosen": 1.230340600013733,
370
+ "rewards/margins": 17.653335571289062,
371
+ "rewards/rejected": -16.42299461364746,
372
+ "step": 110
373
+ },
374
+ {
375
+ "epoch": 0.017592167661006578,
376
+ "grad_norm": 2.764949513789361e-08,
377
+ "learning_rate": 0.00027765078110616136,
378
+ "logits/chosen": 2.0481059551239014,
379
+ "logits/rejected": 0.06188444048166275,
380
+ "logps/chosen": -310.6258239746094,
381
+ "logps/rejected": -710.1292724609375,
382
+ "loss": 0.0,
383
+ "rewards/accuracies": 1.0,
384
+ "rewards/chosen": 1.083168387413025,
385
+ "rewards/margins": 23.025583267211914,
386
+ "rewards/rejected": -21.942415237426758,
387
+ "step": 115
388
+ },
389
+ {
390
+ "epoch": 0.018357044515832952,
391
+ "grad_norm": 1.2643829450098565e-06,
392
+ "learning_rate": 0.00027764962777952664,
393
+ "logits/chosen": 2.1907763481140137,
394
+ "logits/rejected": 1.0400992631912231,
395
+ "logps/chosen": -307.0046081542969,
396
+ "logps/rejected": -755.6473999023438,
397
+ "loss": 0.0,
398
+ "rewards/accuracies": 1.0,
399
+ "rewards/chosen": 1.624768853187561,
400
+ "rewards/margins": 22.524160385131836,
401
+ "rewards/rejected": -20.89939308166504,
402
+ "step": 120
403
+ },
404
+ {
405
+ "epoch": 0.019121921370659323,
406
+ "grad_norm": 9.230769360685231e-13,
407
+ "learning_rate": 0.0002776481249647646,
408
+ "logits/chosen": 2.0665574073791504,
409
+ "logits/rejected": 0.20953090488910675,
410
+ "logps/chosen": -278.8282775878906,
411
+ "logps/rejected": -763.1021728515625,
412
+ "loss": 0.0,
413
+ "rewards/accuracies": 1.0,
414
+ "rewards/chosen": 0.6925491094589233,
415
+ "rewards/margins": 24.283218383789062,
416
+ "rewards/rejected": -23.590667724609375,
417
+ "step": 125
418
+ },
419
+ {
420
+ "epoch": 0.019886798225485697,
421
+ "grad_norm": 2.1791736344312085e-06,
422
+ "learning_rate": 0.00027764627266565854,
423
+ "logits/chosen": 2.2263553142547607,
424
+ "logits/rejected": 0.7493933439254761,
425
+ "logps/chosen": -355.59344482421875,
426
+ "logps/rejected": -662.4058837890625,
427
+ "loss": 0.0,
428
+ "rewards/accuracies": 1.0,
429
+ "rewards/chosen": 1.3247101306915283,
430
+ "rewards/margins": 20.672582626342773,
431
+ "rewards/rejected": -19.34787368774414,
432
+ "step": 130
433
+ },
434
+ {
435
+ "epoch": 0.02065167508031207,
436
+ "grad_norm": 1.3962798220745753e-05,
437
+ "learning_rate": 0.00027764407088687153,
438
+ "logits/chosen": 2.0784218311309814,
439
+ "logits/rejected": 0.6179172396659851,
440
+ "logps/chosen": -327.31329345703125,
441
+ "logps/rejected": -678.7785034179688,
442
+ "loss": 0.0,
443
+ "rewards/accuracies": 1.0,
444
+ "rewards/chosen": 1.206441044807434,
445
+ "rewards/margins": 20.837465286254883,
446
+ "rewards/rejected": -19.631023406982422,
447
+ "step": 135
448
+ },
449
+ {
450
+ "epoch": 0.02141655193513844,
451
+ "grad_norm": 5.620458409794082e-07,
452
+ "learning_rate": 0.0002776415196339467,
453
+ "logits/chosen": 1.9481678009033203,
454
+ "logits/rejected": 0.5887425541877747,
455
+ "logps/chosen": -309.68682861328125,
456
+ "logps/rejected": -587.2570190429688,
457
+ "loss": 0.0,
458
+ "rewards/accuracies": 1.0,
459
+ "rewards/chosen": 0.8636436462402344,
460
+ "rewards/margins": 18.32933807373047,
461
+ "rewards/rejected": -17.465694427490234,
462
+ "step": 140
463
+ },
464
+ {
465
+ "epoch": 0.022181428789964815,
466
+ "grad_norm": 1.5234973034239374e-06,
467
+ "learning_rate": 0.00027763861891330675,
468
+ "logits/chosen": 1.926863670349121,
469
+ "logits/rejected": 0.5701156854629517,
470
+ "logps/chosen": -344.00811767578125,
471
+ "logps/rejected": -644.8851318359375,
472
+ "loss": 0.0,
473
+ "rewards/accuracies": 1.0,
474
+ "rewards/chosen": 1.6971412897109985,
475
+ "rewards/margins": 20.91071891784668,
476
+ "rewards/rejected": -19.213577270507812,
477
+ "step": 145
478
+ },
479
+ {
480
+ "epoch": 0.02294630564479119,
481
+ "grad_norm": 3.310283773316769e-06,
482
+ "learning_rate": 0.0002776353687322543,
483
+ "logits/chosen": 1.9721519947052002,
484
+ "logits/rejected": 0.6008085012435913,
485
+ "logps/chosen": -346.241455078125,
486
+ "logps/rejected": -690.4124755859375,
487
+ "loss": 0.0,
488
+ "rewards/accuracies": 1.0,
489
+ "rewards/chosen": 0.7422004342079163,
490
+ "rewards/margins": 21.550968170166016,
491
+ "rewards/rejected": -20.808767318725586,
492
+ "step": 150
493
+ },
494
+ {
495
+ "epoch": 0.02294630564479119,
496
+ "eval_logits/chosen": NaN,
497
+ "eval_logits/rejected": 0.5568678379058838,
498
+ "eval_logps/chosen": -325.7964782714844,
499
+ "eval_logps/rejected": -757.2901000976562,
500
+ "eval_loss": 0.024217354133725166,
501
+ "eval_rewards/accuracies": 0.9811594486236572,
502
+ "eval_rewards/chosen": 0.9729923605918884,
503
+ "eval_rewards/margins": 23.638286590576172,
504
+ "eval_rewards/rejected": -22.665294647216797,
505
+ "eval_runtime": 63.3584,
506
+ "eval_samples_per_second": 10.875,
507
+ "eval_steps_per_second": 5.445,
508
+ "step": 150
509
  }
510
  ],
511
  "logging_steps": 5,