willyli commited on
Commit
aaea0c4
·
verified ·
1 Parent(s): bad0dbb

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498927c08771e04a831f83e7a148866b9ec2d9236382d3fca73a82829ba3e116
3
  size 4962001760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525eac99ad96fc44058191ab19adc5993ebaea58b8e0178702747f1ee3435202
3
  size 4962001760
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf18cb7323bdf103c060825194b12041cd9f65527e6801e58f01c2af2518636
3
  size 4915916160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558c4922ee8c74243d0fdb31e3e04330963270eac20e7e5f27843b699e5bb2a2
3
  size 4915916160
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf047b0776c247cd18bfec687a84a46e4c272eaf95d4cf533bebf3adf64f86ee
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56220ae9514cffaa65f5e69f741b689c6b09c909e431ece359e6cc8fc172facf
3
  size 4999819336
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83528af594926033e99367dc5529ec2083f14bb66aeb7d9573f7d56ce7848fe
3
  size 1623221024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b75675ac64f9f371e8692d260bb19c22ec999a3cdfd5bed397548ae96194d4
3
  size 1623221024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa69d5d5f89ce23f9d6b71b738413f9206b416237feabf7585aeab96db924124
3
  size 13053963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbbf775bc02f0e5313eea63ceadf33473d5eb33db11880946d7f8cd32301931
3
  size 13053963
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:289f39013137940cf1ee5449b041384da04bc9bb685618953fd31d6c772eea95
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a163c08920e0f5521e8b4bad62b9d358b8cbc6506edd88b7096a6ffc2a7c242d
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:989bd77ff2885f40e3c715332e40e1c540ff701d6b100ef8efad9627d7d3c776
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:876bd3e9fc68db1c95fed624955b5fd3f484c7a9299d86e57f3ba1ddf3348179
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377f1d7a96c328bb457e236cce32a38315a6883ea0ab33abae7d9374afeb92c7
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517ac8bafc12147371b154cd689f10e05f9ee0f90b1a1d98261bc402b3794d28
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0739a97c2722739882afaf6b544115b335b7f2dc177e7fce8c522b3da6991521
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf5db3a4eb888f369dfe32f926286928204680ca9125437e1b7c7b3f270ab15
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e3fea8223898c73ab613293e12de1974fe1688bc3594682f3c0aa0de08b3c6
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59728ce06aff590492cee7a3ae474ff2766e75b2b0d12b0ecb1e0ed3c034d59
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968b0e878dc8c9dab1af28d2a5a3c5c25fb01d09bbb5d144524ae269d784486a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c2193cb033eebca6c12e4a1e29534cc18579a6cad159bde288623281c5c35d2
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b79c15f80530d12a2c7921202ae7531199c99ba76f622702271dce29c3ab78c7
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1143ef96cc6fb0d682bd8e4f30c0e8393b47d3bfc04f20f3b76342f03f9600
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:925732ed115662d23fac8942143e41da75815df4736560086168dcda367dc0c5
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057a179eb770c90e474fec0e25f409212d61bb896d2da1bbccb971884ff21913
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fbc8305b7ca0c6d67c3d651c7ca33470c406cdfe738d0a9715f708a4a927f87
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c002680d92a128e78b90abe033ccb635fd42c0f7b5d06a2d995a5f9b60d18627
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.0,
6
  "eval_steps": 100,
7
- "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -373,6 +373,124 @@
373
  "loss": 0.2549,
374
  "rewards/rejected": -1.526987361907959,
375
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.333333333333333,
6
  "eval_steps": 100,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
373
  "loss": 0.2549,
374
  "rewards/rejected": -1.526987361907959,
375
  "step": 300
376
+ },
377
+ {
378
+ "epoch": 4.133333333333334,
379
+ "grad_norm": 54.0,
380
+ "kl": 39.12127685546875,
381
+ "learning_rate": 5.879999999999999e-07,
382
+ "logits/chosen": 1737360384.0,
383
+ "logps/chosen": -1544.43232421875,
384
+ "loss": 0.4935,
385
+ "rewards/chosen": 3.9731983184814452,
386
+ "step": 310
387
+ },
388
+ {
389
+ "epoch": 4.266666666666667,
390
+ "grad_norm": 172.0,
391
+ "kl": 46.1120491027832,
392
+ "learning_rate": 5.746666666666667e-07,
393
+ "logits/chosen": 1747944038.4,
394
+ "logps/chosen": -1632.6375,
395
+ "loss": 0.4933,
396
+ "rewards/chosen": 4.660909652709961,
397
+ "step": 320
398
+ },
399
+ {
400
+ "epoch": 4.4,
401
+ "grad_norm": 60.0,
402
+ "kl": 50.143592834472656,
403
+ "learning_rate": 5.613333333333333e-07,
404
+ "logits/chosen": 1759613952.0,
405
+ "logps/chosen": -1568.59072265625,
406
+ "loss": 0.4785,
407
+ "rewards/chosen": 5.1204078674316404,
408
+ "step": 330
409
+ },
410
+ {
411
+ "epoch": 4.533333333333333,
412
+ "grad_norm": 42.5,
413
+ "kl": 45.67478942871094,
414
+ "learning_rate": 5.48e-07,
415
+ "logits/chosen": 1756831406.5018728,
416
+ "logits/rejected": 1722636635.7735848,
417
+ "logps/chosen": -1589.1077949438202,
418
+ "logps/rejected": -1681.454304245283,
419
+ "loss": 0.4447,
420
+ "rewards/chosen": 5.551258558637641,
421
+ "rewards/margins": 6.024997390931457,
422
+ "rewards/rejected": -0.47373883229381636,
423
+ "step": 340
424
+ },
425
+ {
426
+ "epoch": 4.666666666666667,
427
+ "grad_norm": 217.0,
428
+ "kl": 0.32102876901626587,
429
+ "learning_rate": 5.346666666666666e-07,
430
+ "logits/rejected": 1712375193.6,
431
+ "logps/rejected": -1534.0259765625,
432
+ "loss": 0.338,
433
+ "rewards/rejected": -0.8637893676757813,
434
+ "step": 350
435
+ },
436
+ {
437
+ "epoch": 4.8,
438
+ "grad_norm": 47.0,
439
+ "kl": 0.029415320605039597,
440
+ "learning_rate": 5.213333333333333e-07,
441
+ "logits/rejected": 1739532697.6,
442
+ "logps/rejected": -1605.47685546875,
443
+ "loss": 0.2836,
444
+ "rewards/rejected": -1.2877922058105469,
445
+ "step": 360
446
+ },
447
+ {
448
+ "epoch": 4.933333333333334,
449
+ "grad_norm": 50.5,
450
+ "kl": 0.0,
451
+ "learning_rate": 5.079999999999999e-07,
452
+ "logits/rejected": 1720070963.2,
453
+ "logps/rejected": -1630.8001953125,
454
+ "loss": 0.2262,
455
+ "rewards/rejected": -1.7868902206420898,
456
+ "step": 370
457
+ },
458
+ {
459
+ "epoch": 5.066666666666666,
460
+ "grad_norm": 70.5,
461
+ "kl": 21.522884368896484,
462
+ "learning_rate": 4.946666666666666e-07,
463
+ "logits/chosen": 1738568908.8,
464
+ "logits/rejected": 1726313472.0,
465
+ "logps/chosen": -1644.7123046875,
466
+ "logps/rejected": -1377.4642578125,
467
+ "loss": 0.3804,
468
+ "rewards/chosen": 4.260452651977539,
469
+ "rewards/margins": 5.9641773223876955,
470
+ "rewards/rejected": -1.7037246704101563,
471
+ "step": 380
472
+ },
473
+ {
474
+ "epoch": 5.2,
475
+ "grad_norm": 49.75,
476
+ "kl": 47.77645492553711,
477
+ "learning_rate": 4.813333333333334e-07,
478
+ "logits/chosen": 1732489625.6,
479
+ "logps/chosen": -1525.93984375,
480
+ "loss": 0.4944,
481
+ "rewards/chosen": 4.817991256713867,
482
+ "step": 390
483
+ },
484
+ {
485
+ "epoch": 5.333333333333333,
486
+ "grad_norm": 50.25,
487
+ "kl": 49.042144775390625,
488
+ "learning_rate": 4.68e-07,
489
+ "logits/chosen": 1741244620.8,
490
+ "logps/chosen": -1609.87353515625,
491
+ "loss": 0.4644,
492
+ "rewards/chosen": 5.071358489990234,
493
+ "step": 400
494
  }
495
  ],
496
  "logging_steps": 10,