willyli commited on
Commit
4b630c6
·
verified ·
1 Parent(s): 896d237

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4570838371cb46cb81aa6e4e93e439795cf68c0acdb027e6d242243ef305391
3
  size 4962001760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c243598c92aa172639ff6e40dd1b983ce2b906a69338a7f68bb077d0b49af2
3
  size 4962001760
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d816528d00db7afcf13755816f471a57e2401601af1e498b925651fb1ee7141
3
  size 4915916160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97de6841f7a06df63f15116413b63a6d8f0ed1481c73daec4d308907b9ef7477
3
  size 4915916160
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e2a7109a89481a14c23ebcca39f764b77662153e1d11566a1997fcbb214fdec
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df52273384350aee9baa269ceb1e9e573aeaf2a7566f394660f38eefd4989e8
3
  size 4999819336
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b9c1b25cb5cc33bd227513645df9037e53f4f31852a7141c5f1585c5401666
3
  size 1623221024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff86115c9ca1a90bf6873583c738e398f9f9947c13c68352f73a518f1bd9b44
3
  size 1623221024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:006f3cb0dd1a2817bf4e23a383a26d52ed1f7f87a0ddb412aec4aae524addbbe
3
  size 13053963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554d638b6db84406c05520b337e63b464c5dd4ca01d9e36368adbd26b62126ee
3
  size 13053963
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b9684a001ced786cf3f8d180da14cc75fa2707f771fb8db8a2f3987616d58a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6954e89c1960fa7ad6ea4d0b8bbf7ab04896fa485702dacc91479549c4c5f5bd
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a7fd708494878bd9a17192c05a815384b2db4098489cd7778c553943417ba7
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39430be47ab0759ba4c6b2029bf2ac1430c442bd0153447dadb3e88ceeace01f
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f6f6314518ebe461b62a277be61b06e05bed27aea542e4ae2bffb060e73689a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e70cc76576adfb81afd443354dc1068743c58308fe7d889099fbeedaa0a7788
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d6a9499db1eadd54ce72d5506ca43e60264cf1a33ebf777e4ed3931ed1a03b9
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a75d5f02ca237d8ef45529e298315fb70e4bd993cf96fd04ae998cf9895b3c4
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4e82eac1af25e6d36636846ff6b561653dba1263d790f10f82c5fa93bfeb01a
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6680063dc12b212232a97593d0ad4ca30bf709a7f3fc4f37444f622689ee4b
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41aa0f60e2dc0839912ac9319700da038b4ead806802304d22298e9dd3372ae9
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d575736283b8fd594e1e1fc04ebb33a9123b2d46d9a7f2b36ee599ae2efdf4fa
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3586402bd40d50079663ce2ab2f6725fc21b6aa67d9ba6b8985080086eb3fd05
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac6501ba3ee593e553d24857a1d06a2ae89d416a291843a7f705be771a8677fb
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b3318c70326f3b79256c222c497e48e741ec33cc9f1c47dd67359e1fee68294
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4b071aa28acd8e61610943b47f7a78b87ce3fa1a3a7004c67f45adf039ffbe
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:504c0ac187a5cb09c6c830e073a49c85f9ed8f4f258d346199b3f76f0521e60a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09ab206d51e08db07522003a69609e8e727f47b95ea47e6b2b8aa03687721b3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 6.666666666666667,
6
  "eval_steps": 100,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -609,6 +609,124 @@
609
  "loss": 0.265,
610
  "rewards/rejected": -1.5677401542663574,
611
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  }
613
  ],
614
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 8.0,
6
  "eval_steps": 100,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
609
  "loss": 0.265,
610
  "rewards/rejected": -1.5677401542663574,
611
  "step": 500
612
+ },
613
+ {
614
+ "epoch": 6.8,
615
+ "grad_norm": 39.75,
616
+ "kl": 0.0,
617
+ "learning_rate": 3.2133333333333335e-07,
618
+ "logits/rejected": 1729151795.2,
619
+ "logps/rejected": -1609.8298828125,
620
+ "loss": 0.2389,
621
+ "rewards/rejected": -1.7230974197387696,
622
+ "step": 510
623
+ },
624
+ {
625
+ "epoch": 6.933333333333334,
626
+ "grad_norm": 42.0,
627
+ "kl": 0.0,
628
+ "learning_rate": 3.08e-07,
629
+ "logits/rejected": 1712730828.8,
630
+ "logps/rejected": -1633.16318359375,
631
+ "loss": 0.2112,
632
+ "rewards/rejected": -2.023202896118164,
633
+ "step": 520
634
+ },
635
+ {
636
+ "epoch": 7.066666666666666,
637
+ "grad_norm": 64.5,
638
+ "kl": 25.04488754272461,
639
+ "learning_rate": 2.9466666666666666e-07,
640
+ "logits/chosen": 1731994419.2,
641
+ "logits/rejected": 1720656691.2,
642
+ "logps/chosen": -1637.28173828125,
643
+ "logps/rejected": -1377.9115234375,
644
+ "loss": 0.3763,
645
+ "rewards/chosen": 5.003516006469726,
646
+ "rewards/margins": 6.7519731521606445,
647
+ "rewards/rejected": -1.748457145690918,
648
+ "step": 530
649
+ },
650
+ {
651
+ "epoch": 7.2,
652
+ "grad_norm": 56.0,
653
+ "kl": 52.43037033081055,
654
+ "learning_rate": 2.813333333333333e-07,
655
+ "logits/chosen": 1726875648.0,
656
+ "logps/chosen": -1521.06044921875,
657
+ "loss": 0.4829,
658
+ "rewards/chosen": 5.305931854248047,
659
+ "step": 540
660
+ },
661
+ {
662
+ "epoch": 7.333333333333333,
663
+ "grad_norm": 50.0,
664
+ "kl": 51.872047424316406,
665
+ "learning_rate": 2.68e-07,
666
+ "logits/chosen": 1734085222.4,
667
+ "logps/chosen": -1607.64345703125,
668
+ "loss": 0.4712,
669
+ "rewards/chosen": 5.294354629516602,
670
+ "step": 550
671
+ },
672
+ {
673
+ "epoch": 7.466666666666667,
674
+ "grad_norm": 51.0,
675
+ "kl": 55.77549362182617,
676
+ "learning_rate": 2.546666666666666e-07,
677
+ "logits/chosen": 1753063219.2,
678
+ "logps/chosen": -1617.415625,
679
+ "loss": 0.4773,
680
+ "rewards/chosen": 5.702725982666015,
681
+ "step": 560
682
+ },
683
+ {
684
+ "epoch": 7.6,
685
+ "grad_norm": 43.75,
686
+ "kl": 18.287708282470703,
687
+ "learning_rate": 2.413333333333333e-07,
688
+ "logits/chosen": 1731032073.5700934,
689
+ "logits/rejected": 1702629587.5305164,
690
+ "logps/chosen": -1447.9690420560748,
691
+ "logps/rejected": -1521.8135269953052,
692
+ "loss": 0.3238,
693
+ "rewards/chosen": 5.521567086193049,
694
+ "rewards/margins": 7.057752337348419,
695
+ "rewards/rejected": -1.5361852511553697,
696
+ "step": 570
697
+ },
698
+ {
699
+ "epoch": 7.733333333333333,
700
+ "grad_norm": 41.0,
701
+ "kl": 0.0,
702
+ "learning_rate": 2.28e-07,
703
+ "logits/rejected": 1697045913.6,
704
+ "logps/rejected": -1615.4837890625,
705
+ "loss": 0.2158,
706
+ "rewards/rejected": -1.9997014999389648,
707
+ "step": 580
708
+ },
709
+ {
710
+ "epoch": 7.866666666666667,
711
+ "grad_norm": 38.75,
712
+ "kl": 0.0,
713
+ "learning_rate": 2.1466666666666666e-07,
714
+ "logits/rejected": 1732887756.8,
715
+ "logps/rejected": -1627.6578125,
716
+ "loss": 0.206,
717
+ "rewards/rejected": -2.029564094543457,
718
+ "step": 590
719
+ },
720
+ {
721
+ "epoch": 8.0,
722
+ "grad_norm": 43.25,
723
+ "kl": 0.0,
724
+ "learning_rate": 2.0133333333333334e-07,
725
+ "logits/rejected": 1715685171.2,
726
+ "logps/rejected": -1501.2095703125,
727
+ "loss": 0.241,
728
+ "rewards/rejected": -1.795237922668457,
729
+ "step": 600
730
  }
731
  ],
732
  "logging_steps": 10,