willyli commited on
Commit
3d8df7e
·
verified ·
1 Parent(s): 65663a0

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87c243598c92aa172639ff6e40dd1b983ce2b906a69338a7f68bb077d0b49af2
3
  size 4962001760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fa05163bed9a1cd36363dd4ff80562137d5f68770c186539473124d4551083
3
  size 4962001760
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97de6841f7a06df63f15116413b63a6d8f0ed1481c73daec4d308907b9ef7477
3
  size 4915916160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da931ba763893ccb98dc512107b6b5c263eca97cd13e5b05ac86947b959fc7da
3
  size 4915916160
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4df52273384350aee9baa269ceb1e9e573aeaf2a7566f394660f38eefd4989e8
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52dcde588022fc935bedd4afcd81e6f4e1b41f079db26834e38267f75b98e817
3
  size 4999819336
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff86115c9ca1a90bf6873583c738e398f9f9947c13c68352f73a518f1bd9b44
3
  size 1623221024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1642b4a6158c2fd2540216fefd044c2d03795ab85a18a37a96ff8990efe21e7a
3
  size 1623221024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:554d638b6db84406c05520b337e63b464c5dd4ca01d9e36368adbd26b62126ee
3
  size 13053963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb389ace5a850a48e09909591bc0839eabefd253110f1ce6ef4f82c311105c39
3
  size 13053963
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6954e89c1960fa7ad6ea4d0b8bbf7ab04896fa485702dacc91479549c4c5f5bd
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0da242ef698d1e1ed5421e6b23d1b44f93a6a55dcb1a127cd8931c14bd84e798
3
  size 16389
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39430be47ab0759ba4c6b2029bf2ac1430c442bd0153447dadb3e88ceeace01f
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84fd50bb0fe9f0aa26725966ffc16f69210c4dc450904bc61526922ca0cae8b
3
  size 16389
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e70cc76576adfb81afd443354dc1068743c58308fe7d889099fbeedaa0a7788
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d28d7579b85bd847de486eff8a19da23bfa22d830f81a69f170a6ac10defb69
3
  size 16389
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a75d5f02ca237d8ef45529e298315fb70e4bd993cf96fd04ae998cf9895b3c4
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5d0fa36258fc125bd8dc7fbf20d2fa389251a7d5e275c5c7a393818ce55fa5
3
  size 16389
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c6680063dc12b212232a97593d0ad4ca30bf709a7f3fc4f37444f622689ee4b
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726101b9f924215ed50f082b1e89969dff616b826c0c2b64bd1221baa108c357
3
  size 16389
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d575736283b8fd594e1e1fc04ebb33a9123b2d46d9a7f2b36ee599ae2efdf4fa
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c55b96aa4759bfcbcc6b0d85681eebff0595b41c99b9a11340f9a8e78bff69
3
  size 16389
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac6501ba3ee593e553d24857a1d06a2ae89d416a291843a7f705be771a8677fb
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f2fbc025fa229f4a96f9a30abe9f81e3829db7eae80a3909a4d421e8abeee73
3
  size 16389
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee4b071aa28acd8e61610943b47f7a78b87ce3fa1a3a7004c67f45adf039ffbe
3
  size 16389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab5ad9877076fc54a96b83cdb0188bb8bad71aa0e4038145c7fc0725631af546
3
  size 16389
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d09ab206d51e08db07522003a69609e8e727f47b95ea47e6b2b8aa03687721b3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537e8778e25faae48041764f389414d8ae2bc54743a220f0eba2e87e988c3690
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 8.0,
6
  "eval_steps": 100,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -727,6 +727,124 @@
727
  "loss": 0.241,
728
  "rewards/rejected": -1.795237922668457,
729
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 9.333333333333334,
6
  "eval_steps": 100,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
727
  "loss": 0.241,
728
  "rewards/rejected": -1.795237922668457,
729
  "step": 600
730
+ },
731
+ {
732
+ "epoch": 8.133333333333333,
733
+ "grad_norm": 55.0,
734
+ "kl": 51.67559051513672,
735
+ "learning_rate": 1.88e-07,
736
+ "logits/chosen": 1727099904.0,
737
+ "logps/chosen": -1532.07939453125,
738
+ "loss": 0.4961,
739
+ "rewards/chosen": 5.208480453491211,
740
+ "step": 610
741
+ },
742
+ {
743
+ "epoch": 8.266666666666667,
744
+ "grad_norm": 64.5,
745
+ "kl": 54.74528884887695,
746
+ "learning_rate": 1.7466666666666667e-07,
747
+ "logits/chosen": 1733115904.0,
748
+ "logps/chosen": -1624.519921875,
749
+ "loss": 0.5013,
750
+ "rewards/chosen": 5.472665786743164,
751
+ "step": 620
752
+ },
753
+ {
754
+ "epoch": 8.4,
755
+ "grad_norm": 54.75,
756
+ "kl": 54.551849365234375,
757
+ "learning_rate": 1.6133333333333332e-07,
758
+ "logits/chosen": 1742884659.2,
759
+ "logps/chosen": -1564.97412109375,
760
+ "loss": 0.4915,
761
+ "rewards/chosen": 5.482054138183594,
762
+ "step": 630
763
+ },
764
+ {
765
+ "epoch": 8.533333333333333,
766
+ "grad_norm": 30.625,
767
+ "kl": 46.306522369384766,
768
+ "learning_rate": 1.4799999999999998e-07,
769
+ "logits/chosen": 1739496766.3220973,
770
+ "logits/rejected": 1702134687.3962264,
771
+ "logps/chosen": -1588.0999531835207,
772
+ "logps/rejected": -1696.4740566037735,
773
+ "loss": 0.4184,
774
+ "rewards/chosen": 5.652020515127575,
775
+ "rewards/margins": 7.627730826153518,
776
+ "rewards/rejected": -1.9757103110259433,
777
+ "step": 640
778
+ },
779
+ {
780
+ "epoch": 8.666666666666666,
781
+ "grad_norm": 56.5,
782
+ "kl": 0.0,
783
+ "learning_rate": 1.3466666666666665e-07,
784
+ "logits/rejected": 1696173260.8,
785
+ "logps/rejected": -1544.7572265625,
786
+ "loss": 0.2282,
787
+ "rewards/rejected": -1.9369186401367187,
788
+ "step": 650
789
+ },
790
+ {
791
+ "epoch": 8.8,
792
+ "grad_norm": 36.75,
793
+ "kl": 0.0,
794
+ "learning_rate": 1.2133333333333333e-07,
795
+ "logits/rejected": 1725317120.0,
796
+ "logps/rejected": -1612.53291015625,
797
+ "loss": 0.2132,
798
+ "rewards/rejected": -1.9934148788452148,
799
+ "step": 660
800
+ },
801
+ {
802
+ "epoch": 8.933333333333334,
803
+ "grad_norm": 43.25,
804
+ "kl": 0.0,
805
+ "learning_rate": 1.0799999999999999e-07,
806
+ "logits/rejected": 1711136563.2,
807
+ "logps/rejected": -1633.72958984375,
808
+ "loss": 0.2084,
809
+ "rewards/rejected": -2.079827880859375,
810
+ "step": 670
811
+ },
812
+ {
813
+ "epoch": 9.066666666666666,
814
+ "grad_norm": 74.5,
815
+ "kl": 26.150564193725586,
816
+ "learning_rate": 9.466666666666665e-08,
817
+ "logits/chosen": 1734777036.8,
818
+ "logits/rejected": 1720304844.8,
819
+ "logps/chosen": -1634.3982421875,
820
+ "logps/rejected": -1376.32314453125,
821
+ "loss": 0.3847,
822
+ "rewards/chosen": 5.2918556213378904,
823
+ "rewards/margins": 6.881484413146973,
824
+ "rewards/rejected": -1.5896287918090821,
825
+ "step": 680
826
+ },
827
+ {
828
+ "epoch": 9.2,
829
+ "grad_norm": 45.0,
830
+ "kl": 54.32807540893555,
831
+ "learning_rate": 8.133333333333332e-08,
832
+ "logits/chosen": 1726023270.4,
833
+ "logps/chosen": -1519.271875,
834
+ "loss": 0.4882,
835
+ "rewards/chosen": 5.484774017333985,
836
+ "step": 690
837
+ },
838
+ {
839
+ "epoch": 9.333333333333334,
840
+ "grad_norm": 51.75,
841
+ "kl": 53.640716552734375,
842
+ "learning_rate": 6.8e-08,
843
+ "logits/chosen": 1733355929.6,
844
+ "logps/chosen": -1606.2349609375,
845
+ "loss": 0.4805,
846
+ "rewards/chosen": 5.435222625732422,
847
+ "step": 700
848
  }
849
  ],
850
  "logging_steps": 10,