FormlessAI commited on
Commit
ddab039
·
verified ·
1 Parent(s): e53ef9a

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16c61241ba77a4a6d1e354ec1bf073c0c4173f5eaf1f0bd990705d72968825e6
3
  size 447329696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a365e9fcb2454a4c23ba115a7809178487f14e670b8347d96cdf4cff0d5360
3
  size 447329696
last-checkpoint/global_step298/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b42934795f6e589ecf158269a54cdac29447df9c133456dd455795bf3a1fd2
3
+ size 337225765
last-checkpoint/global_step298/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f8f5690a0cda8662d09a99d62e7588b9ecbecd4324ff4f6a1828f76ed1d662a
3
+ size 337225893
last-checkpoint/global_step298/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e068943495936dc63bb3b9f0b9fbf295441fdd77a28f3c70f87e1374052b7c8
3
+ size 337225893
last-checkpoint/global_step298/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fddd60d00b4177e5f0c9c4669dfbe280e45fdba78cc217dfad10a582c218dc
3
+ size 337225893
last-checkpoint/global_step298/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4073004376166aa76edc9f00a363567c1dc2e7f7463a3a1f13d6a16a3b825edc
3
+ size 2282578149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step199
 
1
+ global_step298
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9892a41432d4e48929c5376b8225d5373bb9afe9f14665fbcb3934dc45cb330a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022e5f570f01a12c1d81b42bd6eb19a0e173e7f47e424219366d42929f99e132
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6f3949e2b25ad05905da16bc2d58aeadc9a9f1db0b0e97a04ceb63467acff1d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3aed2436bc3ad431adb01e8d4df32815773aab4a6b13c3adb4fced6192268c3
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c3812c5e7f13c9d32f97d83cd625428061b1c9486e869caffd453d83ac07ceb
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97dd34720bb0b20ed7301d9f60cd440d867fb1aac58f9dc15c1f7fe4ac70e520
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eda756b33768639c1e1932ae00c5d43269de5705c09162bc09acdf71d3533c0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37832d386fc739839d36940cdc0c137876018d931548a6ee584a9d52680e1a59
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47104b67f5b6e5f268c4ddeb2948b3f6186e33a1d199d379ffca5d4a7e9bb1e8
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af806fee020be865d25dafddb307ac91eb8c597cf77813f63beb93464bf2104
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.028071066364645958,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.4308797127468582,
6
  "eval_steps": 50,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -672,6 +672,338 @@
672
  "eval_samples_per_second": 30.947,
673
  "eval_steps_per_second": 1.97,
674
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  }
676
  ],
677
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.02023915760219097,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.1436265709156195,
6
  "eval_steps": 50,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
672
  "eval_samples_per_second": 30.947,
673
  "eval_steps_per_second": 1.97,
674
  "step": 200
675
+ },
676
+ {
677
+ "epoch": 1.466786355475763,
678
+ "grad_norm": 3.074913501739502,
679
+ "learning_rate": 0.00011372766805777725,
680
+ "logits/chosen": -4.8984375,
681
+ "logits/rejected": -8.321874618530273,
682
+ "logps/chosen": -133.85000610351562,
683
+ "logps/rejected": -351.5,
684
+ "loss": 0.0452,
685
+ "rewards/accuracies": 0.9750000238418579,
686
+ "rewards/chosen": -6.512499809265137,
687
+ "rewards/margins": 21.049999237060547,
688
+ "rewards/rejected": -27.5625,
689
+ "step": 205
690
+ },
691
+ {
692
+ "epoch": 1.502692998204668,
693
+ "grad_norm": 3.414430618286133,
694
+ "learning_rate": 0.00011372461861389142,
695
+ "logits/chosen": -5.293749809265137,
696
+ "logits/rejected": -7.599999904632568,
697
+ "logps/chosen": -180.0,
698
+ "logps/rejected": -397.3999938964844,
699
+ "loss": 0.0937,
700
+ "rewards/accuracies": 0.9781249761581421,
701
+ "rewards/chosen": -11.2421875,
702
+ "rewards/margins": 20.878124237060547,
703
+ "rewards/rejected": -32.10625076293945,
704
+ "step": 210
705
+ },
706
+ {
707
+ "epoch": 1.5385996409335727,
708
+ "grad_norm": 1.9072022438049316,
709
+ "learning_rate": 0.00011372142606203516,
710
+ "logits/chosen": -4.057812690734863,
711
+ "logits/rejected": -6.109375,
712
+ "logps/chosen": -156.0749969482422,
713
+ "logps/rejected": -324.79998779296875,
714
+ "loss": 0.0461,
715
+ "rewards/accuracies": 0.9750000238418579,
716
+ "rewards/chosen": -8.564062118530273,
717
+ "rewards/margins": 16.028125762939453,
718
+ "rewards/rejected": -24.587499618530273,
719
+ "step": 215
720
+ },
721
+ {
722
+ "epoch": 1.5745062836624775,
723
+ "grad_norm": 1.2352949380874634,
724
+ "learning_rate": 0.00011371809041024573,
725
+ "logits/chosen": -5.315625190734863,
726
+ "logits/rejected": -7.754687309265137,
727
+ "logps/chosen": -189.64999389648438,
728
+ "logps/rejected": -392.0,
729
+ "loss": 0.0332,
730
+ "rewards/accuracies": 0.981249988079071,
731
+ "rewards/chosen": -11.946874618530273,
732
+ "rewards/margins": 19.446874618530273,
733
+ "rewards/rejected": -31.424999237060547,
734
+ "step": 220
735
+ },
736
+ {
737
+ "epoch": 1.6104129263913824,
738
+ "grad_norm": 0.5811780095100403,
739
+ "learning_rate": 0.00011371461166692062,
740
+ "logits/chosen": -7.451562404632568,
741
+ "logits/rejected": -10.225000381469727,
742
+ "logps/chosen": -218.4499969482422,
743
+ "logps/rejected": -433.79998779296875,
744
+ "loss": 0.0486,
745
+ "rewards/accuracies": 0.984375,
746
+ "rewards/chosen": -14.824999809265137,
747
+ "rewards/margins": 20.75,
748
+ "rewards/rejected": -35.57500076293945,
749
+ "step": 225
750
+ },
751
+ {
752
+ "epoch": 1.6463195691202872,
753
+ "grad_norm": 0.33781036734580994,
754
+ "learning_rate": 0.00011371098984081755,
755
+ "logits/chosen": -9.0390625,
756
+ "logits/rejected": -11.784375190734863,
757
+ "logps/chosen": -232.75,
758
+ "logps/rejected": -425.79998779296875,
759
+ "loss": 0.0465,
760
+ "rewards/accuracies": 0.9781249761581421,
761
+ "rewards/chosen": -16.509374618530273,
762
+ "rewards/margins": 18.971874237060547,
763
+ "rewards/rejected": -35.45624923706055,
764
+ "step": 230
765
+ },
766
+ {
767
+ "epoch": 1.682226211849192,
768
+ "grad_norm": 0.9501491189002991,
769
+ "learning_rate": 0.0001137072249410545,
770
+ "logits/chosen": -8.489062309265137,
771
+ "logits/rejected": -11.706250190734863,
772
+ "logps/chosen": -229.6999969482422,
773
+ "logps/rejected": -451.3999938964844,
774
+ "loss": 0.0196,
775
+ "rewards/accuracies": 0.996874988079071,
776
+ "rewards/chosen": -16.003124237060547,
777
+ "rewards/margins": 21.600000381469727,
778
+ "rewards/rejected": -37.599998474121094,
779
+ "step": 235
780
+ },
781
+ {
782
+ "epoch": 1.718132854578097,
783
+ "grad_norm": 2.2919723987579346,
784
+ "learning_rate": 0.00011370331697710956,
785
+ "logits/chosen": -8.271875381469727,
786
+ "logits/rejected": -12.678125381469727,
787
+ "logps/chosen": -205.75,
788
+ "logps/rejected": -511.79998779296875,
789
+ "loss": 0.0752,
790
+ "rewards/accuracies": 0.984375,
791
+ "rewards/chosen": -13.65625,
792
+ "rewards/margins": 30.087499618530273,
793
+ "rewards/rejected": -43.75,
794
+ "step": 240
795
+ },
796
+ {
797
+ "epoch": 1.754039497307002,
798
+ "grad_norm": 2.5256083011627197,
799
+ "learning_rate": 0.00011369926595882104,
800
+ "logits/chosen": -10.071874618530273,
801
+ "logits/rejected": -13.768750190734863,
802
+ "logps/chosen": -291.04998779296875,
803
+ "logps/rejected": -619.2000122070312,
804
+ "loss": 0.0377,
805
+ "rewards/accuracies": 0.9781249761581421,
806
+ "rewards/chosen": -22.446874618530273,
807
+ "rewards/margins": 32.150001525878906,
808
+ "rewards/rejected": -54.57500076293945,
809
+ "step": 245
810
+ },
811
+ {
812
+ "epoch": 1.7899461400359067,
813
+ "grad_norm": 1.3631178140640259,
814
+ "learning_rate": 0.00011369507189638736,
815
+ "logits/chosen": -3.7313232421875,
816
+ "logits/rejected": -7.044335842132568,
817
+ "logps/chosen": -175.27499389648438,
818
+ "logps/rejected": -371.45001220703125,
819
+ "loss": 0.0627,
820
+ "rewards/accuracies": 0.9781249761581421,
821
+ "rewards/chosen": -10.771875381469727,
822
+ "rewards/margins": 18.6015625,
823
+ "rewards/rejected": -29.353124618530273,
824
+ "step": 250
825
+ },
826
+ {
827
+ "epoch": 1.7899461400359067,
828
+ "eval_logits/chosen": -0.22195972502231598,
829
+ "eval_logits/rejected": -1.6530331373214722,
830
+ "eval_logps/chosen": -116.17646789550781,
831
+ "eval_logps/rejected": -208.88235473632812,
832
+ "eval_loss": 0.07754824310541153,
833
+ "eval_rewards/accuracies": 0.9485294222831726,
834
+ "eval_rewards/chosen": -4.249080657958984,
835
+ "eval_rewards/margins": 8.38786792755127,
836
+ "eval_rewards/rejected": -12.650734901428223,
837
+ "eval_runtime": 8.5185,
838
+ "eval_samples_per_second": 31.344,
839
+ "eval_steps_per_second": 1.996,
840
+ "step": 250
841
+ },
842
+ {
843
+ "epoch": 1.8258527827648114,
844
+ "grad_norm": 1.8161951303482056,
845
+ "learning_rate": 0.00011369073480036712,
846
+ "logits/chosen": -2.3915038108825684,
847
+ "logits/rejected": -3.521484375,
848
+ "logps/chosen": -145.4250030517578,
849
+ "logps/rejected": -257.20001220703125,
850
+ "loss": 0.0791,
851
+ "rewards/accuracies": 0.9624999761581421,
852
+ "rewards/chosen": -7.525000095367432,
853
+ "rewards/margins": 10.292187690734863,
854
+ "rewards/rejected": -17.818750381469727,
855
+ "step": 255
856
+ },
857
+ {
858
+ "epoch": 1.8617594254937164,
859
+ "grad_norm": 0.3177375793457031,
860
+ "learning_rate": 0.00011368625468167889,
861
+ "logits/chosen": -8.206250190734863,
862
+ "logits/rejected": -10.065625190734863,
863
+ "logps/chosen": -325.3500061035156,
864
+ "logps/rejected": -509.1000061035156,
865
+ "loss": 0.0508,
866
+ "rewards/accuracies": 0.971875011920929,
867
+ "rewards/chosen": -26.078125,
868
+ "rewards/margins": 17.381250381469727,
869
+ "rewards/rejected": -43.42499923706055,
870
+ "step": 260
871
+ },
872
+ {
873
+ "epoch": 1.8976660682226212,
874
+ "grad_norm": 0.7164928317070007,
875
+ "learning_rate": 0.00011368163155160139,
876
+ "logits/chosen": -10.165624618530273,
877
+ "logits/rejected": -11.875,
878
+ "logps/chosen": -410.29998779296875,
879
+ "logps/rejected": -585.4000244140625,
880
+ "loss": 0.0577,
881
+ "rewards/accuracies": 0.981249988079071,
882
+ "rewards/chosen": -34.45624923706055,
883
+ "rewards/margins": 16.634374618530273,
884
+ "rewards/rejected": -51.07500076293945,
885
+ "step": 265
886
+ },
887
+ {
888
+ "epoch": 1.933572710951526,
889
+ "grad_norm": 2.070739507675171,
890
+ "learning_rate": 0.00011367686542177336,
891
+ "logits/chosen": -6.4375,
892
+ "logits/rejected": -7.814062595367432,
893
+ "logps/chosen": -340.1000061035156,
894
+ "logps/rejected": -469.79998779296875,
895
+ "loss": 0.0576,
896
+ "rewards/accuracies": 0.9593750238418579,
897
+ "rewards/chosen": -27.168750762939453,
898
+ "rewards/margins": 12.381250381469727,
899
+ "rewards/rejected": -39.54999923706055,
900
+ "step": 270
901
+ },
902
+ {
903
+ "epoch": 1.969479353680431,
904
+ "grad_norm": 1.2207964658737183,
905
+ "learning_rate": 0.0001136719563041935,
906
+ "logits/chosen": -4.547656059265137,
907
+ "logits/rejected": -6.598437309265137,
908
+ "logps/chosen": -263.0,
909
+ "logps/rejected": -423.8999938964844,
910
+ "loss": 0.0322,
911
+ "rewards/accuracies": 0.9906250238418579,
912
+ "rewards/chosen": -19.46875,
913
+ "rewards/margins": 15.509374618530273,
914
+ "rewards/rejected": -34.993751525878906,
915
+ "step": 275
916
+ },
917
+ {
918
+ "epoch": 2.0,
919
+ "grad_norm": 6.416799545288086,
920
+ "learning_rate": 0.0001136669042112205,
921
+ "logits/chosen": -5.321691036224365,
922
+ "logits/rejected": -8.242647171020508,
923
+ "logps/chosen": -274.0,
924
+ "logps/rejected": -515.6470336914062,
925
+ "loss": 0.0428,
926
+ "rewards/accuracies": 0.9852941036224365,
927
+ "rewards/chosen": -20.264705657958984,
928
+ "rewards/margins": 23.514705657958984,
929
+ "rewards/rejected": -43.80882263183594,
930
+ "step": 280
931
+ },
932
+ {
933
+ "epoch": 2.035906642728905,
934
+ "grad_norm": 0.0026381895877420902,
935
+ "learning_rate": 0.00011366170915557303,
936
+ "logits/chosen": -6.020312309265137,
937
+ "logits/rejected": -9.284375190734863,
938
+ "logps/chosen": -289.1499938964844,
939
+ "logps/rejected": -603.0,
940
+ "loss": 0.0676,
941
+ "rewards/accuracies": 0.9781249761581421,
942
+ "rewards/chosen": -22.331249237060547,
943
+ "rewards/margins": 30.418750762939453,
944
+ "rewards/rejected": -52.75,
945
+ "step": 285
946
+ },
947
+ {
948
+ "epoch": 2.0718132854578095,
949
+ "grad_norm": 9.352706909179688,
950
+ "learning_rate": 0.00011365637115032964,
951
+ "logits/chosen": -5.478906154632568,
952
+ "logits/rejected": -9.690625190734863,
953
+ "logps/chosen": -263.3500061035156,
954
+ "logps/rejected": -613.4000244140625,
955
+ "loss": 0.0791,
956
+ "rewards/accuracies": 0.9750000238418579,
957
+ "rewards/chosen": -19.450000762939453,
958
+ "rewards/margins": 34.474998474121094,
959
+ "rewards/rejected": -53.9375,
960
+ "step": 290
961
+ },
962
+ {
963
+ "epoch": 2.1077199281867145,
964
+ "grad_norm": 1.2689452171325684,
965
+ "learning_rate": 0.0001136508902089287,
966
+ "logits/chosen": -2.486132860183716,
967
+ "logits/rejected": -5.1484375,
968
+ "logps/chosen": -145.1999969482422,
969
+ "logps/rejected": -346.20001220703125,
970
+ "loss": 0.1107,
971
+ "rewards/accuracies": 0.9593750238418579,
972
+ "rewards/chosen": -7.432031154632568,
973
+ "rewards/margins": 19.28125,
974
+ "rewards/rejected": -26.706249237060547,
975
+ "step": 295
976
+ },
977
+ {
978
+ "epoch": 2.1436265709156195,
979
+ "grad_norm": 0.04833826795220375,
980
+ "learning_rate": 0.00011364526634516852,
981
+ "logits/chosen": -5.057812690734863,
982
+ "logits/rejected": -8.810937881469727,
983
+ "logps/chosen": -240.14999389648438,
984
+ "logps/rejected": -519.7999877929688,
985
+ "loss": 0.1121,
986
+ "rewards/accuracies": 0.9781249761581421,
987
+ "rewards/chosen": -17.071874618530273,
988
+ "rewards/margins": 27.412500381469727,
989
+ "rewards/rejected": -44.462501525878906,
990
+ "step": 300
991
+ },
992
+ {
993
+ "epoch": 2.1436265709156195,
994
+ "eval_logits/chosen": -4.876838207244873,
995
+ "eval_logits/rejected": -8.235294342041016,
996
+ "eval_logps/chosen": -253.05882263183594,
997
+ "eval_logps/rejected": -514.8235473632812,
998
+ "eval_loss": 0.02023915760219097,
999
+ "eval_rewards/accuracies": 0.9836230278015137,
1000
+ "eval_rewards/chosen": -18.169116973876953,
1001
+ "eval_rewards/margins": 25.522058486938477,
1002
+ "eval_rewards/rejected": -43.661766052246094,
1003
+ "eval_runtime": 8.6202,
1004
+ "eval_samples_per_second": 30.974,
1005
+ "eval_steps_per_second": 1.972,
1006
+ "step": 300
1007
  }
1008
  ],
1009
  "logging_steps": 5,