CocoRoF commited on
Commit
cc96919
·
verified ·
1 Parent(s): d94034a

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a424a2b0f4bba2668648fad242ba4bc01129a8dfff560cb28160583a32cc8795
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314d749e4dd7abfe5f61b2561f4a91a44ca75ecd1eed4e91397af84284dbe186
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4147564c0aebe4adfc533256efb0c26f7c34d829d8cfd1faf86069796ae5e92b
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4dc31615294601109de6bdb9c8366f58c1c42b24bbdb22c50617985782c033
3
  size 1475248442
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1798148c8ddbef876c6eb294160aa711b50b4fbc294eeefa2c2edd714ea4965e
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f991cde05bd4bf7e497599ae4e2cc6a082c7ed663e36ba15a2e932ed573a6a1f
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.46860356138706655,
5
  "eval_steps": 2.0,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -707,6 +707,356 @@
707
  "learning_rate": 2.9414245548266168e-05,
708
  "loss": 0.3148,
709
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  }
711
  ],
712
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7029053420805998,
5
  "eval_steps": 2.0,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
707
  "learning_rate": 2.9414245548266168e-05,
708
  "loss": 0.3148,
709
  "step": 1000
710
+ },
711
+ {
712
+ "epoch": 0.4732895970009372,
713
+ "grad_norm": 2.0385727882385254,
714
+ "learning_rate": 2.940838800374883e-05,
715
+ "loss": 0.3239,
716
+ "step": 1010
717
+ },
718
+ {
719
+ "epoch": 0.47797563261480785,
720
+ "grad_norm": 1.620071291923523,
721
+ "learning_rate": 2.940253045923149e-05,
722
+ "loss": 0.3402,
723
+ "step": 1020
724
+ },
725
+ {
726
+ "epoch": 0.48266166822867856,
727
+ "grad_norm": 1.8785263299942017,
728
+ "learning_rate": 2.9396672914714152e-05,
729
+ "loss": 0.3246,
730
+ "step": 1030
731
+ },
732
+ {
733
+ "epoch": 0.4873477038425492,
734
+ "grad_norm": 2.2061355113983154,
735
+ "learning_rate": 2.9390815370196814e-05,
736
+ "loss": 0.3742,
737
+ "step": 1040
738
+ },
739
+ {
740
+ "epoch": 0.49203373945641987,
741
+ "grad_norm": 1.86517333984375,
742
+ "learning_rate": 2.9384957825679477e-05,
743
+ "loss": 0.3132,
744
+ "step": 1050
745
+ },
746
+ {
747
+ "epoch": 0.4967197750702905,
748
+ "grad_norm": 1.9350093603134155,
749
+ "learning_rate": 2.9379100281162136e-05,
750
+ "loss": 0.3288,
751
+ "step": 1060
752
+ },
753
+ {
754
+ "epoch": 0.5014058106841612,
755
+ "grad_norm": 1.9156781435012817,
756
+ "learning_rate": 2.93732427366448e-05,
757
+ "loss": 0.3561,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 0.5060918462980318,
762
+ "grad_norm": 1.9902970790863037,
763
+ "learning_rate": 2.936738519212746e-05,
764
+ "loss": 0.3531,
765
+ "step": 1080
766
+ },
767
+ {
768
+ "epoch": 0.5107778819119025,
769
+ "grad_norm": 2.0289220809936523,
770
+ "learning_rate": 2.9361527647610123e-05,
771
+ "loss": 0.3589,
772
+ "step": 1090
773
+ },
774
+ {
775
+ "epoch": 0.5154639175257731,
776
+ "grad_norm": 2.2270498275756836,
777
+ "learning_rate": 2.9355670103092782e-05,
778
+ "loss": 0.3221,
779
+ "step": 1100
780
+ },
781
+ {
782
+ "epoch": 0.5201499531396439,
783
+ "grad_norm": 2.1600871086120605,
784
+ "learning_rate": 2.9349812558575448e-05,
785
+ "loss": 0.3466,
786
+ "step": 1110
787
+ },
788
+ {
789
+ "epoch": 0.5248359887535146,
790
+ "grad_norm": 1.8811109066009521,
791
+ "learning_rate": 2.9343955014058107e-05,
792
+ "loss": 0.3416,
793
+ "step": 1120
794
+ },
795
+ {
796
+ "epoch": 0.5295220243673852,
797
+ "grad_norm": 1.6011837720870972,
798
+ "learning_rate": 2.933809746954077e-05,
799
+ "loss": 0.3707,
800
+ "step": 1130
801
+ },
802
+ {
803
+ "epoch": 0.5342080599812559,
804
+ "grad_norm": 1.9934078454971313,
805
+ "learning_rate": 2.9332239925023428e-05,
806
+ "loss": 0.3954,
807
+ "step": 1140
808
+ },
809
+ {
810
+ "epoch": 0.5388940955951266,
811
+ "grad_norm": 1.530349612236023,
812
+ "learning_rate": 2.9326382380506094e-05,
813
+ "loss": 0.3301,
814
+ "step": 1150
815
+ },
816
+ {
817
+ "epoch": 0.5435801312089972,
818
+ "grad_norm": 1.9134522676467896,
819
+ "learning_rate": 2.9320524835988753e-05,
820
+ "loss": 0.3133,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 0.5482661668228679,
825
+ "grad_norm": 2.5994045734405518,
826
+ "learning_rate": 2.9314667291471415e-05,
827
+ "loss": 0.3366,
828
+ "step": 1170
829
+ },
830
+ {
831
+ "epoch": 0.5529522024367385,
832
+ "grad_norm": 1.6373965740203857,
833
+ "learning_rate": 2.9308809746954078e-05,
834
+ "loss": 0.3447,
835
+ "step": 1180
836
+ },
837
+ {
838
+ "epoch": 0.5576382380506092,
839
+ "grad_norm": 1.8884459733963013,
840
+ "learning_rate": 2.930295220243674e-05,
841
+ "loss": 0.3201,
842
+ "step": 1190
843
+ },
844
+ {
845
+ "epoch": 0.5623242736644799,
846
+ "grad_norm": 2.0964128971099854,
847
+ "learning_rate": 2.9297094657919402e-05,
848
+ "loss": 0.3528,
849
+ "step": 1200
850
+ },
851
+ {
852
+ "epoch": 0.5670103092783505,
853
+ "grad_norm": 1.8909735679626465,
854
+ "learning_rate": 2.929123711340206e-05,
855
+ "loss": 0.3208,
856
+ "step": 1210
857
+ },
858
+ {
859
+ "epoch": 0.5716963448922212,
860
+ "grad_norm": 1.6073822975158691,
861
+ "learning_rate": 2.9285379568884727e-05,
862
+ "loss": 0.3231,
863
+ "step": 1220
864
+ },
865
+ {
866
+ "epoch": 0.5763823805060918,
867
+ "grad_norm": 1.852653980255127,
868
+ "learning_rate": 2.9279522024367386e-05,
869
+ "loss": 0.331,
870
+ "step": 1230
871
+ },
872
+ {
873
+ "epoch": 0.5810684161199625,
874
+ "grad_norm": 1.4911562204360962,
875
+ "learning_rate": 2.927366447985005e-05,
876
+ "loss": 0.2986,
877
+ "step": 1240
878
+ },
879
+ {
880
+ "epoch": 0.5857544517338332,
881
+ "grad_norm": 1.87758207321167,
882
+ "learning_rate": 2.9267806935332708e-05,
883
+ "loss": 0.3348,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 0.5904404873477038,
888
+ "grad_norm": 2.20400071144104,
889
+ "learning_rate": 2.9261949390815373e-05,
890
+ "loss": 0.3578,
891
+ "step": 1260
892
+ },
893
+ {
894
+ "epoch": 0.5951265229615745,
895
+ "grad_norm": 2.1372976303100586,
896
+ "learning_rate": 2.9256091846298032e-05,
897
+ "loss": 0.3513,
898
+ "step": 1270
899
+ },
900
+ {
901
+ "epoch": 0.5998125585754451,
902
+ "grad_norm": 1.7900727987289429,
903
+ "learning_rate": 2.9250234301780695e-05,
904
+ "loss": 0.3022,
905
+ "step": 1280
906
+ },
907
+ {
908
+ "epoch": 0.6044985941893158,
909
+ "grad_norm": 1.2783315181732178,
910
+ "learning_rate": 2.9244376757263354e-05,
911
+ "loss": 0.3005,
912
+ "step": 1290
913
+ },
914
+ {
915
+ "epoch": 0.6091846298031866,
916
+ "grad_norm": 1.9415411949157715,
917
+ "learning_rate": 2.923851921274602e-05,
918
+ "loss": 0.2902,
919
+ "step": 1300
920
+ },
921
+ {
922
+ "epoch": 0.6138706654170571,
923
+ "grad_norm": 1.2527676820755005,
924
+ "learning_rate": 2.923266166822868e-05,
925
+ "loss": 0.3494,
926
+ "step": 1310
927
+ },
928
+ {
929
+ "epoch": 0.6185567010309279,
930
+ "grad_norm": 2.1499555110931396,
931
+ "learning_rate": 2.922680412371134e-05,
932
+ "loss": 0.3124,
933
+ "step": 1320
934
+ },
935
+ {
936
+ "epoch": 0.6232427366447985,
937
+ "grad_norm": 1.3738154172897339,
938
+ "learning_rate": 2.9220946579194e-05,
939
+ "loss": 0.3146,
940
+ "step": 1330
941
+ },
942
+ {
943
+ "epoch": 0.6279287722586692,
944
+ "grad_norm": 1.5831377506256104,
945
+ "learning_rate": 2.9215089034676666e-05,
946
+ "loss": 0.3231,
947
+ "step": 1340
948
+ },
949
+ {
950
+ "epoch": 0.6326148078725399,
951
+ "grad_norm": 2.8547163009643555,
952
+ "learning_rate": 2.9209231490159325e-05,
953
+ "loss": 0.3311,
954
+ "step": 1350
955
+ },
956
+ {
957
+ "epoch": 0.6373008434864105,
958
+ "grad_norm": 1.6369024515151978,
959
+ "learning_rate": 2.9203373945641987e-05,
960
+ "loss": 0.2868,
961
+ "step": 1360
962
+ },
963
+ {
964
+ "epoch": 0.6419868791002812,
965
+ "grad_norm": 1.599731206893921,
966
+ "learning_rate": 2.919751640112465e-05,
967
+ "loss": 0.322,
968
+ "step": 1370
969
+ },
970
+ {
971
+ "epoch": 0.6466729147141518,
972
+ "grad_norm": 1.878670573234558,
973
+ "learning_rate": 2.9191658856607312e-05,
974
+ "loss": 0.3201,
975
+ "step": 1380
976
+ },
977
+ {
978
+ "epoch": 0.6513589503280225,
979
+ "grad_norm": 2.1555798053741455,
980
+ "learning_rate": 2.918580131208997e-05,
981
+ "loss": 0.3317,
982
+ "step": 1390
983
+ },
984
+ {
985
+ "epoch": 0.6560449859418932,
986
+ "grad_norm": 2.735520124435425,
987
+ "learning_rate": 2.9179943767572633e-05,
988
+ "loss": 0.2862,
989
+ "step": 1400
990
+ },
991
+ {
992
+ "epoch": 0.6607310215557638,
993
+ "grad_norm": 1.7845630645751953,
994
+ "learning_rate": 2.91740862230553e-05,
995
+ "loss": 0.3107,
996
+ "step": 1410
997
+ },
998
+ {
999
+ "epoch": 0.6654170571696345,
1000
+ "grad_norm": 1.9131550788879395,
1001
+ "learning_rate": 2.9168228678537958e-05,
1002
+ "loss": 0.3135,
1003
+ "step": 1420
1004
+ },
1005
+ {
1006
+ "epoch": 0.6701030927835051,
1007
+ "grad_norm": 1.7999134063720703,
1008
+ "learning_rate": 2.916237113402062e-05,
1009
+ "loss": 0.3393,
1010
+ "step": 1430
1011
+ },
1012
+ {
1013
+ "epoch": 0.6747891283973758,
1014
+ "grad_norm": 1.856102466583252,
1015
+ "learning_rate": 2.915651358950328e-05,
1016
+ "loss": 0.3394,
1017
+ "step": 1440
1018
+ },
1019
+ {
1020
+ "epoch": 0.6794751640112465,
1021
+ "grad_norm": 1.4492303133010864,
1022
+ "learning_rate": 2.9150656044985945e-05,
1023
+ "loss": 0.2909,
1024
+ "step": 1450
1025
+ },
1026
+ {
1027
+ "epoch": 0.6841611996251171,
1028
+ "grad_norm": 1.517831563949585,
1029
+ "learning_rate": 2.9144798500468604e-05,
1030
+ "loss": 0.2919,
1031
+ "step": 1460
1032
+ },
1033
+ {
1034
+ "epoch": 0.6888472352389878,
1035
+ "grad_norm": 1.8984867334365845,
1036
+ "learning_rate": 2.9138940955951267e-05,
1037
+ "loss": 0.3282,
1038
+ "step": 1470
1039
+ },
1040
+ {
1041
+ "epoch": 0.6935332708528584,
1042
+ "grad_norm": 2.1415510177612305,
1043
+ "learning_rate": 2.9133083411433926e-05,
1044
+ "loss": 0.3159,
1045
+ "step": 1480
1046
+ },
1047
+ {
1048
+ "epoch": 0.6982193064667291,
1049
+ "grad_norm": 1.7007821798324585,
1050
+ "learning_rate": 2.912722586691659e-05,
1051
+ "loss": 0.2854,
1052
+ "step": 1490
1053
+ },
1054
+ {
1055
+ "epoch": 0.7029053420805998,
1056
+ "grad_norm": 1.7445827722549438,
1057
+ "learning_rate": 2.912136832239925e-05,
1058
+ "loss": 0.3308,
1059
+ "step": 1500
1060
  }
1061
  ],
1062
  "logging_steps": 10,