ErrorAI commited on
Commit
15af835
·
verified ·
1 Parent(s): 6c53582

Training in progress, step 148, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:606cb6a3e8b746460a69bb87373968e375011b6fba7af8449abadca398677e82
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953c61c5559e17ff832dfbc6299726ad0df7053f63d3d1ec66215d178a229c3f
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5571b04e0cc7a76dfc672f8ee7c665d3e99067e38251f5df26c00e2785e4250
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb97dbf425537b10e254846bc6db31ea9e3ee94db47607dc39e59078e93813e
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1034f448c880480cb5aaeba0a571a7a73c9f8f8ea3cf6b46b1ab4365aad57a45
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:345f067230779a595369265188a8b1c66df6a273b8a48973dbe2371aa3dbbf81
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1e1adc68ac37464732ee6ab450d14f2508c0acc3e7d305d49cbcb2391439808
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41be6dedc995393bbb9b134c6e1cbcb5376e1ee02b2c574fb6a6cc9949532ab1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.751269035532995,
5
  "eval_steps": 500,
6
- "global_step": 111,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -784,6 +784,273 @@
784
  "learning_rate": 1.5628779114358034e-05,
785
  "loss": 3.1958,
786
  "step": 111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787
  }
788
  ],
789
  "logging_steps": 1,
@@ -798,12 +1065,12 @@
798
  "should_evaluate": false,
799
  "should_log": false,
800
  "should_save": true,
801
- "should_training_stop": false
802
  },
803
  "attributes": {}
804
  }
805
  },
806
- "total_flos": 1.554618100136018e+17,
807
  "train_batch_size": 4,
808
  "trial_name": null,
809
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0050761421319796,
5
  "eval_steps": 500,
6
+ "global_step": 148,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
784
  "learning_rate": 1.5628779114358034e-05,
785
  "loss": 3.1958,
786
  "step": 111
787
+ },
788
+ {
789
+ "epoch": 0.7580372250423012,
790
+ "grad_norm": 2.940979480743408,
791
+ "learning_rate": 1.4839375161924446e-05,
792
+ "loss": 3.3009,
793
+ "step": 112
794
+ },
795
+ {
796
+ "epoch": 0.7648054145516074,
797
+ "grad_norm": 3.06207013130188,
798
+ "learning_rate": 1.406694062122389e-05,
799
+ "loss": 3.2287,
800
+ "step": 113
801
+ },
802
+ {
803
+ "epoch": 0.7715736040609137,
804
+ "grad_norm": 3.0322513580322266,
805
+ "learning_rate": 1.3311848288809813e-05,
806
+ "loss": 3.1302,
807
+ "step": 114
808
+ },
809
+ {
810
+ "epoch": 0.7783417935702199,
811
+ "grad_norm": 2.8478641510009766,
812
+ "learning_rate": 1.257446259144494e-05,
813
+ "loss": 3.4677,
814
+ "step": 115
815
+ },
816
+ {
817
+ "epoch": 0.7851099830795262,
818
+ "grad_norm": 2.822754383087158,
819
+ "learning_rate": 1.1855139410219657e-05,
820
+ "loss": 3.4789,
821
+ "step": 116
822
+ },
823
+ {
824
+ "epoch": 0.7918781725888325,
825
+ "grad_norm": 2.8323540687561035,
826
+ "learning_rate": 1.1154225908794642e-05,
827
+ "loss": 3.3064,
828
+ "step": 117
829
+ },
830
+ {
831
+ "epoch": 0.7986463620981388,
832
+ "grad_norm": 2.916195869445801,
833
+ "learning_rate": 1.047206036585095e-05,
834
+ "loss": 3.29,
835
+ "step": 118
836
+ },
837
+ {
838
+ "epoch": 0.805414551607445,
839
+ "grad_norm": 2.9813146591186523,
840
+ "learning_rate": 9.808972011828055e-06,
841
+ "loss": 2.9577,
842
+ "step": 119
843
+ },
844
+ {
845
+ "epoch": 0.8121827411167513,
846
+ "grad_norm": 2.8787038326263428,
847
+ "learning_rate": 9.16528087002892e-06,
848
+ "loss": 3.0186,
849
+ "step": 120
850
+ },
851
+ {
852
+ "epoch": 0.8189509306260575,
853
+ "grad_norm": 3.055864095687866,
854
+ "learning_rate": 8.541297602168591e-06,
855
+ "loss": 3.3334,
856
+ "step": 121
857
+ },
858
+ {
859
+ "epoch": 0.8257191201353637,
860
+ "grad_norm": 3.0566341876983643,
861
+ "learning_rate": 7.937323358440935e-06,
862
+ "loss": 3.3495,
863
+ "step": 122
864
+ },
865
+ {
866
+ "epoch": 0.8324873096446701,
867
+ "grad_norm": 2.9866137504577637,
868
+ "learning_rate": 7.353649632175957e-06,
869
+ "loss": 3.5154,
870
+ "step": 123
871
+ },
872
+ {
873
+ "epoch": 0.8392554991539763,
874
+ "grad_norm": 2.9870195388793945,
875
+ "learning_rate": 6.790558119157597e-06,
876
+ "loss": 3.3266,
877
+ "step": 124
878
+ },
879
+ {
880
+ "epoch": 0.8460236886632826,
881
+ "grad_norm": 3.1664063930511475,
882
+ "learning_rate": 6.248320581670281e-06,
883
+ "loss": 3.3117,
884
+ "step": 125
885
+ },
886
+ {
887
+ "epoch": 0.8527918781725888,
888
+ "grad_norm": 3.2000887393951416,
889
+ "learning_rate": 5.727198717339511e-06,
890
+ "loss": 3.4478,
891
+ "step": 126
892
+ },
893
+ {
894
+ "epoch": 0.8595600676818951,
895
+ "grad_norm": 3.1794345378875732,
896
+ "learning_rate": 5.227444032829887e-06,
897
+ "loss": 3.2662,
898
+ "step": 127
899
+ },
900
+ {
901
+ "epoch": 0.8663282571912013,
902
+ "grad_norm": 3.3094160556793213,
903
+ "learning_rate": 4.74929772246166e-06,
904
+ "loss": 3.516,
905
+ "step": 128
906
+ },
907
+ {
908
+ "epoch": 0.8730964467005076,
909
+ "grad_norm": 3.2829089164733887,
910
+ "learning_rate": 4.292990551804171e-06,
911
+ "loss": 3.41,
912
+ "step": 129
913
+ },
914
+ {
915
+ "epoch": 0.8798646362098139,
916
+ "grad_norm": 3.497692346572876,
917
+ "learning_rate": 3.858742746302535e-06,
918
+ "loss": 3.5923,
919
+ "step": 130
920
+ },
921
+ {
922
+ "epoch": 0.8866328257191202,
923
+ "grad_norm": 3.415341377258301,
924
+ "learning_rate": 3.4467638849912497e-06,
925
+ "loss": 3.8121,
926
+ "step": 131
927
+ },
928
+ {
929
+ "epoch": 0.8934010152284264,
930
+ "grad_norm": 3.6507880687713623,
931
+ "learning_rate": 3.0572527993460053e-06,
932
+ "loss": 3.5136,
933
+ "step": 132
934
+ },
935
+ {
936
+ "epoch": 0.9001692047377327,
937
+ "grad_norm": 3.711308717727661,
938
+ "learning_rate": 2.6903974773225702e-06,
939
+ "loss": 3.7174,
940
+ "step": 133
941
+ },
942
+ {
943
+ "epoch": 0.9069373942470389,
944
+ "grad_norm": 3.8556933403015137,
945
+ "learning_rate": 2.3463749726290286e-06,
946
+ "loss": 3.6928,
947
+ "step": 134
948
+ },
949
+ {
950
+ "epoch": 0.9137055837563451,
951
+ "grad_norm": 4.0776686668396,
952
+ "learning_rate": 2.0253513192751373e-06,
953
+ "loss": 4.0913,
954
+ "step": 135
955
+ },
956
+ {
957
+ "epoch": 0.9204737732656514,
958
+ "grad_norm": 4.565491676330566,
959
+ "learning_rate": 1.7274814514400994e-06,
960
+ "loss": 4.0228,
961
+ "step": 136
962
+ },
963
+ {
964
+ "epoch": 0.9272419627749577,
965
+ "grad_norm": 4.521999359130859,
966
+ "learning_rate": 1.4529091286973995e-06,
967
+ "loss": 3.9589,
968
+ "step": 137
969
+ },
970
+ {
971
+ "epoch": 0.934010152284264,
972
+ "grad_norm": 4.838030815124512,
973
+ "learning_rate": 1.2017668666327753e-06,
974
+ "loss": 3.9551,
975
+ "step": 138
976
+ },
977
+ {
978
+ "epoch": 0.9407783417935702,
979
+ "grad_norm": 5.700983047485352,
980
+ "learning_rate": 9.741758728888218e-07,
981
+ "loss": 4.2368,
982
+ "step": 139
983
+ },
984
+ {
985
+ "epoch": 0.9475465313028765,
986
+ "grad_norm": 5.643460273742676,
987
+ "learning_rate": 7.702459886670788e-07,
988
+ "loss": 4.3557,
989
+ "step": 140
990
+ },
991
+ {
992
+ "epoch": 0.9543147208121827,
993
+ "grad_norm": 5.738959312438965,
994
+ "learning_rate": 5.900756357159143e-07,
995
+ "loss": 4.2717,
996
+ "step": 141
997
+ },
998
+ {
999
+ "epoch": 0.961082910321489,
1000
+ "grad_norm": 6.009876251220703,
1001
+ "learning_rate": 4.337517688296544e-07,
1002
+ "loss": 4.1127,
1003
+ "step": 142
1004
+ },
1005
+ {
1006
+ "epoch": 0.9678510998307953,
1007
+ "grad_norm": 6.47075080871582,
1008
+ "learning_rate": 3.013498338820031e-07,
1009
+ "loss": 4.012,
1010
+ "step": 143
1011
+ },
1012
+ {
1013
+ "epoch": 0.9746192893401016,
1014
+ "grad_norm": 7.140995979309082,
1015
+ "learning_rate": 1.9293373141394122e-07,
1016
+ "loss": 4.1349,
1017
+ "step": 144
1018
+ },
1019
+ {
1020
+ "epoch": 0.9813874788494078,
1021
+ "grad_norm": 2.5970511436462402,
1022
+ "learning_rate": 1.0855578579370695e-07,
1023
+ "loss": 3.1786,
1024
+ "step": 145
1025
+ },
1026
+ {
1027
+ "epoch": 0.988155668358714,
1028
+ "grad_norm": 2.9720969200134277,
1029
+ "learning_rate": 4.825671996370185e-08,
1030
+ "loss": 3.2409,
1031
+ "step": 146
1032
+ },
1033
+ {
1034
+ "epoch": 0.9949238578680203,
1035
+ "grad_norm": 3.734907388687134,
1036
+ "learning_rate": 1.2065635786595586e-08,
1037
+ "loss": 3.6735,
1038
+ "step": 147
1039
+ },
1040
+ {
1041
+ "epoch": 0.9949238578680203,
1042
+ "eval_loss": 0.8590127229690552,
1043
+ "eval_runtime": 12.5982,
1044
+ "eval_samples_per_second": 9.922,
1045
+ "eval_steps_per_second": 2.54,
1046
+ "step": 147
1047
+ },
1048
+ {
1049
+ "epoch": 1.0050761421319796,
1050
+ "grad_norm": 3.801922559738159,
1051
+ "learning_rate": 0.0,
1052
+ "loss": 3.3448,
1053
+ "step": 148
1054
  }
1055
  ],
1056
  "logging_steps": 1,
 
1065
  "should_evaluate": false,
1066
  "should_log": false,
1067
  "should_save": true,
1068
+ "should_training_stop": true
1069
  },
1070
  "attributes": {}
1071
  }
1072
  },
1073
+ "total_flos": 2.0734083567766733e+17,
1074
  "train_batch_size": 4,
1075
  "trial_name": null,
1076
  "trial_params": null