rovdetection commited on
Commit
beaace0
·
verified ·
1 Parent(s): e38689b

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6a1a977dbd01ad2f8a9eed9e7f79d95b196931e09186bccc4d8f5f04cbed2d7
3
  size 4523108832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6696d4f268a5241495ceaff2acea183efcc3afd5b44955ab5f6c2b91adbea6b9
3
  size 4523108832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aaf5712450f016bf21c43fb383dbe666b12a078f8860c6a53a008f9aa13666b
3
- size 2911851147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363b73880c7a938f5b75d71760d551c6d014704f2a4ec6628c9aaa6f429fa21e
3
+ size 2912179275
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f9a0f7843a37be87edd23f4e88aa93b38b95cc2c07503eeb1cf2e4632453a2
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca372268f4fa9335030c0cb7aedb6cdba75f457da50e7a4034abb1a2d0843689
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fb5abe0c6c486932d56ee6ec9792e12e43df2b7c9d233a22935f823c7902b1d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0717d7b217f15f3f50778aac3433a6ba54621949761b93d00f3416efeb959875
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0064,
6
  "eval_steps": 500,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -708,6 +708,356 @@
708
  "learning_rate": 0.00018002,
709
  "loss": 1.862677001953125,
710
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  }
712
  ],
713
  "logging_steps": 10,
@@ -727,7 +1077,7 @@
727
  "attributes": {}
728
  }
729
  },
730
- "total_flos": 8.465719089364992e+16,
731
  "train_batch_size": 1,
732
  "trial_name": null,
733
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0188,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
708
  "learning_rate": 0.00018002,
709
  "loss": 1.862677001953125,
710
  "step": 1000
711
+ },
712
+ {
713
+ "epoch": 1.001,
714
+ "grad_norm": 0.7271579504013062,
715
+ "learning_rate": 0.00017982000000000002,
716
+ "loss": 1.4198949813842774,
717
+ "step": 1010
718
+ },
719
+ {
720
+ "epoch": 1.002,
721
+ "grad_norm": 0.7241926789283752,
722
+ "learning_rate": 0.00017962000000000002,
723
+ "loss": 1.290165328979492,
724
+ "step": 1020
725
+ },
726
+ {
727
+ "epoch": 1.003,
728
+ "grad_norm": 0.6930139064788818,
729
+ "learning_rate": 0.00017942,
730
+ "loss": 1.2588088989257813,
731
+ "step": 1030
732
+ },
733
+ {
734
+ "epoch": 1.004,
735
+ "grad_norm": 0.7046691179275513,
736
+ "learning_rate": 0.00017922,
737
+ "loss": 1.2508729934692382,
738
+ "step": 1040
739
+ },
740
+ {
741
+ "epoch": 1.005,
742
+ "grad_norm": 0.7425150275230408,
743
+ "learning_rate": 0.00017902,
744
+ "loss": 1.2099505424499513,
745
+ "step": 1050
746
+ },
747
+ {
748
+ "epoch": 1.006,
749
+ "grad_norm": 0.6843275427818298,
750
+ "learning_rate": 0.00017882,
751
+ "loss": 1.1226897239685059,
752
+ "step": 1060
753
+ },
754
+ {
755
+ "epoch": 1.007,
756
+ "grad_norm": 0.6979613900184631,
757
+ "learning_rate": 0.00017862000000000002,
758
+ "loss": 1.6102104187011719,
759
+ "step": 1070
760
+ },
761
+ {
762
+ "epoch": 1.008,
763
+ "grad_norm": 0.6627645492553711,
764
+ "learning_rate": 0.00017842000000000002,
765
+ "loss": 1.9422037124633789,
766
+ "step": 1080
767
+ },
768
+ {
769
+ "epoch": 1.009,
770
+ "grad_norm": 0.6664915084838867,
771
+ "learning_rate": 0.00017822,
772
+ "loss": 1.8065723419189452,
773
+ "step": 1090
774
+ },
775
+ {
776
+ "epoch": 1.01,
777
+ "grad_norm": 0.6870133280754089,
778
+ "learning_rate": 0.00017802,
779
+ "loss": 1.8182893753051759,
780
+ "step": 1100
781
+ },
782
+ {
783
+ "epoch": 1.011,
784
+ "grad_norm": 0.6618905663490295,
785
+ "learning_rate": 0.00017782,
786
+ "loss": 1.814337158203125,
787
+ "step": 1110
788
+ },
789
+ {
790
+ "epoch": 1.012,
791
+ "grad_norm": 1.1367806196212769,
792
+ "learning_rate": 0.00017762,
793
+ "loss": 1.73431453704834,
794
+ "step": 1120
795
+ },
796
+ {
797
+ "epoch": 1.013,
798
+ "grad_norm": 0.685649573802948,
799
+ "learning_rate": 0.00017742000000000002,
800
+ "loss": 1.7108922958374024,
801
+ "step": 1130
802
+ },
803
+ {
804
+ "epoch": 1.014,
805
+ "grad_norm": 0.6594149470329285,
806
+ "learning_rate": 0.00017722000000000001,
807
+ "loss": 1.779058074951172,
808
+ "step": 1140
809
+ },
810
+ {
811
+ "epoch": 1.015,
812
+ "grad_norm": 0.6914166212081909,
813
+ "learning_rate": 0.00017702,
814
+ "loss": 1.7222532272338866,
815
+ "step": 1150
816
+ },
817
+ {
818
+ "epoch": 1.016,
819
+ "grad_norm": 0.6719418168067932,
820
+ "learning_rate": 0.00017682,
821
+ "loss": 1.730402946472168,
822
+ "step": 1160
823
+ },
824
+ {
825
+ "epoch": 1.017,
826
+ "grad_norm": 0.7353265881538391,
827
+ "learning_rate": 0.00017662,
828
+ "loss": 1.711669921875,
829
+ "step": 1170
830
+ },
831
+ {
832
+ "epoch": 1.018,
833
+ "grad_norm": 0.6861590147018433,
834
+ "learning_rate": 0.00017642,
835
+ "loss": 1.7661975860595702,
836
+ "step": 1180
837
+ },
838
+ {
839
+ "epoch": 1.019,
840
+ "grad_norm": 0.6513635516166687,
841
+ "learning_rate": 0.00017622000000000002,
842
+ "loss": 1.6878995895385742,
843
+ "step": 1190
844
+ },
845
+ {
846
+ "epoch": 1.02,
847
+ "grad_norm": 0.6829492449760437,
848
+ "learning_rate": 0.00017602,
849
+ "loss": 1.6560598373413087,
850
+ "step": 1200
851
+ },
852
+ {
853
+ "epoch": 1.021,
854
+ "grad_norm": 0.6806496381759644,
855
+ "learning_rate": 0.00017582,
856
+ "loss": 1.6022390365600585,
857
+ "step": 1210
858
+ },
859
+ {
860
+ "epoch": 1.022,
861
+ "grad_norm": 0.6467958092689514,
862
+ "learning_rate": 0.00017562,
863
+ "loss": 1.527933406829834,
864
+ "step": 1220
865
+ },
866
+ {
867
+ "epoch": 1.023,
868
+ "grad_norm": 0.7558693289756775,
869
+ "learning_rate": 0.00017542,
870
+ "loss": 1.6221937179565429,
871
+ "step": 1230
872
+ },
873
+ {
874
+ "epoch": 1.024,
875
+ "grad_norm": 0.7131749391555786,
876
+ "learning_rate": 0.00017522000000000002,
877
+ "loss": 1.5218177795410157,
878
+ "step": 1240
879
+ },
880
+ {
881
+ "epoch": 1.025,
882
+ "grad_norm": 0.7794132828712463,
883
+ "learning_rate": 0.00017502000000000001,
884
+ "loss": 1.5550528526306153,
885
+ "step": 1250
886
+ },
887
+ {
888
+ "epoch": 1.026,
889
+ "grad_norm": 0.7174275517463684,
890
+ "learning_rate": 0.00017482,
891
+ "loss": 1.5929729461669921,
892
+ "step": 1260
893
+ },
894
+ {
895
+ "epoch": 1.027,
896
+ "grad_norm": 0.7710967659950256,
897
+ "learning_rate": 0.00017462,
898
+ "loss": 1.4883572578430175,
899
+ "step": 1270
900
+ },
901
+ {
902
+ "epoch": 1.028,
903
+ "grad_norm": 0.689930260181427,
904
+ "learning_rate": 0.00017442,
905
+ "loss": 1.4959463119506835,
906
+ "step": 1280
907
+ },
908
+ {
909
+ "epoch": 1.029,
910
+ "grad_norm": 0.7309102416038513,
911
+ "learning_rate": 0.00017422,
912
+ "loss": 1.5185231208801269,
913
+ "step": 1290
914
+ },
915
+ {
916
+ "epoch": 1.03,
917
+ "grad_norm": 0.6255451440811157,
918
+ "learning_rate": 0.00017402000000000002,
919
+ "loss": 1.4664793968200684,
920
+ "step": 1300
921
+ },
922
+ {
923
+ "epoch": 1.031,
924
+ "grad_norm": 0.7244739532470703,
925
+ "learning_rate": 0.00017382,
926
+ "loss": 1.4731544494628905,
927
+ "step": 1310
928
+ },
929
+ {
930
+ "epoch": 2.0008,
931
+ "grad_norm": 0.7122154235839844,
932
+ "learning_rate": 0.00017362,
933
+ "loss": 1.3298330307006836,
934
+ "step": 1320
935
+ },
936
+ {
937
+ "epoch": 2.0018,
938
+ "grad_norm": 0.7280747294425964,
939
+ "learning_rate": 0.00017342,
940
+ "loss": 1.094059658050537,
941
+ "step": 1330
942
+ },
943
+ {
944
+ "epoch": 2.0028,
945
+ "grad_norm": 0.7428712844848633,
946
+ "learning_rate": 0.00017322,
947
+ "loss": 0.9706879615783691,
948
+ "step": 1340
949
+ },
950
+ {
951
+ "epoch": 2.0038,
952
+ "grad_norm": 0.759975790977478,
953
+ "learning_rate": 0.00017302,
954
+ "loss": 1.02620849609375,
955
+ "step": 1350
956
+ },
957
+ {
958
+ "epoch": 2.0048,
959
+ "grad_norm": 0.7684850692749023,
960
+ "learning_rate": 0.00017282000000000002,
961
+ "loss": 0.9361392974853515,
962
+ "step": 1360
963
+ },
964
+ {
965
+ "epoch": 2.0058,
966
+ "grad_norm": 0.7847408652305603,
967
+ "learning_rate": 0.00017262,
968
+ "loss": 0.8885183334350586,
969
+ "step": 1370
970
+ },
971
+ {
972
+ "epoch": 2.0068,
973
+ "grad_norm": 0.921360194683075,
974
+ "learning_rate": 0.00017242,
975
+ "loss": 1.0311158180236817,
976
+ "step": 1380
977
+ },
978
+ {
979
+ "epoch": 2.0078,
980
+ "grad_norm": 0.6837793588638306,
981
+ "learning_rate": 0.00017222,
982
+ "loss": 1.2890826225280763,
983
+ "step": 1390
984
+ },
985
+ {
986
+ "epoch": 2.0088,
987
+ "grad_norm": 0.7072200775146484,
988
+ "learning_rate": 0.00017202,
989
+ "loss": 1.209097957611084,
990
+ "step": 1400
991
+ },
992
+ {
993
+ "epoch": 2.0098,
994
+ "grad_norm": 0.6607412695884705,
995
+ "learning_rate": 0.00017182,
996
+ "loss": 1.2470938682556152,
997
+ "step": 1410
998
+ },
999
+ {
1000
+ "epoch": 2.0108,
1001
+ "grad_norm": 0.7476115226745605,
1002
+ "learning_rate": 0.00017162000000000001,
1003
+ "loss": 1.2279239654541017,
1004
+ "step": 1420
1005
+ },
1006
+ {
1007
+ "epoch": 2.0118,
1008
+ "grad_norm": 0.7690820693969727,
1009
+ "learning_rate": 0.00017142,
1010
+ "loss": 1.1757261276245117,
1011
+ "step": 1430
1012
+ },
1013
+ {
1014
+ "epoch": 2.0128,
1015
+ "grad_norm": 0.7281740307807922,
1016
+ "learning_rate": 0.00017122,
1017
+ "loss": 1.158098030090332,
1018
+ "step": 1440
1019
+ },
1020
+ {
1021
+ "epoch": 2.0138,
1022
+ "grad_norm": 0.7036393880844116,
1023
+ "learning_rate": 0.00017102,
1024
+ "loss": 1.1959182739257812,
1025
+ "step": 1450
1026
+ },
1027
+ {
1028
+ "epoch": 2.0148,
1029
+ "grad_norm": 0.7431781888008118,
1030
+ "learning_rate": 0.00017082,
1031
+ "loss": 1.189777946472168,
1032
+ "step": 1460
1033
+ },
1034
+ {
1035
+ "epoch": 2.0158,
1036
+ "grad_norm": 0.7411831021308899,
1037
+ "learning_rate": 0.00017062,
1038
+ "loss": 1.2544533729553222,
1039
+ "step": 1470
1040
+ },
1041
+ {
1042
+ "epoch": 2.0168,
1043
+ "grad_norm": 0.7163280844688416,
1044
+ "learning_rate": 0.00017042,
1045
+ "loss": 1.165062141418457,
1046
+ "step": 1480
1047
+ },
1048
+ {
1049
+ "epoch": 2.0178,
1050
+ "grad_norm": 0.7118193507194519,
1051
+ "learning_rate": 0.00017022,
1052
+ "loss": 1.2281935691833497,
1053
+ "step": 1490
1054
+ },
1055
+ {
1056
+ "epoch": 2.0188,
1057
+ "grad_norm": 0.6703944206237793,
1058
+ "learning_rate": 0.00017002,
1059
+ "loss": 1.1760727882385253,
1060
+ "step": 1500
1061
  }
1062
  ],
1063
  "logging_steps": 10,
 
1077
  "attributes": {}
1078
  }
1079
  },
1080
+ "total_flos": 1.2700397305014682e+17,
1081
  "train_batch_size": 1,
1082
  "trial_name": null,
1083
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6613ae7e039f69e0c219d13c85d932cd4ee709153ecf7a7918efd897af4b506a
3
  size 5329
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a906813357c502f71d3ca10d7a5748ab64a30407afe18ed781d8ce5e0a1e7e1
3
  size 5329