Johannes Garstenauer commited on
Commit
e40687c
·
1 Parent(s): 73c7fcb

Training in progress, step 80624

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86217225d7cd41a33df6e628a809ff37337b8cae8b60b627d11b40cf0cc74d4d
3
- size 133906117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a32601442637b42f7a6eb10ebea11d5d313b374e4bc71dab957e52f81ba30d
3
+ size 133906309
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b85119d9ce7de09760ad6f50ef01233ac40b1284c199cfe6758d0438c2f617d
3
  size 266387761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a81cbf5d8cd63bef63639bc99cfc0f2477944d78d679cea7a6a8bf8465875fd
3
  size 266387761
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba1459a55b276706d50169f646d5abc9d3ae02b6aac4529867f3178659d9f573
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99abdf87e091757a4c26601a35007bfa738d7dc783b6dec2f7e9dcc551f7dba0
3
  size 14511
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4feadef87614a154dba84298df97d9e7b6305e2e944020a12c50873ae6873bdb
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e8dbbdc09c6ae689f3a38febec079d787f5f9cfadb6ee31fcc9975cbbabc68
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2250101866340198,
5
- "global_step": 60468,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -758,11 +758,265 @@
758
  "eval_samples_per_second": 388.927,
759
  "eval_steps_per_second": 6.077,
760
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  }
762
  ],
763
  "max_steps": 806202,
764
  "num_train_epochs": 3,
765
- "total_flos": 1.0259983296107643e+18,
766
  "trial_name": null,
767
  "trial_params": null
768
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3000135821786931,
5
+ "global_step": 80624,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
758
  "eval_samples_per_second": 388.927,
759
  "eval_steps_per_second": 6.077,
760
  "step": 60000
761
+ },
762
+ {
763
+ "epoch": 0.23,
764
+ "learning_rate": 2.3594167338706678e-05,
765
+ "loss": 2.9852,
766
+ "step": 60500
767
+ },
768
+ {
769
+ "epoch": 0.23,
770
+ "learning_rate": 2.3578347234067888e-05,
771
+ "loss": 3.1677,
772
+ "step": 61000
773
+ },
774
+ {
775
+ "epoch": 0.23,
776
+ "learning_rate": 2.35625271294291e-05,
777
+ "loss": 3.0343,
778
+ "step": 61500
779
+ },
780
+ {
781
+ "epoch": 0.23,
782
+ "learning_rate": 2.3546707024790315e-05,
783
+ "loss": 2.9848,
784
+ "step": 62000
785
+ },
786
+ {
787
+ "epoch": 0.23,
788
+ "learning_rate": 2.3530886920151525e-05,
789
+ "loss": 3.0706,
790
+ "step": 62500
791
+ },
792
+ {
793
+ "epoch": 0.23,
794
+ "learning_rate": 2.351506681551274e-05,
795
+ "loss": 3.036,
796
+ "step": 63000
797
+ },
798
+ {
799
+ "epoch": 0.24,
800
+ "learning_rate": 2.349924671087395e-05,
801
+ "loss": 2.9671,
802
+ "step": 63500
803
+ },
804
+ {
805
+ "epoch": 0.24,
806
+ "learning_rate": 2.3483426606235166e-05,
807
+ "loss": 3.0112,
808
+ "step": 64000
809
+ },
810
+ {
811
+ "epoch": 0.24,
812
+ "learning_rate": 2.3467606501596376e-05,
813
+ "loss": 2.9627,
814
+ "step": 64500
815
+ },
816
+ {
817
+ "epoch": 0.24,
818
+ "learning_rate": 2.3451786396957586e-05,
819
+ "loss": 3.0045,
820
+ "step": 65000
821
+ },
822
+ {
823
+ "epoch": 0.24,
824
+ "learning_rate": 2.34359662923188e-05,
825
+ "loss": 3.0389,
826
+ "step": 65500
827
+ },
828
+ {
829
+ "epoch": 0.25,
830
+ "learning_rate": 2.3420146187680013e-05,
831
+ "loss": 2.9675,
832
+ "step": 66000
833
+ },
834
+ {
835
+ "epoch": 0.25,
836
+ "learning_rate": 2.3404326083041223e-05,
837
+ "loss": 2.9137,
838
+ "step": 66500
839
+ },
840
+ {
841
+ "epoch": 0.25,
842
+ "learning_rate": 2.3388505978402436e-05,
843
+ "loss": 2.9304,
844
+ "step": 67000
845
+ },
846
+ {
847
+ "epoch": 0.25,
848
+ "learning_rate": 2.3372685873763647e-05,
849
+ "loss": 2.931,
850
+ "step": 67500
851
+ },
852
+ {
853
+ "epoch": 0.25,
854
+ "learning_rate": 2.3356865769124863e-05,
855
+ "loss": 2.8961,
856
+ "step": 68000
857
+ },
858
+ {
859
+ "epoch": 0.25,
860
+ "learning_rate": 2.3341045664486074e-05,
861
+ "loss": 2.9894,
862
+ "step": 68500
863
+ },
864
+ {
865
+ "epoch": 0.26,
866
+ "learning_rate": 2.3325225559847284e-05,
867
+ "loss": 2.8711,
868
+ "step": 69000
869
+ },
870
+ {
871
+ "epoch": 0.26,
872
+ "learning_rate": 2.3309405455208497e-05,
873
+ "loss": 2.8287,
874
+ "step": 69500
875
+ },
876
+ {
877
+ "epoch": 0.26,
878
+ "learning_rate": 2.329358535056971e-05,
879
+ "loss": 2.899,
880
+ "step": 70000
881
+ },
882
+ {
883
+ "epoch": 0.26,
884
+ "learning_rate": 2.327776524593092e-05,
885
+ "loss": 2.8684,
886
+ "step": 70500
887
+ },
888
+ {
889
+ "epoch": 0.26,
890
+ "learning_rate": 2.3261945141292134e-05,
891
+ "loss": 2.8482,
892
+ "step": 71000
893
+ },
894
+ {
895
+ "epoch": 0.27,
896
+ "learning_rate": 2.3246125036653344e-05,
897
+ "loss": 2.8913,
898
+ "step": 71500
899
+ },
900
+ {
901
+ "epoch": 0.27,
902
+ "learning_rate": 2.3230304932014558e-05,
903
+ "loss": 2.8641,
904
+ "step": 72000
905
+ },
906
+ {
907
+ "epoch": 0.27,
908
+ "learning_rate": 2.321448482737577e-05,
909
+ "loss": 2.861,
910
+ "step": 72500
911
+ },
912
+ {
913
+ "epoch": 0.27,
914
+ "learning_rate": 2.319866472273698e-05,
915
+ "loss": 2.8647,
916
+ "step": 73000
917
+ },
918
+ {
919
+ "epoch": 0.27,
920
+ "learning_rate": 2.3182844618098195e-05,
921
+ "loss": 2.8619,
922
+ "step": 73500
923
+ },
924
+ {
925
+ "epoch": 0.28,
926
+ "learning_rate": 2.316702451345941e-05,
927
+ "loss": 2.8267,
928
+ "step": 74000
929
+ },
930
+ {
931
+ "epoch": 0.28,
932
+ "learning_rate": 2.3151204408820622e-05,
933
+ "loss": 2.8155,
934
+ "step": 74500
935
+ },
936
+ {
937
+ "epoch": 0.28,
938
+ "learning_rate": 2.3135384304181832e-05,
939
+ "loss": 2.876,
940
+ "step": 75000
941
+ },
942
+ {
943
+ "epoch": 0.28,
944
+ "eval_loss": 2.817479133605957,
945
+ "eval_runtime": 15565.5566,
946
+ "eval_samples_per_second": 389.979,
947
+ "eval_steps_per_second": 6.093,
948
+ "step": 75000
949
+ },
950
+ {
951
+ "epoch": 0.28,
952
+ "learning_rate": 2.3119564199543042e-05,
953
+ "loss": 2.7945,
954
+ "step": 75500
955
+ },
956
+ {
957
+ "epoch": 0.28,
958
+ "learning_rate": 2.3103744094904256e-05,
959
+ "loss": 2.8052,
960
+ "step": 76000
961
+ },
962
+ {
963
+ "epoch": 0.28,
964
+ "learning_rate": 2.308792399026547e-05,
965
+ "loss": 2.7997,
966
+ "step": 76500
967
+ },
968
+ {
969
+ "epoch": 0.29,
970
+ "learning_rate": 2.307210388562668e-05,
971
+ "loss": 2.7941,
972
+ "step": 77000
973
+ },
974
+ {
975
+ "epoch": 0.29,
976
+ "learning_rate": 2.3056283780987893e-05,
977
+ "loss": 2.8057,
978
+ "step": 77500
979
+ },
980
+ {
981
+ "epoch": 0.29,
982
+ "learning_rate": 2.3040463676349103e-05,
983
+ "loss": 2.8142,
984
+ "step": 78000
985
+ },
986
+ {
987
+ "epoch": 0.29,
988
+ "learning_rate": 2.302464357171032e-05,
989
+ "loss": 2.8154,
990
+ "step": 78500
991
+ },
992
+ {
993
+ "epoch": 0.29,
994
+ "learning_rate": 2.300882346707153e-05,
995
+ "loss": 2.7736,
996
+ "step": 79000
997
+ },
998
+ {
999
+ "epoch": 0.3,
1000
+ "learning_rate": 2.299300336243274e-05,
1001
+ "loss": 2.8183,
1002
+ "step": 79500
1003
+ },
1004
+ {
1005
+ "epoch": 0.3,
1006
+ "learning_rate": 2.2977183257793953e-05,
1007
+ "loss": 2.7761,
1008
+ "step": 80000
1009
+ },
1010
+ {
1011
+ "epoch": 0.3,
1012
+ "learning_rate": 2.2961363153155167e-05,
1013
+ "loss": 2.747,
1014
+ "step": 80500
1015
  }
1016
  ],
1017
  "max_steps": 806202,
1018
  "num_train_epochs": 3,
1019
+ "total_flos": 1.3679977728143524e+18,
1020
  "trial_name": null,
1021
  "trial_params": null
1022
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b85119d9ce7de09760ad6f50ef01233ac40b1284c199cfe6758d0438c2f617d
3
  size 266387761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a81cbf5d8cd63bef63639bc99cfc0f2477944d78d679cea7a6a8bf8465875fd
3
  size 266387761