jflotz commited on
Commit
e2d8aa0
·
1 Parent(s): 0b773af

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecf293809682bf3798688c407d42afa788d77c5557da4033ee0baeac06cf1302
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a806770a6d25518cb1674ae526f6d236de399a295d5a453821ffdc0e6a41627
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d5f11aab6f911cbec235d15a3494a5c1ad6a9959fd4ddb8c6370040ccb52d96
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175c9af2cd55b51e9df8d727ba8d18aab140807b11f81fcd4adde2c5741e4d30
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37040ab33982f6d1312ce35ee66415b5fb51e1c104e02428f2187d6ddef02e5
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06b5c694b40cd5a966b5116288c30b2f19979f1058d82965ee57335ae5f1e596
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8d45ebb28c7b9f23a6abeafd90122c1ed22446a846f1cf2ac94e95c51e1adb
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4461148968916945,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -806,11 +806,211 @@
806
  "eval_samples_per_second": 943.135,
807
  "eval_steps_per_second": 14.781,
808
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "max_steps": 1000000,
812
  "num_train_epochs": 12,
813
- "total_flos": 2.804010441213886e+21,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5576436211146181,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 943.135,
807
  "eval_steps_per_second": 14.781,
808
  "step": 40000
809
+ },
810
+ {
811
+ "epoch": 0.45,
812
+ "learning_rate": 0.0001215,
813
+ "loss": 0.3943,
814
+ "step": 40500
815
+ },
816
+ {
817
+ "epoch": 0.46,
818
+ "learning_rate": 0.00012299999999999998,
819
+ "loss": 0.3929,
820
+ "step": 41000
821
+ },
822
+ {
823
+ "epoch": 0.46,
824
+ "eval_loss": 0.37926527857780457,
825
+ "eval_runtime": 2.3557,
826
+ "eval_samples_per_second": 975.086,
827
+ "eval_steps_per_second": 15.282,
828
+ "step": 41000
829
+ },
830
+ {
831
+ "epoch": 0.46,
832
+ "learning_rate": 0.0001245,
833
+ "loss": 0.39,
834
+ "step": 41500
835
+ },
836
+ {
837
+ "epoch": 0.47,
838
+ "learning_rate": 0.00012599999999999997,
839
+ "loss": 0.3873,
840
+ "step": 42000
841
+ },
842
+ {
843
+ "epoch": 0.47,
844
+ "eval_loss": 0.37127774953842163,
845
+ "eval_runtime": 2.3664,
846
+ "eval_samples_per_second": 970.675,
847
+ "eval_steps_per_second": 15.213,
848
+ "step": 42000
849
+ },
850
+ {
851
+ "epoch": 0.47,
852
+ "learning_rate": 0.00012749999999999998,
853
+ "loss": 0.3861,
854
+ "step": 42500
855
+ },
856
+ {
857
+ "epoch": 0.48,
858
+ "learning_rate": 0.000129,
859
+ "loss": 0.3837,
860
+ "step": 43000
861
+ },
862
+ {
863
+ "epoch": 0.48,
864
+ "eval_loss": 0.36950594186782837,
865
+ "eval_runtime": 2.4197,
866
+ "eval_samples_per_second": 949.3,
867
+ "eval_steps_per_second": 14.878,
868
+ "step": 43000
869
+ },
870
+ {
871
+ "epoch": 0.49,
872
+ "learning_rate": 0.0001305,
873
+ "loss": 0.3812,
874
+ "step": 43500
875
+ },
876
+ {
877
+ "epoch": 0.49,
878
+ "learning_rate": 0.00013199999999999998,
879
+ "loss": 0.3793,
880
+ "step": 44000
881
+ },
882
+ {
883
+ "epoch": 0.49,
884
+ "eval_loss": 0.3651977479457855,
885
+ "eval_runtime": 2.3939,
886
+ "eval_samples_per_second": 959.528,
887
+ "eval_steps_per_second": 15.038,
888
+ "step": 44000
889
+ },
890
+ {
891
+ "epoch": 0.5,
892
+ "learning_rate": 0.0001335,
893
+ "loss": 0.3775,
894
+ "step": 44500
895
+ },
896
+ {
897
+ "epoch": 0.5,
898
+ "learning_rate": 0.000135,
899
+ "loss": 0.3756,
900
+ "step": 45000
901
+ },
902
+ {
903
+ "epoch": 0.5,
904
+ "eval_loss": 0.3592735230922699,
905
+ "eval_runtime": 2.3855,
906
+ "eval_samples_per_second": 962.901,
907
+ "eval_steps_per_second": 15.091,
908
+ "step": 45000
909
+ },
910
+ {
911
+ "epoch": 0.51,
912
+ "learning_rate": 0.00013649999999999998,
913
+ "loss": 0.3737,
914
+ "step": 45500
915
+ },
916
+ {
917
+ "epoch": 0.51,
918
+ "learning_rate": 0.000138,
919
+ "loss": 0.3718,
920
+ "step": 46000
921
+ },
922
+ {
923
+ "epoch": 0.51,
924
+ "eval_loss": 0.3585481643676758,
925
+ "eval_runtime": 2.3854,
926
+ "eval_samples_per_second": 962.952,
927
+ "eval_steps_per_second": 15.092,
928
+ "step": 46000
929
+ },
930
+ {
931
+ "epoch": 0.52,
932
+ "learning_rate": 0.0001395,
933
+ "loss": 0.3704,
934
+ "step": 46500
935
+ },
936
+ {
937
+ "epoch": 0.52,
938
+ "learning_rate": 0.00014099999999999998,
939
+ "loss": 0.3687,
940
+ "step": 47000
941
+ },
942
+ {
943
+ "epoch": 0.52,
944
+ "eval_loss": 0.3562163710594177,
945
+ "eval_runtime": 2.4137,
946
+ "eval_samples_per_second": 951.637,
947
+ "eval_steps_per_second": 14.915,
948
+ "step": 47000
949
+ },
950
+ {
951
+ "epoch": 0.53,
952
+ "learning_rate": 0.0001425,
953
+ "loss": 0.367,
954
+ "step": 47500
955
+ },
956
+ {
957
+ "epoch": 0.54,
958
+ "learning_rate": 0.00014399999999999998,
959
+ "loss": 0.3654,
960
+ "step": 48000
961
+ },
962
+ {
963
+ "epoch": 0.54,
964
+ "eval_loss": 0.35154005885124207,
965
+ "eval_runtime": 2.4671,
966
+ "eval_samples_per_second": 931.04,
967
+ "eval_steps_per_second": 14.592,
968
+ "step": 48000
969
+ },
970
+ {
971
+ "epoch": 0.54,
972
+ "learning_rate": 0.00014549999999999999,
973
+ "loss": 0.3638,
974
+ "step": 48500
975
+ },
976
+ {
977
+ "epoch": 0.55,
978
+ "learning_rate": 0.000147,
979
+ "loss": 0.3625,
980
+ "step": 49000
981
+ },
982
+ {
983
+ "epoch": 0.55,
984
+ "eval_loss": 0.3474389910697937,
985
+ "eval_runtime": 2.4177,
986
+ "eval_samples_per_second": 950.09,
987
+ "eval_steps_per_second": 14.89,
988
+ "step": 49000
989
+ },
990
+ {
991
+ "epoch": 0.55,
992
+ "learning_rate": 0.00014849999999999998,
993
+ "loss": 0.3612,
994
+ "step": 49500
995
+ },
996
+ {
997
+ "epoch": 0.56,
998
+ "learning_rate": 0.00015,
999
+ "loss": 0.3592,
1000
+ "step": 50000
1001
+ },
1002
+ {
1003
+ "epoch": 0.56,
1004
+ "eval_loss": 0.3449079096317291,
1005
+ "eval_runtime": 2.4174,
1006
+ "eval_samples_per_second": 950.212,
1007
+ "eval_steps_per_second": 14.892,
1008
+ "step": 50000
1009
  }
1010
  ],
1011
  "max_steps": 1000000,
1012
  "num_train_epochs": 12,
1013
+ "total_flos": 3.505013051517357e+21,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d5f11aab6f911cbec235d15a3494a5c1ad6a9959fd4ddb8c6370040ccb52d96
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175c9af2cd55b51e9df8d727ba8d18aab140807b11f81fcd4adde2c5741e4d30
3
  size 449471589