Nadav commited on
Commit
ae04072
·
1 Parent(s): 8302824

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f490a44ca4db15e7575b8b5617b1840dd0ecf21419611f63f02b99abb69e9b
3
- size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa88874d16a939070903a93d5ba329854e46aadf2f7f7ea1a61905c517a19385
3
+ size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27d58b7a9a0b285b885c546e3dfb7edd5e2459a83480dfbe8cb380fa08f7c48b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df44f87e5182a482c9b6d3a619d67eba073218906e5c075beb6e29b9896b503
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40154e4835e4b0b561c3bf4ff2dc5bc26aeff6d6da324651140e34885d603328
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c007293227ea685a1b33f33dc58e5de97e57e2743aa81629ff6475f1a3723ca2
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6b7e341561c3473a840ee99225858db17d1117cb5b96bd811d9778b3f6e2cd9
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a7310bd5c1439c4bb35c49bf02221c2d3a79d6f2845dd008a516ce1038c5377
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621aa918e7d94e76e980dd52e6d602b019ed8ae919fe1ca65a5ac903747bec67
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069e42e1515a55f52067d0327eb65014273d332079adda47ed8ce25758c61f85
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.225115387964817,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -822,11 +822,147 @@
822
  "eval_samples_per_second": 54.961,
823
  "eval_steps_per_second": 0.868,
824
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
  }
826
  ],
827
  "max_steps": 100000,
828
  "num_train_epochs": 9,
829
- "total_flos": 2.8257429268680965e+21,
830
  "trial_name": null,
831
  "trial_params": null
832
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.09596795262562,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
822
  "eval_samples_per_second": 54.961,
823
  "eval_steps_per_second": 0.868,
824
  "step": 60000
825
+ },
826
+ {
827
+ "epoch": 5.27,
828
+ "learning_rate": 4.0717494800280484e-05,
829
+ "loss": 0.3985,
830
+ "step": 60500
831
+ },
832
+ {
833
+ "epoch": 5.31,
834
+ "learning_rate": 4.0044619833152034e-05,
835
+ "loss": 0.3961,
836
+ "step": 61000
837
+ },
838
+ {
839
+ "epoch": 5.36,
840
+ "learning_rate": 3.937553126317129e-05,
841
+ "loss": 0.397,
842
+ "step": 61500
843
+ },
844
+ {
845
+ "epoch": 5.4,
846
+ "learning_rate": 3.871039417793112e-05,
847
+ "loss": 0.3957,
848
+ "step": 62000
849
+ },
850
+ {
851
+ "epoch": 5.44,
852
+ "learning_rate": 3.804937269005464e-05,
853
+ "loss": 0.3943,
854
+ "step": 62500
855
+ },
856
+ {
857
+ "epoch": 5.49,
858
+ "learning_rate": 3.7392629896702916e-05,
859
+ "loss": 0.3946,
860
+ "step": 63000
861
+ },
862
+ {
863
+ "epoch": 5.53,
864
+ "learning_rate": 3.674032783933316e-05,
865
+ "loss": 0.3951,
866
+ "step": 63500
867
+ },
868
+ {
869
+ "epoch": 5.57,
870
+ "learning_rate": 3.609262746371739e-05,
871
+ "loss": 0.3947,
872
+ "step": 64000
873
+ },
874
+ {
875
+ "epoch": 5.62,
876
+ "learning_rate": 3.544968858023156e-05,
877
+ "loss": 0.3942,
878
+ "step": 64500
879
+ },
880
+ {
881
+ "epoch": 5.66,
882
+ "learning_rate": 3.481294084680979e-05,
883
+ "loss": 0.3935,
884
+ "step": 65000
885
+ },
886
+ {
887
+ "epoch": 5.66,
888
+ "eval_loss": 0.3747362196445465,
889
+ "eval_runtime": 73.821,
890
+ "eval_samples_per_second": 67.731,
891
+ "eval_steps_per_second": 1.07,
892
+ "step": 65000
893
+ },
894
+ {
895
+ "epoch": 5.7,
896
+ "learning_rate": 3.417998932888398e-05,
897
+ "loss": 0.3928,
898
+ "step": 65500
899
+ },
900
+ {
901
+ "epoch": 5.75,
902
+ "learning_rate": 3.3552271217928446e-05,
903
+ "loss": 0.3931,
904
+ "step": 66000
905
+ },
906
+ {
907
+ "epoch": 5.79,
908
+ "learning_rate": 3.292994139399436e-05,
909
+ "loss": 0.3917,
910
+ "step": 66500
911
+ },
912
+ {
913
+ "epoch": 5.83,
914
+ "learning_rate": 3.231438135148965e-05,
915
+ "loss": 0.3915,
916
+ "step": 67000
917
+ },
918
+ {
919
+ "epoch": 5.88,
920
+ "learning_rate": 3.170327584695585e-05,
921
+ "loss": 0.392,
922
+ "step": 67500
923
+ },
924
+ {
925
+ "epoch": 5.92,
926
+ "learning_rate": 3.1098014841384136e-05,
927
+ "loss": 0.3912,
928
+ "step": 68000
929
+ },
930
+ {
931
+ "epoch": 5.97,
932
+ "learning_rate": 3.0498747673870948e-05,
933
+ "loss": 0.3913,
934
+ "step": 68500
935
+ },
936
+ {
937
+ "epoch": 6.01,
938
+ "learning_rate": 2.9905622204622832e-05,
939
+ "loss": 0.3912,
940
+ "step": 69000
941
+ },
942
+ {
943
+ "epoch": 6.05,
944
+ "learning_rate": 2.9318784778474186e-05,
945
+ "loss": 0.3898,
946
+ "step": 69500
947
+ },
948
+ {
949
+ "epoch": 6.1,
950
+ "learning_rate": 2.8738380188778874e-05,
951
+ "loss": 0.3901,
952
+ "step": 70000
953
+ },
954
+ {
955
+ "epoch": 6.1,
956
+ "eval_loss": 0.37070053815841675,
957
+ "eval_runtime": 53.0932,
958
+ "eval_samples_per_second": 94.174,
959
+ "eval_steps_per_second": 1.488,
960
+ "step": 70000
961
  }
962
  ],
963
  "max_steps": 100000,
964
  "num_train_epochs": 9,
965
+ "total_flos": 3.296694285295811e+21,
966
  "trial_name": null,
967
  "trial_params": null
968
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27d58b7a9a0b285b885c546e3dfb7edd5e2459a83480dfbe8cb380fa08f7c48b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df44f87e5182a482c9b6d3a619d67eba073218906e5c075beb6e29b9896b503
3
  size 449471589