Nadav commited on
Commit
7182479
·
1 Parent(s): c0eae84

Training in progress, step 45000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8364802d21dd9f982e45881cc79c347aa3801886af5229e46080ee91f3907fe6
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974a1e790d3ca33c068dbaca3cc4297bfcbca347437b8b9cb62025728f09e96e
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7452543a8fbf992ab3cce28416697ef7ccf235bef8f9b12b8a45f822598554fe
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe764e4b14b1a41f602255c5ad661b82e79519ef62fb0a01c7236478ec943d57
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c140b5a229b9a3368f84f81bd05277429e8fd4356be63302dcf2f4ec2ee074c7
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c1e2756d4a4685b9df2f62aad4f5988b6c0b032f0fc7cb98d4e77d5c23a8e8
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:668b6868588ce6f6b1dad74dfa79e9c675d217e8314657782f2e491c66698c2c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584cc9786ad375313cf5cd3cfaf9ab2fa5956cf7f817f1132bf8fc5dbd46f871
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.402517863218782,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -806,11 +806,111 @@
806
  "eval_samples_per_second": 32.806,
807
  "eval_steps_per_second": 1.05,
808
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "max_steps": 1000000,
812
  "num_train_epochs": 86,
813
- "total_flos": 1.84008340746311e+21,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.8278325961211297,
5
+ "global_step": 45000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 32.806,
807
  "eval_steps_per_second": 1.05,
808
  "step": 40000
809
+ },
810
+ {
811
+ "epoch": 3.45,
812
+ "learning_rate": 9.999999999999999e-06,
813
+ "loss": 0.4167,
814
+ "step": 40500
815
+ },
816
+ {
817
+ "epoch": 3.49,
818
+ "learning_rate": 9.999999999999999e-06,
819
+ "loss": 0.417,
820
+ "step": 41000
821
+ },
822
+ {
823
+ "epoch": 3.49,
824
+ "eval_loss": 0.38663551211357117,
825
+ "eval_runtime": 24.5074,
826
+ "eval_samples_per_second": 20.402,
827
+ "eval_steps_per_second": 0.653,
828
+ "step": 41000
829
+ },
830
+ {
831
+ "epoch": 3.53,
832
+ "learning_rate": 9.999999999999999e-06,
833
+ "loss": 0.4168,
834
+ "step": 41500
835
+ },
836
+ {
837
+ "epoch": 3.57,
838
+ "learning_rate": 9.999999999999999e-06,
839
+ "loss": 0.4159,
840
+ "step": 42000
841
+ },
842
+ {
843
+ "epoch": 3.57,
844
+ "eval_loss": 0.38440173864364624,
845
+ "eval_runtime": 30.9795,
846
+ "eval_samples_per_second": 16.14,
847
+ "eval_steps_per_second": 0.516,
848
+ "step": 42000
849
+ },
850
+ {
851
+ "epoch": 3.62,
852
+ "learning_rate": 9.999999999999999e-06,
853
+ "loss": 0.4151,
854
+ "step": 42500
855
+ },
856
+ {
857
+ "epoch": 3.66,
858
+ "learning_rate": 9.999999999999999e-06,
859
+ "loss": 0.4155,
860
+ "step": 43000
861
+ },
862
+ {
863
+ "epoch": 3.66,
864
+ "eval_loss": 0.3864738941192627,
865
+ "eval_runtime": 24.9969,
866
+ "eval_samples_per_second": 20.002,
867
+ "eval_steps_per_second": 0.64,
868
+ "step": 43000
869
+ },
870
+ {
871
+ "epoch": 3.7,
872
+ "learning_rate": 9.999999999999999e-06,
873
+ "loss": 0.4157,
874
+ "step": 43500
875
+ },
876
+ {
877
+ "epoch": 3.74,
878
+ "learning_rate": 9.999999999999999e-06,
879
+ "loss": 0.4158,
880
+ "step": 44000
881
+ },
882
+ {
883
+ "epoch": 3.74,
884
+ "eval_loss": 0.3862515091896057,
885
+ "eval_runtime": 28.5688,
886
+ "eval_samples_per_second": 17.502,
887
+ "eval_steps_per_second": 0.56,
888
+ "step": 44000
889
+ },
890
+ {
891
+ "epoch": 3.79,
892
+ "learning_rate": 9.999999999999999e-06,
893
+ "loss": 0.4147,
894
+ "step": 44500
895
+ },
896
+ {
897
+ "epoch": 3.83,
898
+ "learning_rate": 9.999999999999999e-06,
899
+ "loss": 0.4134,
900
+ "step": 45000
901
+ },
902
+ {
903
+ "epoch": 3.83,
904
+ "eval_loss": 0.38480713963508606,
905
+ "eval_runtime": 27.3513,
906
+ "eval_samples_per_second": 18.281,
907
+ "eval_steps_per_second": 0.585,
908
+ "step": 45000
909
  }
910
  ],
911
  "max_steps": 1000000,
912
  "num_train_epochs": 86,
913
+ "total_flos": 2.0700998918659003e+21,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a9d277649a19c26574f85a21d72156907a1fa49083c31e96d8eb40d2455fc4
3
  size 449471589