Nadav commited on
Commit
c0eae84
·
1 Parent(s): 238efe4

Training in progress, step 40000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b8a99831b810a81afda5499f89d37d313f57de0e44acfd6fda4ffa5d407961
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8364802d21dd9f982e45881cc79c347aa3801886af5229e46080ee91f3907fe6
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eec5be5134af20148899e71dab937c29c165b2f523524e79200da7b125e7331
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8930b754593f69ea99a69818713906027d8b18db77040fbc82850fb457145d53
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7452543a8fbf992ab3cce28416697ef7ccf235bef8f9b12b8a45f822598554fe
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3636d5d5906420899d9a721abefc725ca1bec46f94db174f2d813e8cafd619
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c140b5a229b9a3368f84f81bd05277429e8fd4356be63302dcf2f4ec2ee074c7
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:304d89faced0aa75098d224436ef3865f3b1d27481cbd97cf9d9b995cd4a60e9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:668b6868588ce6f6b1dad74dfa79e9c675d217e8314657782f2e491c66698c2c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9772031303164344,
5
- "global_step": 35000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -706,11 +706,111 @@
706
  "eval_samples_per_second": 29.603,
707
  "eval_steps_per_second": 0.947,
708
  "step": 35000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  }
710
  ],
711
  "max_steps": 1000000,
712
  "num_train_epochs": 86,
713
- "total_flos": 1.6100830789800572e+21,
714
  "trial_name": null,
715
  "trial_params": null
716
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.402517863218782,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
706
  "eval_samples_per_second": 29.603,
707
  "eval_steps_per_second": 0.947,
708
  "step": 35000
709
+ },
710
+ {
711
+ "epoch": 3.02,
712
+ "learning_rate": 9.999999999999999e-06,
713
+ "loss": 0.4195,
714
+ "step": 35500
715
+ },
716
+ {
717
+ "epoch": 3.06,
718
+ "learning_rate": 9.999999999999999e-06,
719
+ "loss": 0.4194,
720
+ "step": 36000
721
+ },
722
+ {
723
+ "epoch": 3.06,
724
+ "eval_loss": 0.38856348395347595,
725
+ "eval_runtime": 16.5028,
726
+ "eval_samples_per_second": 30.298,
727
+ "eval_steps_per_second": 0.97,
728
+ "step": 36000
729
+ },
730
+ {
731
+ "epoch": 3.1,
732
+ "learning_rate": 9.999999999999999e-06,
733
+ "loss": 0.4193,
734
+ "step": 36500
735
+ },
736
+ {
737
+ "epoch": 3.15,
738
+ "learning_rate": 9.999999999999999e-06,
739
+ "loss": 0.4208,
740
+ "step": 37000
741
+ },
742
+ {
743
+ "epoch": 3.15,
744
+ "eval_loss": 0.3889642059803009,
745
+ "eval_runtime": 28.0106,
746
+ "eval_samples_per_second": 17.85,
747
+ "eval_steps_per_second": 0.571,
748
+ "step": 37000
749
+ },
750
+ {
751
+ "epoch": 3.19,
752
+ "learning_rate": 9.999999999999999e-06,
753
+ "loss": 0.4189,
754
+ "step": 37500
755
+ },
756
+ {
757
+ "epoch": 3.23,
758
+ "learning_rate": 9.999999999999999e-06,
759
+ "loss": 0.4187,
760
+ "step": 38000
761
+ },
762
+ {
763
+ "epoch": 3.23,
764
+ "eval_loss": 0.3886989653110504,
765
+ "eval_runtime": 15.6007,
766
+ "eval_samples_per_second": 32.05,
767
+ "eval_steps_per_second": 1.026,
768
+ "step": 38000
769
+ },
770
+ {
771
+ "epoch": 3.27,
772
+ "learning_rate": 9.999999999999999e-06,
773
+ "loss": 0.4181,
774
+ "step": 38500
775
+ },
776
+ {
777
+ "epoch": 3.32,
778
+ "learning_rate": 9.999999999999999e-06,
779
+ "loss": 0.417,
780
+ "step": 39000
781
+ },
782
+ {
783
+ "epoch": 3.32,
784
+ "eval_loss": 0.3878667950630188,
785
+ "eval_runtime": 14.893,
786
+ "eval_samples_per_second": 33.573,
787
+ "eval_steps_per_second": 1.074,
788
+ "step": 39000
789
+ },
790
+ {
791
+ "epoch": 3.36,
792
+ "learning_rate": 9.999999999999999e-06,
793
+ "loss": 0.4176,
794
+ "step": 39500
795
+ },
796
+ {
797
+ "epoch": 3.4,
798
+ "learning_rate": 9.999999999999999e-06,
799
+ "loss": 0.4164,
800
+ "step": 40000
801
+ },
802
+ {
803
+ "epoch": 3.4,
804
+ "eval_loss": 0.3855785131454468,
805
+ "eval_runtime": 15.2409,
806
+ "eval_samples_per_second": 32.806,
807
+ "eval_steps_per_second": 1.05,
808
+ "step": 40000
809
  }
810
  ],
811
  "max_steps": 1000000,
812
  "num_train_epochs": 86,
813
+ "total_flos": 1.84008340746311e+21,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eec5be5134af20148899e71dab937c29c165b2f523524e79200da7b125e7331
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
3
  size 449471589