jflotz commited on
Commit
d0638a2
·
1 Parent(s): 51a5d87

Training in progress, step 840000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f349542b4533abe4453e0adeb6aff6cd875b986f4117c2f333ebbbb94148a468
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24859c623c6a5769d23a445d9e652805ef93ef8232d0532f3fafc5dad772c85e
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b44f4d1ea700e774f5dee0343ba4324675c77c29852dd54fec6a281d849ccd3b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b75da63b821a4c72c4b37f39fc301b88ce6e4d7dc37edf4f078b7f5706f736e3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.899746829796014,
5
- "global_step": 830000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16606,11 +16606,211 @@
16606
  "eval_samples_per_second": 864.555,
16607
  "eval_steps_per_second": 13.55,
16608
  "step": 830000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16609
  }
16610
  ],
16611
  "max_steps": 1000000,
16612
  "num_train_epochs": 12,
16613
- "total_flos": 5.818277853938688e+22,
16614
  "trial_name": null,
16615
  "trial_params": null
16616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.011275554018937,
5
+ "global_step": 840000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16606
  "eval_samples_per_second": 864.555,
16607
  "eval_steps_per_second": 13.55,
16608
  "step": 830000
16609
+ },
16610
+ {
16611
+ "epoch": 8.91,
16612
+ "learning_rate": 2.0711718689098057e-05,
16613
+ "loss": 0.1858,
16614
+ "step": 830500
16615
+ },
16616
+ {
16617
+ "epoch": 8.91,
16618
+ "learning_rate": 2.0650267139558772e-05,
16619
+ "loss": 0.1854,
16620
+ "step": 831000
16621
+ },
16622
+ {
16623
+ "epoch": 8.91,
16624
+ "eval_loss": 0.17501012980937958,
16625
+ "eval_runtime": 2.6667,
16626
+ "eval_samples_per_second": 861.35,
16627
+ "eval_steps_per_second": 13.5,
16628
+ "step": 831000
16629
+ },
16630
+ {
16631
+ "epoch": 8.92,
16632
+ "learning_rate": 2.058897784992289e-05,
16633
+ "loss": 0.1855,
16634
+ "step": 831500
16635
+ },
16636
+ {
16637
+ "epoch": 8.92,
16638
+ "learning_rate": 2.052785098775293e-05,
16639
+ "loss": 0.1855,
16640
+ "step": 832000
16641
+ },
16642
+ {
16643
+ "epoch": 8.92,
16644
+ "eval_loss": 0.17379425466060638,
16645
+ "eval_runtime": 2.675,
16646
+ "eval_samples_per_second": 858.689,
16647
+ "eval_steps_per_second": 13.458,
16648
+ "step": 832000
16649
+ },
16650
+ {
16651
+ "epoch": 8.93,
16652
+ "learning_rate": 2.0466886720167436e-05,
16653
+ "loss": 0.1847,
16654
+ "step": 832500
16655
+ },
16656
+ {
16657
+ "epoch": 8.93,
16658
+ "learning_rate": 2.04060852138404e-05,
16659
+ "loss": 0.1854,
16660
+ "step": 833000
16661
+ },
16662
+ {
16663
+ "epoch": 8.93,
16664
+ "eval_loss": 0.1764400452375412,
16665
+ "eval_runtime": 2.6031,
16666
+ "eval_samples_per_second": 882.425,
16667
+ "eval_steps_per_second": 13.83,
16668
+ "step": 833000
16669
+ },
16670
+ {
16671
+ "epoch": 8.94,
16672
+ "learning_rate": 2.0345446635000783e-05,
16673
+ "loss": 0.1856,
16674
+ "step": 833500
16675
+ },
16676
+ {
16677
+ "epoch": 8.94,
16678
+ "learning_rate": 2.028497114943219e-05,
16679
+ "loss": 0.1851,
16680
+ "step": 834000
16681
+ },
16682
+ {
16683
+ "epoch": 8.94,
16684
+ "eval_loss": 0.17593778669834137,
16685
+ "eval_runtime": 2.5824,
16686
+ "eval_samples_per_second": 889.497,
16687
+ "eval_steps_per_second": 13.941,
16688
+ "step": 834000
16689
+ },
16690
+ {
16691
+ "epoch": 8.95,
16692
+ "learning_rate": 2.022465892247223e-05,
16693
+ "loss": 0.1855,
16694
+ "step": 834500
16695
+ },
16696
+ {
16697
+ "epoch": 8.96,
16698
+ "learning_rate": 2.0164510119012263e-05,
16699
+ "loss": 0.1849,
16700
+ "step": 835000
16701
+ },
16702
+ {
16703
+ "epoch": 8.96,
16704
+ "eval_loss": 0.1772100031375885,
16705
+ "eval_runtime": 2.6877,
16706
+ "eval_samples_per_second": 854.619,
16707
+ "eval_steps_per_second": 13.394,
16708
+ "step": 835000
16709
+ },
16710
+ {
16711
+ "epoch": 8.96,
16712
+ "learning_rate": 2.0104524903496834e-05,
16713
+ "loss": 0.1852,
16714
+ "step": 835500
16715
+ },
16716
+ {
16717
+ "epoch": 8.97,
16718
+ "learning_rate": 2.0044703439923217e-05,
16719
+ "loss": 0.1854,
16720
+ "step": 836000
16721
+ },
16722
+ {
16723
+ "epoch": 8.97,
16724
+ "eval_loss": 0.17744192481040955,
16725
+ "eval_runtime": 2.6114,
16726
+ "eval_samples_per_second": 879.619,
16727
+ "eval_steps_per_second": 13.786,
16728
+ "step": 836000
16729
+ },
16730
+ {
16731
+ "epoch": 8.97,
16732
+ "learning_rate": 1.998504589184101e-05,
16733
+ "loss": 0.1851,
16734
+ "step": 836500
16735
+ },
16736
+ {
16737
+ "epoch": 8.98,
16738
+ "learning_rate": 1.9925552422351654e-05,
16739
+ "loss": 0.1849,
16740
+ "step": 837000
16741
+ },
16742
+ {
16743
+ "epoch": 8.98,
16744
+ "eval_loss": 0.1755765676498413,
16745
+ "eval_runtime": 2.6326,
16746
+ "eval_samples_per_second": 872.529,
16747
+ "eval_steps_per_second": 13.675,
16748
+ "step": 837000
16749
+ },
16750
+ {
16751
+ "epoch": 8.98,
16752
+ "learning_rate": 1.9866223194108028e-05,
16753
+ "loss": 0.1851,
16754
+ "step": 837500
16755
+ },
16756
+ {
16757
+ "epoch": 8.99,
16758
+ "learning_rate": 1.9807058369314016e-05,
16759
+ "loss": 0.1845,
16760
+ "step": 838000
16761
+ },
16762
+ {
16763
+ "epoch": 8.99,
16764
+ "eval_loss": 0.17676672339439392,
16765
+ "eval_runtime": 2.6846,
16766
+ "eval_samples_per_second": 855.61,
16767
+ "eval_steps_per_second": 13.41,
16768
+ "step": 838000
16769
+ },
16770
+ {
16771
+ "epoch": 8.99,
16772
+ "learning_rate": 1.9748058109723953e-05,
16773
+ "loss": 0.1852,
16774
+ "step": 838500
16775
+ },
16776
+ {
16777
+ "epoch": 9.0,
16778
+ "learning_rate": 1.968922257664231e-05,
16779
+ "loss": 0.1853,
16780
+ "step": 839000
16781
+ },
16782
+ {
16783
+ "epoch": 9.0,
16784
+ "eval_loss": 0.17678546905517578,
16785
+ "eval_runtime": 2.6872,
16786
+ "eval_samples_per_second": 854.778,
16787
+ "eval_steps_per_second": 13.397,
16788
+ "step": 839000
16789
+ },
16790
+ {
16791
+ "epoch": 9.01,
16792
+ "learning_rate": 1.9630551930923155e-05,
16793
+ "loss": 0.1851,
16794
+ "step": 839500
16795
+ },
16796
+ {
16797
+ "epoch": 9.01,
16798
+ "learning_rate": 1.9572046332969825e-05,
16799
+ "loss": 0.1848,
16800
+ "step": 840000
16801
+ },
16802
+ {
16803
+ "epoch": 9.01,
16804
+ "eval_loss": 0.1751183122396469,
16805
+ "eval_runtime": 2.6372,
16806
+ "eval_samples_per_second": 870.984,
16807
+ "eval_steps_per_second": 13.651,
16808
+ "step": 840000
16809
  }
16810
  ],
16811
  "max_steps": 1000000,
16812
  "num_train_epochs": 12,
16813
+ "total_flos": 5.8883726385215196e+22,
16814
  "trial_name": null,
16815
  "trial_params": null
16816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
3
  size 449471589