jflotz commited on
Commit
3a6c403
·
1 Parent(s): e0bf422

Training in progress, step 490000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4060e633dae015e5639b74a57f1603654125aa36a5a7f3f6681895bc39045ec
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba175f94af3131ddc7e585c8d0c85376ebd1433f20a5a01a35d8488fc39885b
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f459617feff07cc080660c37736876af343cf74d6054e3f49e34fa66dc0e7730
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90520ba7330b686999308b8bbfc39f4cdbd59c0cf0ab7c6ba32bba11cfac21a5
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebbfa680b1187d22cc7371654116ef29dab3c85749ad34b845956736ad3b3612
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.70663811563169,
5
- "global_step": 480000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9606,11 +9606,211 @@
9606
  "eval_samples_per_second": 1170.393,
9607
  "eval_steps_per_second": 18.343,
9608
  "step": 480000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9609
  }
9610
  ],
9611
  "max_steps": 500000,
9612
  "num_train_epochs": 12,
9613
- "total_flos": 1.5335098118561847e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.929693076374019,
5
+ "global_step": 490000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9606
  "eval_samples_per_second": 1170.393,
9607
  "eval_steps_per_second": 18.343,
9608
  "step": 480000
9609
+ },
9610
+ {
9611
+ "epoch": 10.72,
9612
+ "learning_rate": 1.1204252864868377e-05,
9613
+ "loss": 0.2538,
9614
+ "step": 480500
9615
+ },
9616
+ {
9617
+ "epoch": 10.73,
9618
+ "learning_rate": 1.1143368309400725e-05,
9619
+ "loss": 0.2539,
9620
+ "step": 481000
9621
+ },
9622
+ {
9623
+ "epoch": 10.73,
9624
+ "eval_loss": 0.23603801429271698,
9625
+ "eval_runtime": 1.9718,
9626
+ "eval_samples_per_second": 1164.912,
9627
+ "eval_steps_per_second": 18.257,
9628
+ "step": 481000
9629
+ },
9630
+ {
9631
+ "epoch": 10.74,
9632
+ "learning_rate": 1.1084056947009348e-05,
9633
+ "loss": 0.2538,
9634
+ "step": 481500
9635
+ },
9636
+ {
9637
+ "epoch": 10.75,
9638
+ "learning_rate": 1.1026319426313837e-05,
9639
+ "loss": 0.2538,
9640
+ "step": 482000
9641
+ },
9642
+ {
9643
+ "epoch": 10.75,
9644
+ "eval_loss": 0.23858527839183807,
9645
+ "eval_runtime": 1.961,
9646
+ "eval_samples_per_second": 1171.312,
9647
+ "eval_steps_per_second": 18.358,
9648
+ "step": 482000
9649
+ },
9650
+ {
9651
+ "epoch": 10.76,
9652
+ "learning_rate": 1.097015637872247e-05,
9653
+ "loss": 0.2538,
9654
+ "step": 482500
9655
+ },
9656
+ {
9657
+ "epoch": 10.77,
9658
+ "learning_rate": 1.0915568418425301e-05,
9659
+ "loss": 0.2537,
9660
+ "step": 483000
9661
+ },
9662
+ {
9663
+ "epoch": 10.77,
9664
+ "eval_loss": 0.23714858293533325,
9665
+ "eval_runtime": 2.009,
9666
+ "eval_samples_per_second": 1143.375,
9667
+ "eval_steps_per_second": 17.92,
9668
+ "step": 483000
9669
+ },
9670
+ {
9671
+ "epoch": 10.78,
9672
+ "learning_rate": 1.0862556142387571e-05,
9673
+ "loss": 0.2539,
9674
+ "step": 483500
9675
+ },
9676
+ {
9677
+ "epoch": 10.8,
9678
+ "learning_rate": 1.081112013034298e-05,
9679
+ "loss": 0.2537,
9680
+ "step": 484000
9681
+ },
9682
+ {
9683
+ "epoch": 10.8,
9684
+ "eval_loss": 0.23877692222595215,
9685
+ "eval_runtime": 1.9856,
9686
+ "eval_samples_per_second": 1156.824,
9687
+ "eval_steps_per_second": 18.13,
9688
+ "step": 484000
9689
+ },
9690
+ {
9691
+ "epoch": 10.81,
9692
+ "learning_rate": 1.0761260944787561e-05,
9693
+ "loss": 0.2551,
9694
+ "step": 484500
9695
+ },
9696
+ {
9697
+ "epoch": 10.82,
9698
+ "learning_rate": 1.0712979130973347e-05,
9699
+ "loss": 0.2542,
9700
+ "step": 485000
9701
+ },
9702
+ {
9703
+ "epoch": 10.82,
9704
+ "eval_loss": 0.23765695095062256,
9705
+ "eval_runtime": 1.9888,
9706
+ "eval_samples_per_second": 1154.965,
9707
+ "eval_steps_per_second": 18.101,
9708
+ "step": 485000
9709
+ },
9710
+ {
9711
+ "epoch": 10.83,
9712
+ "learning_rate": 1.0666275216902535e-05,
9713
+ "loss": 0.2539,
9714
+ "step": 485500
9715
+ },
9716
+ {
9717
+ "epoch": 10.84,
9718
+ "learning_rate": 1.0621149713321656e-05,
9719
+ "loss": 0.2539,
9720
+ "step": 486000
9721
+ },
9722
+ {
9723
+ "epoch": 10.84,
9724
+ "eval_loss": 0.23621481657028198,
9725
+ "eval_runtime": 1.9428,
9726
+ "eval_samples_per_second": 1182.329,
9727
+ "eval_steps_per_second": 18.53,
9728
+ "step": 486000
9729
+ },
9730
+ {
9731
+ "epoch": 10.85,
9732
+ "learning_rate": 1.0577603113715964e-05,
9733
+ "loss": 0.2539,
9734
+ "step": 486500
9735
+ },
9736
+ {
9737
+ "epoch": 10.86,
9738
+ "learning_rate": 1.0535635894304106e-05,
9739
+ "loss": 0.2535,
9740
+ "step": 487000
9741
+ },
9742
+ {
9743
+ "epoch": 10.86,
9744
+ "eval_loss": 0.23733575642108917,
9745
+ "eval_runtime": 1.9603,
9746
+ "eval_samples_per_second": 1171.773,
9747
+ "eval_steps_per_second": 18.365,
9748
+ "step": 487000
9749
+ },
9750
+ {
9751
+ "epoch": 10.87,
9752
+ "learning_rate": 1.0495248514032875e-05,
9753
+ "loss": 0.2539,
9754
+ "step": 487500
9755
+ },
9756
+ {
9757
+ "epoch": 10.89,
9758
+ "learning_rate": 1.045644141457218e-05,
9759
+ "loss": 0.2533,
9760
+ "step": 488000
9761
+ },
9762
+ {
9763
+ "epoch": 10.89,
9764
+ "eval_loss": 0.23612964153289795,
9765
+ "eval_runtime": 1.9923,
9766
+ "eval_samples_per_second": 1152.93,
9767
+ "eval_steps_per_second": 18.069,
9768
+ "step": 488000
9769
+ },
9770
+ {
9771
+ "epoch": 10.9,
9772
+ "learning_rate": 1.0419215020310254e-05,
9773
+ "loss": 0.2534,
9774
+ "step": 488500
9775
+ },
9776
+ {
9777
+ "epoch": 10.91,
9778
+ "learning_rate": 1.0383569738348988e-05,
9779
+ "loss": 0.2533,
9780
+ "step": 489000
9781
+ },
9782
+ {
9783
+ "epoch": 10.91,
9784
+ "eval_loss": 0.2368190884590149,
9785
+ "eval_runtime": 1.9507,
9786
+ "eval_samples_per_second": 1177.524,
9787
+ "eval_steps_per_second": 18.455,
9788
+ "step": 489000
9789
+ },
9790
+ {
9791
+ "epoch": 10.92,
9792
+ "learning_rate": 1.0349505958499436e-05,
9793
+ "loss": 0.2534,
9794
+ "step": 489500
9795
+ },
9796
+ {
9797
+ "epoch": 10.93,
9798
+ "learning_rate": 1.0317024053277693e-05,
9799
+ "loss": 0.2535,
9800
+ "step": 490000
9801
+ },
9802
+ {
9803
+ "epoch": 10.93,
9804
+ "eval_loss": 0.23948417603969574,
9805
+ "eval_runtime": 2.0351,
9806
+ "eval_samples_per_second": 1128.67,
9807
+ "eval_steps_per_second": 17.689,
9808
+ "step": 490000
9809
  }
9810
  ],
9811
  "max_steps": 500000,
9812
  "num_train_epochs": 12,
9813
+ "total_flos": 1.5654585257336347e+22,
9814
  "trial_name": null,
9815
  "trial_params": null
9816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f459617feff07cc080660c37736876af343cf74d6054e3f49e34fa66dc0e7730
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6bd31ef6b75d2ff57b791613279c5afe6c8244312a64f00fb084519b8aaac6
3
  size 102501541