jflotz commited on
Commit
c7e9fcb
·
1 Parent(s): 363c8e7

Training in progress, step 940000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7e7257d85066f44137f90721b6eeea6b47af4fdd60cb740d773a3a975cd64d8
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c72fdd0154d524cda12334954eb1e4f193d30dc2134990578a195ba70ede7f
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3151d7e7cec0857fd8c0138e846e83ffe158233cc9712166843e469c2af9c3a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b0e1b3397ffbcdba72f77e0e72529212805b3efe290aff36c3ecd969d87bca4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dab84d4b75593cd9349f424c4371ea8ac2493751bc544a294c8ef74a18b08e9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.01503407202525,
5
- "global_step": 930000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -18606,11 +18606,211 @@
18606
  "eval_samples_per_second": 877.459,
18607
  "eval_steps_per_second": 13.752,
18608
  "step": 930000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18609
  }
18610
  ],
18611
  "max_steps": 1000000,
18612
  "num_train_epochs": 12,
18613
- "total_flos": 6.519269511347128e+22,
18614
  "trial_name": null,
18615
  "trial_params": null
18616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.126562796248173,
5
+ "global_step": 940000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
18606
  "eval_samples_per_second": 877.459,
18607
  "eval_steps_per_second": 13.752,
18608
  "step": 930000
18609
+ },
18610
+ {
18611
+ "epoch": 10.02,
18612
+ "learning_rate": 1.1840677154152987e-05,
18613
+ "loss": 0.181,
18614
+ "step": 930500
18615
+ },
18616
+ {
18617
+ "epoch": 10.03,
18618
+ "learning_rate": 1.1814402460652382e-05,
18619
+ "loss": 0.1811,
18620
+ "step": 931000
18621
+ },
18622
+ {
18623
+ "epoch": 10.03,
18624
+ "eval_loss": 0.17144934833049774,
18625
+ "eval_runtime": 2.7383,
18626
+ "eval_samples_per_second": 838.849,
18627
+ "eval_steps_per_second": 13.147,
18628
+ "step": 931000
18629
+ },
18630
+ {
18631
+ "epoch": 10.03,
18632
+ "learning_rate": 1.178831418397181e-05,
18633
+ "loss": 0.181,
18634
+ "step": 931500
18635
+ },
18636
+ {
18637
+ "epoch": 10.04,
18638
+ "learning_rate": 1.176241239543558e-05,
18639
+ "loss": 0.181,
18640
+ "step": 932000
18641
+ },
18642
+ {
18643
+ "epoch": 10.04,
18644
+ "eval_loss": 0.17210912704467773,
18645
+ "eval_runtime": 2.6312,
18646
+ "eval_samples_per_second": 872.975,
18647
+ "eval_steps_per_second": 13.682,
18648
+ "step": 932000
18649
+ },
18650
+ {
18651
+ "epoch": 10.04,
18652
+ "learning_rate": 1.173669716585822e-05,
18653
+ "loss": 0.1809,
18654
+ "step": 932500
18655
+ },
18656
+ {
18657
+ "epoch": 10.05,
18658
+ "learning_rate": 1.171116856554418e-05,
18659
+ "loss": 0.1809,
18660
+ "step": 933000
18661
+ },
18662
+ {
18663
+ "epoch": 10.05,
18664
+ "eval_loss": 0.17279262840747833,
18665
+ "eval_runtime": 2.687,
18666
+ "eval_samples_per_second": 854.858,
18667
+ "eval_steps_per_second": 13.398,
18668
+ "step": 933000
18669
+ },
18670
+ {
18671
+ "epoch": 10.05,
18672
+ "learning_rate": 1.168582666428768e-05,
18673
+ "loss": 0.1809,
18674
+ "step": 933500
18675
+ },
18676
+ {
18677
+ "epoch": 10.06,
18678
+ "learning_rate": 1.1660671531372517e-05,
18679
+ "loss": 0.1807,
18680
+ "step": 934000
18681
+ },
18682
+ {
18683
+ "epoch": 10.06,
18684
+ "eval_loss": 0.17214839160442352,
18685
+ "eval_runtime": 2.6862,
18686
+ "eval_samples_per_second": 855.103,
18687
+ "eval_steps_per_second": 13.402,
18688
+ "step": 934000
18689
+ },
18690
+ {
18691
+ "epoch": 10.07,
18692
+ "learning_rate": 1.1635703235571846e-05,
18693
+ "loss": 0.181,
18694
+ "step": 934500
18695
+ },
18696
+ {
18697
+ "epoch": 10.07,
18698
+ "learning_rate": 1.1610921845148052e-05,
18699
+ "loss": 0.1805,
18700
+ "step": 935000
18701
+ },
18702
+ {
18703
+ "epoch": 10.07,
18704
+ "eval_loss": 0.17261220514774323,
18705
+ "eval_runtime": 2.7622,
18706
+ "eval_samples_per_second": 831.593,
18707
+ "eval_steps_per_second": 13.033,
18708
+ "step": 935000
18709
+ },
18710
+ {
18711
+ "epoch": 10.08,
18712
+ "learning_rate": 1.1586327427852503e-05,
18713
+ "loss": 0.1805,
18714
+ "step": 935500
18715
+ },
18716
+ {
18717
+ "epoch": 10.08,
18718
+ "learning_rate": 1.156192005092539e-05,
18719
+ "loss": 0.1807,
18720
+ "step": 936000
18721
+ },
18722
+ {
18723
+ "epoch": 10.08,
18724
+ "eval_loss": 0.17041905224323273,
18725
+ "eval_runtime": 2.643,
18726
+ "eval_samples_per_second": 869.101,
18727
+ "eval_steps_per_second": 13.621,
18728
+ "step": 936000
18729
+ },
18730
+ {
18731
+ "epoch": 10.09,
18732
+ "learning_rate": 1.153769978109557e-05,
18733
+ "loss": 0.1806,
18734
+ "step": 936500
18735
+ },
18736
+ {
18737
+ "epoch": 10.09,
18738
+ "learning_rate": 1.1513666684580308e-05,
18739
+ "loss": 0.1809,
18740
+ "step": 937000
18741
+ },
18742
+ {
18743
+ "epoch": 10.09,
18744
+ "eval_loss": 0.1718713790178299,
18745
+ "eval_runtime": 2.6411,
18746
+ "eval_samples_per_second": 869.707,
18747
+ "eval_steps_per_second": 13.631,
18748
+ "step": 937000
18749
+ },
18750
+ {
18751
+ "epoch": 10.1,
18752
+ "learning_rate": 1.1489820827085185e-05,
18753
+ "loss": 0.1808,
18754
+ "step": 937500
18755
+ },
18756
+ {
18757
+ "epoch": 10.1,
18758
+ "learning_rate": 1.1466162273803876e-05,
18759
+ "loss": 0.1809,
18760
+ "step": 938000
18761
+ },
18762
+ {
18763
+ "epoch": 10.1,
18764
+ "eval_loss": 0.17236891388893127,
18765
+ "eval_runtime": 2.4881,
18766
+ "eval_samples_per_second": 923.197,
18767
+ "eval_steps_per_second": 14.469,
18768
+ "step": 938000
18769
+ },
18770
+ {
18771
+ "epoch": 10.11,
18772
+ "learning_rate": 1.144269108941795e-05,
18773
+ "loss": 0.1808,
18774
+ "step": 938500
18775
+ },
18776
+ {
18777
+ "epoch": 10.12,
18778
+ "learning_rate": 1.1419407338096732e-05,
18779
+ "loss": 0.1807,
18780
+ "step": 939000
18781
+ },
18782
+ {
18783
+ "epoch": 10.12,
18784
+ "eval_loss": 0.17213864624500275,
18785
+ "eval_runtime": 2.697,
18786
+ "eval_samples_per_second": 851.681,
18787
+ "eval_steps_per_second": 13.348,
18788
+ "step": 939000
18789
+ },
18790
+ {
18791
+ "epoch": 10.12,
18792
+ "learning_rate": 1.1396311083497103e-05,
18793
+ "loss": 0.1808,
18794
+ "step": 939500
18795
+ },
18796
+ {
18797
+ "epoch": 10.13,
18798
+ "learning_rate": 1.1373402388763346e-05,
18799
+ "loss": 0.1806,
18800
+ "step": 940000
18801
+ },
18802
+ {
18803
+ "epoch": 10.13,
18804
+ "eval_loss": 0.17225030064582825,
18805
+ "eval_runtime": 2.5852,
18806
+ "eval_samples_per_second": 888.512,
18807
+ "eval_steps_per_second": 13.925,
18808
+ "step": 940000
18809
  }
18810
  ],
18811
  "max_steps": 1000000,
18812
  "num_train_epochs": 12,
18813
+ "total_flos": 6.589369772377475e+22,
18814
  "trial_name": null,
18815
  "trial_params": null
18816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3151d7e7cec0857fd8c0138e846e83ffe158233cc9712166843e469c2af9c3a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
3
  size 449471589