jflotz commited on
Commit
f764013
·
1 Parent(s): 5bb91f4

Training in progress, step 440000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf00e3d4f0141ee77d7fac9590f426eb5970750593815683fd979f14be837d4f
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a4ad94b9d5ed920cecf2394888d2a87ad3ee893c3c5ddbd5617ff00b81d3e84
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93aa30fc145e0d59395556dbcdda066166e18281fd9035e50e621fea7af14d91
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b43c6c8225f37c4e4c9b2cfb89653fd869da7112e0141a79fcb8ec050a3739
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f427c751ea4b109969727e0c5f2ef9ef6fd7587de8192ab50fc2201ab4ba3ed9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.591363311920057,
5
- "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8606,11 +8606,211 @@
8606
  "eval_samples_per_second": 1115.617,
8607
  "eval_steps_per_second": 17.485,
8608
  "step": 430000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8609
  }
8610
  ],
8611
  "max_steps": 500000,
8612
  "num_train_epochs": 12,
8613
- "total_flos": 1.3737690878950762e+22,
8614
  "trial_name": null,
8615
  "trial_params": null
8616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.814418272662383,
5
+ "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8606
  "eval_samples_per_second": 1115.617,
8607
  "eval_steps_per_second": 17.485,
8608
  "step": 430000
8609
+ },
8610
+ {
8611
+ "epoch": 9.6,
8612
+ "learning_rate": 2.5050805238106804e-05,
8613
+ "loss": 0.2561,
8614
+ "step": 430500
8615
+ },
8616
+ {
8617
+ "epoch": 9.61,
8618
+ "learning_rate": 2.4838784403798542e-05,
8619
+ "loss": 0.256,
8620
+ "step": 431000
8621
+ },
8622
+ {
8623
+ "epoch": 9.61,
8624
+ "eval_loss": 0.23836444318294525,
8625
+ "eval_runtime": 2.1211,
8626
+ "eval_samples_per_second": 1082.938,
8627
+ "eval_steps_per_second": 16.972,
8628
+ "step": 431000
8629
+ },
8630
+ {
8631
+ "epoch": 9.62,
8632
+ "learning_rate": 2.4628186991690346e-05,
8633
+ "loss": 0.256,
8634
+ "step": 431500
8635
+ },
8636
+ {
8637
+ "epoch": 9.64,
8638
+ "learning_rate": 2.4419015304841797e-05,
8639
+ "loss": 0.2562,
8640
+ "step": 432000
8641
+ },
8642
+ {
8643
+ "epoch": 9.64,
8644
+ "eval_loss": 0.2407396286725998,
8645
+ "eval_runtime": 2.084,
8646
+ "eval_samples_per_second": 1102.19,
8647
+ "eval_steps_per_second": 17.274,
8648
+ "step": 432000
8649
+ },
8650
+ {
8651
+ "epoch": 9.65,
8652
+ "learning_rate": 2.4211271630720957e-05,
8653
+ "loss": 0.2559,
8654
+ "step": 432500
8655
+ },
8656
+ {
8657
+ "epoch": 9.66,
8658
+ "learning_rate": 2.4004958241179347e-05,
8659
+ "loss": 0.2558,
8660
+ "step": 433000
8661
+ },
8662
+ {
8663
+ "epoch": 9.66,
8664
+ "eval_loss": 0.23972494900226593,
8665
+ "eval_runtime": 2.0364,
8666
+ "eval_samples_per_second": 1127.958,
8667
+ "eval_steps_per_second": 17.678,
8668
+ "step": 433000
8669
+ },
8670
+ {
8671
+ "epoch": 9.67,
8672
+ "learning_rate": 2.3800077392427193e-05,
8673
+ "loss": 0.2559,
8674
+ "step": 433500
8675
+ },
8676
+ {
8677
+ "epoch": 9.68,
8678
+ "learning_rate": 2.3596631325008536e-05,
8679
+ "loss": 0.256,
8680
+ "step": 434000
8681
+ },
8682
+ {
8683
+ "epoch": 9.68,
8684
+ "eval_loss": 0.24173137545585632,
8685
+ "eval_runtime": 2.0453,
8686
+ "eval_samples_per_second": 1123.05,
8687
+ "eval_steps_per_second": 17.601,
8688
+ "step": 434000
8689
+ },
8690
+ {
8691
+ "epoch": 9.69,
8692
+ "learning_rate": 2.3394622263777042e-05,
8693
+ "loss": 0.2566,
8694
+ "step": 434500
8695
+ },
8696
+ {
8697
+ "epoch": 9.7,
8698
+ "learning_rate": 2.3194052417871433e-05,
8699
+ "loss": 0.2558,
8700
+ "step": 435000
8701
+ },
8702
+ {
8703
+ "epoch": 9.7,
8704
+ "eval_loss": 0.23999714851379395,
8705
+ "eval_runtime": 2.0454,
8706
+ "eval_samples_per_second": 1123.023,
8707
+ "eval_steps_per_second": 17.601,
8708
+ "step": 435000
8709
+ },
8710
+ {
8711
+ "epoch": 9.71,
8712
+ "learning_rate": 2.2994923980691425e-05,
8713
+ "loss": 0.2556,
8714
+ "step": 435500
8715
+ },
8716
+ {
8717
+ "epoch": 9.73,
8718
+ "learning_rate": 2.279723912987365e-05,
8719
+ "loss": 0.2552,
8720
+ "step": 436000
8721
+ },
8722
+ {
8723
+ "epoch": 9.73,
8724
+ "eval_loss": 0.23865634202957153,
8725
+ "eval_runtime": 2.0599,
8726
+ "eval_samples_per_second": 1115.091,
8727
+ "eval_steps_per_second": 17.476,
8728
+ "step": 436000
8729
+ },
8730
+ {
8731
+ "epoch": 9.74,
8732
+ "learning_rate": 2.2601000027268006e-05,
8733
+ "loss": 0.2555,
8734
+ "step": 436500
8735
+ },
8736
+ {
8737
+ "epoch": 9.75,
8738
+ "learning_rate": 2.2406208818913857e-05,
8739
+ "loss": 0.2556,
8740
+ "step": 437000
8741
+ },
8742
+ {
8743
+ "epoch": 9.75,
8744
+ "eval_loss": 0.2411411553621292,
8745
+ "eval_runtime": 2.0137,
8746
+ "eval_samples_per_second": 1140.659,
8747
+ "eval_steps_per_second": 17.877,
8748
+ "step": 437000
8749
+ },
8750
+ {
8751
+ "epoch": 9.76,
8752
+ "learning_rate": 2.221286763501666e-05,
8753
+ "loss": 0.2571,
8754
+ "step": 437500
8755
+ },
8756
+ {
8757
+ "epoch": 9.77,
8758
+ "learning_rate": 2.2020978589924673e-05,
8759
+ "loss": 0.258,
8760
+ "step": 438000
8761
+ },
8762
+ {
8763
+ "epoch": 9.77,
8764
+ "eval_loss": 0.24090658128261566,
8765
+ "eval_runtime": 2.0077,
8766
+ "eval_samples_per_second": 1144.123,
8767
+ "eval_steps_per_second": 17.931,
8768
+ "step": 438000
8769
+ },
8770
+ {
8771
+ "epoch": 9.78,
8772
+ "learning_rate": 2.1830543782105647e-05,
8773
+ "loss": 0.2566,
8774
+ "step": 438500
8775
+ },
8776
+ {
8777
+ "epoch": 9.79,
8778
+ "learning_rate": 2.1641565294124206e-05,
8779
+ "loss": 0.2565,
8780
+ "step": 439000
8781
+ },
8782
+ {
8783
+ "epoch": 9.79,
8784
+ "eval_loss": 0.23793531954288483,
8785
+ "eval_runtime": 1.9748,
8786
+ "eval_samples_per_second": 1163.127,
8787
+ "eval_steps_per_second": 18.229,
8788
+ "step": 439000
8789
+ },
8790
+ {
8791
+ "epoch": 9.8,
8792
+ "learning_rate": 2.1454045192618794e-05,
8793
+ "loss": 0.2564,
8794
+ "step": 439500
8795
+ },
8796
+ {
8797
+ "epoch": 9.81,
8798
+ "learning_rate": 2.1267985528279212e-05,
8799
+ "loss": 0.2569,
8800
+ "step": 440000
8801
+ },
8802
+ {
8803
+ "epoch": 9.81,
8804
+ "eval_loss": 0.23971830308437347,
8805
+ "eval_runtime": 1.9911,
8806
+ "eval_samples_per_second": 1153.611,
8807
+ "eval_steps_per_second": 18.08,
8808
+ "step": 440000
8809
  }
8810
  ],
8811
  "max_steps": 500000,
8812
  "num_train_epochs": 12,
8813
+ "total_flos": 1.4057178017725262e+22,
8814
  "trial_name": null,
8815
  "trial_params": null
8816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93aa30fc145e0d59395556dbcdda066166e18281fd9035e50e621fea7af14d91
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
3
  size 102501541