jflotz commited on
Commit
115020b
·
1 Parent(s): 28c2a2b

Training in progress, step 890000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86cf27fbaeb2a38de0ef33258b77f6fefbd96bfd63b67353f72569cf9236a376
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:683c63bc197cf3bb64f6c2ce95a62fc4f0bf6028b19e6d2e5831707a2f06c758
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebaa6261431616bb924fa3611c1e782327703255936f9b7e34a1eda29c117895
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2afa6aa14483adb7c817c2439178a198c4680dbfe427eab82def33bea1566914
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.457390450910632,
5
- "global_step": 880000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17606,11 +17606,211 @@
17606
  "eval_samples_per_second": 870.144,
17607
  "eval_steps_per_second": 13.637,
17608
  "step": 880000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17609
  }
17610
  ],
17611
  "max_steps": 1000000,
17612
  "num_train_epochs": 12,
17613
- "total_flos": 6.168773682642908e+22,
17614
  "trial_name": null,
17615
  "trial_params": null
17616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.568919175133555,
5
+ "global_step": 890000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17606
  "eval_samples_per_second": 870.144,
17607
  "eval_steps_per_second": 13.637,
17608
  "step": 880000
17609
+ },
17610
+ {
17611
+ "epoch": 9.46,
17612
+ "learning_rate": 1.539507089487205e-05,
17613
+ "loss": 0.183,
17614
+ "step": 880500
17615
+ },
17616
+ {
17617
+ "epoch": 9.47,
17618
+ "learning_rate": 1.535060126557028e-05,
17619
+ "loss": 0.1829,
17620
+ "step": 881000
17621
+ },
17622
+ {
17623
+ "epoch": 9.47,
17624
+ "eval_loss": 0.17408204078674316,
17625
+ "eval_runtime": 2.6439,
17626
+ "eval_samples_per_second": 868.809,
17627
+ "eval_steps_per_second": 13.617,
17628
+ "step": 881000
17629
+ },
17630
+ {
17631
+ "epoch": 9.47,
17632
+ "learning_rate": 1.5306308385255997e-05,
17633
+ "loss": 0.1827,
17634
+ "step": 881500
17635
+ },
17636
+ {
17637
+ "epoch": 9.48,
17638
+ "learning_rate": 1.5262192375024284e-05,
17639
+ "loss": 0.1827,
17640
+ "step": 882000
17641
+ },
17642
+ {
17643
+ "epoch": 9.48,
17644
+ "eval_loss": 0.17428572475910187,
17645
+ "eval_runtime": 2.6251,
17646
+ "eval_samples_per_second": 875.0,
17647
+ "eval_steps_per_second": 13.714,
17648
+ "step": 882000
17649
+ },
17650
+ {
17651
+ "epoch": 9.49,
17652
+ "learning_rate": 1.521825335548661e-05,
17653
+ "loss": 0.1832,
17654
+ "step": 882500
17655
+ },
17656
+ {
17657
+ "epoch": 9.49,
17658
+ "learning_rate": 1.5174491446770566e-05,
17659
+ "loss": 0.1827,
17660
+ "step": 883000
17661
+ },
17662
+ {
17663
+ "epoch": 9.49,
17664
+ "eval_loss": 0.17153075337409973,
17665
+ "eval_runtime": 2.6515,
17666
+ "eval_samples_per_second": 866.317,
17667
+ "eval_steps_per_second": 13.577,
17668
+ "step": 883000
17669
+ },
17670
+ {
17671
+ "epoch": 9.5,
17672
+ "learning_rate": 1.5130906768519563e-05,
17673
+ "loss": 0.1827,
17674
+ "step": 883500
17675
+ },
17676
+ {
17677
+ "epoch": 9.5,
17678
+ "learning_rate": 1.508749943989242e-05,
17679
+ "loss": 0.183,
17680
+ "step": 884000
17681
+ },
17682
+ {
17683
+ "epoch": 9.5,
17684
+ "eval_loss": 0.17301537096500397,
17685
+ "eval_runtime": 2.656,
17686
+ "eval_samples_per_second": 864.819,
17687
+ "eval_steps_per_second": 13.554,
17688
+ "step": 884000
17689
+ },
17690
+ {
17691
+ "epoch": 9.51,
17692
+ "learning_rate": 1.5044269579563144e-05,
17693
+ "loss": 0.1825,
17694
+ "step": 884500
17695
+ },
17696
+ {
17697
+ "epoch": 9.51,
17698
+ "learning_rate": 1.500121730572051e-05,
17699
+ "loss": 0.183,
17700
+ "step": 885000
17701
+ },
17702
+ {
17703
+ "epoch": 9.51,
17704
+ "eval_loss": 0.17374014854431152,
17705
+ "eval_runtime": 2.719,
17706
+ "eval_samples_per_second": 844.787,
17707
+ "eval_steps_per_second": 13.24,
17708
+ "step": 885000
17709
+ },
17710
+ {
17711
+ "epoch": 9.52,
17712
+ "learning_rate": 1.4958342736067783e-05,
17713
+ "loss": 0.1829,
17714
+ "step": 885500
17715
+ },
17716
+ {
17717
+ "epoch": 9.52,
17718
+ "learning_rate": 1.4915645987822406e-05,
17719
+ "loss": 0.1829,
17720
+ "step": 886000
17721
+ },
17722
+ {
17723
+ "epoch": 9.52,
17724
+ "eval_loss": 0.17604438960552216,
17725
+ "eval_runtime": 2.7026,
17726
+ "eval_samples_per_second": 849.921,
17727
+ "eval_steps_per_second": 13.32,
17728
+ "step": 886000
17729
+ },
17730
+ {
17731
+ "epoch": 9.53,
17732
+ "learning_rate": 1.4873127177715653e-05,
17733
+ "loss": 0.1827,
17734
+ "step": 886500
17735
+ },
17736
+ {
17737
+ "epoch": 9.54,
17738
+ "learning_rate": 1.4830786421992347e-05,
17739
+ "loss": 0.1829,
17740
+ "step": 887000
17741
+ },
17742
+ {
17743
+ "epoch": 9.54,
17744
+ "eval_loss": 0.17339639365673065,
17745
+ "eval_runtime": 2.7392,
17746
+ "eval_samples_per_second": 838.573,
17747
+ "eval_steps_per_second": 13.143,
17748
+ "step": 887000
17749
+ },
17750
+ {
17751
+ "epoch": 9.54,
17752
+ "learning_rate": 1.4788623836410479e-05,
17753
+ "loss": 0.1823,
17754
+ "step": 887500
17755
+ },
17756
+ {
17757
+ "epoch": 9.55,
17758
+ "learning_rate": 1.4746639536240942e-05,
17759
+ "loss": 0.1824,
17760
+ "step": 888000
17761
+ },
17762
+ {
17763
+ "epoch": 9.55,
17764
+ "eval_loss": 0.17382191121578217,
17765
+ "eval_runtime": 2.7435,
17766
+ "eval_samples_per_second": 837.254,
17767
+ "eval_steps_per_second": 13.122,
17768
+ "step": 888000
17769
+ },
17770
+ {
17771
+ "epoch": 9.55,
17772
+ "learning_rate": 1.4704833636267232e-05,
17773
+ "loss": 0.1825,
17774
+ "step": 888500
17775
+ },
17776
+ {
17777
+ "epoch": 9.56,
17778
+ "learning_rate": 1.4663206250785055e-05,
17779
+ "loss": 0.1824,
17780
+ "step": 889000
17781
+ },
17782
+ {
17783
+ "epoch": 9.56,
17784
+ "eval_loss": 0.17390523850917816,
17785
+ "eval_runtime": 2.7145,
17786
+ "eval_samples_per_second": 846.211,
17787
+ "eval_steps_per_second": 13.262,
17788
+ "step": 889000
17789
+ },
17790
+ {
17791
+ "epoch": 9.56,
17792
+ "learning_rate": 1.4621757493602125e-05,
17793
+ "loss": 0.1826,
17794
+ "step": 889500
17795
+ },
17796
+ {
17797
+ "epoch": 9.57,
17798
+ "learning_rate": 1.4580487478037748e-05,
17799
+ "loss": 0.1826,
17800
+ "step": 890000
17801
+ },
17802
+ {
17803
+ "epoch": 9.57,
17804
+ "eval_loss": 0.17268939316272736,
17805
+ "eval_runtime": 2.6865,
17806
+ "eval_samples_per_second": 855.002,
17807
+ "eval_steps_per_second": 13.4,
17808
+ "step": 890000
17809
  }
17810
  ],
17811
  "max_steps": 1000000,
17812
  "num_train_epochs": 12,
17813
+ "total_flos": 6.238873943673255e+22,
17814
  "trial_name": null,
17815
  "trial_params": null
17816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
3
  size 449471589