jflotz commited on
Commit
6626a39
·
1 Parent(s): 16c4f23

Training in progress, step 890000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f011531683b29e6010c43a1101dacea9ed4a18363936f0bda8424b0b64fd61d5
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aed45f0ab31dea98b9869760d36ab73a26078c09333a23350a1212c72042c48
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:935da3b045c8c8b8ce754d2c39cf0981b2085a82929dd47cd40a448687388e6a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77bc69e9635ccd1de21423522e341a85a08863f86590b713104bba2dbfd70bb
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbdeea7578a634cee6754d432445c1d3c22f4d3700bcbfbce695f5cbf096739a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1993aafd21a33893d293353cc2d3a986655d484aa3f8d8bd3ce1158082956b62
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6c5ff5b48c10910314f1846b5d56765a69ff07e0a1c2179950e7e708be45c72
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31a0dbc0a9de5b13d8d236df1a529fa25f2a462a9bcc23416d4f0397bad521d
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:538db9627b3319da412d82dd2d788799c67c9fd4db76e45825c3a463b4ec891a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2118e2247b5fdbceb9d1ec4a69c6b9d09754ac3081a89daca9da9f417d9a57c
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cefdc80aebec1bb7388820de673e9b9482e6730079e6308c4991cb6f1f6a1673
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce1b40c469ef20ff3c2e73618244894f6c048059642c597ebc6bc915a80cce6
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:078a7ec7b0b49e632a586c20c54dd7869580bfe51e6466659e4a1c049cec1d5c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce8dd4b4014883403bd302ffd7cbfd4827bc6596e89d4a566a074b59e6257940
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0ee34e421a4579fb938367d875bd6d0e9cb1e7cc940564954160dbbd10ab58c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c955d0a8374d68c8c7fd0b4a59ac81688e461d16a4ab83367d33bbcd82c828c2
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f6c3ac5b3d9ff9275bcc6f2b68cf34b2b33a4d26ba0d17a6dea955ffb4848b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5d88d03c1c6897f3e815db204f99493002f497118b104ebefc70815e22888c1
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6c3b9739e151e083ac7876c028ada3eeb37ab066447f172eee69fac580c5323
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1d5e819bb6a0b170d191713e427e3ac82a202a5b895fa2fdb4da78756f26177
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2989453209078372,
5
- "global_step": 880000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10566,11 +10566,131 @@
10566
  "learning_rate": 1.543971715158307e-05,
10567
  "loss": 0.2863,
10568
  "step": 880000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10569
  }
10570
  ],
10571
  "max_steps": 1000000,
10572
  "num_train_epochs": 2,
10573
- "total_flos": 5.949418752030229e+22,
10574
  "trial_name": null,
10575
  "trial_params": null
10576
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3188750089683596,
5
+ "global_step": 890000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10566
  "learning_rate": 1.543971715158307e-05,
10567
  "loss": 0.2863,
10568
  "step": 880000
10569
+ },
10570
+ {
10571
+ "epoch": 1.3,
10572
+ "learning_rate": 1.539507089487205e-05,
10573
+ "loss": 0.2865,
10574
+ "step": 880500
10575
+ },
10576
+ {
10577
+ "epoch": 1.3,
10578
+ "learning_rate": 1.535060126557028e-05,
10579
+ "loss": 0.2861,
10580
+ "step": 881000
10581
+ },
10582
+ {
10583
+ "epoch": 1.3,
10584
+ "learning_rate": 1.5306308385255997e-05,
10585
+ "loss": 0.2859,
10586
+ "step": 881500
10587
+ },
10588
+ {
10589
+ "epoch": 1.3,
10590
+ "learning_rate": 1.5262192375024284e-05,
10591
+ "loss": 0.2856,
10592
+ "step": 882000
10593
+ },
10594
+ {
10595
+ "epoch": 1.3,
10596
+ "learning_rate": 1.521825335548661e-05,
10597
+ "loss": 0.2861,
10598
+ "step": 882500
10599
+ },
10600
+ {
10601
+ "epoch": 1.3,
10602
+ "learning_rate": 1.5174491446770566e-05,
10603
+ "loss": 0.2866,
10604
+ "step": 883000
10605
+ },
10606
+ {
10607
+ "epoch": 1.31,
10608
+ "learning_rate": 1.5130906768519563e-05,
10609
+ "loss": 0.2862,
10610
+ "step": 883500
10611
+ },
10612
+ {
10613
+ "epoch": 1.31,
10614
+ "learning_rate": 1.508749943989242e-05,
10615
+ "loss": 0.2864,
10616
+ "step": 884000
10617
+ },
10618
+ {
10619
+ "epoch": 1.31,
10620
+ "learning_rate": 1.5044269579563144e-05,
10621
+ "loss": 0.286,
10622
+ "step": 884500
10623
+ },
10624
+ {
10625
+ "epoch": 1.31,
10626
+ "learning_rate": 1.500121730572051e-05,
10627
+ "loss": 0.2859,
10628
+ "step": 885000
10629
+ },
10630
+ {
10631
+ "epoch": 1.31,
10632
+ "learning_rate": 1.4958342736067783e-05,
10633
+ "loss": 0.2856,
10634
+ "step": 885500
10635
+ },
10636
+ {
10637
+ "epoch": 1.31,
10638
+ "learning_rate": 1.4915645987822406e-05,
10639
+ "loss": 0.2858,
10640
+ "step": 886000
10641
+ },
10642
+ {
10643
+ "epoch": 1.31,
10644
+ "learning_rate": 1.4873127177715653e-05,
10645
+ "loss": 0.2847,
10646
+ "step": 886500
10647
+ },
10648
+ {
10649
+ "epoch": 1.31,
10650
+ "learning_rate": 1.4830786421992347e-05,
10651
+ "loss": 0.2863,
10652
+ "step": 887000
10653
+ },
10654
+ {
10655
+ "epoch": 1.31,
10656
+ "learning_rate": 1.4788623836410479e-05,
10657
+ "loss": 0.2857,
10658
+ "step": 887500
10659
+ },
10660
+ {
10661
+ "epoch": 1.31,
10662
+ "learning_rate": 1.4746639536240942e-05,
10663
+ "loss": 0.2856,
10664
+ "step": 888000
10665
+ },
10666
+ {
10667
+ "epoch": 1.32,
10668
+ "learning_rate": 1.4704833636267232e-05,
10669
+ "loss": 0.2858,
10670
+ "step": 888500
10671
+ },
10672
+ {
10673
+ "epoch": 1.32,
10674
+ "learning_rate": 1.4663206250785055e-05,
10675
+ "loss": 0.2854,
10676
+ "step": 889000
10677
+ },
10678
+ {
10679
+ "epoch": 1.32,
10680
+ "learning_rate": 1.4621757493602125e-05,
10681
+ "loss": 0.2857,
10682
+ "step": 889500
10683
+ },
10684
+ {
10685
+ "epoch": 1.32,
10686
+ "learning_rate": 1.4580487478037748e-05,
10687
+ "loss": 0.2854,
10688
+ "step": 890000
10689
  }
10690
  ],
10691
  "max_steps": 1000000,
10692
  "num_train_epochs": 2,
10693
+ "total_flos": 6.017029328628566e+22,
10694
  "trial_name": null,
10695
  "trial_params": null
10696
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
3
  size 449450757