jflotz commited on
Commit
3271735
·
1 Parent(s): 6626a39

Training in progress, step 900000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aed45f0ab31dea98b9869760d36ab73a26078c09333a23350a1212c72042c48
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e68aaa7f20cf2655b5ee95587161e9676e99cc34136556add0f9965734e7755
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e51d656bcb12cf1d2e57afead6fbd882c17c7bfca89d40bc32ba44fc16d622
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77bc69e9635ccd1de21423522e341a85a08863f86590b713104bba2dbfd70bb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ad6fff41d308df46b9a1b2f331c0f91fef2419029183c0886a7d45a3462024
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1993aafd21a33893d293353cc2d3a986655d484aa3f8d8bd3ce1158082956b62
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40d19463da3fc8105dffb32921ceec5ee75e4844d0eca7759a43c7f4b14bbba
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31a0dbc0a9de5b13d8d236df1a529fa25f2a462a9bcc23416d4f0397bad521d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd73f45d2e32308ab057499cc1554bb0a6b5dce90d1016017772831b0342b94
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2118e2247b5fdbceb9d1ec4a69c6b9d09754ac3081a89daca9da9f417d9a57c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90198646614d410142ba8b69e323e9efb6043fdb55e8f55a8936dc33416f4c8d
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ce1b40c469ef20ff3c2e73618244894f6c048059642c597ebc6bc915a80cce6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d370228b86aa2250b5e7dcf8be134cb35dc855b6e23853f0ea74181606aa91c
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce8dd4b4014883403bd302ffd7cbfd4827bc6596e89d4a566a074b59e6257940
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052919acbfa40e0f835c2850fde0f02534bb1eb7e31ee0f5a540aca7e0f25da3
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c955d0a8374d68c8c7fd0b4a59ac81688e461d16a4ab83367d33bbcd82c828c2
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5c995fa02cdc8d504061c7c2c431119a7ac4cd2ff34fa2e7bd0545514b474d1
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5d88d03c1c6897f3e815db204f99493002f497118b104ebefc70815e22888c1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f95e8365bd03b8115e7ed1873af4534b3ec6096af843d6743d8130c920c2cc
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1d5e819bb6a0b170d191713e427e3ac82a202a5b895fa2fdb4da78756f26177
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c027d2ad1a7ec1ccedf4c28ec4cf8e5a70643d6805bfcfeb4cb2085d818a7bdc
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3188750089683596,
5
- "global_step": 890000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10686,11 +10686,131 @@
10686
  "learning_rate": 1.4580487478037748e-05,
10687
  "loss": 0.2854,
10688
  "step": 890000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10689
  }
10690
  ],
10691
  "max_steps": 1000000,
10692
  "num_train_epochs": 2,
10693
- "total_flos": 6.017029328628566e+22,
10694
  "trial_name": null,
10695
  "trial_params": null
10696
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.338804697028882,
5
+ "global_step": 900000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10686
  "learning_rate": 1.4580487478037748e-05,
10687
  "loss": 0.2854,
10688
  "step": 890000
10689
+ },
10690
+ {
10691
+ "epoch": 1.32,
10692
+ "learning_rate": 1.4539396316922552e-05,
10693
+ "loss": 0.2862,
10694
+ "step": 890500
10695
+ },
10696
+ {
10697
+ "epoch": 1.32,
10698
+ "learning_rate": 1.4498484122598232e-05,
10699
+ "loss": 0.2858,
10700
+ "step": 891000
10701
+ },
10702
+ {
10703
+ "epoch": 1.32,
10704
+ "learning_rate": 1.4457751006917137e-05,
10705
+ "loss": 0.2853,
10706
+ "step": 891500
10707
+ },
10708
+ {
10709
+ "epoch": 1.32,
10710
+ "learning_rate": 1.4417197081242083e-05,
10711
+ "loss": 0.2856,
10712
+ "step": 892000
10713
+ },
10714
+ {
10715
+ "epoch": 1.32,
10716
+ "learning_rate": 1.4376822456445926e-05,
10717
+ "loss": 0.2859,
10718
+ "step": 892500
10719
+ },
10720
+ {
10721
+ "epoch": 1.32,
10722
+ "learning_rate": 1.433662724291136e-05,
10723
+ "loss": 0.2859,
10724
+ "step": 893000
10725
+ },
10726
+ {
10727
+ "epoch": 1.33,
10728
+ "learning_rate": 1.4296611550530563e-05,
10729
+ "loss": 0.2859,
10730
+ "step": 893500
10731
+ },
10732
+ {
10733
+ "epoch": 1.33,
10734
+ "learning_rate": 1.4256775488704904e-05,
10735
+ "loss": 0.2859,
10736
+ "step": 894000
10737
+ },
10738
+ {
10739
+ "epoch": 1.33,
10740
+ "learning_rate": 1.4217119166344665e-05,
10741
+ "loss": 0.2851,
10742
+ "step": 894500
10743
+ },
10744
+ {
10745
+ "epoch": 1.33,
10746
+ "learning_rate": 1.4177642691868717e-05,
10747
+ "loss": 0.2856,
10748
+ "step": 895000
10749
+ },
10750
+ {
10751
+ "epoch": 1.33,
10752
+ "learning_rate": 1.4138346173204218e-05,
10753
+ "loss": 0.2855,
10754
+ "step": 895500
10755
+ },
10756
+ {
10757
+ "epoch": 1.33,
10758
+ "learning_rate": 1.4099229717786368e-05,
10759
+ "loss": 0.2857,
10760
+ "step": 896000
10761
+ },
10762
+ {
10763
+ "epoch": 1.33,
10764
+ "learning_rate": 1.406029343255806e-05,
10765
+ "loss": 0.2852,
10766
+ "step": 896500
10767
+ },
10768
+ {
10769
+ "epoch": 1.33,
10770
+ "learning_rate": 1.4021537423969588e-05,
10771
+ "loss": 0.2858,
10772
+ "step": 897000
10773
+ },
10774
+ {
10775
+ "epoch": 1.33,
10776
+ "learning_rate": 1.3982961797978431e-05,
10777
+ "loss": 0.2853,
10778
+ "step": 897500
10779
+ },
10780
+ {
10781
+ "epoch": 1.33,
10782
+ "learning_rate": 1.3944566660048863e-05,
10783
+ "loss": 0.2851,
10784
+ "step": 898000
10785
+ },
10786
+ {
10787
+ "epoch": 1.34,
10788
+ "learning_rate": 1.3906352115151725e-05,
10789
+ "loss": 0.285,
10790
+ "step": 898500
10791
+ },
10792
+ {
10793
+ "epoch": 1.34,
10794
+ "learning_rate": 1.3868318267764128e-05,
10795
+ "loss": 0.2854,
10796
+ "step": 899000
10797
+ },
10798
+ {
10799
+ "epoch": 1.34,
10800
+ "learning_rate": 1.3830465221869146e-05,
10801
+ "loss": 0.2855,
10802
+ "step": 899500
10803
+ },
10804
+ {
10805
+ "epoch": 1.34,
10806
+ "learning_rate": 1.3792793080955574e-05,
10807
+ "loss": 0.2852,
10808
+ "step": 900000
10809
  }
10810
  ],
10811
  "max_steps": 1000000,
10812
  "num_train_epochs": 2,
10813
+ "total_flos": 6.0846394803756095e+22,
10814
  "trial_name": null,
10815
  "trial_params": null
10816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07c213d1a42d199003850d981d6ccc1a53b07b35352b3c677b2fec2729c3a474
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e51d656bcb12cf1d2e57afead6fbd882c17c7bfca89d40bc32ba44fc16d622
3
  size 449450757