jflotz commited on
Commit
2906bc4
·
1 Parent(s): 800e724

Training in progress, step 980000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e51baa4ddc0d5650abf5371aac2f77196b05031ccca7029b3d99ba99af85e57f
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37ff0bb2125ba916e184875051de31c0a53ab6d8764d350d94b8f895cf97825
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a45db7e85e08c084e49c40cab0c2c6092d92f81b5fa24290a645085ef74f75b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22c34d3305b209ee825f5aed33939719436301cfaf9de55d7b3b5639a3350e80
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56d647c17f4fed38d972bfade7f44a26e438ac9b6b775a7bbc225c5be1e112bd
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6315416921462fe0ecc7a0c3f368f4b3c932064b761ab22ca7678bb4befc6c5
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e9cbd943c7dcfb1555090abbcd45a86173e47d10be2fa2e7308539ca596dff0
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a76def7122ba11f5e0b58c3da4b2e90151c2789ca35782d50ddc98428cb6201
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5917abb04845a366f52356ca50f06ea044267bd039a587ed19cc120ed161e748
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd7ce8763729f28d8acde8cd6b3dfea779e9b4dbaa1cc534d994758c7e6d95d
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:582d77403f5af050452c09ec279770dab4724f234e767ab55c84c502beea2905
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3093dd66e653e1347d48de0c3738e9baef47fa7023af660daaa6d276c2516c1c
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c6bb99cfe9f0c710fe2cc6cec0d5d888a917b4fa016be56cafcfbbd47bac76
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c71863cae64230816d8f7da13edddd177d84ad915ea936c18a2d7e479676590
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f81952f7451d63a6ff6bf67269698e7e674adab210fce43113020157f4cf03d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1106c2b8026b5aad28464b0bd2b8b204a664cd4c27abf0a87c50c85c6899ce87
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee4f92b2c66061ec16f42f6ff8db5a75108eff8cc62884e9d5c3c7875be42d2c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abf64edbda9080a72948f7486c0ac7635f48e74d5752a1a9ab0d947e838bf23
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf98a74aa6df8eeab9552258d949bc73dcd837ff3b88682e5ebe82858a949936
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6a86f148673573f563d78ae30ad4429d07d6c9eca28255a514457fc218ec48
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4783125134525394,
5
- "global_step": 970000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11646,11 +11646,131 @@
11646
  "learning_rate": 1.0344196821849202e-05,
11647
  "loss": 0.2821,
11648
  "step": 970000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11649
  }
11650
  ],
11651
  "max_steps": 1000000,
11652
  "num_train_epochs": 2,
11653
- "total_flos": 6.557885636027719e+22,
11654
  "trial_name": null,
11655
  "trial_params": null
11656
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4982422015130619,
5
+ "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11646
  "learning_rate": 1.0344196821849202e-05,
11647
  "loss": 0.2821,
11648
  "step": 970000
11649
+ },
11650
+ {
11651
+ "epoch": 1.48,
11652
+ "learning_rate": 1.0332828229586692e-05,
11653
+ "loss": 0.2831,
11654
+ "step": 970500
11655
+ },
11656
+ {
11657
+ "epoch": 1.48,
11658
+ "learning_rate": 1.032165010471157e-05,
11659
+ "loss": 0.2835,
11660
+ "step": 971000
11661
+ },
11662
+ {
11663
+ "epoch": 1.48,
11664
+ "learning_rate": 1.0310662477784401e-05,
11665
+ "loss": 0.2824,
11666
+ "step": 971500
11667
+ },
11668
+ {
11669
+ "epoch": 1.48,
11670
+ "learning_rate": 1.0299865378844936e-05,
11671
+ "loss": 0.2828,
11672
+ "step": 972000
11673
+ },
11674
+ {
11675
+ "epoch": 1.48,
11676
+ "learning_rate": 1.028925883741203e-05,
11677
+ "loss": 0.2832,
11678
+ "step": 972500
11679
+ },
11680
+ {
11681
+ "epoch": 1.48,
11682
+ "learning_rate": 1.0278842882483569e-05,
11683
+ "loss": 0.2836,
11684
+ "step": 973000
11685
+ },
11686
+ {
11687
+ "epoch": 1.49,
11688
+ "learning_rate": 1.026861754253637e-05,
11689
+ "loss": 0.2837,
11690
+ "step": 973500
11691
+ },
11692
+ {
11693
+ "epoch": 1.49,
11694
+ "learning_rate": 1.025858284552612e-05,
11695
+ "loss": 0.2829,
11696
+ "step": 974000
11697
+ },
11698
+ {
11699
+ "epoch": 1.49,
11700
+ "learning_rate": 1.0248738818887307e-05,
11701
+ "loss": 0.2829,
11702
+ "step": 974500
11703
+ },
11704
+ {
11705
+ "epoch": 1.49,
11706
+ "learning_rate": 1.023908548953311e-05,
11707
+ "loss": 0.2832,
11708
+ "step": 975000
11709
+ },
11710
+ {
11711
+ "epoch": 1.49,
11712
+ "learning_rate": 1.0229622883855378e-05,
11713
+ "loss": 0.2837,
11714
+ "step": 975500
11715
+ },
11716
+ {
11717
+ "epoch": 1.49,
11718
+ "learning_rate": 1.02203510277245e-05,
11719
+ "loss": 0.2832,
11720
+ "step": 976000
11721
+ },
11722
+ {
11723
+ "epoch": 1.49,
11724
+ "learning_rate": 1.021126994648939e-05,
11725
+ "loss": 0.2828,
11726
+ "step": 976500
11727
+ },
11728
+ {
11729
+ "epoch": 1.49,
11730
+ "learning_rate": 1.0202379664977364e-05,
11731
+ "loss": 0.2838,
11732
+ "step": 977000
11733
+ },
11734
+ {
11735
+ "epoch": 1.49,
11736
+ "learning_rate": 1.019368020749412e-05,
11737
+ "loss": 0.2828,
11738
+ "step": 977500
11739
+ },
11740
+ {
11741
+ "epoch": 1.49,
11742
+ "learning_rate": 1.018517159782365e-05,
11743
+ "loss": 0.2826,
11744
+ "step": 978000
11745
+ },
11746
+ {
11747
+ "epoch": 1.5,
11748
+ "learning_rate": 1.0176853859228149e-05,
11749
+ "loss": 0.2829,
11750
+ "step": 978500
11751
+ },
11752
+ {
11753
+ "epoch": 1.5,
11754
+ "learning_rate": 1.0168727014448004e-05,
11755
+ "loss": 0.2836,
11756
+ "step": 979000
11757
+ },
11758
+ {
11759
+ "epoch": 1.5,
11760
+ "learning_rate": 1.0160791085701714e-05,
11761
+ "loss": 0.2834,
11762
+ "step": 979500
11763
+ },
11764
+ {
11765
+ "epoch": 1.5,
11766
+ "learning_rate": 1.0153046094685783e-05,
11767
+ "loss": 0.2831,
11768
+ "step": 980000
11769
  }
11770
  ],
11771
  "max_steps": 1000000,
11772
  "num_train_epochs": 2,
11773
+ "total_flos": 6.625495046923828e+22,
11774
  "trial_name": null,
11775
  "trial_params": null
11776
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
3
  size 449450757