jflotz commited on
Commit
1ac6581
·
1 Parent(s): 2906bc4

Training in progress, step 990000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a37ff0bb2125ba916e184875051de31c0a53ab6d8764d350d94b8f895cf97825
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c84155e298a5a6b7ee070a8179e83bcd232eb2e5869a2fab62635fe434a15b
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c34d3305b209ee825f5aed33939719436301cfaf9de55d7b3b5639a3350e80
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d92331d191f9b54e8682c7179f1d46d8e4298cbee5bf860f80c01313d919cbf
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6315416921462fe0ecc7a0c3f368f4b3c932064b761ab22ca7678bb4befc6c5
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3f3f7f2089f141dbb6bfd17083ac633b398668fbdd29b4239ed78cd16828ef
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a76def7122ba11f5e0b58c3da4b2e90151c2789ca35782d50ddc98428cb6201
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1e11f0bc52abbbd19f8d4cb2af2cb9140b19cf7c5678a2b90ba0e9593a2025
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dd7ce8763729f28d8acde8cd6b3dfea779e9b4dbaa1cc534d994758c7e6d95d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bffc19433b361587fa636d9fe7f366ad621d886a3e88e7b929e925c972115064
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3093dd66e653e1347d48de0c3738e9baef47fa7023af660daaa6d276c2516c1c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf40a9092f2a61c65fa8d4b01ad107ecd14742f4e2c480971b7e313c2ab669b
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c71863cae64230816d8f7da13edddd177d84ad915ea936c18a2d7e479676590
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d8ccac7b1580c03e5bd6e78ceed5d6b1e846215ceb31ec6a8e03829af4c92e
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1106c2b8026b5aad28464b0bd2b8b204a664cd4c27abf0a87c50c85c6899ce87
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a564015df3012aed298103a98abad0123fee1b00b18b24191670937f37a6281
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abf64edbda9080a72948f7486c0ac7635f48e74d5752a1a9ab0d947e838bf23
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3839c7b56c36b9afc190821fb76a047c055639f3ce0f32d8cdce397cba6fa5f
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f6a86f148673573f563d78ae30ad4429d07d6c9eca28255a514457fc218ec48
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8600853d84ba2918d905e70ba99b5bea80c1839bef1da439315875bb3ad462d
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4982422015130619,
5
- "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11766,11 +11766,131 @@
11766
  "learning_rate": 1.0153046094685783e-05,
11767
  "loss": 0.2831,
11768
  "step": 980000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11769
  }
11770
  ],
11771
  "max_steps": 1000000,
11772
  "num_train_epochs": 2,
11773
- "total_flos": 6.625495046923828e+22,
11774
  "trial_name": null,
11775
  "trial_params": null
11776
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5181718895735843,
5
+ "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11766
  "learning_rate": 1.0153046094685783e-05,
11767
  "loss": 0.2831,
11768
  "step": 980000
11769
+ },
11770
+ {
11771
+ "epoch": 1.5,
11772
+ "learning_rate": 1.0145492062574731e-05,
11773
+ "loss": 0.2826,
11774
+ "step": 980500
11775
+ },
11776
+ {
11777
+ "epoch": 1.5,
11778
+ "learning_rate": 1.0138129010020992e-05,
11779
+ "loss": 0.2831,
11780
+ "step": 981000
11781
+ },
11782
+ {
11783
+ "epoch": 1.5,
11784
+ "learning_rate": 1.0130956957154867e-05,
11785
+ "loss": 0.2831,
11786
+ "step": 981500
11787
+ },
11788
+ {
11789
+ "epoch": 1.5,
11790
+ "learning_rate": 1.0123975923584488e-05,
11791
+ "loss": 0.2824,
11792
+ "step": 982000
11793
+ },
11794
+ {
11795
+ "epoch": 1.5,
11796
+ "learning_rate": 1.0117185928395721e-05,
11797
+ "loss": 0.2828,
11798
+ "step": 982500
11799
+ },
11800
+ {
11801
+ "epoch": 1.5,
11802
+ "learning_rate": 1.0110586990152152e-05,
11803
+ "loss": 0.2831,
11804
+ "step": 983000
11805
+ },
11806
+ {
11807
+ "epoch": 1.51,
11808
+ "learning_rate": 1.0104179126895039e-05,
11809
+ "loss": 0.2829,
11810
+ "step": 983500
11811
+ },
11812
+ {
11813
+ "epoch": 1.51,
11814
+ "learning_rate": 1.0097962356143219e-05,
11815
+ "loss": 0.2831,
11816
+ "step": 984000
11817
+ },
11818
+ {
11819
+ "epoch": 1.51,
11820
+ "learning_rate": 1.009193669489312e-05,
11821
+ "loss": 0.2834,
11822
+ "step": 984500
11823
+ },
11824
+ {
11825
+ "epoch": 1.51,
11826
+ "learning_rate": 1.0086102159618668e-05,
11827
+ "loss": 0.2834,
11828
+ "step": 985000
11829
+ },
11830
+ {
11831
+ "epoch": 1.51,
11832
+ "learning_rate": 1.0080458766271252e-05,
11833
+ "loss": 0.2832,
11834
+ "step": 985500
11835
+ },
11836
+ {
11837
+ "epoch": 1.51,
11838
+ "learning_rate": 1.0075006530279694e-05,
11839
+ "loss": 0.2833,
11840
+ "step": 986000
11841
+ },
11842
+ {
11843
+ "epoch": 1.51,
11844
+ "learning_rate": 1.0069745466550205e-05,
11845
+ "loss": 0.2827,
11846
+ "step": 986500
11847
+ },
11848
+ {
11849
+ "epoch": 1.51,
11850
+ "learning_rate": 1.0064675589466339e-05,
11851
+ "loss": 0.2821,
11852
+ "step": 987000
11853
+ },
11854
+ {
11855
+ "epoch": 1.51,
11856
+ "learning_rate": 1.005979691288893e-05,
11857
+ "loss": 0.2834,
11858
+ "step": 987500
11859
+ },
11860
+ {
11861
+ "epoch": 1.51,
11862
+ "learning_rate": 1.0055109450156098e-05,
11863
+ "loss": 0.2831,
11864
+ "step": 988000
11865
+ },
11866
+ {
11867
+ "epoch": 1.52,
11868
+ "learning_rate": 1.0050613214083197e-05,
11869
+ "loss": 0.2822,
11870
+ "step": 988500
11871
+ },
11872
+ {
11873
+ "epoch": 1.52,
11874
+ "learning_rate": 1.0046308216962759e-05,
11875
+ "loss": 0.2826,
11876
+ "step": 989000
11877
+ },
11878
+ {
11879
+ "epoch": 1.52,
11880
+ "learning_rate": 1.0042194470564472e-05,
11881
+ "loss": 0.2824,
11882
+ "step": 989500
11883
+ },
11884
+ {
11885
+ "epoch": 1.52,
11886
+ "learning_rate": 1.0038271986135177e-05,
11887
+ "loss": 0.2827,
11888
+ "step": 990000
11889
  }
11890
  ],
11891
  "max_steps": 1000000,
11892
  "num_train_epochs": 2,
11893
+ "total_flos": 6.693099686599179e+22,
11894
  "trial_name": null,
11895
  "trial_params": null
11896
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6119a41cc84dfc82b1ad8e160d2de4f6723c4c70df2567e7b05d5135b2ad413c
3
  size 449450757