jflotz commited on
Commit
a24ab0e
·
1 Parent(s): 89c8e94

Training in progress, step 940000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46ff1d8d7d94b8fc5390751564ccb419f91a9858db9bdfd9f11bebe5113b944a
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecce8a24d008f6560478b115a93f60ad26f968d3ddf31f980be259930161927
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:523ad7de15419f9770d9e7768264983ffe3ed3b6b2e41d3411de9307e3568dda
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e5a9ccad5520e92d8772f310d7fdda3e07cfbb13ef5c7d62c7867e7bebc124b
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f02b76320d64a6c1fa36467fd16fb842d38967c6960f680698d2b6c46fa9ddb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c122c7c46f7a52340c6d76de6ecac3033b4eb22c4f622df7095c80bbdc58bbda
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb4ba7042a42dc2dddfda89081c338c759af082271bf3d6b2249d374dfb5549f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed89074af6bf56092de6f7f69bec6b0962f68e0ef26b7f849107336565f843e4
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d492de85ba9ca1e7e895ebf249111dbc1e669c10034dc3697642d6b066377bc6
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c180309b549d4feb0af8c96f555ed5574acd58bfc58b660812ae5e9d9e08c50
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87bf88b63314cf7e6bc76f627a1120e94df3871d06e2e6a10b31a22c360f0003
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bb0af833997f5b0c50461291524b2fc678bcc9d26f6e79d5bfe28f62abb339
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b861dd3cac874664255fa5c36d008a64ba119b36b6461f68fec95ff34294b32
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824645f1c25785303da3dc203bf2689aba1f62a78c6bdfef5a484af4a0860aef
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64737ac04d5942009955008d940bd1d1005db5a2c22b7982f258d011e0ad8cb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e242b745198753e15159abe0972314bc254dd07db526f1dc4d19cc5c285dfb8e
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb15e8b762074a6cf94106e13146d7507cbb10c383c4eadc6b62f549780de27
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e565516de72353a91a8afad67f1979b57e142666bec47e27765241642f3d4f
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8bf7fa4ff5edd25c6aca18d0779a09bdac81be773b8a02595b2a01362d92297
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a619368d8fa94ba3412b069e884d3c02325231635774dd381c11c0f2a15299d7
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3985937612104495,
5
- "global_step": 930000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11166,11 +11166,131 @@
11166
  "learning_rate": 1.1867138192639601e-05,
11167
  "loss": 0.2842,
11168
  "step": 930000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11169
  }
11170
  ],
11171
  "max_steps": 1000000,
11172
  "num_train_epochs": 2,
11173
- "total_flos": 6.287461350629951e+22,
11174
  "trial_name": null,
11175
  "trial_params": null
11176
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.418523449270972,
5
+ "global_step": 940000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11166
  "learning_rate": 1.1867138192639601e-05,
11167
  "loss": 0.2842,
11168
  "step": 930000
11169
+ },
11170
+ {
11171
+ "epoch": 1.4,
11172
+ "learning_rate": 1.1840677154152987e-05,
11173
+ "loss": 0.2848,
11174
+ "step": 930500
11175
+ },
11176
+ {
11177
+ "epoch": 1.4,
11178
+ "learning_rate": 1.1814402460652382e-05,
11179
+ "loss": 0.2842,
11180
+ "step": 931000
11181
+ },
11182
+ {
11183
+ "epoch": 1.4,
11184
+ "learning_rate": 1.178831418397181e-05,
11185
+ "loss": 0.2839,
11186
+ "step": 931500
11187
+ },
11188
+ {
11189
+ "epoch": 1.4,
11190
+ "learning_rate": 1.176241239543558e-05,
11191
+ "loss": 0.2842,
11192
+ "step": 932000
11193
+ },
11194
+ {
11195
+ "epoch": 1.4,
11196
+ "learning_rate": 1.173669716585822e-05,
11197
+ "loss": 0.2847,
11198
+ "step": 932500
11199
+ },
11200
+ {
11201
+ "epoch": 1.4,
11202
+ "learning_rate": 1.171116856554418e-05,
11203
+ "loss": 0.2836,
11204
+ "step": 933000
11205
+ },
11206
+ {
11207
+ "epoch": 1.41,
11208
+ "learning_rate": 1.168582666428768e-05,
11209
+ "loss": 0.284,
11210
+ "step": 933500
11211
+ },
11212
+ {
11213
+ "epoch": 1.41,
11214
+ "learning_rate": 1.1660671531372517e-05,
11215
+ "loss": 0.2837,
11216
+ "step": 934000
11217
+ },
11218
+ {
11219
+ "epoch": 1.41,
11220
+ "learning_rate": 1.1635703235571846e-05,
11221
+ "loss": 0.2848,
11222
+ "step": 934500
11223
+ },
11224
+ {
11225
+ "epoch": 1.41,
11226
+ "learning_rate": 1.1610921845148052e-05,
11227
+ "loss": 0.2845,
11228
+ "step": 935000
11229
+ },
11230
+ {
11231
+ "epoch": 1.41,
11232
+ "learning_rate": 1.1586327427852503e-05,
11233
+ "loss": 0.2847,
11234
+ "step": 935500
11235
+ },
11236
+ {
11237
+ "epoch": 1.41,
11238
+ "learning_rate": 1.156192005092539e-05,
11239
+ "loss": 0.2843,
11240
+ "step": 936000
11241
+ },
11242
+ {
11243
+ "epoch": 1.41,
11244
+ "learning_rate": 1.153769978109557e-05,
11245
+ "loss": 0.2836,
11246
+ "step": 936500
11247
+ },
11248
+ {
11249
+ "epoch": 1.41,
11250
+ "learning_rate": 1.1513666684580308e-05,
11251
+ "loss": 0.2847,
11252
+ "step": 937000
11253
+ },
11254
+ {
11255
+ "epoch": 1.41,
11256
+ "learning_rate": 1.1489820827085185e-05,
11257
+ "loss": 0.2839,
11258
+ "step": 937500
11259
+ },
11260
+ {
11261
+ "epoch": 1.41,
11262
+ "learning_rate": 1.1466162273803876e-05,
11263
+ "loss": 0.2844,
11264
+ "step": 938000
11265
+ },
11266
+ {
11267
+ "epoch": 1.42,
11268
+ "learning_rate": 1.144269108941795e-05,
11269
+ "loss": 0.284,
11270
+ "step": 938500
11271
+ },
11272
+ {
11273
+ "epoch": 1.42,
11274
+ "learning_rate": 1.1419407338096732e-05,
11275
+ "loss": 0.285,
11276
+ "step": 939000
11277
+ },
11278
+ {
11279
+ "epoch": 1.42,
11280
+ "learning_rate": 1.1396311083497103e-05,
11281
+ "loss": 0.2841,
11282
+ "step": 939500
11283
+ },
11284
+ {
11285
+ "epoch": 1.42,
11286
+ "learning_rate": 1.1373402388763346e-05,
11287
+ "loss": 0.2834,
11288
+ "step": 940000
11289
  }
11290
  ],
11291
  "max_steps": 1000000,
11292
  "num_train_epochs": 2,
11293
+ "total_flos": 6.355066839128279e+22,
11294
  "trial_name": null,
11295
  "trial_params": null
11296
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
3
  size 449450757