jflotz commited on
Commit
ad05784
·
1 Parent(s): 35448ec

Training in progress, step 470000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b547411c79d357104c70e5661a49a33c4c727f3979f31a84e358578efcb1bf5d
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de66d4a93d929dbf59bf082b441005c55284067d059fcb692905debf93d43ec
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9908cf9403ac4306b20a0d3eb86bc03da4da8a781371917c67d6447e4350b51
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f347b3051d84f6b39fd81f0a25c5ca1660537f9dd78ea3f5348e6e0fe62d0
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.260528194147037,
5
- "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9206,11 +9206,211 @@
9206
  "eval_samples_per_second": 1173.539,
9207
  "eval_steps_per_second": 18.392,
9208
  "step": 460000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9209
  }
9210
  ],
9211
  "max_steps": 500000,
9212
  "num_train_epochs": 12,
9213
- "total_flos": 1.4696123841012847e+22,
9214
  "trial_name": null,
9215
  "trial_params": null
9216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.483583154889365,
5
+ "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9206
  "eval_samples_per_second": 1173.539,
9207
  "eval_steps_per_second": 18.392,
9208
  "step": 460000
9209
+ },
9210
+ {
9211
+ "epoch": 10.27,
9212
+ "learning_rate": 1.4920092237448903e-05,
9213
+ "loss": 0.2554,
9214
+ "step": 460500
9215
+ },
9216
+ {
9217
+ "epoch": 10.28,
9218
+ "learning_rate": 1.4797008356541874e-05,
9219
+ "loss": 0.2548,
9220
+ "step": 461000
9221
+ },
9222
+ {
9223
+ "epoch": 10.28,
9224
+ "eval_loss": 0.2381051927804947,
9225
+ "eval_runtime": 1.9705,
9226
+ "eval_samples_per_second": 1165.71,
9227
+ "eval_steps_per_second": 18.27,
9228
+ "step": 461000
9229
+ },
9230
+ {
9231
+ "epoch": 10.29,
9232
+ "learning_rate": 1.4675457713088947e-05,
9233
+ "loss": 0.2547,
9234
+ "step": 461500
9235
+ },
9236
+ {
9237
+ "epoch": 10.31,
9238
+ "learning_rate": 1.4555441636348494e-05,
9239
+ "loss": 0.2549,
9240
+ "step": 462000
9241
+ },
9242
+ {
9243
+ "epoch": 10.31,
9244
+ "eval_loss": 0.23718567192554474,
9245
+ "eval_runtime": 2.0107,
9246
+ "eval_samples_per_second": 1142.393,
9247
+ "eval_steps_per_second": 17.904,
9248
+ "step": 462000
9249
+ },
9250
+ {
9251
+ "epoch": 10.32,
9252
+ "learning_rate": 1.4436961438797095e-05,
9253
+ "loss": 0.2547,
9254
+ "step": 462500
9255
+ },
9256
+ {
9257
+ "epoch": 10.33,
9258
+ "learning_rate": 1.4320018416115206e-05,
9259
+ "loss": 0.2548,
9260
+ "step": 463000
9261
+ },
9262
+ {
9263
+ "epoch": 10.33,
9264
+ "eval_loss": 0.23715750873088837,
9265
+ "eval_runtime": 2.0434,
9266
+ "eval_samples_per_second": 1124.132,
9267
+ "eval_steps_per_second": 17.618,
9268
+ "step": 463000
9269
+ },
9270
+ {
9271
+ "epoch": 10.34,
9272
+ "learning_rate": 1.4204613847173003e-05,
9273
+ "loss": 0.2547,
9274
+ "step": 463500
9275
+ },
9276
+ {
9277
+ "epoch": 10.35,
9278
+ "learning_rate": 1.4090748994016354e-05,
9279
+ "loss": 0.2547,
9280
+ "step": 464000
9281
+ },
9282
+ {
9283
+ "epoch": 10.35,
9284
+ "eval_loss": 0.23930229246616364,
9285
+ "eval_runtime": 2.02,
9286
+ "eval_samples_per_second": 1137.119,
9287
+ "eval_steps_per_second": 17.822,
9288
+ "step": 464000
9289
+ },
9290
+ {
9291
+ "epoch": 10.36,
9292
+ "learning_rate": 1.3978425101853049e-05,
9293
+ "loss": 0.2545,
9294
+ "step": 464500
9295
+ },
9296
+ {
9297
+ "epoch": 10.37,
9298
+ "learning_rate": 1.3867643399039165e-05,
9299
+ "loss": 0.2546,
9300
+ "step": 465000
9301
+ },
9302
+ {
9303
+ "epoch": 10.37,
9304
+ "eval_loss": 0.24023665487766266,
9305
+ "eval_runtime": 2.0161,
9306
+ "eval_samples_per_second": 1139.319,
9307
+ "eval_steps_per_second": 17.856,
9308
+ "step": 465000
9309
+ },
9310
+ {
9311
+ "epoch": 10.38,
9312
+ "learning_rate": 1.3758405097065648e-05,
9313
+ "loss": 0.2547,
9314
+ "step": 465500
9315
+ },
9316
+ {
9317
+ "epoch": 10.39,
9318
+ "learning_rate": 1.3650711390545131e-05,
9319
+ "loss": 0.2549,
9320
+ "step": 466000
9321
+ },
9322
+ {
9323
+ "epoch": 10.39,
9324
+ "eval_loss": 0.2383406162261963,
9325
+ "eval_runtime": 2.0372,
9326
+ "eval_samples_per_second": 1127.524,
9327
+ "eval_steps_per_second": 17.671,
9328
+ "step": 466000
9329
+ },
9330
+ {
9331
+ "epoch": 10.41,
9332
+ "learning_rate": 1.3544563457198657e-05,
9333
+ "loss": 0.2546,
9334
+ "step": 466500
9335
+ },
9336
+ {
9337
+ "epoch": 10.42,
9338
+ "learning_rate": 1.343996245784307e-05,
9339
+ "loss": 0.2545,
9340
+ "step": 467000
9341
+ },
9342
+ {
9343
+ "epoch": 10.42,
9344
+ "eval_loss": 0.23841743171215057,
9345
+ "eval_runtime": 1.9888,
9346
+ "eval_samples_per_second": 1154.948,
9347
+ "eval_steps_per_second": 18.101,
9348
+ "step": 467000
9349
+ },
9350
+ {
9351
+ "epoch": 10.43,
9352
+ "learning_rate": 1.3336909536378107e-05,
9353
+ "loss": 0.2549,
9354
+ "step": 467500
9355
+ },
9356
+ {
9357
+ "epoch": 10.44,
9358
+ "learning_rate": 1.3235405819774022e-05,
9359
+ "loss": 0.2544,
9360
+ "step": 468000
9361
+ },
9362
+ {
9363
+ "epoch": 10.44,
9364
+ "eval_loss": 0.2374790459871292,
9365
+ "eval_runtime": 2.0476,
9366
+ "eval_samples_per_second": 1121.805,
9367
+ "eval_steps_per_second": 17.582,
9368
+ "step": 468000
9369
+ },
9370
+ {
9371
+ "epoch": 10.45,
9372
+ "learning_rate": 1.3135452418059208e-05,
9373
+ "loss": 0.2543,
9374
+ "step": 468500
9375
+ },
9376
+ {
9377
+ "epoch": 10.46,
9378
+ "learning_rate": 1.3037050424308027e-05,
9379
+ "loss": 0.2544,
9380
+ "step": 469000
9381
+ },
9382
+ {
9383
+ "epoch": 10.46,
9384
+ "eval_loss": 0.2366662174463272,
9385
+ "eval_runtime": 2.0397,
9386
+ "eval_samples_per_second": 1126.142,
9387
+ "eval_steps_per_second": 17.65,
9388
+ "step": 469000
9389
+ },
9390
+ {
9391
+ "epoch": 10.47,
9392
+ "learning_rate": 1.2940200914628945e-05,
9393
+ "loss": 0.2547,
9394
+ "step": 469500
9395
+ },
9396
+ {
9397
+ "epoch": 10.48,
9398
+ "learning_rate": 1.2844904948152644e-05,
9399
+ "loss": 0.255,
9400
+ "step": 470000
9401
+ },
9402
+ {
9403
+ "epoch": 10.48,
9404
+ "eval_loss": 0.23585031926631927,
9405
+ "eval_runtime": 1.91,
9406
+ "eval_samples_per_second": 1202.649,
9407
+ "eval_steps_per_second": 18.849,
9408
+ "step": 470000
9409
  }
9410
  ],
9411
  "max_steps": 500000,
9412
  "num_train_epochs": 12,
9413
+ "total_flos": 1.5015610979787347e+22,
9414
  "trial_name": null,
9415
  "trial_params": null
9416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9908cf9403ac4306b20a0d3eb86bc03da4da8a781371917c67d6447e4350b51
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
3
  size 102501541