jflotz commited on
Commit
79155c1
·
1 Parent(s): 5e11f4e

Training in progress, step 870000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a77d7445b898e81ba1dbf302ad4aa88b3930b40801531ded9875b083edd127
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4a2010561ae6b219703766f77123488046a173556bad32795a979a0714f6e7c
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1af16532ed7776301ec2b0d23baf8c67ba74ec07e3f7e0782860705643ea3c80
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d368f4f79d735aeb82977d11fd8d84913a3919ff8ecbae0982e3d606c331447e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.234333002464785,
5
- "global_step": 860000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17206,11 +17206,211 @@
17206
  "eval_samples_per_second": 862.261,
17207
  "eval_steps_per_second": 13.514,
17208
  "step": 860000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17209
  }
17210
  ],
17211
  "max_steps": 1000000,
17212
  "num_train_epochs": 12,
17213
- "total_flos": 6.028573160582214e+22,
17214
  "trial_name": null,
17215
  "trial_params": null
17216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.345861726687708,
5
+ "global_step": 870000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17206
  "eval_samples_per_second": 862.261,
17207
  "eval_steps_per_second": 13.514,
17208
  "step": 860000
17209
+ },
17210
+ {
17211
+ "epoch": 9.24,
17212
+ "learning_rate": 1.7317340758783407e-05,
17213
+ "loss": 0.1835,
17214
+ "step": 860500
17215
+ },
17216
+ {
17217
+ "epoch": 9.25,
17218
+ "learning_rate": 1.726590597701708e-05,
17219
+ "loss": 0.1842,
17220
+ "step": 861000
17221
+ },
17222
+ {
17223
+ "epoch": 9.25,
17224
+ "eval_loss": 0.17551767826080322,
17225
+ "eval_runtime": 2.6099,
17226
+ "eval_samples_per_second": 880.108,
17227
+ "eval_steps_per_second": 13.794,
17228
+ "step": 861000
17229
+ },
17230
+ {
17231
+ "epoch": 9.25,
17232
+ "learning_rate": 1.7214642707868325e-05,
17233
+ "loss": 0.1839,
17234
+ "step": 861500
17235
+ },
17236
+ {
17237
+ "epoch": 9.26,
17238
+ "learning_rate": 1.7163551091488952e-05,
17239
+ "loss": 0.1839,
17240
+ "step": 862000
17241
+ },
17242
+ {
17243
+ "epoch": 9.26,
17244
+ "eval_loss": 0.17372268438339233,
17245
+ "eval_runtime": 2.641,
17246
+ "eval_samples_per_second": 869.76,
17247
+ "eval_steps_per_second": 13.631,
17248
+ "step": 862000
17249
+ },
17250
+ {
17251
+ "epoch": 9.26,
17252
+ "learning_rate": 1.711263126756148e-05,
17253
+ "loss": 0.1841,
17254
+ "step": 862500
17255
+ },
17256
+ {
17257
+ "epoch": 9.27,
17258
+ "learning_rate": 1.7061883375298788e-05,
17259
+ "loss": 0.1834,
17260
+ "step": 863000
17261
+ },
17262
+ {
17263
+ "epoch": 9.27,
17264
+ "eval_loss": 0.17352163791656494,
17265
+ "eval_runtime": 2.6082,
17266
+ "eval_samples_per_second": 880.7,
17267
+ "eval_steps_per_second": 13.803,
17268
+ "step": 863000
17269
+ },
17270
+ {
17271
+ "epoch": 9.27,
17272
+ "learning_rate": 1.7011307553443647e-05,
17273
+ "loss": 0.1837,
17274
+ "step": 863500
17275
+ },
17276
+ {
17277
+ "epoch": 9.28,
17278
+ "learning_rate": 1.6960903940268456e-05,
17279
+ "loss": 0.1836,
17280
+ "step": 864000
17281
+ },
17282
+ {
17283
+ "epoch": 9.28,
17284
+ "eval_loss": 0.17270448803901672,
17285
+ "eval_runtime": 2.6998,
17286
+ "eval_samples_per_second": 850.79,
17287
+ "eval_steps_per_second": 13.334,
17288
+ "step": 864000
17289
+ },
17290
+ {
17291
+ "epoch": 9.28,
17292
+ "learning_rate": 1.6910672673574746e-05,
17293
+ "loss": 0.1838,
17294
+ "step": 864500
17295
+ },
17296
+ {
17297
+ "epoch": 9.29,
17298
+ "learning_rate": 1.6860613890692876e-05,
17299
+ "loss": 0.1832,
17300
+ "step": 865000
17301
+ },
17302
+ {
17303
+ "epoch": 9.29,
17304
+ "eval_loss": 0.1736259162425995,
17305
+ "eval_runtime": 2.674,
17306
+ "eval_samples_per_second": 859.019,
17307
+ "eval_steps_per_second": 13.463,
17308
+ "step": 865000
17309
+ },
17310
+ {
17311
+ "epoch": 9.3,
17312
+ "learning_rate": 1.6810727728481673e-05,
17313
+ "loss": 0.1833,
17314
+ "step": 865500
17315
+ },
17316
+ {
17317
+ "epoch": 9.3,
17318
+ "learning_rate": 1.6761014323327962e-05,
17319
+ "loss": 0.1835,
17320
+ "step": 866000
17321
+ },
17322
+ {
17323
+ "epoch": 9.3,
17324
+ "eval_loss": 0.17491458356380463,
17325
+ "eval_runtime": 2.6701,
17326
+ "eval_samples_per_second": 860.268,
17327
+ "eval_steps_per_second": 13.483,
17328
+ "step": 866000
17329
+ },
17330
+ {
17331
+ "epoch": 9.31,
17332
+ "learning_rate": 1.6711473811146333e-05,
17333
+ "loss": 0.1836,
17334
+ "step": 866500
17335
+ },
17336
+ {
17337
+ "epoch": 9.31,
17338
+ "learning_rate": 1.6662106327378645e-05,
17339
+ "loss": 0.1837,
17340
+ "step": 867000
17341
+ },
17342
+ {
17343
+ "epoch": 9.31,
17344
+ "eval_loss": 0.17311297357082367,
17345
+ "eval_runtime": 2.5749,
17346
+ "eval_samples_per_second": 892.083,
17347
+ "eval_steps_per_second": 13.981,
17348
+ "step": 867000
17349
+ },
17350
+ {
17351
+ "epoch": 9.32,
17352
+ "learning_rate": 1.6612912006993688e-05,
17353
+ "loss": 0.1835,
17354
+ "step": 867500
17355
+ },
17356
+ {
17357
+ "epoch": 9.32,
17358
+ "learning_rate": 1.6563890984486884e-05,
17359
+ "loss": 0.1834,
17360
+ "step": 868000
17361
+ },
17362
+ {
17363
+ "epoch": 9.32,
17364
+ "eval_loss": 0.1740087866783142,
17365
+ "eval_runtime": 2.6738,
17366
+ "eval_samples_per_second": 859.085,
17367
+ "eval_steps_per_second": 13.464,
17368
+ "step": 868000
17369
+ },
17370
+ {
17371
+ "epoch": 9.33,
17372
+ "learning_rate": 1.6515043393879825e-05,
17373
+ "loss": 0.1837,
17374
+ "step": 868500
17375
+ },
17376
+ {
17377
+ "epoch": 9.33,
17378
+ "learning_rate": 1.6466369368719955e-05,
17379
+ "loss": 0.1834,
17380
+ "step": 869000
17381
+ },
17382
+ {
17383
+ "epoch": 9.33,
17384
+ "eval_loss": 0.17570127546787262,
17385
+ "eval_runtime": 2.6358,
17386
+ "eval_samples_per_second": 871.447,
17387
+ "eval_steps_per_second": 13.658,
17388
+ "step": 869000
17389
+ },
17390
+ {
17391
+ "epoch": 9.34,
17392
+ "learning_rate": 1.641786904208022e-05,
17393
+ "loss": 0.1833,
17394
+ "step": 869500
17395
+ },
17396
+ {
17397
+ "epoch": 9.35,
17398
+ "learning_rate": 1.6369542546558626e-05,
17399
+ "loss": 0.1835,
17400
+ "step": 870000
17401
+ },
17402
+ {
17403
+ "epoch": 9.35,
17404
+ "eval_loss": 0.17456747591495514,
17405
+ "eval_runtime": 2.7306,
17406
+ "eval_samples_per_second": 841.196,
17407
+ "eval_steps_per_second": 13.184,
17408
+ "step": 870000
17409
  }
17410
  ],
17411
  "max_steps": 1000000,
17412
  "num_train_epochs": 12,
17413
+ "total_flos": 6.098673421612561e+22,
17414
  "trial_name": null,
17415
  "trial_params": null
17416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
3
  size 449471589