jflotz commited on
Commit
4f69a4a
·
1 Parent(s): e00ada0

Training in progress, step 920000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:286afc89953ac85c207ddee00ff74005ad2f262dcd53bfa64635a387b524e2f5
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee3aeed50a2a4c25efaca58a22f8e74eeb4a5176131dc454fbde37631acf016d
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68bd93c5df85b9ef6b8dfb004005413abc49b194d979c692716ee25211f1498f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df508c344c1f04d3b388cef9605593fbfd129cd18e2830701d3110873541479
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.791976623579403,
5
- "global_step": 910000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -18206,11 +18206,211 @@
18206
  "eval_samples_per_second": 861.024,
18207
  "eval_steps_per_second": 13.495,
18208
  "step": 910000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18209
  }
18210
  ],
18211
  "max_steps": 1000000,
18212
  "num_train_epochs": 12,
18213
- "total_flos": 6.3790744657339496e+22,
18214
  "trial_name": null,
18215
  "trial_params": null
18216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.903505347802326,
5
+ "global_step": 920000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
18206
  "eval_samples_per_second": 861.024,
18207
  "eval_steps_per_second": 13.495,
18208
  "step": 910000
18209
+ },
18210
+ {
18211
+ "epoch": 9.8,
18212
+ "learning_rate": 1.3043645183837645e-05,
18213
+ "loss": 0.1817,
18214
+ "step": 910500
18215
+ },
18216
+ {
18217
+ "epoch": 9.8,
18218
+ "learning_rate": 1.3009978131197669e-05,
18219
+ "loss": 0.1821,
18220
+ "step": 911000
18221
+ },
18222
+ {
18223
+ "epoch": 9.8,
18224
+ "eval_loss": 0.17097479104995728,
18225
+ "eval_runtime": 2.7046,
18226
+ "eval_samples_per_second": 849.303,
18227
+ "eval_steps_per_second": 13.311,
18228
+ "step": 911000
18229
+ },
18230
+ {
18231
+ "epoch": 9.81,
18232
+ "learning_rate": 1.297649422671947e-05,
18233
+ "loss": 0.1815,
18234
+ "step": 911500
18235
+ },
18236
+ {
18237
+ "epoch": 9.81,
18238
+ "learning_rate": 1.2943193561946762e-05,
18239
+ "loss": 0.1814,
18240
+ "step": 912000
18241
+ },
18242
+ {
18243
+ "epoch": 9.81,
18244
+ "eval_loss": 0.1718176305294037,
18245
+ "eval_runtime": 2.6645,
18246
+ "eval_samples_per_second": 862.075,
18247
+ "eval_steps_per_second": 13.511,
18248
+ "step": 912000
18249
+ },
18250
+ {
18251
+ "epoch": 9.82,
18252
+ "learning_rate": 1.291007622792231e-05,
18253
+ "loss": 0.1816,
18254
+ "step": 912500
18255
+ },
18256
+ {
18257
+ "epoch": 9.83,
18258
+ "learning_rate": 1.2877142315187628e-05,
18259
+ "loss": 0.1818,
18260
+ "step": 913000
18261
+ },
18262
+ {
18263
+ "epoch": 9.83,
18264
+ "eval_loss": 0.1710846871137619,
18265
+ "eval_runtime": 2.7026,
18266
+ "eval_samples_per_second": 849.924,
18267
+ "eval_steps_per_second": 13.321,
18268
+ "step": 913000
18269
+ },
18270
+ {
18271
+ "epoch": 9.83,
18272
+ "learning_rate": 1.2844391913782773e-05,
18273
+ "loss": 0.182,
18274
+ "step": 913500
18275
+ },
18276
+ {
18277
+ "epoch": 9.84,
18278
+ "learning_rate": 1.28118251132461e-05,
18279
+ "loss": 0.1815,
18280
+ "step": 914000
18281
+ },
18282
+ {
18283
+ "epoch": 9.84,
18284
+ "eval_loss": 0.17333008348941803,
18285
+ "eval_runtime": 2.5502,
18286
+ "eval_samples_per_second": 900.72,
18287
+ "eval_steps_per_second": 14.117,
18288
+ "step": 914000
18289
+ },
18290
+ {
18291
+ "epoch": 9.84,
18292
+ "learning_rate": 1.2779442002613984e-05,
18293
+ "loss": 0.1814,
18294
+ "step": 914500
18295
+ },
18296
+ {
18297
+ "epoch": 9.85,
18298
+ "learning_rate": 1.274724267042063e-05,
18299
+ "loss": 0.1814,
18300
+ "step": 915000
18301
+ },
18302
+ {
18303
+ "epoch": 9.85,
18304
+ "eval_loss": 0.1717572808265686,
18305
+ "eval_runtime": 2.636,
18306
+ "eval_samples_per_second": 871.395,
18307
+ "eval_steps_per_second": 13.657,
18308
+ "step": 915000
18309
+ },
18310
+ {
18311
+ "epoch": 9.85,
18312
+ "learning_rate": 1.2715227204697775e-05,
18313
+ "loss": 0.1814,
18314
+ "step": 915500
18315
+ },
18316
+ {
18317
+ "epoch": 9.86,
18318
+ "learning_rate": 1.2683395692974472e-05,
18319
+ "loss": 0.1819,
18320
+ "step": 916000
18321
+ },
18322
+ {
18323
+ "epoch": 9.86,
18324
+ "eval_loss": 0.17225094139575958,
18325
+ "eval_runtime": 2.6373,
18326
+ "eval_samples_per_second": 870.95,
18327
+ "eval_steps_per_second": 13.65,
18328
+ "step": 916000
18329
+ },
18330
+ {
18331
+ "epoch": 9.86,
18332
+ "learning_rate": 1.2651748222276879e-05,
18333
+ "loss": 0.1815,
18334
+ "step": 916500
18335
+ },
18336
+ {
18337
+ "epoch": 9.87,
18338
+ "learning_rate": 1.2620284879127947e-05,
18339
+ "loss": 0.1816,
18340
+ "step": 917000
18341
+ },
18342
+ {
18343
+ "epoch": 9.87,
18344
+ "eval_loss": 0.17146611213684082,
18345
+ "eval_runtime": 2.7493,
18346
+ "eval_samples_per_second": 835.499,
18347
+ "eval_steps_per_second": 13.094,
18348
+ "step": 917000
18349
+ },
18350
+ {
18351
+ "epoch": 9.88,
18352
+ "learning_rate": 1.2589005749547281e-05,
18353
+ "loss": 0.1815,
18354
+ "step": 917500
18355
+ },
18356
+ {
18357
+ "epoch": 9.88,
18358
+ "learning_rate": 1.2557910919050803e-05,
18359
+ "loss": 0.1813,
18360
+ "step": 918000
18361
+ },
18362
+ {
18363
+ "epoch": 9.88,
18364
+ "eval_loss": 0.16947948932647705,
18365
+ "eval_runtime": 2.7503,
18366
+ "eval_samples_per_second": 835.189,
18367
+ "eval_steps_per_second": 13.09,
18368
+ "step": 918000
18369
+ },
18370
+ {
18371
+ "epoch": 9.89,
18372
+ "learning_rate": 1.2527000472650597e-05,
18373
+ "loss": 0.1815,
18374
+ "step": 918500
18375
+ },
18376
+ {
18377
+ "epoch": 9.89,
18378
+ "learning_rate": 1.2496274494854666e-05,
18379
+ "loss": 0.1812,
18380
+ "step": 919000
18381
+ },
18382
+ {
18383
+ "epoch": 9.89,
18384
+ "eval_loss": 0.17127934098243713,
18385
+ "eval_runtime": 2.6734,
18386
+ "eval_samples_per_second": 859.205,
18387
+ "eval_steps_per_second": 13.466,
18388
+ "step": 919000
18389
+ },
18390
+ {
18391
+ "epoch": 9.9,
18392
+ "learning_rate": 1.2465733069666629e-05,
18393
+ "loss": 0.1813,
18394
+ "step": 919500
18395
+ },
18396
+ {
18397
+ "epoch": 9.9,
18398
+ "learning_rate": 1.24353762805856e-05,
18399
+ "loss": 0.1814,
18400
+ "step": 920000
18401
+ },
18402
+ {
18403
+ "epoch": 9.9,
18404
+ "eval_loss": 0.17229017615318298,
18405
+ "eval_runtime": 2.6708,
18406
+ "eval_samples_per_second": 860.053,
18407
+ "eval_steps_per_second": 13.479,
18408
+ "step": 920000
18409
  }
18410
  ],
18411
  "max_steps": 1000000,
18412
  "num_train_epochs": 12,
18413
+ "total_flos": 6.449174726764297e+22,
18414
  "trial_name": null,
18415
  "trial_params": null
18416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
3
  size 449471589