jflotz commited on
Commit
7779c66
·
1 Parent(s): fcbebbe

Training in progress, step 820000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20271fe04ea74ef6c1f5d2a4d1320cc4e3cdd71160c77649afb9825f61cfb447
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d93c3e982e0579f40b8abbb458ec9e37e56aeab75677fc246cecb087804c2e
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c229483d7671f93f7f6ccbe0ea6c92e0f0e6de8b45b16f50c28569110970a54
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80ff99717fc66d7e2670093ac4b787c0d4e68c8bb6b50d5d8a0a59479daaf2a3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c63a65d51252613e1cd5f3ab255f2a8e56d55631776ee22be37789c5802ebbf2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.676689381350167,
5
- "global_step": 810000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16206,11 +16206,211 @@
16206
  "eval_samples_per_second": 876.614,
16207
  "eval_steps_per_second": 13.739,
16208
  "step": 810000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16209
  }
16210
  ],
16211
  "max_steps": 1000000,
16212
  "num_train_epochs": 12,
16213
- "total_flos": 5.678077331877994e+22,
16214
  "trial_name": null,
16215
  "trial_params": null
16216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.78821810557309,
5
+ "global_step": 820000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16206
  "eval_samples_per_second": 876.614,
16207
  "eval_steps_per_second": 13.739,
16208
  "step": 810000
16209
+ },
16210
+ {
16211
+ "epoch": 8.68,
16212
+ "learning_rate": 2.3300855876332162e-05,
16213
+ "loss": 0.1868,
16214
+ "step": 810500
16215
+ },
16216
+ {
16217
+ "epoch": 8.69,
16218
+ "learning_rate": 2.32330563722056e-05,
16219
+ "loss": 0.1864,
16220
+ "step": 811000
16221
+ },
16222
+ {
16223
+ "epoch": 8.69,
16224
+ "eval_loss": 0.1779273897409439,
16225
+ "eval_runtime": 2.7339,
16226
+ "eval_samples_per_second": 840.196,
16227
+ "eval_steps_per_second": 13.168,
16228
+ "step": 811000
16229
+ },
16230
+ {
16231
+ "epoch": 8.69,
16232
+ "learning_rate": 2.316541206673529e-05,
16233
+ "loss": 0.1865,
16234
+ "step": 811500
16235
+ },
16236
+ {
16237
+ "epoch": 8.7,
16238
+ "learning_rate": 2.309792314485815e-05,
16239
+ "loss": 0.1866,
16240
+ "step": 812000
16241
+ },
16242
+ {
16243
+ "epoch": 8.7,
16244
+ "eval_loss": 0.17686782777309418,
16245
+ "eval_runtime": 2.8998,
16246
+ "eval_samples_per_second": 792.124,
16247
+ "eval_steps_per_second": 12.415,
16248
+ "step": 812000
16249
+ },
16250
+ {
16251
+ "epoch": 8.7,
16252
+ "learning_rate": 2.3030589791086353e-05,
16253
+ "loss": 0.1865,
16254
+ "step": 812500
16255
+ },
16256
+ {
16257
+ "epoch": 8.71,
16258
+ "learning_rate": 2.2963412189506695e-05,
16259
+ "loss": 0.1869,
16260
+ "step": 813000
16261
+ },
16262
+ {
16263
+ "epoch": 8.71,
16264
+ "eval_loss": 0.1769571304321289,
16265
+ "eval_runtime": 2.6694,
16266
+ "eval_samples_per_second": 860.483,
16267
+ "eval_steps_per_second": 13.486,
16268
+ "step": 813000
16269
+ },
16270
+ {
16271
+ "epoch": 8.72,
16272
+ "learning_rate": 2.2896390523780156e-05,
16273
+ "loss": 0.1865,
16274
+ "step": 813500
16275
+ },
16276
+ {
16277
+ "epoch": 8.72,
16278
+ "learning_rate": 2.282952497714145e-05,
16279
+ "loss": 0.186,
16280
+ "step": 814000
16281
+ },
16282
+ {
16283
+ "epoch": 8.72,
16284
+ "eval_loss": 0.17855176329612732,
16285
+ "eval_runtime": 2.6874,
16286
+ "eval_samples_per_second": 854.735,
16287
+ "eval_steps_per_second": 13.396,
16288
+ "step": 814000
16289
+ },
16290
+ {
16291
+ "epoch": 8.73,
16292
+ "learning_rate": 2.2762815732398387e-05,
16293
+ "loss": 0.1862,
16294
+ "step": 814500
16295
+ },
16296
+ {
16297
+ "epoch": 8.73,
16298
+ "learning_rate": 2.2696262971931538e-05,
16299
+ "loss": 0.1863,
16300
+ "step": 815000
16301
+ },
16302
+ {
16303
+ "epoch": 8.73,
16304
+ "eval_loss": 0.17720898985862732,
16305
+ "eval_runtime": 2.6031,
16306
+ "eval_samples_per_second": 882.4,
16307
+ "eval_steps_per_second": 13.83,
16308
+ "step": 815000
16309
+ },
16310
+ {
16311
+ "epoch": 8.74,
16312
+ "learning_rate": 2.2629866877693577e-05,
16313
+ "loss": 0.1865,
16314
+ "step": 815500
16315
+ },
16316
+ {
16317
+ "epoch": 8.74,
16318
+ "learning_rate": 2.2563627631208887e-05,
16319
+ "loss": 0.1869,
16320
+ "step": 816000
16321
+ },
16322
+ {
16323
+ "epoch": 8.74,
16324
+ "eval_loss": 0.17861302196979523,
16325
+ "eval_runtime": 2.6611,
16326
+ "eval_samples_per_second": 863.188,
16327
+ "eval_steps_per_second": 13.528,
16328
+ "step": 816000
16329
+ },
16330
+ {
16331
+ "epoch": 8.75,
16332
+ "learning_rate": 2.2497545413573065e-05,
16333
+ "loss": 0.1863,
16334
+ "step": 816500
16335
+ },
16336
+ {
16337
+ "epoch": 8.75,
16338
+ "learning_rate": 2.2431620405452336e-05,
16339
+ "loss": 0.1859,
16340
+ "step": 817000
16341
+ },
16342
+ {
16343
+ "epoch": 8.75,
16344
+ "eval_loss": 0.17606213688850403,
16345
+ "eval_runtime": 2.6726,
16346
+ "eval_samples_per_second": 859.474,
16347
+ "eval_steps_per_second": 13.47,
16348
+ "step": 817000
16349
+ },
16350
+ {
16351
+ "epoch": 8.76,
16352
+ "learning_rate": 2.23658527870832e-05,
16353
+ "loss": 0.1863,
16354
+ "step": 817500
16355
+ },
16356
+ {
16357
+ "epoch": 8.77,
16358
+ "learning_rate": 2.230024273827179e-05,
16359
+ "loss": 0.1862,
16360
+ "step": 818000
16361
+ },
16362
+ {
16363
+ "epoch": 8.77,
16364
+ "eval_loss": 0.17465642094612122,
16365
+ "eval_runtime": 2.6831,
16366
+ "eval_samples_per_second": 856.102,
16367
+ "eval_steps_per_second": 13.417,
16368
+ "step": 818000
16369
+ },
16370
+ {
16371
+ "epoch": 8.77,
16372
+ "learning_rate": 2.223479043839345e-05,
16373
+ "loss": 0.1867,
16374
+ "step": 818500
16375
+ },
16376
+ {
16377
+ "epoch": 8.78,
16378
+ "learning_rate": 2.216949606639231e-05,
16379
+ "loss": 0.1863,
16380
+ "step": 819000
16381
+ },
16382
+ {
16383
+ "epoch": 8.78,
16384
+ "eval_loss": 0.17773117125034332,
16385
+ "eval_runtime": 2.6871,
16386
+ "eval_samples_per_second": 854.84,
16387
+ "eval_steps_per_second": 13.398,
16388
+ "step": 819000
16389
+ },
16390
+ {
16391
+ "epoch": 8.78,
16392
+ "learning_rate": 2.2104359800780665e-05,
16393
+ "loss": 0.1859,
16394
+ "step": 819500
16395
+ },
16396
+ {
16397
+ "epoch": 8.79,
16398
+ "learning_rate": 2.2039381819638596e-05,
16399
+ "loss": 0.186,
16400
+ "step": 820000
16401
+ },
16402
+ {
16403
+ "epoch": 8.79,
16404
+ "eval_loss": 0.1770503968000412,
16405
+ "eval_runtime": 2.6357,
16406
+ "eval_samples_per_second": 871.505,
16407
+ "eval_steps_per_second": 13.659,
16408
+ "step": 820000
16409
  }
16410
  ],
16411
  "max_steps": 1000000,
16412
  "num_train_epochs": 12,
16413
+ "total_flos": 5.748177592908341e+22,
16414
  "trial_name": null,
16415
  "trial_params": null
16416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd0bb237b46c76aefe15f9280bc4734a0b83f8b031790d8bd933f10088a4b40
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
3
  size 449471589