jflotz commited on
Commit
d8c3456
·
1 Parent(s): f00b519

Training in progress, step 570000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b1cc1f37f7674d19f50d0ac065ef69529e5d29f5bb20b814471f7b204857988
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb93720ada86a6004ba26eb4fabd56849226e35d2f46baf4052697153c666bb
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af9a25b33d29c3fd157c79676adec6abbe35f3978d907c7efc857fe0437c64ac
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62e41706c3cdebd0963ceae6fb24ae079cf26e6452a67e31e4c02f3a80456e6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.245608556483722,
5
- "global_step": 560000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11206,11 +11206,211 @@
11206
  "eval_samples_per_second": 882.169,
11207
  "eval_steps_per_second": 13.826,
11208
  "step": 560000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11209
  }
11210
  ],
11211
  "max_steps": 1000000,
11212
  "num_train_epochs": 12,
11213
- "total_flos": 3.925581759014346e+22,
11214
  "trial_name": null,
11215
  "trial_params": null
11216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.357137280706646,
5
+ "global_step": 570000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11206
  "eval_samples_per_second": 882.169,
11207
  "eval_steps_per_second": 13.826,
11208
  "step": 560000
11209
+ },
11210
+ {
11211
+ "epoch": 6.25,
11212
+ "learning_rate": 7.180111803267856e-05,
11213
+ "loss": 0.2054,
11214
+ "step": 560500
11215
+ },
11216
+ {
11217
+ "epoch": 6.26,
11218
+ "learning_rate": 7.168618306865838e-05,
11219
+ "loss": 0.2051,
11220
+ "step": 561000
11221
+ },
11222
+ {
11223
+ "epoch": 6.26,
11224
+ "eval_loss": 0.195304736495018,
11225
+ "eval_runtime": 2.4332,
11226
+ "eval_samples_per_second": 944.014,
11227
+ "eval_steps_per_second": 14.795,
11228
+ "step": 561000
11229
+ },
11230
+ {
11231
+ "epoch": 6.26,
11232
+ "learning_rate": 7.157127083429626e-05,
11233
+ "loss": 0.2053,
11234
+ "step": 561500
11235
+ },
11236
+ {
11237
+ "epoch": 6.27,
11238
+ "learning_rate": 7.145638164375779e-05,
11239
+ "loss": 0.205,
11240
+ "step": 562000
11241
+ },
11242
+ {
11243
+ "epoch": 6.27,
11244
+ "eval_loss": 0.1958540827035904,
11245
+ "eval_runtime": 2.6408,
11246
+ "eval_samples_per_second": 869.814,
11247
+ "eval_steps_per_second": 13.632,
11248
+ "step": 562000
11249
+ },
11250
+ {
11251
+ "epoch": 6.27,
11252
+ "learning_rate": 7.134151581114565e-05,
11253
+ "loss": 0.2053,
11254
+ "step": 562500
11255
+ },
11256
+ {
11257
+ "epoch": 6.28,
11258
+ "learning_rate": 7.122667365049869e-05,
11259
+ "loss": 0.2052,
11260
+ "step": 563000
11261
+ },
11262
+ {
11263
+ "epoch": 6.28,
11264
+ "eval_loss": 0.19526307284832,
11265
+ "eval_runtime": 2.6193,
11266
+ "eval_samples_per_second": 876.966,
11267
+ "eval_steps_per_second": 13.744,
11268
+ "step": 563000
11269
+ },
11270
+ {
11271
+ "epoch": 6.28,
11272
+ "learning_rate": 7.111185547579099e-05,
11273
+ "loss": 0.205,
11274
+ "step": 563500
11275
+ },
11276
+ {
11277
+ "epoch": 6.29,
11278
+ "learning_rate": 7.099706160093098e-05,
11279
+ "loss": 0.2051,
11280
+ "step": 564000
11281
+ },
11282
+ {
11283
+ "epoch": 6.29,
11284
+ "eval_loss": 0.1962643265724182,
11285
+ "eval_runtime": 2.4959,
11286
+ "eval_samples_per_second": 920.299,
11287
+ "eval_steps_per_second": 14.423,
11288
+ "step": 564000
11289
+ },
11290
+ {
11291
+ "epoch": 6.3,
11292
+ "learning_rate": 7.08822923397608e-05,
11293
+ "loss": 0.2054,
11294
+ "step": 564500
11295
+ },
11296
+ {
11297
+ "epoch": 6.3,
11298
+ "learning_rate": 7.076754800605516e-05,
11299
+ "loss": 0.2053,
11300
+ "step": 565000
11301
+ },
11302
+ {
11303
+ "epoch": 6.3,
11304
+ "eval_loss": 0.19500210881233215,
11305
+ "eval_runtime": 2.6355,
11306
+ "eval_samples_per_second": 871.546,
11307
+ "eval_steps_per_second": 13.659,
11308
+ "step": 565000
11309
+ },
11310
+ {
11311
+ "epoch": 6.31,
11312
+ "learning_rate": 7.065282891352078e-05,
11313
+ "loss": 0.2049,
11314
+ "step": 565500
11315
+ },
11316
+ {
11317
+ "epoch": 6.31,
11318
+ "learning_rate": 7.053813537579523e-05,
11319
+ "loss": 0.2052,
11320
+ "step": 566000
11321
+ },
11322
+ {
11323
+ "epoch": 6.31,
11324
+ "eval_loss": 0.1964665800333023,
11325
+ "eval_runtime": 2.6178,
11326
+ "eval_samples_per_second": 877.444,
11327
+ "eval_steps_per_second": 13.752,
11328
+ "step": 566000
11329
+ },
11330
+ {
11331
+ "epoch": 6.32,
11332
+ "learning_rate": 7.042346770644624e-05,
11333
+ "loss": 0.2046,
11334
+ "step": 566500
11335
+ },
11336
+ {
11337
+ "epoch": 6.32,
11338
+ "learning_rate": 7.030882621897088e-05,
11339
+ "loss": 0.2046,
11340
+ "step": 567000
11341
+ },
11342
+ {
11343
+ "epoch": 6.32,
11344
+ "eval_loss": 0.19378143548965454,
11345
+ "eval_runtime": 2.6471,
11346
+ "eval_samples_per_second": 867.729,
11347
+ "eval_steps_per_second": 13.6,
11348
+ "step": 567000
11349
+ },
11350
+ {
11351
+ "epoch": 6.33,
11352
+ "learning_rate": 7.019421122679455e-05,
11353
+ "loss": 0.2052,
11354
+ "step": 567500
11355
+ },
11356
+ {
11357
+ "epoch": 6.33,
11358
+ "learning_rate": 7.00796230432703e-05,
11359
+ "loss": 0.2045,
11360
+ "step": 568000
11361
+ },
11362
+ {
11363
+ "epoch": 6.33,
11364
+ "eval_loss": 0.1938391774892807,
11365
+ "eval_runtime": 2.5793,
11366
+ "eval_samples_per_second": 890.552,
11367
+ "eval_steps_per_second": 13.957,
11368
+ "step": 568000
11369
+ },
11370
+ {
11371
+ "epoch": 6.34,
11372
+ "learning_rate": 6.996506198167789e-05,
11373
+ "loss": 0.2046,
11374
+ "step": 568500
11375
+ },
11376
+ {
11377
+ "epoch": 6.35,
11378
+ "learning_rate": 6.985052835522279e-05,
11379
+ "loss": 0.2045,
11380
+ "step": 569000
11381
+ },
11382
+ {
11383
+ "epoch": 6.35,
11384
+ "eval_loss": 0.19408397376537323,
11385
+ "eval_runtime": 2.5021,
11386
+ "eval_samples_per_second": 918.029,
11387
+ "eval_steps_per_second": 14.388,
11388
+ "step": 569000
11389
+ },
11390
+ {
11391
+ "epoch": 6.35,
11392
+ "learning_rate": 6.973602247703561e-05,
11393
+ "loss": 0.2047,
11394
+ "step": 569500
11395
+ },
11396
+ {
11397
+ "epoch": 6.36,
11398
+ "learning_rate": 6.962154466017105e-05,
11399
+ "loss": 0.2047,
11400
+ "step": 570000
11401
+ },
11402
+ {
11403
+ "epoch": 6.36,
11404
+ "eval_loss": 0.19305509328842163,
11405
+ "eval_runtime": 2.5881,
11406
+ "eval_samples_per_second": 887.536,
11407
+ "eval_steps_per_second": 13.91,
11408
+ "step": 570000
11409
  }
11410
  ],
11411
  "max_steps": 1000000,
11412
  "num_train_epochs": 12,
11413
+ "total_flos": 3.9956820200446935e+22,
11414
  "trial_name": null,
11415
  "trial_params": null
11416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
3
  size 449471589