jflotz commited on
Commit
fdc51a1
·
1 Parent(s): b58a356

Training in progress, step 970000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04f217d4f2435c53b27b3ffb23b807fa09f40f06e34bfe7a070589d6890dd66f
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1b3977166c4e20fc41f424497409f61e5dbee702d8ad14048093e7cfab3225
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b07a1641f2eb1fc754b2daa11b1bd54bb7066b815cea15a3f40a26c5bed3d59
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10da665c924d0a78f574c75f6e0dbcce312de7ca6adb021d1a4f147f32e318
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:351338e637aa543d98ac6400f2e05e86270a6a5900e20a3e790dbfa3cb26dbef
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af3eb0d3db8162f6de4427ee5f19b1787f4bdb865e0ebda13f4fed6034a8890
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.349620244694021,
5
- "global_step": 960000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -19206,11 +19206,211 @@
19206
  "eval_samples_per_second": 837.549,
19207
  "eval_steps_per_second": 13.127,
19208
  "step": 960000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19209
  }
19210
  ],
19211
  "max_steps": 1000000,
19212
  "num_train_epochs": 12,
19213
- "total_flos": 6.72957029443817e+22,
19214
  "trial_name": null,
19215
  "trial_params": null
19216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.461148968916945,
5
+ "global_step": 970000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
19206
  "eval_samples_per_second": 837.549,
19207
  "eval_steps_per_second": 13.127,
19208
  "step": 960000
19209
+ },
19210
+ {
19211
+ "epoch": 10.36,
19212
+ "learning_rate": 1.0596344426086501e-05,
19213
+ "loss": 0.1798,
19214
+ "step": 960500
19215
+ },
19216
+ {
19217
+ "epoch": 10.36,
19218
+ "learning_rate": 1.0581363452005424e-05,
19219
+ "loss": 0.1805,
19220
+ "step": 961000
19221
+ },
19222
+ {
19223
+ "epoch": 10.36,
19224
+ "eval_loss": 0.17073865234851837,
19225
+ "eval_runtime": 2.5938,
19226
+ "eval_samples_per_second": 885.585,
19227
+ "eval_steps_per_second": 13.879,
19228
+ "step": 961000
19229
+ },
19230
+ {
19231
+ "epoch": 10.37,
19232
+ "learning_rate": 1.0566572265825932e-05,
19233
+ "loss": 0.18,
19234
+ "step": 961500
19235
+ },
19236
+ {
19237
+ "epoch": 10.37,
19238
+ "learning_rate": 1.0551970907986557e-05,
19239
+ "loss": 0.1801,
19240
+ "step": 962000
19241
+ },
19242
+ {
19243
+ "epoch": 10.37,
19244
+ "eval_loss": 0.17134888470172882,
19245
+ "eval_runtime": 2.5813,
19246
+ "eval_samples_per_second": 889.852,
19247
+ "eval_steps_per_second": 13.946,
19248
+ "step": 962000
19249
+ },
19250
+ {
19251
+ "epoch": 10.38,
19252
+ "learning_rate": 1.0537559418406849e-05,
19253
+ "loss": 0.18,
19254
+ "step": 962500
19255
+ },
19256
+ {
19257
+ "epoch": 10.38,
19258
+ "learning_rate": 1.0523337836487271e-05,
19259
+ "loss": 0.1799,
19260
+ "step": 963000
19261
+ },
19262
+ {
19263
+ "epoch": 10.38,
19264
+ "eval_loss": 0.17050015926361084,
19265
+ "eval_runtime": 2.6391,
19266
+ "eval_samples_per_second": 870.378,
19267
+ "eval_steps_per_second": 13.641,
19268
+ "step": 963000
19269
+ },
19270
+ {
19271
+ "epoch": 10.39,
19272
+ "learning_rate": 1.0509306201109092e-05,
19273
+ "loss": 0.1801,
19274
+ "step": 963500
19275
+ },
19276
+ {
19277
+ "epoch": 10.39,
19278
+ "learning_rate": 1.0495464550634267e-05,
19279
+ "loss": 0.18,
19280
+ "step": 964000
19281
+ },
19282
+ {
19283
+ "epoch": 10.39,
19284
+ "eval_loss": 0.17047521471977234,
19285
+ "eval_runtime": 2.6548,
19286
+ "eval_samples_per_second": 865.215,
19287
+ "eval_steps_per_second": 13.56,
19288
+ "step": 964000
19289
+ },
19290
+ {
19291
+ "epoch": 10.4,
19292
+ "learning_rate": 1.0481812922905339e-05,
19293
+ "loss": 0.1805,
19294
+ "step": 964500
19295
+ },
19296
+ {
19297
+ "epoch": 10.41,
19298
+ "learning_rate": 1.046835135524533e-05,
19299
+ "loss": 0.1798,
19300
+ "step": 965000
19301
+ },
19302
+ {
19303
+ "epoch": 10.41,
19304
+ "eval_loss": 0.17172271013259888,
19305
+ "eval_runtime": 2.5812,
19306
+ "eval_samples_per_second": 889.895,
19307
+ "eval_steps_per_second": 13.947,
19308
+ "step": 965000
19309
+ },
19310
+ {
19311
+ "epoch": 10.41,
19312
+ "learning_rate": 1.0455079884457653e-05,
19313
+ "loss": 0.1801,
19314
+ "step": 965500
19315
+ },
19316
+ {
19317
+ "epoch": 10.42,
19318
+ "learning_rate": 1.044199854682601e-05,
19319
+ "loss": 0.1797,
19320
+ "step": 966000
19321
+ },
19322
+ {
19323
+ "epoch": 10.42,
19324
+ "eval_loss": 0.16956347227096558,
19325
+ "eval_runtime": 2.6699,
19326
+ "eval_samples_per_second": 860.34,
19327
+ "eval_steps_per_second": 13.484,
19328
+ "step": 966000
19329
+ },
19330
+ {
19331
+ "epoch": 10.42,
19332
+ "learning_rate": 1.0429107378114277e-05,
19333
+ "loss": 0.1802,
19334
+ "step": 966500
19335
+ },
19336
+ {
19337
+ "epoch": 10.43,
19338
+ "learning_rate": 1.0416406413566414e-05,
19339
+ "loss": 0.1802,
19340
+ "step": 967000
19341
+ },
19342
+ {
19343
+ "epoch": 10.43,
19344
+ "eval_loss": 0.17151953279972076,
19345
+ "eval_runtime": 2.5629,
19346
+ "eval_samples_per_second": 896.252,
19347
+ "eval_steps_per_second": 14.047,
19348
+ "step": 967000
19349
+ },
19350
+ {
19351
+ "epoch": 10.43,
19352
+ "learning_rate": 1.0403895687906366e-05,
19353
+ "loss": 0.1803,
19354
+ "step": 967500
19355
+ },
19356
+ {
19357
+ "epoch": 10.44,
19358
+ "learning_rate": 1.0391575235337991e-05,
19359
+ "loss": 0.1798,
19360
+ "step": 968000
19361
+ },
19362
+ {
19363
+ "epoch": 10.44,
19364
+ "eval_loss": 0.17273712158203125,
19365
+ "eval_runtime": 2.6967,
19366
+ "eval_samples_per_second": 851.792,
19367
+ "eval_steps_per_second": 13.35,
19368
+ "step": 968000
19369
+ },
19370
+ {
19371
+ "epoch": 10.44,
19372
+ "learning_rate": 1.0379445089544929e-05,
19373
+ "loss": 0.1799,
19374
+ "step": 968500
19375
+ },
19376
+ {
19377
+ "epoch": 10.45,
19378
+ "learning_rate": 1.0367505283690547e-05,
19379
+ "loss": 0.1797,
19380
+ "step": 969000
19381
+ },
19382
+ {
19383
+ "epoch": 10.45,
19384
+ "eval_loss": 0.17085492610931396,
19385
+ "eval_runtime": 2.6519,
19386
+ "eval_samples_per_second": 866.18,
19387
+ "eval_steps_per_second": 13.575,
19388
+ "step": 969000
19389
+ },
19390
+ {
19391
+ "epoch": 10.46,
19392
+ "learning_rate": 1.0355755850417803e-05,
19393
+ "loss": 0.1797,
19394
+ "step": 969500
19395
+ },
19396
+ {
19397
+ "epoch": 10.46,
19398
+ "learning_rate": 1.0344196821849202e-05,
19399
+ "loss": 0.1799,
19400
+ "step": 970000
19401
+ },
19402
+ {
19403
+ "epoch": 10.46,
19404
+ "eval_loss": 0.1711302548646927,
19405
+ "eval_runtime": 2.5979,
19406
+ "eval_samples_per_second": 884.178,
19407
+ "eval_steps_per_second": 13.857,
19408
+ "step": 970000
19409
  }
19410
  ],
19411
  "max_steps": 1000000,
19412
  "num_train_epochs": 12,
19413
+ "total_flos": 6.799670555468517e+22,
19414
  "trial_name": null,
19415
  "trial_params": null
19416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b5376abe825ff6d04d360831bbfd37e2e2d959d6e490763218e7e38b5c10ca1
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc5b0eb8c3bb3dd69db934b29e5beccddfae1a796cf948830a8b05766d44497
3
  size 449471589