jflotz commited on
Commit
51a5d87
·
1 Parent(s): 7779c66

Training in progress, step 830000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6d93c3e982e0579f40b8abbb458ec9e37e56aeab75677fc246cecb087804c2e
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f349542b4533abe4453e0adeb6aff6cd875b986f4117c2f333ebbbb94148a468
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63a66ccb6dc97ab9b83be4c1a0e1c26e5449019e9e3ce121febf772eda3a493
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c63a65d51252613e1cd5f3ab255f2a8e56d55631776ee22be37789c5802ebbf2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44f4d1ea700e774f5dee0343ba4324675c77c29852dd54fec6a281d849ccd3b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.78821810557309,
5
- "global_step": 820000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16406,11 +16406,211 @@
16406
  "eval_samples_per_second": 871.505,
16407
  "eval_steps_per_second": 13.659,
16408
  "step": 820000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16409
  }
16410
  ],
16411
  "max_steps": 1000000,
16412
  "num_train_epochs": 12,
16413
- "total_flos": 5.748177592908341e+22,
16414
  "trial_name": null,
16415
  "trial_params": null
16416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.899746829796014,
5
+ "global_step": 830000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16406
  "eval_samples_per_second": 871.505,
16407
  "eval_steps_per_second": 13.659,
16408
  "step": 820000
16409
+ },
16410
+ {
16411
+ "epoch": 8.79,
16412
+ "learning_rate": 2.1974562300613417e-05,
16413
+ "loss": 0.186,
16414
+ "step": 820500
16415
+ },
16416
+ {
16417
+ "epoch": 8.8,
16418
+ "learning_rate": 2.1909901420919184e-05,
16419
+ "loss": 0.1856,
16420
+ "step": 821000
16421
+ },
16422
+ {
16423
+ "epoch": 8.8,
16424
+ "eval_loss": 0.17747129499912262,
16425
+ "eval_runtime": 2.664,
16426
+ "eval_samples_per_second": 862.246,
16427
+ "eval_steps_per_second": 13.514,
16428
+ "step": 821000
16429
+ },
16430
+ {
16431
+ "epoch": 8.8,
16432
+ "learning_rate": 2.1845399357336326e-05,
16433
+ "loss": 0.186,
16434
+ "step": 821500
16435
+ },
16436
+ {
16437
+ "epoch": 8.81,
16438
+ "learning_rate": 2.1781056286210997e-05,
16439
+ "loss": 0.186,
16440
+ "step": 822000
16441
+ },
16442
+ {
16443
+ "epoch": 8.81,
16444
+ "eval_loss": 0.1773909628391266,
16445
+ "eval_runtime": 2.5828,
16446
+ "eval_samples_per_second": 889.354,
16447
+ "eval_steps_per_second": 13.939,
16448
+ "step": 822000
16449
+ },
16450
+ {
16451
+ "epoch": 8.82,
16452
+ "learning_rate": 2.1716872383454674e-05,
16453
+ "loss": 0.1861,
16454
+ "step": 822500
16455
+ },
16456
+ {
16457
+ "epoch": 8.82,
16458
+ "learning_rate": 2.1652847824543744e-05,
16459
+ "loss": 0.1856,
16460
+ "step": 823000
16461
+ },
16462
+ {
16463
+ "epoch": 8.82,
16464
+ "eval_loss": 0.1759449690580368,
16465
+ "eval_runtime": 2.6867,
16466
+ "eval_samples_per_second": 854.948,
16467
+ "eval_steps_per_second": 13.399,
16468
+ "step": 823000
16469
+ },
16470
+ {
16471
+ "epoch": 8.83,
16472
+ "learning_rate": 2.1588982784518853e-05,
16473
+ "loss": 0.1862,
16474
+ "step": 823500
16475
+ },
16476
+ {
16477
+ "epoch": 8.83,
16478
+ "learning_rate": 2.1525277437984636e-05,
16479
+ "loss": 0.1857,
16480
+ "step": 824000
16481
+ },
16482
+ {
16483
+ "epoch": 8.83,
16484
+ "eval_loss": 0.1774652898311615,
16485
+ "eval_runtime": 2.6123,
16486
+ "eval_samples_per_second": 879.304,
16487
+ "eval_steps_per_second": 13.781,
16488
+ "step": 824000
16489
+ },
16490
+ {
16491
+ "epoch": 8.84,
16492
+ "learning_rate": 2.1461731959109053e-05,
16493
+ "loss": 0.186,
16494
+ "step": 824500
16495
+ },
16496
+ {
16497
+ "epoch": 8.84,
16498
+ "learning_rate": 2.1398346521623e-05,
16499
+ "loss": 0.1857,
16500
+ "step": 825000
16501
+ },
16502
+ {
16503
+ "epoch": 8.84,
16504
+ "eval_loss": 0.17699038982391357,
16505
+ "eval_runtime": 2.654,
16506
+ "eval_samples_per_second": 865.476,
16507
+ "eval_steps_per_second": 13.564,
16508
+ "step": 825000
16509
+ },
16510
+ {
16511
+ "epoch": 8.85,
16512
+ "learning_rate": 2.1335121298819867e-05,
16513
+ "loss": 0.1859,
16514
+ "step": 825500
16515
+ },
16516
+ {
16517
+ "epoch": 8.86,
16518
+ "learning_rate": 2.1272056463554978e-05,
16519
+ "loss": 0.1862,
16520
+ "step": 826000
16521
+ },
16522
+ {
16523
+ "epoch": 8.86,
16524
+ "eval_loss": 0.17693667113780975,
16525
+ "eval_runtime": 2.6428,
16526
+ "eval_samples_per_second": 869.15,
16527
+ "eval_steps_per_second": 13.622,
16528
+ "step": 826000
16529
+ },
16530
+ {
16531
+ "epoch": 8.86,
16532
+ "learning_rate": 2.1209152188245214e-05,
16533
+ "loss": 0.1858,
16534
+ "step": 826500
16535
+ },
16536
+ {
16537
+ "epoch": 8.87,
16538
+ "learning_rate": 2.114640864486845e-05,
16539
+ "loss": 0.1857,
16540
+ "step": 827000
16541
+ },
16542
+ {
16543
+ "epoch": 8.87,
16544
+ "eval_loss": 0.1788521409034729,
16545
+ "eval_runtime": 2.6742,
16546
+ "eval_samples_per_second": 858.952,
16547
+ "eval_steps_per_second": 13.462,
16548
+ "step": 827000
16549
+ },
16550
+ {
16551
+ "epoch": 8.87,
16552
+ "learning_rate": 2.1083826004963102e-05,
16553
+ "loss": 0.1859,
16554
+ "step": 827500
16555
+ },
16556
+ {
16557
+ "epoch": 8.88,
16558
+ "learning_rate": 2.1021404439627775e-05,
16559
+ "loss": 0.1855,
16560
+ "step": 828000
16561
+ },
16562
+ {
16563
+ "epoch": 8.88,
16564
+ "eval_loss": 0.17763476073741913,
16565
+ "eval_runtime": 2.5581,
16566
+ "eval_samples_per_second": 897.942,
16567
+ "eval_steps_per_second": 14.073,
16568
+ "step": 828000
16569
+ },
16570
+ {
16571
+ "epoch": 8.88,
16572
+ "learning_rate": 2.09591441195206e-05,
16573
+ "loss": 0.1856,
16574
+ "step": 828500
16575
+ },
16576
+ {
16577
+ "epoch": 8.89,
16578
+ "learning_rate": 2.089704521485896e-05,
16579
+ "loss": 0.1858,
16580
+ "step": 829000
16581
+ },
16582
+ {
16583
+ "epoch": 8.89,
16584
+ "eval_loss": 0.17711400985717773,
16585
+ "eval_runtime": 2.6039,
16586
+ "eval_samples_per_second": 882.145,
16587
+ "eval_steps_per_second": 13.826,
16588
+ "step": 829000
16589
+ },
16590
+ {
16591
+ "epoch": 8.89,
16592
+ "learning_rate": 2.083510789541883e-05,
16593
+ "loss": 0.1852,
16594
+ "step": 829500
16595
+ },
16596
+ {
16597
+ "epoch": 8.9,
16598
+ "learning_rate": 2.0773332330534513e-05,
16599
+ "loss": 0.1857,
16600
+ "step": 830000
16601
+ },
16602
+ {
16603
+ "epoch": 8.9,
16604
+ "eval_loss": 0.17438167333602905,
16605
+ "eval_runtime": 2.6569,
16606
+ "eval_samples_per_second": 864.555,
16607
+ "eval_steps_per_second": 13.55,
16608
+ "step": 830000
16609
  }
16610
  ],
16611
  "max_steps": 1000000,
16612
  "num_train_epochs": 12,
16613
+ "total_flos": 5.818277853938688e+22,
16614
  "trial_name": null,
16615
  "trial_params": null
16616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41345120e0d1385984c4967bb7df3bbe42ffb08d61340ff50f089fccaf2a5880
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
3
  size 449471589