jflotz commited on
Commit
363c8e7
·
1 Parent(s): 4f69a4a

Training in progress, step 930000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee3aeed50a2a4c25efaca58a22f8e74eeb4a5176131dc454fbde37631acf016d
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e7257d85066f44137f90721b6eeea6b47af4fdd60cb740d773a3a975cd64d8
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3151d7e7cec0857fd8c0138e846e83ffe158233cc9712166843e469c2af9c3a
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b532accc0aeba393068677a9f95a8872109f8bff87e999a983cbe001ebba335b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1557f867cc6f3d40a606a0ce0859f55e4a3fbdcdcb8655429fb299c80a41a4d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7df508c344c1f04d3b388cef9605593fbfd129cd18e2830701d3110873541479
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0e1b3397ffbcdba72f77e0e72529212805b3efe290aff36c3ecd969d87bca4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.903505347802326,
5
- "global_step": 920000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -18406,11 +18406,211 @@
18406
  "eval_samples_per_second": 860.053,
18407
  "eval_steps_per_second": 13.479,
18408
  "step": 920000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18409
  }
18410
  ],
18411
  "max_steps": 1000000,
18412
  "num_train_epochs": 12,
18413
- "total_flos": 6.449174726764297e+22,
18414
  "trial_name": null,
18415
  "trial_params": null
18416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.01503407202525,
5
+ "global_step": 930000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
18406
  "eval_samples_per_second": 860.053,
18407
  "eval_steps_per_second": 13.479,
18408
  "step": 920000
18409
+ },
18410
+ {
18411
+ "epoch": 9.91,
18412
+ "learning_rate": 1.240520421060586e-05,
18413
+ "loss": 0.1818,
18414
+ "step": 920500
18415
+ },
18416
+ {
18417
+ "epoch": 9.91,
18418
+ "learning_rate": 1.2375216942216713e-05,
18419
+ "loss": 0.1817,
18420
+ "step": 921000
18421
+ },
18422
+ {
18423
+ "epoch": 9.91,
18424
+ "eval_loss": 0.17294897139072418,
18425
+ "eval_runtime": 2.7216,
18426
+ "eval_samples_per_second": 843.974,
18427
+ "eval_steps_per_second": 13.227,
18428
+ "step": 921000
18429
+ },
18430
+ {
18431
+ "epoch": 9.92,
18432
+ "learning_rate": 1.2345414557402198e-05,
18433
+ "loss": 0.1811,
18434
+ "step": 921500
18435
+ },
18436
+ {
18437
+ "epoch": 9.93,
18438
+ "learning_rate": 1.2315797137640906e-05,
18439
+ "loss": 0.1814,
18440
+ "step": 922000
18441
+ },
18442
+ {
18443
+ "epoch": 9.93,
18444
+ "eval_loss": 0.1728929877281189,
18445
+ "eval_runtime": 2.6867,
18446
+ "eval_samples_per_second": 854.941,
18447
+ "eval_steps_per_second": 13.399,
18448
+ "step": 922000
18449
+ },
18450
+ {
18451
+ "epoch": 9.93,
18452
+ "learning_rate": 1.2286364763905723e-05,
18453
+ "loss": 0.1813,
18454
+ "step": 922500
18455
+ },
18456
+ {
18457
+ "epoch": 9.94,
18458
+ "learning_rate": 1.225711751666363e-05,
18459
+ "loss": 0.1815,
18460
+ "step": 923000
18461
+ },
18462
+ {
18463
+ "epoch": 9.94,
18464
+ "eval_loss": 0.17273372411727905,
18465
+ "eval_runtime": 2.6851,
18466
+ "eval_samples_per_second": 855.459,
18467
+ "eval_steps_per_second": 13.407,
18468
+ "step": 923000
18469
+ },
18470
+ {
18471
+ "epoch": 9.94,
18472
+ "learning_rate": 1.2228055475875488e-05,
18473
+ "loss": 0.1811,
18474
+ "step": 923500
18475
+ },
18476
+ {
18477
+ "epoch": 9.95,
18478
+ "learning_rate": 1.2199178720995825e-05,
18479
+ "loss": 0.1814,
18480
+ "step": 924000
18481
+ },
18482
+ {
18483
+ "epoch": 9.95,
18484
+ "eval_loss": 0.17107011377811432,
18485
+ "eval_runtime": 2.7626,
18486
+ "eval_samples_per_second": 831.466,
18487
+ "eval_steps_per_second": 13.031,
18488
+ "step": 924000
18489
+ },
18490
+ {
18491
+ "epoch": 9.95,
18492
+ "learning_rate": 1.217048733097256e-05,
18493
+ "loss": 0.181,
18494
+ "step": 924500
18495
+ },
18496
+ {
18497
+ "epoch": 9.96,
18498
+ "learning_rate": 1.2141981384246874e-05,
18499
+ "loss": 0.1808,
18500
+ "step": 925000
18501
+ },
18502
+ {
18503
+ "epoch": 9.96,
18504
+ "eval_loss": 0.17153695225715637,
18505
+ "eval_runtime": 2.6072,
18506
+ "eval_samples_per_second": 881.021,
18507
+ "eval_steps_per_second": 13.808,
18508
+ "step": 925000
18509
+ },
18510
+ {
18511
+ "epoch": 9.96,
18512
+ "learning_rate": 1.211366095875293e-05,
18513
+ "loss": 0.1811,
18514
+ "step": 925500
18515
+ },
18516
+ {
18517
+ "epoch": 9.97,
18518
+ "learning_rate": 1.2085526131917685e-05,
18519
+ "loss": 0.181,
18520
+ "step": 926000
18521
+ },
18522
+ {
18523
+ "epoch": 9.97,
18524
+ "eval_loss": 0.1717982143163681,
18525
+ "eval_runtime": 2.6346,
18526
+ "eval_samples_per_second": 871.846,
18527
+ "eval_steps_per_second": 13.664,
18528
+ "step": 926000
18529
+ },
18530
+ {
18531
+ "epoch": 9.98,
18532
+ "learning_rate": 1.2057576980660691e-05,
18533
+ "loss": 0.181,
18534
+ "step": 926500
18535
+ },
18536
+ {
18537
+ "epoch": 9.98,
18538
+ "learning_rate": 1.2029813581393866e-05,
18539
+ "loss": 0.1813,
18540
+ "step": 927000
18541
+ },
18542
+ {
18543
+ "epoch": 9.98,
18544
+ "eval_loss": 0.17178404331207275,
18545
+ "eval_runtime": 2.6835,
18546
+ "eval_samples_per_second": 855.987,
18547
+ "eval_steps_per_second": 13.416,
18548
+ "step": 927000
18549
+ },
18550
+ {
18551
+ "epoch": 9.99,
18552
+ "learning_rate": 1.2002236010021269e-05,
18553
+ "loss": 0.1811,
18554
+ "step": 927500
18555
+ },
18556
+ {
18557
+ "epoch": 9.99,
18558
+ "learning_rate": 1.197484434193893e-05,
18559
+ "loss": 0.1811,
18560
+ "step": 928000
18561
+ },
18562
+ {
18563
+ "epoch": 9.99,
18564
+ "eval_loss": 0.17153653502464294,
18565
+ "eval_runtime": 2.6987,
18566
+ "eval_samples_per_second": 851.135,
18567
+ "eval_steps_per_second": 13.34,
18568
+ "step": 928000
18569
+ },
18570
+ {
18571
+ "epoch": 10.0,
18572
+ "learning_rate": 1.1947638652034617e-05,
18573
+ "loss": 0.1809,
18574
+ "step": 928500
18575
+ },
18576
+ {
18577
+ "epoch": 10.0,
18578
+ "learning_rate": 1.192061901468768e-05,
18579
+ "loss": 0.1811,
18580
+ "step": 929000
18581
+ },
18582
+ {
18583
+ "epoch": 10.0,
18584
+ "eval_loss": 0.17235822975635529,
18585
+ "eval_runtime": 2.6438,
18586
+ "eval_samples_per_second": 868.809,
18587
+ "eval_steps_per_second": 13.617,
18588
+ "step": 929000
18589
+ },
18590
+ {
18591
+ "epoch": 10.01,
18592
+ "learning_rate": 1.1893785503768736e-05,
18593
+ "loss": 0.1812,
18594
+ "step": 929500
18595
+ },
18596
+ {
18597
+ "epoch": 10.02,
18598
+ "learning_rate": 1.1867138192639601e-05,
18599
+ "loss": 0.1809,
18600
+ "step": 930000
18601
+ },
18602
+ {
18603
+ "epoch": 10.02,
18604
+ "eval_loss": 0.1709609031677246,
18605
+ "eval_runtime": 2.6178,
18606
+ "eval_samples_per_second": 877.459,
18607
+ "eval_steps_per_second": 13.752,
18608
+ "step": 930000
18609
  }
18610
  ],
18611
  "max_steps": 1000000,
18612
  "num_train_epochs": 12,
18613
+ "total_flos": 6.519269511347128e+22,
18614
  "trial_name": null,
18615
  "trial_params": null
18616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c53e22d617d625d68b0b7c24d68f147254236ebb3b272bd10eecf6d93598e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3151d7e7cec0857fd8c0138e846e83ffe158233cc9712166843e469c2af9c3a
3
  size 449471589