jflotz commited on
Commit
28c2a2b
·
1 Parent(s): 79155c1

Training in progress, step 880000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a2010561ae6b219703766f77123488046a173556bad32795a979a0714f6e7c
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86cf27fbaeb2a38de0ef33258b77f6fefbd96bfd63b67353f72569cf9236a376
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55186c11fcb4a70528459949af80ac1b2eb9ca8f4193cbb9f48d92b908839fa5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d368f4f79d735aeb82977d11fd8d84913a3919ff8ecbae0982e3d606c331447e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebaa6261431616bb924fa3611c1e782327703255936f9b7e34a1eda29c117895
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.345861726687708,
5
- "global_step": 870000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17406,11 +17406,211 @@
17406
  "eval_samples_per_second": 841.196,
17407
  "eval_steps_per_second": 13.184,
17408
  "step": 870000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17409
  }
17410
  ],
17411
  "max_steps": 1000000,
17412
  "num_train_epochs": 12,
17413
- "total_flos": 6.098673421612561e+22,
17414
  "trial_name": null,
17415
  "trial_params": null
17416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.457390450910632,
5
+ "global_step": 880000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17406
  "eval_samples_per_second": 841.196,
17407
  "eval_steps_per_second": 13.184,
17408
  "step": 870000
17409
+ },
17410
+ {
17411
+ "epoch": 9.35,
17412
+ "learning_rate": 1.6321390014277996e-05,
17413
+ "loss": 0.1831,
17414
+ "step": 870500
17415
+ },
17416
+ {
17417
+ "epoch": 9.36,
17418
+ "learning_rate": 1.6273411576885517e-05,
17419
+ "loss": 0.1836,
17420
+ "step": 871000
17421
+ },
17422
+ {
17423
+ "epoch": 9.36,
17424
+ "eval_loss": 0.17539818584918976,
17425
+ "eval_runtime": 2.6712,
17426
+ "eval_samples_per_second": 859.901,
17427
+ "eval_steps_per_second": 13.477,
17428
+ "step": 871000
17429
+ },
17430
+ {
17431
+ "epoch": 9.36,
17432
+ "learning_rate": 1.6225607365552378e-05,
17433
+ "loss": 0.1831,
17434
+ "step": 871500
17435
+ },
17436
+ {
17437
+ "epoch": 9.37,
17438
+ "learning_rate": 1.617797751097349e-05,
17439
+ "loss": 0.1832,
17440
+ "step": 872000
17441
+ },
17442
+ {
17443
+ "epoch": 9.37,
17444
+ "eval_loss": 0.1717691868543625,
17445
+ "eval_runtime": 2.6798,
17446
+ "eval_samples_per_second": 857.157,
17447
+ "eval_steps_per_second": 13.434,
17448
+ "step": 872000
17449
+ },
17450
+ {
17451
+ "epoch": 9.37,
17452
+ "learning_rate": 1.6130522143367032e-05,
17453
+ "loss": 0.1832,
17454
+ "step": 872500
17455
+ },
17456
+ {
17457
+ "epoch": 9.38,
17458
+ "learning_rate": 1.608324139247421e-05,
17459
+ "loss": 0.1835,
17460
+ "step": 873000
17461
+ },
17462
+ {
17463
+ "epoch": 9.38,
17464
+ "eval_loss": 0.1719122976064682,
17465
+ "eval_runtime": 2.6225,
17466
+ "eval_samples_per_second": 875.898,
17467
+ "eval_steps_per_second": 13.728,
17468
+ "step": 873000
17469
+ },
17470
+ {
17471
+ "epoch": 9.38,
17472
+ "learning_rate": 1.6036135387558756e-05,
17473
+ "loss": 0.1831,
17474
+ "step": 873500
17475
+ },
17476
+ {
17477
+ "epoch": 9.39,
17478
+ "learning_rate": 1.5989204257406693e-05,
17479
+ "loss": 0.1833,
17480
+ "step": 874000
17481
+ },
17482
+ {
17483
+ "epoch": 9.39,
17484
+ "eval_loss": 0.17478306591510773,
17485
+ "eval_runtime": 2.6101,
17486
+ "eval_samples_per_second": 880.046,
17487
+ "eval_steps_per_second": 13.793,
17488
+ "step": 874000
17489
+ },
17490
+ {
17491
+ "epoch": 9.4,
17492
+ "learning_rate": 1.594244813032595e-05,
17493
+ "loss": 0.1829,
17494
+ "step": 874500
17495
+ },
17496
+ {
17497
+ "epoch": 9.4,
17498
+ "learning_rate": 1.5895867134145974e-05,
17499
+ "loss": 0.1829,
17500
+ "step": 875000
17501
+ },
17502
+ {
17503
+ "epoch": 9.4,
17504
+ "eval_loss": 0.17394264042377472,
17505
+ "eval_runtime": 2.5878,
17506
+ "eval_samples_per_second": 887.623,
17507
+ "eval_steps_per_second": 13.911,
17508
+ "step": 875000
17509
+ },
17510
+ {
17511
+ "epoch": 9.41,
17512
+ "learning_rate": 1.5849461396217467e-05,
17513
+ "loss": 0.1834,
17514
+ "step": 875500
17515
+ },
17516
+ {
17517
+ "epoch": 9.41,
17518
+ "learning_rate": 1.5803231043411912e-05,
17519
+ "loss": 0.1827,
17520
+ "step": 876000
17521
+ },
17522
+ {
17523
+ "epoch": 9.41,
17524
+ "eval_loss": 0.17351944744586945,
17525
+ "eval_runtime": 2.6686,
17526
+ "eval_samples_per_second": 860.761,
17527
+ "eval_steps_per_second": 13.49,
17528
+ "step": 876000
17529
+ },
17530
+ {
17531
+ "epoch": 9.42,
17532
+ "learning_rate": 1.575717620212132e-05,
17533
+ "loss": 0.183,
17534
+ "step": 876500
17535
+ },
17536
+ {
17537
+ "epoch": 9.42,
17538
+ "learning_rate": 1.5711296998257902e-05,
17539
+ "loss": 0.1832,
17540
+ "step": 877000
17541
+ },
17542
+ {
17543
+ "epoch": 9.42,
17544
+ "eval_loss": 0.17347006499767303,
17545
+ "eval_runtime": 2.7428,
17546
+ "eval_samples_per_second": 837.47,
17547
+ "eval_steps_per_second": 13.125,
17548
+ "step": 877000
17549
+ },
17550
+ {
17551
+ "epoch": 9.43,
17552
+ "learning_rate": 1.5665593557253623e-05,
17553
+ "loss": 0.1833,
17554
+ "step": 877500
17555
+ },
17556
+ {
17557
+ "epoch": 9.44,
17558
+ "learning_rate": 1.562006600405996e-05,
17559
+ "loss": 0.1829,
17560
+ "step": 878000
17561
+ },
17562
+ {
17563
+ "epoch": 9.44,
17564
+ "eval_loss": 0.1734461635351181,
17565
+ "eval_runtime": 2.6113,
17566
+ "eval_samples_per_second": 879.646,
17567
+ "eval_steps_per_second": 13.786,
17568
+ "step": 878000
17569
+ },
17570
+ {
17571
+ "epoch": 9.44,
17572
+ "learning_rate": 1.5574714463147512e-05,
17573
+ "loss": 0.1831,
17574
+ "step": 878500
17575
+ },
17576
+ {
17577
+ "epoch": 9.45,
17578
+ "learning_rate": 1.5529539058505624e-05,
17579
+ "loss": 0.183,
17580
+ "step": 879000
17581
+ },
17582
+ {
17583
+ "epoch": 9.45,
17584
+ "eval_loss": 0.17375677824020386,
17585
+ "eval_runtime": 2.5315,
17586
+ "eval_samples_per_second": 907.374,
17587
+ "eval_steps_per_second": 14.221,
17588
+ "step": 879000
17589
+ },
17590
+ {
17591
+ "epoch": 9.45,
17592
+ "learning_rate": 1.5484539913642175e-05,
17593
+ "loss": 0.1826,
17594
+ "step": 879500
17595
+ },
17596
+ {
17597
+ "epoch": 9.46,
17598
+ "learning_rate": 1.543971715158307e-05,
17599
+ "loss": 0.1828,
17600
+ "step": 880000
17601
+ },
17602
+ {
17603
+ "epoch": 9.46,
17604
+ "eval_loss": 0.17431409657001495,
17605
+ "eval_runtime": 2.6398,
17606
+ "eval_samples_per_second": 870.144,
17607
+ "eval_steps_per_second": 13.637,
17608
+ "step": 880000
17609
  }
17610
  ],
17611
  "max_steps": 1000000,
17612
  "num_train_epochs": 12,
17613
+ "total_flos": 6.168773682642908e+22,
17614
  "trial_name": null,
17615
  "trial_params": null
17616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f70f0274e15c89ba1e5e2f894493b1d0b23475cd923b06c04110b0afc32880fa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
3
  size 449471589