jflotz commited on
Commit
e0bf422
·
1 Parent(s): ad05784

Training in progress, step 480000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de66d4a93d929dbf59bf082b441005c55284067d059fcb692905debf93d43ec
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4060e633dae015e5639b74a57f1603654125aa36a5a7f3f6681895bc39045ec
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f459617feff07cc080660c37736876af343cf74d6054e3f49e34fa66dc0e7730
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0939dfbb6ece3c207cb8da7c45398fb3890f9eac2a2a8a7c463fa15eb93581f8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999632cad034f4eed7b42df53c59662bbc3b13bc2d63a7da3c114bc461d69692
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d15e11a6de6abd55121a73bc214cc950fb971f927ae2b1d5067145da50de5d0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbfa680b1187d22cc7371654116ef29dab3c85749ad34b845956736ad3b3612
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.483583154889365,
5
- "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9406,11 +9406,211 @@
9406
  "eval_samples_per_second": 1202.649,
9407
  "eval_steps_per_second": 18.849,
9408
  "step": 470000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9409
  }
9410
  ],
9411
  "max_steps": 500000,
9412
  "num_train_epochs": 12,
9413
- "total_flos": 1.5015610979787347e+22,
9414
  "trial_name": null,
9415
  "trial_params": null
9416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.70663811563169,
5
+ "global_step": 480000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9406
  "eval_samples_per_second": 1202.649,
9407
  "eval_steps_per_second": 18.849,
9408
  "step": 470000
9409
+ },
9410
+ {
9411
+ "epoch": 10.49,
9412
+ "learning_rate": 1.2751163567020592e-05,
9413
+ "loss": 0.2544,
9414
+ "step": 470500
9415
+ },
9416
+ {
9417
+ "epoch": 10.51,
9418
+ "learning_rate": 1.2658977796373478e-05,
9419
+ "loss": 0.2546,
9420
+ "step": 471000
9421
+ },
9422
+ {
9423
+ "epoch": 10.51,
9424
+ "eval_loss": 0.23835012316703796,
9425
+ "eval_runtime": 2.0021,
9426
+ "eval_samples_per_second": 1147.276,
9427
+ "eval_steps_per_second": 17.981,
9428
+ "step": 471000
9429
+ },
9430
+ {
9431
+ "epoch": 10.52,
9432
+ "learning_rate": 1.2568348644340153e-05,
9433
+ "loss": 0.2545,
9434
+ "step": 471500
9435
+ },
9436
+ {
9437
+ "epoch": 10.53,
9438
+ "learning_rate": 1.2479277102026465e-05,
9439
+ "loss": 0.2544,
9440
+ "step": 472000
9441
+ },
9442
+ {
9443
+ "epoch": 10.53,
9444
+ "eval_loss": 0.23700165748596191,
9445
+ "eval_runtime": 2.0158,
9446
+ "eval_samples_per_second": 1139.473,
9447
+ "eval_steps_per_second": 17.859,
9448
+ "step": 472000
9449
+ },
9450
+ {
9451
+ "epoch": 10.54,
9452
+ "learning_rate": 1.2391764143504556e-05,
9453
+ "loss": 0.2542,
9454
+ "step": 472500
9455
+ },
9456
+ {
9457
+ "epoch": 10.55,
9458
+ "learning_rate": 1.2305810725802118e-05,
9459
+ "loss": 0.254,
9460
+ "step": 473000
9461
+ },
9462
+ {
9463
+ "epoch": 10.55,
9464
+ "eval_loss": 0.23847134411334991,
9465
+ "eval_runtime": 1.9758,
9466
+ "eval_samples_per_second": 1162.557,
9467
+ "eval_steps_per_second": 18.22,
9468
+ "step": 473000
9469
+ },
9470
+ {
9471
+ "epoch": 10.56,
9472
+ "learning_rate": 1.222141778889195e-05,
9473
+ "loss": 0.2538,
9474
+ "step": 473500
9475
+ },
9476
+ {
9477
+ "epoch": 10.57,
9478
+ "learning_rate": 1.2138586255681707e-05,
9479
+ "loss": 0.2539,
9480
+ "step": 474000
9481
+ },
9482
+ {
9483
+ "epoch": 10.57,
9484
+ "eval_loss": 0.23491570353507996,
9485
+ "eval_runtime": 1.99,
9486
+ "eval_samples_per_second": 1154.271,
9487
+ "eval_steps_per_second": 18.09,
9488
+ "step": 474000
9489
+ },
9490
+ {
9491
+ "epoch": 10.58,
9492
+ "learning_rate": 1.2057317032003731e-05,
9493
+ "loss": 0.2542,
9494
+ "step": 474500
9495
+ },
9496
+ {
9497
+ "epoch": 10.6,
9498
+ "learning_rate": 1.1977611006605263e-05,
9499
+ "loss": 0.2549,
9500
+ "step": 475000
9501
+ },
9502
+ {
9503
+ "epoch": 10.6,
9504
+ "eval_loss": 0.2358667552471161,
9505
+ "eval_runtime": 2.0083,
9506
+ "eval_samples_per_second": 1143.743,
9507
+ "eval_steps_per_second": 17.925,
9508
+ "step": 475000
9509
+ },
9510
+ {
9511
+ "epoch": 10.61,
9512
+ "learning_rate": 1.1899469051138602e-05,
9513
+ "loss": 0.2543,
9514
+ "step": 475500
9515
+ },
9516
+ {
9517
+ "epoch": 10.62,
9518
+ "learning_rate": 1.1822892020151667e-05,
9519
+ "loss": 0.2549,
9520
+ "step": 476000
9521
+ },
9522
+ {
9523
+ "epoch": 10.62,
9524
+ "eval_loss": 0.2398059368133545,
9525
+ "eval_runtime": 2.007,
9526
+ "eval_samples_per_second": 1144.474,
9527
+ "eval_steps_per_second": 17.937,
9528
+ "step": 476000
9529
+ },
9530
+ {
9531
+ "epoch": 10.63,
9532
+ "learning_rate": 1.1747880751078614e-05,
9533
+ "loss": 0.2554,
9534
+ "step": 476500
9535
+ },
9536
+ {
9537
+ "epoch": 10.64,
9538
+ "learning_rate": 1.1674436064230637e-05,
9539
+ "loss": 0.2548,
9540
+ "step": 477000
9541
+ },
9542
+ {
9543
+ "epoch": 10.64,
9544
+ "eval_loss": 0.23931536078453064,
9545
+ "eval_runtime": 1.9895,
9546
+ "eval_samples_per_second": 1154.534,
9547
+ "eval_steps_per_second": 18.095,
9548
+ "step": 477000
9549
+ },
9550
+ {
9551
+ "epoch": 10.65,
9552
+ "learning_rate": 1.1602558762787069e-05,
9553
+ "loss": 0.2547,
9554
+ "step": 477500
9555
+ },
9556
+ {
9557
+ "epoch": 10.66,
9558
+ "learning_rate": 1.1532249632786582e-05,
9559
+ "loss": 0.2542,
9560
+ "step": 478000
9561
+ },
9562
+ {
9563
+ "epoch": 10.66,
9564
+ "eval_loss": 0.237422376871109,
9565
+ "eval_runtime": 2.0815,
9566
+ "eval_samples_per_second": 1103.524,
9567
+ "eval_steps_per_second": 17.295,
9568
+ "step": 478000
9569
+ },
9570
+ {
9571
+ "epoch": 10.67,
9572
+ "learning_rate": 1.1463509443118552e-05,
9573
+ "loss": 0.2541,
9574
+ "step": 478500
9575
+ },
9576
+ {
9577
+ "epoch": 10.68,
9578
+ "learning_rate": 1.1396338945514663e-05,
9579
+ "loss": 0.2543,
9580
+ "step": 479000
9581
+ },
9582
+ {
9583
+ "epoch": 10.68,
9584
+ "eval_loss": 0.2378796935081482,
9585
+ "eval_runtime": 2.0025,
9586
+ "eval_samples_per_second": 1147.061,
9587
+ "eval_steps_per_second": 17.977,
9588
+ "step": 479000
9589
+ },
9590
+ {
9591
+ "epoch": 10.7,
9592
+ "learning_rate": 1.133073887454072e-05,
9593
+ "loss": 0.2538,
9594
+ "step": 479500
9595
+ },
9596
+ {
9597
+ "epoch": 10.71,
9598
+ "learning_rate": 1.1266709947588599e-05,
9599
+ "loss": 0.2539,
9600
+ "step": 480000
9601
+ },
9602
+ {
9603
+ "epoch": 10.71,
9604
+ "eval_loss": 0.2366316020488739,
9605
+ "eval_runtime": 1.9626,
9606
+ "eval_samples_per_second": 1170.393,
9607
+ "eval_steps_per_second": 18.343,
9608
+ "step": 480000
9609
  }
9610
  ],
9611
  "max_steps": 500000,
9612
  "num_train_epochs": 12,
9613
+ "total_flos": 1.5335098118561847e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b866106aa5570064c241431938c823c9b3d9c0359a68aaf12dac64797d2c681f
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f459617feff07cc080660c37736876af343cf74d6054e3f49e34fa66dc0e7730
3
  size 102501541