jflotz commited on
Commit
fd6b070
·
1 Parent(s): 1951179

Training in progress, step 800000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20b6bf8d8d5ac37a0b6971256a9cb8002960aa2ad847514a0c95822b7371956c
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a28142d72c1a639c002962982eef85320960aec72ba5875c70ba183bc4428b1
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f69469f2f4fdd57ec4edb31c8576ad74c022cba61a82ed361e8c725a8bfd2e3
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5afad9e10e656b39d9c034eb9ad27905a1d701ec7ef4e06fa31f7ac951907d8c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c65759179a0409080d1617c50ff7701ce92dbb64fc3e317b9b62050537c3c7
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03e2d491d974215512be82d6817d741af1d742ce064c8b5b0baa9340cdbe75ca
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44a3c67c522ab27350032a90eb3d95a3dabd1324000cbd2835b62067512227df
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bcd5267c0fedea8e2e55ea80159101ae68ee5e1146b9a301bf39c2aa9967fc9
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d08d861261452452fd94fae84216cedf28f7702bcc30b7c349d5fe92376b24f
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:520166b189f0edf67170e629d5706d1486c93dc11ca389dea5cc1d7e88d1c779
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68acbc45c81b60f69d2a54d496b38309d0cdcd82a52bd9be2a013dfa91b5b790
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0087d17439da3954b20dcc50e6499dffee5c672acc2c36f39d3118a283f2182
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bd01ddf08fc5e563abc90c1bcd231f2103e9ae4cae190ae9888d8dbb7258d1
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbf378409b94ea4e42885f8fd80f66f4b8772dfb60fc2828754427211bf4250
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294721238c7a8c697c0dc55ae3f4c4580f7fc2de42c41858980ea55e897cb68b
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1391cf6e3c77ca8f90819d71da90612fe1be4331051d0d6f5e3bcd7b00dcd5f4
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a269e37b31e512e59995e2d8559babba1725032fdbd7ed2e8cd2d1c9cdf42315
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:551b9dc6c1d0749787d20f6ec1e55e5ed180247a2ee512bc66f5e7851b51c414
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ada5eca471a55afc26d1f38512d1163ebd3e62514e19a602711641b70fa1cc7
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:228d16b33dbcec485ca9a2cd36ed151eb467accc3519b3dbd8f821e5f67aeb40
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1b9455cfc1cc0d89898dcc5885e6586d120f989c1d84c4e74dce29a1aeae31
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.119578128363135,
5
- "global_step": 790000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9486,11 +9486,131 @@
9486
  "learning_rate": 2.6211922240019883e-05,
9487
  "loss": 0.2908,
9488
  "step": 790000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9489
  }
9490
  ],
9491
  "max_steps": 1000000,
9492
  "num_train_epochs": 2,
9493
- "total_flos": 5.340954623628781e+22,
9494
  "trial_name": null,
9495
  "trial_params": null
9496
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1395078164236574,
5
+ "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9486
  "learning_rate": 2.6211922240019883e-05,
9487
  "loss": 0.2908,
9488
  "step": 790000
9489
+ },
9490
+ {
9491
+ "epoch": 1.12,
9492
+ "learning_rate": 2.6137923994427768e-05,
9493
+ "loss": 0.2911,
9494
+ "step": 790500
9495
+ },
9496
+ {
9497
+ "epoch": 1.12,
9498
+ "learning_rate": 2.6064073005694758e-05,
9499
+ "loss": 0.2909,
9500
+ "step": 791000
9501
+ },
9502
+ {
9503
+ "epoch": 1.12,
9504
+ "learning_rate": 2.5990369475726598e-05,
9505
+ "loss": 0.2898,
9506
+ "step": 791500
9507
+ },
9508
+ {
9509
+ "epoch": 1.12,
9510
+ "learning_rate": 2.591681360602595e-05,
9511
+ "loss": 0.2902,
9512
+ "step": 792000
9513
+ },
9514
+ {
9515
+ "epoch": 1.12,
9516
+ "learning_rate": 2.5843405597691748e-05,
9517
+ "loss": 0.2909,
9518
+ "step": 792500
9519
+ },
9520
+ {
9521
+ "epoch": 1.13,
9522
+ "learning_rate": 2.577014565141866e-05,
9523
+ "loss": 0.2899,
9524
+ "step": 793000
9525
+ },
9526
+ {
9527
+ "epoch": 1.13,
9528
+ "learning_rate": 2.569703396749661e-05,
9529
+ "loss": 0.2905,
9530
+ "step": 793500
9531
+ },
9532
+ {
9533
+ "epoch": 1.13,
9534
+ "learning_rate": 2.562407074581014e-05,
9535
+ "loss": 0.2908,
9536
+ "step": 794000
9537
+ },
9538
+ {
9539
+ "epoch": 1.13,
9540
+ "learning_rate": 2.5551256185837897e-05,
9541
+ "loss": 0.2904,
9542
+ "step": 794500
9543
+ },
9544
+ {
9545
+ "epoch": 1.13,
9546
+ "learning_rate": 2.5478590486652137e-05,
9547
+ "loss": 0.2905,
9548
+ "step": 795000
9549
+ },
9550
+ {
9551
+ "epoch": 1.13,
9552
+ "learning_rate": 2.5406073846918076e-05,
9553
+ "loss": 0.2901,
9554
+ "step": 795500
9555
+ },
9556
+ {
9557
+ "epoch": 1.13,
9558
+ "learning_rate": 2.533370646489347e-05,
9559
+ "loss": 0.2904,
9560
+ "step": 796000
9561
+ },
9562
+ {
9563
+ "epoch": 1.13,
9564
+ "learning_rate": 2.526148853842796e-05,
9565
+ "loss": 0.2903,
9566
+ "step": 796500
9567
+ },
9568
+ {
9569
+ "epoch": 1.13,
9570
+ "learning_rate": 2.5189420264962586e-05,
9571
+ "loss": 0.2898,
9572
+ "step": 797000
9573
+ },
9574
+ {
9575
+ "epoch": 1.13,
9576
+ "learning_rate": 2.5117501841529297e-05,
9577
+ "loss": 0.291,
9578
+ "step": 797500
9579
+ },
9580
+ {
9581
+ "epoch": 1.14,
9582
+ "learning_rate": 2.504573346475026e-05,
9583
+ "loss": 0.2897,
9584
+ "step": 798000
9585
+ },
9586
+ {
9587
+ "epoch": 1.14,
9588
+ "learning_rate": 2.497411533083753e-05,
9589
+ "loss": 0.2901,
9590
+ "step": 798500
9591
+ },
9592
+ {
9593
+ "epoch": 1.14,
9594
+ "learning_rate": 2.4902647635592324e-05,
9595
+ "loss": 0.2909,
9596
+ "step": 799000
9597
+ },
9598
+ {
9599
+ "epoch": 1.14,
9600
+ "learning_rate": 2.483133057440458e-05,
9601
+ "loss": 0.2899,
9602
+ "step": 799500
9603
+ },
9604
+ {
9605
+ "epoch": 1.14,
9606
+ "learning_rate": 2.476016434225246e-05,
9607
+ "loss": 0.2901,
9608
+ "step": 800000
9609
  }
9610
  ],
9611
  "max_steps": 1000000,
9612
  "num_train_epochs": 2,
9613
+ "total_flos": 5.4085624444111735e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f69469f2f4fdd57ec4edb31c8576ad74c022cba61a82ed361e8c725a8bfd2e3
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
3
  size 449450757