Nadav commited on
Commit
238efe4
·
1 Parent(s): 4ef9c28

Training in progress, step 35000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce32d22625705c0286f9e6d65f3739a225afd225bf42450e442c235926dca0bd
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b8a99831b810a81afda5499f89d37d313f57de0e44acfd6fda4ffa5d407961
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecaaf33fb01c1c5987f8079721db31eb104bfbf4bf240e01f2563f77a752545b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eec5be5134af20148899e71dab937c29c165b2f523524e79200da7b125e7331
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:267980fc8ce2bc5d8b7d17111acb61434edc89327d5100338b6bb7b7ef476513
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8930b754593f69ea99a69818713906027d8b18db77040fbc82850fb457145d53
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7533fc854a01a1c19e7dd354294c0cb875567534d008f89ea504ec2c31e7aad3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3636d5d5906420899d9a721abefc725ca1bec46f94db174f2d813e8cafd619
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab97dfb3ccb8bc19754256c974ba1c934a3de0fa701671d85dd79f589e39ddfb
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:304d89faced0aa75098d224436ef3865f3b1d27481cbd97cf9d9b995cd4a60e9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5518883974140865,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,111 @@
606
  "eval_samples_per_second": 31.625,
607
  "eval_steps_per_second": 1.012,
608
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 1000000,
612
  "num_train_epochs": 86,
613
- "total_flos": 1.3800665945772668e+21,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9772031303164344,
5
+ "global_step": 35000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "eval_samples_per_second": 31.625,
607
  "eval_steps_per_second": 1.012,
608
  "step": 30000
609
+ },
610
+ {
611
+ "epoch": 2.59,
612
+ "learning_rate": 9.999999999999999e-06,
613
+ "loss": 0.422,
614
+ "step": 30500
615
+ },
616
+ {
617
+ "epoch": 2.64,
618
+ "learning_rate": 9.999999999999999e-06,
619
+ "loss": 0.4227,
620
+ "step": 31000
621
+ },
622
+ {
623
+ "epoch": 2.64,
624
+ "eval_loss": 0.3907557427883148,
625
+ "eval_runtime": 16.5808,
626
+ "eval_samples_per_second": 30.155,
627
+ "eval_steps_per_second": 0.965,
628
+ "step": 31000
629
+ },
630
+ {
631
+ "epoch": 2.68,
632
+ "learning_rate": 9.999999999999999e-06,
633
+ "loss": 0.4213,
634
+ "step": 31500
635
+ },
636
+ {
637
+ "epoch": 2.72,
638
+ "learning_rate": 9.999999999999999e-06,
639
+ "loss": 0.421,
640
+ "step": 32000
641
+ },
642
+ {
643
+ "epoch": 2.72,
644
+ "eval_loss": 0.3934537172317505,
645
+ "eval_runtime": 24.4217,
646
+ "eval_samples_per_second": 20.474,
647
+ "eval_steps_per_second": 0.655,
648
+ "step": 32000
649
+ },
650
+ {
651
+ "epoch": 2.76,
652
+ "learning_rate": 9.999999999999999e-06,
653
+ "loss": 0.4207,
654
+ "step": 32500
655
+ },
656
+ {
657
+ "epoch": 2.81,
658
+ "learning_rate": 9.999999999999999e-06,
659
+ "loss": 0.4206,
660
+ "step": 33000
661
+ },
662
+ {
663
+ "epoch": 2.81,
664
+ "eval_loss": 0.3901897072792053,
665
+ "eval_runtime": 16.8693,
666
+ "eval_samples_per_second": 29.64,
667
+ "eval_steps_per_second": 0.948,
668
+ "step": 33000
669
+ },
670
+ {
671
+ "epoch": 2.85,
672
+ "learning_rate": 9.999999999999999e-06,
673
+ "loss": 0.4202,
674
+ "step": 33500
675
+ },
676
+ {
677
+ "epoch": 2.89,
678
+ "learning_rate": 9.999999999999999e-06,
679
+ "loss": 0.4196,
680
+ "step": 34000
681
+ },
682
+ {
683
+ "epoch": 2.89,
684
+ "eval_loss": 0.3905479609966278,
685
+ "eval_runtime": 16.5144,
686
+ "eval_samples_per_second": 30.277,
687
+ "eval_steps_per_second": 0.969,
688
+ "step": 34000
689
+ },
690
+ {
691
+ "epoch": 2.93,
692
+ "learning_rate": 9.999999999999999e-06,
693
+ "loss": 0.4191,
694
+ "step": 34500
695
+ },
696
+ {
697
+ "epoch": 2.98,
698
+ "learning_rate": 9.999999999999999e-06,
699
+ "loss": 0.4205,
700
+ "step": 35000
701
+ },
702
+ {
703
+ "epoch": 2.98,
704
+ "eval_loss": 0.390372633934021,
705
+ "eval_runtime": 16.8904,
706
+ "eval_samples_per_second": 29.603,
707
+ "eval_steps_per_second": 0.947,
708
+ "step": 35000
709
  }
710
  ],
711
  "max_steps": 1000000,
712
  "num_train_epochs": 86,
713
+ "total_flos": 1.6100830789800572e+21,
714
  "trial_name": null,
715
  "trial_params": null
716
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecaaf33fb01c1c5987f8079721db31eb104bfbf4bf240e01f2563f77a752545b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eec5be5134af20148899e71dab937c29c165b2f523524e79200da7b125e7331
3
  size 449471589