jflotz commited on
Commit
0b773af
·
1 Parent(s): 608207a

Training in progress, step 40000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6126f0ff62ccd2a1bb01472f31242c870de9e3dc53a06aa105077fae41608b79
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf293809682bf3798688c407d42afa788d77c5557da4033ee0baeac06cf1302
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b399c38e921c215c625dfa7144f627f6549650ed1df7e1ef7deb2d863ca674a9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5f11aab6f911cbec235d15a3494a5c1ad6a9959fd4ddb8c6370040ccb52d96
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0be7c4ced12e0858928365082a1279c3330778ef5c90870360fd46cc963be5c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbd6f069207b6a5e0cacc85e6677e399c9463922f16c7a1b9e54b0ce635a16e
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eff7bfd19bb5de4a804b312a4895c5e9ec017a31baa7a463d8d86ec7115b34c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b5c694b40cd5a966b5116288c30b2f19979f1058d82965ee57335ae5f1e596
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.33458617266877083,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,211 @@
606
  "eval_samples_per_second": 951.902,
607
  "eval_steps_per_second": 14.919,
608
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 1000000,
612
  "num_train_epochs": 12,
613
- "total_flos": 2.1030078309104144e+21,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4461148968916945,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "eval_samples_per_second": 951.902,
607
  "eval_steps_per_second": 14.919,
608
  "step": 30000
609
+ },
610
+ {
611
+ "epoch": 0.34,
612
+ "learning_rate": 9.149999999999999e-05,
613
+ "loss": 0.4509,
614
+ "step": 30500
615
+ },
616
+ {
617
+ "epoch": 0.35,
618
+ "learning_rate": 9.3e-05,
619
+ "loss": 0.4477,
620
+ "step": 31000
621
+ },
622
+ {
623
+ "epoch": 0.35,
624
+ "eval_loss": 0.4341259002685547,
625
+ "eval_runtime": 2.3655,
626
+ "eval_samples_per_second": 971.032,
627
+ "eval_steps_per_second": 15.219,
628
+ "step": 31000
629
+ },
630
+ {
631
+ "epoch": 0.35,
632
+ "learning_rate": 9.449999999999999e-05,
633
+ "loss": 0.4443,
634
+ "step": 31500
635
+ },
636
+ {
637
+ "epoch": 0.36,
638
+ "learning_rate": 9.599999999999999e-05,
639
+ "loss": 0.4413,
640
+ "step": 32000
641
+ },
642
+ {
643
+ "epoch": 0.36,
644
+ "eval_loss": 0.42718443274497986,
645
+ "eval_runtime": 2.4008,
646
+ "eval_samples_per_second": 956.762,
647
+ "eval_steps_per_second": 14.995,
648
+ "step": 32000
649
+ },
650
+ {
651
+ "epoch": 0.36,
652
+ "learning_rate": 9.75e-05,
653
+ "loss": 0.4376,
654
+ "step": 32500
655
+ },
656
+ {
657
+ "epoch": 0.37,
658
+ "learning_rate": 9.9e-05,
659
+ "loss": 0.4341,
660
+ "step": 33000
661
+ },
662
+ {
663
+ "epoch": 0.37,
664
+ "eval_loss": 0.41980886459350586,
665
+ "eval_runtime": 2.3982,
666
+ "eval_samples_per_second": 957.813,
667
+ "eval_steps_per_second": 15.011,
668
+ "step": 33000
669
+ },
670
+ {
671
+ "epoch": 0.37,
672
+ "learning_rate": 0.0001005,
673
+ "loss": 0.4312,
674
+ "step": 33500
675
+ },
676
+ {
677
+ "epoch": 0.38,
678
+ "learning_rate": 0.000102,
679
+ "loss": 0.4289,
680
+ "step": 34000
681
+ },
682
+ {
683
+ "epoch": 0.38,
684
+ "eval_loss": 0.4122560918331146,
685
+ "eval_runtime": 2.425,
686
+ "eval_samples_per_second": 947.221,
687
+ "eval_steps_per_second": 14.845,
688
+ "step": 34000
689
+ },
690
+ {
691
+ "epoch": 0.38,
692
+ "learning_rate": 0.00010349999999999998,
693
+ "loss": 0.4257,
694
+ "step": 34500
695
+ },
696
+ {
697
+ "epoch": 0.39,
698
+ "learning_rate": 0.00010499999999999999,
699
+ "loss": 0.4224,
700
+ "step": 35000
701
+ },
702
+ {
703
+ "epoch": 0.39,
704
+ "eval_loss": 0.40835943818092346,
705
+ "eval_runtime": 2.4071,
706
+ "eval_samples_per_second": 954.265,
707
+ "eval_steps_per_second": 14.956,
708
+ "step": 35000
709
+ },
710
+ {
711
+ "epoch": 0.4,
712
+ "learning_rate": 0.00010649999999999999,
713
+ "loss": 0.4202,
714
+ "step": 35500
715
+ },
716
+ {
717
+ "epoch": 0.4,
718
+ "learning_rate": 0.00010799999999999998,
719
+ "loss": 0.4173,
720
+ "step": 36000
721
+ },
722
+ {
723
+ "epoch": 0.4,
724
+ "eval_loss": 0.40327221155166626,
725
+ "eval_runtime": 2.3753,
726
+ "eval_samples_per_second": 967.051,
727
+ "eval_steps_per_second": 15.156,
728
+ "step": 36000
729
+ },
730
+ {
731
+ "epoch": 0.41,
732
+ "learning_rate": 0.00010949999999999999,
733
+ "loss": 0.4142,
734
+ "step": 36500
735
+ },
736
+ {
737
+ "epoch": 0.41,
738
+ "learning_rate": 0.00011099999999999999,
739
+ "loss": 0.412,
740
+ "step": 37000
741
+ },
742
+ {
743
+ "epoch": 0.41,
744
+ "eval_loss": 0.39642444252967834,
745
+ "eval_runtime": 2.4394,
746
+ "eval_samples_per_second": 941.616,
747
+ "eval_steps_per_second": 14.758,
748
+ "step": 37000
749
+ },
750
+ {
751
+ "epoch": 0.42,
752
+ "learning_rate": 0.0001125,
753
+ "loss": 0.4098,
754
+ "step": 37500
755
+ },
756
+ {
757
+ "epoch": 0.42,
758
+ "learning_rate": 0.00011399999999999999,
759
+ "loss": 0.407,
760
+ "step": 38000
761
+ },
762
+ {
763
+ "epoch": 0.42,
764
+ "eval_loss": 0.39146095514297485,
765
+ "eval_runtime": 2.3792,
766
+ "eval_samples_per_second": 965.449,
767
+ "eval_steps_per_second": 15.131,
768
+ "step": 38000
769
+ },
770
+ {
771
+ "epoch": 0.43,
772
+ "learning_rate": 0.00011549999999999999,
773
+ "loss": 0.4033,
774
+ "step": 38500
775
+ },
776
+ {
777
+ "epoch": 0.43,
778
+ "learning_rate": 0.000117,
779
+ "loss": 0.402,
780
+ "step": 39000
781
+ },
782
+ {
783
+ "epoch": 0.43,
784
+ "eval_loss": 0.3854062259197235,
785
+ "eval_runtime": 2.4437,
786
+ "eval_samples_per_second": 939.967,
787
+ "eval_steps_per_second": 14.732,
788
+ "step": 39000
789
+ },
790
+ {
791
+ "epoch": 0.44,
792
+ "learning_rate": 0.0001185,
793
+ "loss": 0.3991,
794
+ "step": 39500
795
+ },
796
+ {
797
+ "epoch": 0.45,
798
+ "learning_rate": 0.00011999999999999999,
799
+ "loss": 0.3966,
800
+ "step": 40000
801
+ },
802
+ {
803
+ "epoch": 0.45,
804
+ "eval_loss": 0.3808075189590454,
805
+ "eval_runtime": 2.4355,
806
+ "eval_samples_per_second": 943.135,
807
+ "eval_steps_per_second": 14.781,
808
+ "step": 40000
809
  }
810
  ],
811
  "max_steps": 1000000,
812
  "num_train_epochs": 12,
813
+ "total_flos": 2.804010441213886e+21,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b399c38e921c215c625dfa7144f627f6549650ed1df7e1ef7deb2d863ca674a9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5f11aab6f911cbec235d15a3494a5c1ad6a9959fd4ddb8c6370040ccb52d96
3
  size 449471589