efraimdahl commited on
Commit
20e8f4a
·
verified ·
1 Parent(s): 02f2a1e

Training in progress, epoch 99, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55bb7ce979a3c70eea06a8b02d97643a593536ca44884e478658dbdfc31aa83f
3
  size 56862772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594ed2c8f0bb4f3ff612a4d6690cd0ea9d181c24a0784d1d908297ba826b6170
3
  size 56862772
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0748caf064c33cc214b86dfcfd3c0d78a1351a34a8190c72757b0b532fefab3
3
  size 113744007
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd80a819cfcb97bd05ace5efd7019407db4c099d4f769f4ba75c086a7ef96d8c
3
  size 113744007
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8182b53f98dd7a88836b2ed9f6b04ffa9c680b2470d39112000a2e88ba6e5f4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9688ecbaaabe6dd2631b971e0ac049ec8c0a81e254a5a8c4146f3739c589121a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea873c34a006167303e12093796bad868549cde58efe9398d5c9c742cde1ed48
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ca5b03dc7d99ca80c2974df07874d742f8195d77d13c5aa897314a6ac0e7cf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 19,
3
  "best_metric": 0.07475842535495758,
4
  "best_model_checkpoint": "./results/checkpoint-19",
5
- "epoch": 65.0,
6
  "eval_steps": 500,
7
- "global_step": 65,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -570,6 +570,299 @@
570
  "eval_samples_per_second": 617.163,
571
  "eval_steps_per_second": 123.433,
572
  "step": 65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  }
574
  ],
575
  "logging_steps": 10,
 
2
  "best_global_step": 19,
3
  "best_metric": 0.07475842535495758,
4
  "best_model_checkpoint": "./results/checkpoint-19",
5
+ "epoch": 99.0,
6
  "eval_steps": 500,
7
+ "global_step": 99,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
570
  "eval_samples_per_second": 617.163,
571
  "eval_steps_per_second": 123.433,
572
  "step": 65
573
+ },
574
+ {
575
+ "epoch": 66.0,
576
+ "eval_loss": 0.1179947480559349,
577
+ "eval_runtime": 0.0163,
578
+ "eval_samples_per_second": 614.946,
579
+ "eval_steps_per_second": 122.989,
580
+ "step": 66
581
+ },
582
+ {
583
+ "epoch": 67.0,
584
+ "eval_loss": 0.07585181295871735,
585
+ "eval_runtime": 0.0255,
586
+ "eval_samples_per_second": 391.574,
587
+ "eval_steps_per_second": 78.315,
588
+ "step": 67
589
+ },
590
+ {
591
+ "epoch": 68.0,
592
+ "eval_loss": 0.09531017392873764,
593
+ "eval_runtime": 0.0206,
594
+ "eval_samples_per_second": 486.415,
595
+ "eval_steps_per_second": 97.283,
596
+ "step": 68
597
+ },
598
+ {
599
+ "epoch": 69.0,
600
+ "eval_loss": 0.14820639789104462,
601
+ "eval_runtime": 0.0208,
602
+ "eval_samples_per_second": 480.227,
603
+ "eval_steps_per_second": 96.045,
604
+ "step": 69
605
+ },
606
+ {
607
+ "epoch": 70.0,
608
+ "grad_norm": 5.213359355926514,
609
+ "learning_rate": 0.0031,
610
+ "loss": 0.1041,
611
+ "step": 70
612
+ },
613
+ {
614
+ "epoch": 70.0,
615
+ "eval_loss": 0.09258551150560379,
616
+ "eval_runtime": 0.0168,
617
+ "eval_samples_per_second": 596.044,
618
+ "eval_steps_per_second": 119.209,
619
+ "step": 70
620
+ },
621
+ {
622
+ "epoch": 71.0,
623
+ "eval_loss": 0.12545828521251678,
624
+ "eval_runtime": 0.029,
625
+ "eval_samples_per_second": 344.343,
626
+ "eval_steps_per_second": 68.869,
627
+ "step": 71
628
+ },
629
+ {
630
+ "epoch": 72.0,
631
+ "eval_loss": 0.07994948327541351,
632
+ "eval_runtime": 0.0264,
633
+ "eval_samples_per_second": 378.342,
634
+ "eval_steps_per_second": 75.668,
635
+ "step": 72
636
+ },
637
+ {
638
+ "epoch": 73.0,
639
+ "eval_loss": 0.10161998122930527,
640
+ "eval_runtime": 0.0164,
641
+ "eval_samples_per_second": 611.317,
642
+ "eval_steps_per_second": 122.263,
643
+ "step": 73
644
+ },
645
+ {
646
+ "epoch": 74.0,
647
+ "eval_loss": 0.08442724496126175,
648
+ "eval_runtime": 0.0172,
649
+ "eval_samples_per_second": 579.764,
650
+ "eval_steps_per_second": 115.953,
651
+ "step": 74
652
+ },
653
+ {
654
+ "epoch": 75.0,
655
+ "eval_loss": 0.13757655024528503,
656
+ "eval_runtime": 0.0159,
657
+ "eval_samples_per_second": 628.097,
658
+ "eval_steps_per_second": 125.619,
659
+ "step": 75
660
+ },
661
+ {
662
+ "epoch": 76.0,
663
+ "eval_loss": 0.10746718943119049,
664
+ "eval_runtime": 0.016,
665
+ "eval_samples_per_second": 623.336,
666
+ "eval_steps_per_second": 124.667,
667
+ "step": 76
668
+ },
669
+ {
670
+ "epoch": 77.0,
671
+ "eval_loss": 0.07997341454029083,
672
+ "eval_runtime": 0.0163,
673
+ "eval_samples_per_second": 613.337,
674
+ "eval_steps_per_second": 122.667,
675
+ "step": 77
676
+ },
677
+ {
678
+ "epoch": 78.0,
679
+ "eval_loss": 0.08581076562404633,
680
+ "eval_runtime": 0.0225,
681
+ "eval_samples_per_second": 444.59,
682
+ "eval_steps_per_second": 88.918,
683
+ "step": 78
684
+ },
685
+ {
686
+ "epoch": 79.0,
687
+ "eval_loss": 0.07667073607444763,
688
+ "eval_runtime": 0.0173,
689
+ "eval_samples_per_second": 578.748,
690
+ "eval_steps_per_second": 115.75,
691
+ "step": 79
692
+ },
693
+ {
694
+ "epoch": 80.0,
695
+ "grad_norm": 4.289701461791992,
696
+ "learning_rate": 0.0021,
697
+ "loss": 0.0468,
698
+ "step": 80
699
+ },
700
+ {
701
+ "epoch": 80.0,
702
+ "eval_loss": 0.1910361349582672,
703
+ "eval_runtime": 0.0156,
704
+ "eval_samples_per_second": 639.015,
705
+ "eval_steps_per_second": 127.803,
706
+ "step": 80
707
+ },
708
+ {
709
+ "epoch": 81.0,
710
+ "eval_loss": 0.2634718120098114,
711
+ "eval_runtime": 0.0162,
712
+ "eval_samples_per_second": 617.854,
713
+ "eval_steps_per_second": 123.571,
714
+ "step": 81
715
+ },
716
+ {
717
+ "epoch": 82.0,
718
+ "eval_loss": 0.20500917732715607,
719
+ "eval_runtime": 0.0159,
720
+ "eval_samples_per_second": 627.542,
721
+ "eval_steps_per_second": 125.508,
722
+ "step": 82
723
+ },
724
+ {
725
+ "epoch": 83.0,
726
+ "eval_loss": 0.09815473854541779,
727
+ "eval_runtime": 0.0159,
728
+ "eval_samples_per_second": 627.871,
729
+ "eval_steps_per_second": 125.574,
730
+ "step": 83
731
+ },
732
+ {
733
+ "epoch": 84.0,
734
+ "eval_loss": 0.08160002529621124,
735
+ "eval_runtime": 0.0256,
736
+ "eval_samples_per_second": 390.746,
737
+ "eval_steps_per_second": 78.149,
738
+ "step": 84
739
+ },
740
+ {
741
+ "epoch": 85.0,
742
+ "eval_loss": 0.0980997309088707,
743
+ "eval_runtime": 0.0163,
744
+ "eval_samples_per_second": 613.005,
745
+ "eval_steps_per_second": 122.601,
746
+ "step": 85
747
+ },
748
+ {
749
+ "epoch": 86.0,
750
+ "eval_loss": 0.08293064683675766,
751
+ "eval_runtime": 0.016,
752
+ "eval_samples_per_second": 625.409,
753
+ "eval_steps_per_second": 125.082,
754
+ "step": 86
755
+ },
756
+ {
757
+ "epoch": 87.0,
758
+ "eval_loss": 0.08042607456445694,
759
+ "eval_runtime": 0.0166,
760
+ "eval_samples_per_second": 602.82,
761
+ "eval_steps_per_second": 120.564,
762
+ "step": 87
763
+ },
764
+ {
765
+ "epoch": 88.0,
766
+ "eval_loss": 0.17653095722198486,
767
+ "eval_runtime": 0.0166,
768
+ "eval_samples_per_second": 600.671,
769
+ "eval_steps_per_second": 120.134,
770
+ "step": 88
771
+ },
772
+ {
773
+ "epoch": 89.0,
774
+ "eval_loss": 0.2547139525413513,
775
+ "eval_runtime": 0.016,
776
+ "eval_samples_per_second": 624.729,
777
+ "eval_steps_per_second": 124.946,
778
+ "step": 89
779
+ },
780
+ {
781
+ "epoch": 90.0,
782
+ "grad_norm": 12.196877479553223,
783
+ "learning_rate": 0.0011,
784
+ "loss": 0.0779,
785
+ "step": 90
786
+ },
787
+ {
788
+ "epoch": 90.0,
789
+ "eval_loss": 0.2526191473007202,
790
+ "eval_runtime": 0.0164,
791
+ "eval_samples_per_second": 609.602,
792
+ "eval_steps_per_second": 121.92,
793
+ "step": 90
794
+ },
795
+ {
796
+ "epoch": 91.0,
797
+ "eval_loss": 0.1893763244152069,
798
+ "eval_runtime": 0.0275,
799
+ "eval_samples_per_second": 363.89,
800
+ "eval_steps_per_second": 72.778,
801
+ "step": 91
802
+ },
803
+ {
804
+ "epoch": 92.0,
805
+ "eval_loss": 0.11402726173400879,
806
+ "eval_runtime": 0.0165,
807
+ "eval_samples_per_second": 604.506,
808
+ "eval_steps_per_second": 120.901,
809
+ "step": 92
810
+ },
811
+ {
812
+ "epoch": 93.0,
813
+ "eval_loss": 0.0751984640955925,
814
+ "eval_runtime": 0.0174,
815
+ "eval_samples_per_second": 575.516,
816
+ "eval_steps_per_second": 115.103,
817
+ "step": 93
818
+ },
819
+ {
820
+ "epoch": 94.0,
821
+ "eval_loss": 0.0785483792424202,
822
+ "eval_runtime": 0.0162,
823
+ "eval_samples_per_second": 618.592,
824
+ "eval_steps_per_second": 123.718,
825
+ "step": 94
826
+ },
827
+ {
828
+ "epoch": 95.0,
829
+ "eval_loss": 0.08064951747655869,
830
+ "eval_runtime": 0.0174,
831
+ "eval_samples_per_second": 575.698,
832
+ "eval_steps_per_second": 115.14,
833
+ "step": 95
834
+ },
835
+ {
836
+ "epoch": 96.0,
837
+ "eval_loss": 0.07702342420816422,
838
+ "eval_runtime": 0.0179,
839
+ "eval_samples_per_second": 559.129,
840
+ "eval_steps_per_second": 111.826,
841
+ "step": 96
842
+ },
843
+ {
844
+ "epoch": 97.0,
845
+ "eval_loss": 0.07478635758161545,
846
+ "eval_runtime": 0.0293,
847
+ "eval_samples_per_second": 341.431,
848
+ "eval_steps_per_second": 68.286,
849
+ "step": 97
850
+ },
851
+ {
852
+ "epoch": 98.0,
853
+ "eval_loss": 0.07912726700305939,
854
+ "eval_runtime": 0.0202,
855
+ "eval_samples_per_second": 495.289,
856
+ "eval_steps_per_second": 99.058,
857
+ "step": 98
858
+ },
859
+ {
860
+ "epoch": 99.0,
861
+ "eval_loss": 0.0888177827000618,
862
+ "eval_runtime": 0.0291,
863
+ "eval_samples_per_second": 343.866,
864
+ "eval_steps_per_second": 68.773,
865
+ "step": 99
866
  }
867
  ],
868
  "logging_steps": 10,