mgh6 commited on
Commit
e2c7edf
·
verified ·
1 Parent(s): ffc64e3

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45a0f32b96311d18145a36a2b7ac7077d7c147fa2552926df5bcbb94fa477dcc
3
  size 2708729576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0822fc409472b4df347ce0c6dbb5bf1d445c794af75086f172bf6c2394a18c
3
  size 2708729576
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e0cbd5a6d8a5ccf63de302ffe56531268f9153e400c4a49dbb62f54a65e3c0c
3
  size 52499200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8fcb46ff34fb6b3c884e340b62e5087660faddc671c8d46dc2f75938bf059c
3
  size 52499200
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a87652f696aae07a6616b1088616ece95dadb9b70d2197d96fbef272ac3457
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45b784475c6c47021a6ff08c5d6a553172a50192f5de3c134961024cdbbaf9f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0267fbad859e57b7ff33d6dbd4fbd9fdf3cbf25f82f07754a3b6e19cff3ef2f5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b514faa73b6e320d8ae19d93f3da594146e59f1072af645ee09b9ce747afd0a1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
  "eval_steps": 10,
6
- "global_step": 516,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -721,6 +721,188 @@
721
  "eval_samples_per_second": 21.855,
722
  "eval_steps_per_second": 5.464,
723
  "step": 510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  }
725
  ],
726
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 10,
6
+ "global_step": 645,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
721
  "eval_samples_per_second": 21.855,
722
  "eval_steps_per_second": 5.464,
723
  "step": 510
724
+ },
725
+ {
726
+ "epoch": 4.031234748657882,
727
+ "grad_norm": 484968.46875,
728
+ "learning_rate": 5.9375e-05,
729
+ "loss": 1164.3749,
730
+ "step": 520
731
+ },
732
+ {
733
+ "epoch": 4.031234748657882,
734
+ "eval_runtime": 19.6732,
735
+ "eval_samples_per_second": 21.959,
736
+ "eval_steps_per_second": 5.49,
737
+ "step": 520
738
+ },
739
+ {
740
+ "epoch": 4.109321620302587,
741
+ "grad_norm": 329296.75,
742
+ "learning_rate": 5.8593750000000005e-05,
743
+ "loss": 1631.9979,
744
+ "step": 530
745
+ },
746
+ {
747
+ "epoch": 4.109321620302587,
748
+ "eval_runtime": 19.7244,
749
+ "eval_samples_per_second": 21.902,
750
+ "eval_steps_per_second": 5.475,
751
+ "step": 530
752
+ },
753
+ {
754
+ "epoch": 4.187408491947291,
755
+ "grad_norm": 554537.125,
756
+ "learning_rate": 5.78125e-05,
757
+ "loss": 1570.2684,
758
+ "step": 540
759
+ },
760
+ {
761
+ "epoch": 4.187408491947291,
762
+ "eval_runtime": 19.7495,
763
+ "eval_samples_per_second": 21.874,
764
+ "eval_steps_per_second": 5.468,
765
+ "step": 540
766
+ },
767
+ {
768
+ "epoch": 4.265495363591996,
769
+ "grad_norm": 417775.03125,
770
+ "learning_rate": 5.703125e-05,
771
+ "loss": 1599.58,
772
+ "step": 550
773
+ },
774
+ {
775
+ "epoch": 4.265495363591996,
776
+ "eval_runtime": 19.7438,
777
+ "eval_samples_per_second": 21.88,
778
+ "eval_steps_per_second": 5.47,
779
+ "step": 550
780
+ },
781
+ {
782
+ "epoch": 4.343582235236701,
783
+ "grad_norm": 308738.78125,
784
+ "learning_rate": 5.6250000000000005e-05,
785
+ "loss": 1184.2381,
786
+ "step": 560
787
+ },
788
+ {
789
+ "epoch": 4.343582235236701,
790
+ "eval_runtime": 19.7443,
791
+ "eval_samples_per_second": 21.88,
792
+ "eval_steps_per_second": 5.47,
793
+ "step": 560
794
+ },
795
+ {
796
+ "epoch": 4.421669106881406,
797
+ "grad_norm": 281127.5625,
798
+ "learning_rate": 5.546875e-05,
799
+ "loss": 969.7674,
800
+ "step": 570
801
+ },
802
+ {
803
+ "epoch": 4.421669106881406,
804
+ "eval_runtime": 19.7635,
805
+ "eval_samples_per_second": 21.858,
806
+ "eval_steps_per_second": 5.465,
807
+ "step": 570
808
+ },
809
+ {
810
+ "epoch": 4.49975597852611,
811
+ "grad_norm": 343310.84375,
812
+ "learning_rate": 5.46875e-05,
813
+ "loss": 938.8365,
814
+ "step": 580
815
+ },
816
+ {
817
+ "epoch": 4.49975597852611,
818
+ "eval_runtime": 19.7819,
819
+ "eval_samples_per_second": 21.838,
820
+ "eval_steps_per_second": 5.46,
821
+ "step": 580
822
+ },
823
+ {
824
+ "epoch": 4.577842850170815,
825
+ "grad_norm": 274241.34375,
826
+ "learning_rate": 5.3906250000000006e-05,
827
+ "loss": 1338.7786,
828
+ "step": 590
829
+ },
830
+ {
831
+ "epoch": 4.577842850170815,
832
+ "eval_runtime": 19.7579,
833
+ "eval_samples_per_second": 21.865,
834
+ "eval_steps_per_second": 5.466,
835
+ "step": 590
836
+ },
837
+ {
838
+ "epoch": 4.65592972181552,
839
+ "grad_norm": 416818.875,
840
+ "learning_rate": 5.3125000000000004e-05,
841
+ "loss": 1180.1237,
842
+ "step": 600
843
+ },
844
+ {
845
+ "epoch": 4.65592972181552,
846
+ "eval_runtime": 19.7618,
847
+ "eval_samples_per_second": 21.86,
848
+ "eval_steps_per_second": 5.465,
849
+ "step": 600
850
+ },
851
+ {
852
+ "epoch": 4.7340165934602245,
853
+ "grad_norm": 292501.71875,
854
+ "learning_rate": 5.234375e-05,
855
+ "loss": 1117.2909,
856
+ "step": 610
857
+ },
858
+ {
859
+ "epoch": 4.7340165934602245,
860
+ "eval_runtime": 19.7565,
861
+ "eval_samples_per_second": 21.866,
862
+ "eval_steps_per_second": 5.467,
863
+ "step": 610
864
+ },
865
+ {
866
+ "epoch": 4.812103465104929,
867
+ "grad_norm": 415090.75,
868
+ "learning_rate": 5.15625e-05,
869
+ "loss": 1297.7303,
870
+ "step": 620
871
+ },
872
+ {
873
+ "epoch": 4.812103465104929,
874
+ "eval_runtime": 19.7543,
875
+ "eval_samples_per_second": 21.869,
876
+ "eval_steps_per_second": 5.467,
877
+ "step": 620
878
+ },
879
+ {
880
+ "epoch": 4.890190336749634,
881
+ "grad_norm": 481957.6875,
882
+ "learning_rate": 5.0781250000000004e-05,
883
+ "loss": 1024.6031,
884
+ "step": 630
885
+ },
886
+ {
887
+ "epoch": 4.890190336749634,
888
+ "eval_runtime": 19.7456,
889
+ "eval_samples_per_second": 21.878,
890
+ "eval_steps_per_second": 5.47,
891
+ "step": 630
892
+ },
893
+ {
894
+ "epoch": 4.968277208394339,
895
+ "grad_norm": 760376.8125,
896
+ "learning_rate": 5e-05,
897
+ "loss": 1106.1675,
898
+ "step": 640
899
+ },
900
+ {
901
+ "epoch": 4.968277208394339,
902
+ "eval_runtime": 19.7415,
903
+ "eval_samples_per_second": 21.883,
904
+ "eval_steps_per_second": 5.471,
905
+ "step": 640
906
  }
907
  ],
908
  "logging_steps": 10,