Kudod commited on
Commit
8a90716
·
verified ·
1 Parent(s): 9c6078c

Training in progress, step 60000, checkpoint

Browse files
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e815f23840abeebf3aeec2a270647701046a8558db559ce48bc792af39e06e5b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180f12bbe7f2ed362f4577b4819a9addb472c272a58f77ab4d99762fdc42308b
3
  size 14645
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.157658406785298,
6
  "eval_steps": 10000,
7
- "global_step": 50000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -748,6 +748,154 @@
748
  "eval_samples_per_second": 145.508,
749
  "eval_steps_per_second": 4.547,
750
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
  }
752
  ],
753
  "logging_steps": 500,
@@ -767,7 +915,7 @@
767
  "attributes": {}
768
  }
769
  },
770
- "total_flos": 5.6051019961845965e+17,
771
  "train_batch_size": 32,
772
  "trial_name": null,
773
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.989190088142358,
6
  "eval_steps": 10000,
7
+ "global_step": 60000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
748
  "eval_samples_per_second": 145.508,
749
  "eval_steps_per_second": 4.547,
750
  "step": 50000
751
+ },
752
+ {
753
+ "epoch": 4.199234990853151,
754
+ "grad_norm": NaN,
755
+ "learning_rate": 0.0,
756
+ "loss": 0.0,
757
+ "step": 50500
758
+ },
759
+ {
760
+ "epoch": 4.240811574921005,
761
+ "grad_norm": NaN,
762
+ "learning_rate": 0.0,
763
+ "loss": 0.0,
764
+ "step": 51000
765
+ },
766
+ {
767
+ "epoch": 4.282388158988858,
768
+ "grad_norm": NaN,
769
+ "learning_rate": 0.0,
770
+ "loss": 0.0,
771
+ "step": 51500
772
+ },
773
+ {
774
+ "epoch": 4.3239647430567105,
775
+ "grad_norm": NaN,
776
+ "learning_rate": 0.0,
777
+ "loss": 0.0,
778
+ "step": 52000
779
+ },
780
+ {
781
+ "epoch": 4.365541327124563,
782
+ "grad_norm": NaN,
783
+ "learning_rate": 0.0,
784
+ "loss": 0.0,
785
+ "step": 52500
786
+ },
787
+ {
788
+ "epoch": 4.407117911192416,
789
+ "grad_norm": NaN,
790
+ "learning_rate": 0.0,
791
+ "loss": 0.0,
792
+ "step": 53000
793
+ },
794
+ {
795
+ "epoch": 4.44869449526027,
796
+ "grad_norm": NaN,
797
+ "learning_rate": 0.0,
798
+ "loss": 0.0,
799
+ "step": 53500
800
+ },
801
+ {
802
+ "epoch": 4.490271079328123,
803
+ "grad_norm": NaN,
804
+ "learning_rate": 0.0,
805
+ "loss": 0.0,
806
+ "step": 54000
807
+ },
808
+ {
809
+ "epoch": 4.5318476633959754,
810
+ "grad_norm": NaN,
811
+ "learning_rate": 0.0,
812
+ "loss": 0.0,
813
+ "step": 54500
814
+ },
815
+ {
816
+ "epoch": 4.573424247463828,
817
+ "grad_norm": NaN,
818
+ "learning_rate": 0.0,
819
+ "loss": 0.0,
820
+ "step": 55000
821
+ },
822
+ {
823
+ "epoch": 4.615000831531681,
824
+ "grad_norm": NaN,
825
+ "learning_rate": 0.0,
826
+ "loss": 0.0,
827
+ "step": 55500
828
+ },
829
+ {
830
+ "epoch": 4.656577415599534,
831
+ "grad_norm": NaN,
832
+ "learning_rate": 0.0,
833
+ "loss": 0.0,
834
+ "step": 56000
835
+ },
836
+ {
837
+ "epoch": 4.698153999667388,
838
+ "grad_norm": NaN,
839
+ "learning_rate": 0.0,
840
+ "loss": 0.0,
841
+ "step": 56500
842
+ },
843
+ {
844
+ "epoch": 4.73973058373524,
845
+ "grad_norm": NaN,
846
+ "learning_rate": 0.0,
847
+ "loss": 0.0,
848
+ "step": 57000
849
+ },
850
+ {
851
+ "epoch": 4.781307167803093,
852
+ "grad_norm": NaN,
853
+ "learning_rate": 0.0,
854
+ "loss": 0.0,
855
+ "step": 57500
856
+ },
857
+ {
858
+ "epoch": 4.822883751870946,
859
+ "grad_norm": NaN,
860
+ "learning_rate": 0.0,
861
+ "loss": 0.0,
862
+ "step": 58000
863
+ },
864
+ {
865
+ "epoch": 4.8644603359388,
866
+ "grad_norm": NaN,
867
+ "learning_rate": 0.0,
868
+ "loss": 0.0,
869
+ "step": 58500
870
+ },
871
+ {
872
+ "epoch": 4.9060369200066525,
873
+ "grad_norm": NaN,
874
+ "learning_rate": 0.0,
875
+ "loss": 0.0,
876
+ "step": 59000
877
+ },
878
+ {
879
+ "epoch": 4.947613504074505,
880
+ "grad_norm": NaN,
881
+ "learning_rate": 0.0,
882
+ "loss": 0.0,
883
+ "step": 59500
884
+ },
885
+ {
886
+ "epoch": 4.989190088142358,
887
+ "grad_norm": NaN,
888
+ "learning_rate": 0.0,
889
+ "loss": 0.0,
890
+ "step": 60000
891
+ },
892
+ {
893
+ "epoch": 4.989190088142358,
894
+ "eval_loss": NaN,
895
+ "eval_runtime": 2644.9394,
896
+ "eval_samples_per_second": 145.495,
897
+ "eval_steps_per_second": 4.547,
898
+ "step": 60000
899
  }
900
  ],
901
  "logging_steps": 500,
 
915
  "attributes": {}
916
  }
917
  },
918
+ "total_flos": 6.72635011140738e+17,
919
  "train_batch_size": 32,
920
  "trial_name": null,
921
  "trial_params": null