error577 commited on
Commit
c77ef5e
·
verified ·
1 Parent(s): 066c6e9

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4415f68d0ad9aa86820b176ef18920044ae9992e448da9f88d5a4fe7bd4c2da
3
  size 522227376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ff808bb68c434ef997edf614255f797d07bb051473afe1e16e7400eef2d707
3
  size 522227376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be584c76ae22a24595b5ef2a7ca110dec93dd9371debbbdd90dd6049669c081e
3
  size 1044601082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b605ba105bea4716a13c7a3589f3223c33a38a1550b5f4f5924a69ae1dfb5a
3
  size 1044601082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b220ca7d0891e3ed41ffbbe0e13d38c90b8ebd3e153cdd98aa8bbe43696039ba
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb09e0c4906589c65d02131fb9256b534e79ccc5846887453f3386448bf9a73b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b97b319aeb7599bcb17c039089ff38d423203de2450979c1b08a5047fa1c81f3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb8d16b823fc08a6bdb32c62fea04b073580a8731b10b24ab53c73ef146cf930
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.3958656787872314,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
- "epoch": 0.10903267573000784,
5
  "eval_steps": 20,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -755,6 +755,154 @@
755
  "eval_samples_per_second": 1.999,
756
  "eval_steps_per_second": 1.999,
757
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  }
759
  ],
760
  "logging_steps": 1,
@@ -769,7 +917,7 @@
769
  "early_stopping_threshold": 0.0
770
  },
771
  "attributes": {
772
- "early_stopping_patience_counter": 4
773
  }
774
  },
775
  "TrainerControl": {
@@ -778,12 +926,12 @@
778
  "should_evaluate": false,
779
  "should_log": false,
780
  "should_save": true,
781
- "should_training_stop": false
782
  },
783
  "attributes": {}
784
  }
785
  },
786
- "total_flos": 1.32844080857088e+17,
787
  "train_batch_size": 1,
788
  "trial_name": null,
789
  "trial_params": null
 
1
  {
2
  "best_metric": 2.3958656787872314,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
+ "epoch": 0.13083921087600942,
5
  "eval_steps": 20,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
755
  "eval_samples_per_second": 1.999,
756
  "eval_steps_per_second": 1.999,
757
  "step": 100
758
+ },
759
+ {
760
+ "epoch": 0.11012300248730791,
761
+ "grad_norm": 543.7817993164062,
762
+ "learning_rate": 0.0004986414340225785,
763
+ "loss": 161.0186,
764
+ "step": 101
765
+ },
766
+ {
767
+ "epoch": 0.111213329244608,
768
+ "grad_norm": 93.4175033569336,
769
+ "learning_rate": 0.0004986114391942372,
770
+ "loss": 95.1064,
771
+ "step": 102
772
+ },
773
+ {
774
+ "epoch": 0.11230365600190807,
775
+ "grad_norm": 42.3511848449707,
776
+ "learning_rate": 0.0004985811177760075,
777
+ "loss": 89.4123,
778
+ "step": 103
779
+ },
780
+ {
781
+ "epoch": 0.11339398275920815,
782
+ "grad_norm": 55.580867767333984,
783
+ "learning_rate": 0.0004985504698077212,
784
+ "loss": 87.5712,
785
+ "step": 104
786
+ },
787
+ {
788
+ "epoch": 0.11448430951650823,
789
+ "grad_norm": 31.689430236816406,
790
+ "learning_rate": 0.0004985194953296393,
791
+ "loss": 80.7536,
792
+ "step": 105
793
+ },
794
+ {
795
+ "epoch": 0.1155746362738083,
796
+ "grad_norm": 26.954017639160156,
797
+ "learning_rate": 0.0004984881943824515,
798
+ "loss": 75.9416,
799
+ "step": 106
800
+ },
801
+ {
802
+ "epoch": 0.11666496303110839,
803
+ "grad_norm": 29.87924575805664,
804
+ "learning_rate": 0.0004984565670072765,
805
+ "loss": 79.545,
806
+ "step": 107
807
+ },
808
+ {
809
+ "epoch": 0.11775528978840846,
810
+ "grad_norm": 28.872543334960938,
811
+ "learning_rate": 0.0004984246132456617,
812
+ "loss": 71.7809,
813
+ "step": 108
814
+ },
815
+ {
816
+ "epoch": 0.11884561654570855,
817
+ "grad_norm": 30.724918365478516,
818
+ "learning_rate": 0.0004983923331395836,
819
+ "loss": 71.9761,
820
+ "step": 109
821
+ },
822
+ {
823
+ "epoch": 0.11993594330300862,
824
+ "grad_norm": 31.383636474609375,
825
+ "learning_rate": 0.0004983597267314469,
826
+ "loss": 75.5666,
827
+ "step": 110
828
+ },
829
+ {
830
+ "epoch": 0.1210262700603087,
831
+ "grad_norm": 36.043067932128906,
832
+ "learning_rate": 0.0004983267940640854,
833
+ "loss": 75.1238,
834
+ "step": 111
835
+ },
836
+ {
837
+ "epoch": 0.12211659681760878,
838
+ "grad_norm": 35.503814697265625,
839
+ "learning_rate": 0.0004982935351807612,
840
+ "loss": 78.1812,
841
+ "step": 112
842
+ },
843
+ {
844
+ "epoch": 0.12320692357490885,
845
+ "grad_norm": 36.39851379394531,
846
+ "learning_rate": 0.0004982599501251649,
847
+ "loss": 72.4006,
848
+ "step": 113
849
+ },
850
+ {
851
+ "epoch": 0.12429725033220894,
852
+ "grad_norm": 32.95728302001953,
853
+ "learning_rate": 0.0004982260389414159,
854
+ "loss": 72.0358,
855
+ "step": 114
856
+ },
857
+ {
858
+ "epoch": 0.125387577089509,
859
+ "grad_norm": 34.838993072509766,
860
+ "learning_rate": 0.0004981918016740617,
861
+ "loss": 75.6014,
862
+ "step": 115
863
+ },
864
+ {
865
+ "epoch": 0.1264779038468091,
866
+ "grad_norm": 32.99583435058594,
867
+ "learning_rate": 0.0004981572383680784,
868
+ "loss": 72.1847,
869
+ "step": 116
870
+ },
871
+ {
872
+ "epoch": 0.12756823060410916,
873
+ "grad_norm": 34.666954040527344,
874
+ "learning_rate": 0.0004981223490688702,
875
+ "loss": 70.2274,
876
+ "step": 117
877
+ },
878
+ {
879
+ "epoch": 0.12865855736140924,
880
+ "grad_norm": 37.92626953125,
881
+ "learning_rate": 0.0004980871338222698,
882
+ "loss": 76.0094,
883
+ "step": 118
884
+ },
885
+ {
886
+ "epoch": 0.12974888411870933,
887
+ "grad_norm": 33.52912521362305,
888
+ "learning_rate": 0.0004980515926745376,
889
+ "loss": 77.3571,
890
+ "step": 119
891
+ },
892
+ {
893
+ "epoch": 0.13083921087600942,
894
+ "grad_norm": 34.129905700683594,
895
+ "learning_rate": 0.0004980157256723628,
896
+ "loss": 70.9215,
897
+ "step": 120
898
+ },
899
+ {
900
+ "epoch": 0.13083921087600942,
901
+ "eval_loss": 2.417283535003662,
902
+ "eval_runtime": 74.0366,
903
+ "eval_samples_per_second": 1.999,
904
+ "eval_steps_per_second": 1.999,
905
+ "step": 120
906
  }
907
  ],
908
  "logging_steps": 1,
 
917
  "early_stopping_threshold": 0.0
918
  },
919
  "attributes": {
920
+ "early_stopping_patience_counter": 5
921
  }
922
  },
923
  "TrainerControl": {
 
926
  "should_evaluate": false,
927
  "should_log": false,
928
  "should_save": true,
929
+ "should_training_stop": true
930
  },
931
  "attributes": {}
932
  }
933
  },
934
+ "total_flos": 1.594128970285056e+17,
935
  "train_batch_size": 1,
936
  "trial_name": null,
937
  "trial_params": null