MandraTrap commited on
Commit
916bc65
·
verified ·
1 Parent(s): 1e09ea8

Training in progress, step 133, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d8dc624f9acde36326176980a154822516d424136b09a181311475a15744e63
3
  size 78480072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98db3ed9bea4ffa9f3e6f5de6e859c44234a7d58c3e546271c05c1bd29d14e0e
3
  size 78480072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f397ad9601328571f197af011d9d50fea539c1a183e716af85ff04152a683465
3
  size 157104826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59dca352a9920089bd19272da4368db923b95fe6eddff52d37471628cef40360
3
  size 157104826
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:658d33ef902b1dedc073e9d1bbbd14ca7725ea2c0676e92250f191f18c3878d3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85702cced7cfdb220c2b7edaa9f20135c968ecc9021f4b21eef404719d23c6cd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b969cb3da1b61540c213ebf9a9134bb0b4ecf274ba2793440c21dcfe3464f533
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec0c9b71bfc5d41045936414bd8985d97d970e02f84be007a006f11855b68af
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.02247953414917,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.12924071082390953,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,245 @@
747
  "eval_samples_per_second": 9.439,
748
  "eval_steps_per_second": 9.439,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +1009,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 7.87020276301824e+16,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 1.02247953414917,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.17189014539579967,
5
  "eval_steps": 25,
6
+ "global_step": 133,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 9.439,
748
  "eval_steps_per_second": 9.439,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 0.13053311793214864,
753
+ "grad_norm": 0.36377787590026855,
754
+ "learning_rate": 5.699511691628639e-05,
755
+ "loss": 0.9368,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 0.13182552504038772,
760
+ "grad_norm": 0.3719994127750397,
761
+ "learning_rate": 5.511977500775052e-05,
762
+ "loss": 1.0211,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 0.13311793214862683,
767
+ "grad_norm": 0.3707346022129059,
768
+ "learning_rate": 5.329254083075112e-05,
769
+ "loss": 0.9783,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 0.1344103392568659,
774
+ "grad_norm": 0.36826223134994507,
775
+ "learning_rate": 5.151448143859205e-05,
776
+ "loss": 0.9802,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 0.13570274636510501,
781
+ "grad_norm": 0.36664366722106934,
782
+ "learning_rate": 4.978663516788749e-05,
783
+ "loss": 1.0115,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 0.1369951534733441,
788
+ "grad_norm": 0.36409223079681396,
789
+ "learning_rate": 4.811001103220266e-05,
790
+ "loss": 1.0589,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 0.1382875605815832,
795
+ "grad_norm": 0.3501863479614258,
796
+ "learning_rate": 4.6485588132818434e-05,
797
+ "loss": 1.0006,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 0.13957996768982228,
802
+ "grad_norm": 0.3493867516517639,
803
+ "learning_rate": 4.4914315086963665e-05,
804
+ "loss": 1.0094,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 0.1408723747980614,
809
+ "grad_norm": 0.35538920760154724,
810
+ "learning_rate": 4.339710947384949e-05,
811
+ "loss": 1.0153,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 0.1421647819063005,
816
+ "grad_norm": 0.36491742730140686,
817
+ "learning_rate": 4.193485729882891e-05,
818
+ "loss": 1.0412,
819
+ "step": 110
820
+ },
821
+ {
822
+ "epoch": 0.14345718901453958,
823
+ "grad_norm": 0.36901021003723145,
824
+ "learning_rate": 4.052841247599446e-05,
825
+ "loss": 1.0098,
826
+ "step": 111
827
+ },
828
+ {
829
+ "epoch": 0.14474959612277868,
830
+ "grad_norm": 0.380164235830307,
831
+ "learning_rate": 3.9178596329516735e-05,
832
+ "loss": 1.0856,
833
+ "step": 112
834
+ },
835
+ {
836
+ "epoch": 0.14604200323101776,
837
+ "grad_norm": 0.3649565577507019,
838
+ "learning_rate": 3.78861971140139e-05,
839
+ "loss": 1.0262,
840
+ "step": 113
841
+ },
842
+ {
843
+ "epoch": 0.14733441033925687,
844
+ "grad_norm": 0.37567245960235596,
845
+ "learning_rate": 3.6651969554233325e-05,
846
+ "loss": 1.0776,
847
+ "step": 114
848
+ },
849
+ {
850
+ "epoch": 0.14862681744749595,
851
+ "grad_norm": 0.3666926622390747,
852
+ "learning_rate": 3.5476634404313645e-05,
853
+ "loss": 0.9935,
854
+ "step": 115
855
+ },
856
+ {
857
+ "epoch": 0.14991922455573506,
858
+ "grad_norm": 0.37592780590057373,
859
+ "learning_rate": 3.436087802688448e-05,
860
+ "loss": 1.0594,
861
+ "step": 116
862
+ },
863
+ {
864
+ "epoch": 0.15121163166397414,
865
+ "grad_norm": 0.3706028163433075,
866
+ "learning_rate": 3.330535199225016e-05,
867
+ "loss": 1.0206,
868
+ "step": 117
869
+ },
870
+ {
871
+ "epoch": 0.15250403877221325,
872
+ "grad_norm": 0.38433101773262024,
873
+ "learning_rate": 3.231067269789085e-05,
874
+ "loss": 1.0791,
875
+ "step": 118
876
+ },
877
+ {
878
+ "epoch": 0.15379644588045235,
879
+ "grad_norm": 0.3920274078845978,
880
+ "learning_rate": 3.137742100850389e-05,
881
+ "loss": 1.0546,
882
+ "step": 119
883
+ },
884
+ {
885
+ "epoch": 0.15508885298869143,
886
+ "grad_norm": 0.38755470514297485,
887
+ "learning_rate": 3.0506141916795233e-05,
888
+ "loss": 1.0496,
889
+ "step": 120
890
+ },
891
+ {
892
+ "epoch": 0.15638126009693054,
893
+ "grad_norm": 0.38180747628211975,
894
+ "learning_rate": 2.9697344225218805e-05,
895
+ "loss": 1.0409,
896
+ "step": 121
897
+ },
898
+ {
899
+ "epoch": 0.15767366720516962,
900
+ "grad_norm": 0.38283225893974304,
901
+ "learning_rate": 2.8951500248850473e-05,
902
+ "loss": 1.1025,
903
+ "step": 122
904
+ },
905
+ {
906
+ "epoch": 0.15896607431340873,
907
+ "grad_norm": 0.39247995615005493,
908
+ "learning_rate": 2.826904553956915e-05,
909
+ "loss": 1.0607,
910
+ "step": 123
911
+ },
912
+ {
913
+ "epoch": 0.1602584814216478,
914
+ "grad_norm": 0.3813554048538208,
915
+ "learning_rate": 2.7650378631706888e-05,
916
+ "loss": 1.0681,
917
+ "step": 124
918
+ },
919
+ {
920
+ "epoch": 0.16155088852988692,
921
+ "grad_norm": 0.38495323061943054,
922
+ "learning_rate": 2.709586080931599e-05,
923
+ "loss": 1.0247,
924
+ "step": 125
925
+ },
926
+ {
927
+ "epoch": 0.16155088852988692,
928
+ "eval_loss": 1.0126595497131348,
929
+ "eval_runtime": 5.2902,
930
+ "eval_samples_per_second": 9.451,
931
+ "eval_steps_per_second": 9.451,
932
+ "step": 125
933
+ },
934
+ {
935
+ "epoch": 0.16284329563812602,
936
+ "grad_norm": 0.38048914074897766,
937
+ "learning_rate": 2.6605815895189287e-05,
938
+ "loss": 0.9995,
939
+ "step": 126
940
+ },
941
+ {
942
+ "epoch": 0.1641357027463651,
943
+ "grad_norm": 0.3828051686286926,
944
+ "learning_rate": 2.618053006175675e-05,
945
+ "loss": 1.0404,
946
+ "step": 127
947
+ },
948
+ {
949
+ "epoch": 0.1654281098546042,
950
+ "grad_norm": 0.3885887563228607,
951
+ "learning_rate": 2.582025166396893e-05,
952
+ "loss": 1.0402,
953
+ "step": 128
954
+ },
955
+ {
956
+ "epoch": 0.1667205169628433,
957
+ "grad_norm": 0.38757041096687317,
958
+ "learning_rate": 2.552519109426453e-05,
959
+ "loss": 1.0203,
960
+ "step": 129
961
+ },
962
+ {
963
+ "epoch": 0.1680129240710824,
964
+ "grad_norm": 0.3830198645591736,
965
+ "learning_rate": 2.529552065970726e-05,
966
+ "loss": 1.0256,
967
+ "step": 130
968
+ },
969
+ {
970
+ "epoch": 0.16930533117932148,
971
+ "grad_norm": 0.38863909244537354,
972
+ "learning_rate": 2.5131374481363254e-05,
973
+ "loss": 1.0731,
974
+ "step": 131
975
+ },
976
+ {
977
+ "epoch": 0.17059773828756059,
978
+ "grad_norm": 0.39180222153663635,
979
+ "learning_rate": 2.5032848415978297e-05,
980
+ "loss": 1.0099,
981
+ "step": 132
982
+ },
983
+ {
984
+ "epoch": 0.17189014539579967,
985
+ "grad_norm": 0.41591089963912964,
986
+ "learning_rate": 2.5e-05,
987
+ "loss": 1.0978,
988
+ "step": 133
989
  }
990
  ],
991
  "logging_steps": 1,
 
1009
  "should_evaluate": false,
1010
  "should_log": false,
1011
  "should_save": true,
1012
+ "should_training_stop": true
1013
  },
1014
  "attributes": {}
1015
  }
1016
  },
1017
+ "total_flos": 1.046736967481426e+17,
1018
  "train_batch_size": 1,
1019
  "trial_name": null,
1020
  "trial_params": null