error577 commited on
Commit
3565752
·
verified ·
1 Parent(s): 4673748

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:881444cf70f70d0ccdcde7f927db7dc1702a38e39028bae1149df9a6322310ea
3
  size 859942080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b40b4ea852faa0dabd88f7ddbc8095331dcaf982cf3b34cc7272211b022508
3
  size 859942080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37ce568517e845e12b882a12daa1dcaa6c37ead61f87765fbfff89ea1cf27426
3
  size 90187222
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6973271476cd76cc32089427e31b042ab3cf370ff220d960bc069ac0e8b7e1d7
3
  size 90187222
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0249daad6bba3532cdfe955baeb2365ed9c0ac740b6b1be283905494df7b79fa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:092d05c850cba14a4e1067d9540d36872c05fc71b3eeadb4562ba802384222c9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec1f8f2c1e492d7ced7a566946ce0a12ce91b87a9087e2da8c9d69f72bf0622
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e75c704c71c99deef040fe407c6f53cc8d33f4439273d19c1681b1ebdfb69672
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.802311658859253,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.0007281677115873328,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -723,6 +723,714 @@
723
  "eval_samples_per_second": 5.695,
724
  "eval_steps_per_second": 1.898,
725
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  ],
728
  "logging_steps": 1,
@@ -737,7 +1445,7 @@
737
  "early_stopping_threshold": 0.0
738
  },
739
  "attributes": {
740
- "early_stopping_patience_counter": 0
741
  }
742
  },
743
  "TrainerControl": {
@@ -751,7 +1459,7 @@
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 6630505080422400.0,
755
  "train_batch_size": 3,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 1.802311658859253,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.0014563354231746657,
5
  "eval_steps": 100,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
723
  "eval_samples_per_second": 5.695,
724
  "eval_steps_per_second": 1.898,
725
  "step": 100
726
+ },
727
+ {
728
+ "epoch": 0.0007354493887032061,
729
+ "grad_norm": 30.542020797729492,
730
+ "learning_rate": 0.0001999999803956598,
731
+ "loss": 2.7235,
732
+ "step": 101
733
+ },
734
+ {
735
+ "epoch": 0.0007427310658190794,
736
+ "grad_norm": 12.602702140808105,
737
+ "learning_rate": 0.0001999999803956598,
738
+ "loss": 1.4984,
739
+ "step": 102
740
+ },
741
+ {
742
+ "epoch": 0.0007500127429349528,
743
+ "grad_norm": 13.053272247314453,
744
+ "learning_rate": 0.0001999999803956598,
745
+ "loss": 2.4308,
746
+ "step": 103
747
+ },
748
+ {
749
+ "epoch": 0.0007572944200508261,
750
+ "grad_norm": 11.506697654724121,
751
+ "learning_rate": 0.0001999999803956598,
752
+ "loss": 2.5812,
753
+ "step": 104
754
+ },
755
+ {
756
+ "epoch": 0.0007645760971666995,
757
+ "grad_norm": 10.1392822265625,
758
+ "learning_rate": 0.0001999999803956598,
759
+ "loss": 1.6152,
760
+ "step": 105
761
+ },
762
+ {
763
+ "epoch": 0.0007718577742825728,
764
+ "grad_norm": 14.92361068725586,
765
+ "learning_rate": 0.0001999999803956598,
766
+ "loss": 2.2026,
767
+ "step": 106
768
+ },
769
+ {
770
+ "epoch": 0.0007791394513984461,
771
+ "grad_norm": 8.74295425415039,
772
+ "learning_rate": 0.0001999999803956598,
773
+ "loss": 1.7257,
774
+ "step": 107
775
+ },
776
+ {
777
+ "epoch": 0.0007864211285143195,
778
+ "grad_norm": 9.7953519821167,
779
+ "learning_rate": 0.0001999999803956598,
780
+ "loss": 1.9238,
781
+ "step": 108
782
+ },
783
+ {
784
+ "epoch": 0.0007937028056301927,
785
+ "grad_norm": 11.444181442260742,
786
+ "learning_rate": 0.00019999996584374458,
787
+ "loss": 2.2776,
788
+ "step": 109
789
+ },
790
+ {
791
+ "epoch": 0.000800984482746066,
792
+ "grad_norm": 12.844846725463867,
793
+ "learning_rate": 0.00019999996584374458,
794
+ "loss": 1.9025,
795
+ "step": 110
796
+ },
797
+ {
798
+ "epoch": 0.0008082661598619394,
799
+ "grad_norm": 7.37370491027832,
800
+ "learning_rate": 0.00019999996584374458,
801
+ "loss": 1.0972,
802
+ "step": 111
803
+ },
804
+ {
805
+ "epoch": 0.0008155478369778127,
806
+ "grad_norm": 6.512350559234619,
807
+ "learning_rate": 0.00019999996584374458,
808
+ "loss": 1.4349,
809
+ "step": 112
810
+ },
811
+ {
812
+ "epoch": 0.0008228295140936861,
813
+ "grad_norm": 14.362659454345703,
814
+ "learning_rate": 0.00019999996584374458,
815
+ "loss": 2.0781,
816
+ "step": 113
817
+ },
818
+ {
819
+ "epoch": 0.0008301111912095594,
820
+ "grad_norm": 10.504934310913086,
821
+ "learning_rate": 0.00019999996584374458,
822
+ "loss": 1.8158,
823
+ "step": 114
824
+ },
825
+ {
826
+ "epoch": 0.0008373928683254327,
827
+ "grad_norm": 9.051297187805176,
828
+ "learning_rate": 0.00019999996584374458,
829
+ "loss": 1.393,
830
+ "step": 115
831
+ },
832
+ {
833
+ "epoch": 0.0008446745454413061,
834
+ "grad_norm": 8.922006607055664,
835
+ "learning_rate": 0.00019999996584374458,
836
+ "loss": 1.5453,
837
+ "step": 116
838
+ },
839
+ {
840
+ "epoch": 0.0008519562225571794,
841
+ "grad_norm": 10.427721977233887,
842
+ "learning_rate": 0.00019999996584374458,
843
+ "loss": 1.8282,
844
+ "step": 117
845
+ },
846
+ {
847
+ "epoch": 0.0008592378996730527,
848
+ "grad_norm": 7.2870917320251465,
849
+ "learning_rate": 0.00019999996584374458,
850
+ "loss": 1.0126,
851
+ "step": 118
852
+ },
853
+ {
854
+ "epoch": 0.000866519576788926,
855
+ "grad_norm": 11.430087089538574,
856
+ "learning_rate": 0.00019999996584374458,
857
+ "loss": 1.6173,
858
+ "step": 119
859
+ },
860
+ {
861
+ "epoch": 0.0008738012539047993,
862
+ "grad_norm": 8.79759407043457,
863
+ "learning_rate": 0.00019999996584374458,
864
+ "loss": 1.2973,
865
+ "step": 120
866
+ },
867
+ {
868
+ "epoch": 0.0008810829310206727,
869
+ "grad_norm": 27.648391723632812,
870
+ "learning_rate": 0.00019999996584374458,
871
+ "loss": 1.5452,
872
+ "step": 121
873
+ },
874
+ {
875
+ "epoch": 0.000888364608136546,
876
+ "grad_norm": 6.860217094421387,
877
+ "learning_rate": 0.00019999996584374458,
878
+ "loss": 1.1589,
879
+ "step": 122
880
+ },
881
+ {
882
+ "epoch": 0.0008956462852524193,
883
+ "grad_norm": 9.904220581054688,
884
+ "learning_rate": 0.00019999996584374458,
885
+ "loss": 1.7574,
886
+ "step": 123
887
+ },
888
+ {
889
+ "epoch": 0.0009029279623682927,
890
+ "grad_norm": 14.817357063293457,
891
+ "learning_rate": 0.00019999996584374458,
892
+ "loss": 2.2985,
893
+ "step": 124
894
+ },
895
+ {
896
+ "epoch": 0.000910209639484166,
897
+ "grad_norm": 9.02469253540039,
898
+ "learning_rate": 0.00019999996584374458,
899
+ "loss": 2.0967,
900
+ "step": 125
901
+ },
902
+ {
903
+ "epoch": 0.0009174913166000394,
904
+ "grad_norm": 9.622922897338867,
905
+ "learning_rate": 0.00019999996584374458,
906
+ "loss": 2.4315,
907
+ "step": 126
908
+ },
909
+ {
910
+ "epoch": 0.0009247729937159126,
911
+ "grad_norm": 7.905317306518555,
912
+ "learning_rate": 0.00019999996584374458,
913
+ "loss": 1.3262,
914
+ "step": 127
915
+ },
916
+ {
917
+ "epoch": 0.0009320546708317859,
918
+ "grad_norm": 8.914352416992188,
919
+ "learning_rate": 0.00019999996584374458,
920
+ "loss": 1.7299,
921
+ "step": 128
922
+ },
923
+ {
924
+ "epoch": 0.0009393363479476593,
925
+ "grad_norm": 9.581490516662598,
926
+ "learning_rate": 0.00019999996584374458,
927
+ "loss": 1.8589,
928
+ "step": 129
929
+ },
930
+ {
931
+ "epoch": 0.0009466180250635326,
932
+ "grad_norm": 6.935079097747803,
933
+ "learning_rate": 0.00019999996584374458,
934
+ "loss": 1.1753,
935
+ "step": 130
936
+ },
937
+ {
938
+ "epoch": 0.000953899702179406,
939
+ "grad_norm": 6.804558753967285,
940
+ "learning_rate": 0.00019999996584374458,
941
+ "loss": 1.62,
942
+ "step": 131
943
+ },
944
+ {
945
+ "epoch": 0.0009611813792952793,
946
+ "grad_norm": 11.983720779418945,
947
+ "learning_rate": 0.00019999996584374458,
948
+ "loss": 1.5915,
949
+ "step": 132
950
+ },
951
+ {
952
+ "epoch": 0.0009684630564111526,
953
+ "grad_norm": 25.60173988342285,
954
+ "learning_rate": 0.00019999996584374458,
955
+ "loss": 2.0053,
956
+ "step": 133
957
+ },
958
+ {
959
+ "epoch": 0.000975744733527026,
960
+ "grad_norm": 8.45429515838623,
961
+ "learning_rate": 0.00019999996584374458,
962
+ "loss": 2.0073,
963
+ "step": 134
964
+ },
965
+ {
966
+ "epoch": 0.0009830264106428994,
967
+ "grad_norm": 6.321603775024414,
968
+ "learning_rate": 0.00019999996584374458,
969
+ "loss": 0.9744,
970
+ "step": 135
971
+ },
972
+ {
973
+ "epoch": 0.0009903080877587725,
974
+ "grad_norm": 9.055106163024902,
975
+ "learning_rate": 0.00019999996584374458,
976
+ "loss": 1.6416,
977
+ "step": 136
978
+ },
979
+ {
980
+ "epoch": 0.000997589764874646,
981
+ "grad_norm": 8.92955207824707,
982
+ "learning_rate": 0.00019999996584374458,
983
+ "loss": 2.0005,
984
+ "step": 137
985
+ },
986
+ {
987
+ "epoch": 0.0010048714419905193,
988
+ "grad_norm": 8.891891479492188,
989
+ "learning_rate": 0.00019999996584374458,
990
+ "loss": 1.6211,
991
+ "step": 138
992
+ },
993
+ {
994
+ "epoch": 0.0010121531191063925,
995
+ "grad_norm": 13.83609676361084,
996
+ "learning_rate": 0.00019999996584374458,
997
+ "loss": 1.6726,
998
+ "step": 139
999
+ },
1000
+ {
1001
+ "epoch": 0.001019434796222266,
1002
+ "grad_norm": 16.78993034362793,
1003
+ "learning_rate": 0.00019999996584374458,
1004
+ "loss": 1.86,
1005
+ "step": 140
1006
+ },
1007
+ {
1008
+ "epoch": 0.0010267164733381393,
1009
+ "grad_norm": 10.312461853027344,
1010
+ "learning_rate": 0.00019999996584374458,
1011
+ "loss": 1.4386,
1012
+ "step": 141
1013
+ },
1014
+ {
1015
+ "epoch": 0.0010339981504540125,
1016
+ "grad_norm": 8.104599952697754,
1017
+ "learning_rate": 0.00019999996584374458,
1018
+ "loss": 2.0064,
1019
+ "step": 142
1020
+ },
1021
+ {
1022
+ "epoch": 0.0010412798275698859,
1023
+ "grad_norm": 9.345172882080078,
1024
+ "learning_rate": 0.00019999996584374458,
1025
+ "loss": 1.9574,
1026
+ "step": 143
1027
+ },
1028
+ {
1029
+ "epoch": 0.0010485615046857593,
1030
+ "grad_norm": 8.887224197387695,
1031
+ "learning_rate": 0.00019999996584374458,
1032
+ "loss": 1.4813,
1033
+ "step": 144
1034
+ },
1035
+ {
1036
+ "epoch": 0.0010558431818016324,
1037
+ "grad_norm": 10.085160255432129,
1038
+ "learning_rate": 0.00019999996584374458,
1039
+ "loss": 1.4148,
1040
+ "step": 145
1041
+ },
1042
+ {
1043
+ "epoch": 0.0010631248589175058,
1044
+ "grad_norm": 15.231815338134766,
1045
+ "learning_rate": 0.00019999996584374458,
1046
+ "loss": 2.1894,
1047
+ "step": 146
1048
+ },
1049
+ {
1050
+ "epoch": 0.0010704065360333792,
1051
+ "grad_norm": 9.214299201965332,
1052
+ "learning_rate": 0.00019999996584374458,
1053
+ "loss": 1.2747,
1054
+ "step": 147
1055
+ },
1056
+ {
1057
+ "epoch": 0.0010776882131492526,
1058
+ "grad_norm": 9.717480659484863,
1059
+ "learning_rate": 0.00019999996584374458,
1060
+ "loss": 2.3933,
1061
+ "step": 148
1062
+ },
1063
+ {
1064
+ "epoch": 0.0010849698902651258,
1065
+ "grad_norm": 12.608345031738281,
1066
+ "learning_rate": 0.00019999996584374458,
1067
+ "loss": 2.2024,
1068
+ "step": 149
1069
+ },
1070
+ {
1071
+ "epoch": 0.0010922515673809992,
1072
+ "grad_norm": 11.714061737060547,
1073
+ "learning_rate": 0.00019999995129182935,
1074
+ "loss": 2.1489,
1075
+ "step": 150
1076
+ },
1077
+ {
1078
+ "epoch": 0.0010995332444968726,
1079
+ "grad_norm": 6.733028411865234,
1080
+ "learning_rate": 0.00019999995129182935,
1081
+ "loss": 0.9061,
1082
+ "step": 151
1083
+ },
1084
+ {
1085
+ "epoch": 0.0011068149216127458,
1086
+ "grad_norm": 9.173693656921387,
1087
+ "learning_rate": 0.00019999995129182935,
1088
+ "loss": 1.2983,
1089
+ "step": 152
1090
+ },
1091
+ {
1092
+ "epoch": 0.0011140965987286192,
1093
+ "grad_norm": 10.551212310791016,
1094
+ "learning_rate": 0.00019999995129182935,
1095
+ "loss": 2.1741,
1096
+ "step": 153
1097
+ },
1098
+ {
1099
+ "epoch": 0.0011213782758444926,
1100
+ "grad_norm": 9.643767356872559,
1101
+ "learning_rate": 0.00019999995129182935,
1102
+ "loss": 1.8785,
1103
+ "step": 154
1104
+ },
1105
+ {
1106
+ "epoch": 0.0011286599529603657,
1107
+ "grad_norm": 8.361176490783691,
1108
+ "learning_rate": 0.00019999995129182935,
1109
+ "loss": 1.7363,
1110
+ "step": 155
1111
+ },
1112
+ {
1113
+ "epoch": 0.0011359416300762391,
1114
+ "grad_norm": 7.118173122406006,
1115
+ "learning_rate": 0.00019999995129182935,
1116
+ "loss": 1.9851,
1117
+ "step": 156
1118
+ },
1119
+ {
1120
+ "epoch": 0.0011432233071921125,
1121
+ "grad_norm": 11.809035301208496,
1122
+ "learning_rate": 0.00019999995129182935,
1123
+ "loss": 2.0862,
1124
+ "step": 157
1125
+ },
1126
+ {
1127
+ "epoch": 0.001150504984307986,
1128
+ "grad_norm": 8.845592498779297,
1129
+ "learning_rate": 0.00019999995129182935,
1130
+ "loss": 1.2204,
1131
+ "step": 158
1132
+ },
1133
+ {
1134
+ "epoch": 0.001157786661423859,
1135
+ "grad_norm": 8.715829849243164,
1136
+ "learning_rate": 0.00019999995129182935,
1137
+ "loss": 1.9076,
1138
+ "step": 159
1139
+ },
1140
+ {
1141
+ "epoch": 0.0011650683385397325,
1142
+ "grad_norm": 8.711019515991211,
1143
+ "learning_rate": 0.00019999995129182935,
1144
+ "loss": 1.4475,
1145
+ "step": 160
1146
+ },
1147
+ {
1148
+ "epoch": 0.0011723500156556059,
1149
+ "grad_norm": 9.015527725219727,
1150
+ "learning_rate": 0.00019999995129182935,
1151
+ "loss": 1.9736,
1152
+ "step": 161
1153
+ },
1154
+ {
1155
+ "epoch": 0.001179631692771479,
1156
+ "grad_norm": 13.795394897460938,
1157
+ "learning_rate": 0.00019999995129182935,
1158
+ "loss": 2.1726,
1159
+ "step": 162
1160
+ },
1161
+ {
1162
+ "epoch": 0.0011869133698873525,
1163
+ "grad_norm": 8.399027824401855,
1164
+ "learning_rate": 0.00019999995129182935,
1165
+ "loss": 1.5726,
1166
+ "step": 163
1167
+ },
1168
+ {
1169
+ "epoch": 0.0011941950470032259,
1170
+ "grad_norm": 9.823261260986328,
1171
+ "learning_rate": 0.00019999995129182935,
1172
+ "loss": 1.7958,
1173
+ "step": 164
1174
+ },
1175
+ {
1176
+ "epoch": 0.001201476724119099,
1177
+ "grad_norm": 7.4197516441345215,
1178
+ "learning_rate": 0.00019999995129182935,
1179
+ "loss": 1.1734,
1180
+ "step": 165
1181
+ },
1182
+ {
1183
+ "epoch": 0.0012087584012349724,
1184
+ "grad_norm": 9.796939849853516,
1185
+ "learning_rate": 0.00019999993673991412,
1186
+ "loss": 1.5177,
1187
+ "step": 166
1188
+ },
1189
+ {
1190
+ "epoch": 0.0012160400783508458,
1191
+ "grad_norm": 10.06812858581543,
1192
+ "learning_rate": 0.00019999993673991412,
1193
+ "loss": 2.1493,
1194
+ "step": 167
1195
+ },
1196
+ {
1197
+ "epoch": 0.001223321755466719,
1198
+ "grad_norm": 12.96749210357666,
1199
+ "learning_rate": 0.00019999993673991412,
1200
+ "loss": 2.4498,
1201
+ "step": 168
1202
+ },
1203
+ {
1204
+ "epoch": 0.0012306034325825924,
1205
+ "grad_norm": 10.271540641784668,
1206
+ "learning_rate": 0.00019999993673991412,
1207
+ "loss": 2.1217,
1208
+ "step": 169
1209
+ },
1210
+ {
1211
+ "epoch": 0.0012378851096984658,
1212
+ "grad_norm": 10.417543411254883,
1213
+ "learning_rate": 0.00019999993673991412,
1214
+ "loss": 1.5065,
1215
+ "step": 170
1216
+ },
1217
+ {
1218
+ "epoch": 0.0012451667868143392,
1219
+ "grad_norm": 14.934460639953613,
1220
+ "learning_rate": 0.00019999993673991412,
1221
+ "loss": 1.8384,
1222
+ "step": 171
1223
+ },
1224
+ {
1225
+ "epoch": 0.0012524484639302124,
1226
+ "grad_norm": 29.06182098388672,
1227
+ "learning_rate": 0.00019999993673991412,
1228
+ "loss": 2.2379,
1229
+ "step": 172
1230
+ },
1231
+ {
1232
+ "epoch": 0.0012597301410460858,
1233
+ "grad_norm": 179.1177215576172,
1234
+ "learning_rate": 0.00019999993673991412,
1235
+ "loss": 2.4171,
1236
+ "step": 173
1237
+ },
1238
+ {
1239
+ "epoch": 0.0012670118181619592,
1240
+ "grad_norm": 17.668655395507812,
1241
+ "learning_rate": 0.00019999993673991412,
1242
+ "loss": 2.3388,
1243
+ "step": 174
1244
+ },
1245
+ {
1246
+ "epoch": 0.0012742934952778323,
1247
+ "grad_norm": 8.100573539733887,
1248
+ "learning_rate": 0.00019999993673991412,
1249
+ "loss": 1.3806,
1250
+ "step": 175
1251
+ },
1252
+ {
1253
+ "epoch": 0.0012815751723937057,
1254
+ "grad_norm": 9.43472671508789,
1255
+ "learning_rate": 0.00019999993673991412,
1256
+ "loss": 1.2791,
1257
+ "step": 176
1258
+ },
1259
+ {
1260
+ "epoch": 0.0012888568495095791,
1261
+ "grad_norm": 11.577341079711914,
1262
+ "learning_rate": 0.00019999993673991412,
1263
+ "loss": 1.7206,
1264
+ "step": 177
1265
+ },
1266
+ {
1267
+ "epoch": 0.0012961385266254523,
1268
+ "grad_norm": 12.485160827636719,
1269
+ "learning_rate": 0.00019999993673991412,
1270
+ "loss": 2.4225,
1271
+ "step": 178
1272
+ },
1273
+ {
1274
+ "epoch": 0.0013034202037413257,
1275
+ "grad_norm": 11.283897399902344,
1276
+ "learning_rate": 0.00019999993673991412,
1277
+ "loss": 1.535,
1278
+ "step": 179
1279
+ },
1280
+ {
1281
+ "epoch": 0.001310701880857199,
1282
+ "grad_norm": 9.801483154296875,
1283
+ "learning_rate": 0.00019999993673991412,
1284
+ "loss": 2.1996,
1285
+ "step": 180
1286
+ },
1287
+ {
1288
+ "epoch": 0.0013179835579730723,
1289
+ "grad_norm": 11.930302619934082,
1290
+ "learning_rate": 0.0001999999221879989,
1291
+ "loss": 1.4919,
1292
+ "step": 181
1293
+ },
1294
+ {
1295
+ "epoch": 0.0013252652350889457,
1296
+ "grad_norm": 10.425878524780273,
1297
+ "learning_rate": 0.0001999999221879989,
1298
+ "loss": 1.7397,
1299
+ "step": 182
1300
+ },
1301
+ {
1302
+ "epoch": 0.001332546912204819,
1303
+ "grad_norm": 9.171979904174805,
1304
+ "learning_rate": 0.0001999999221879989,
1305
+ "loss": 1.8727,
1306
+ "step": 183
1307
+ },
1308
+ {
1309
+ "epoch": 0.0013398285893206924,
1310
+ "grad_norm": 10.64809799194336,
1311
+ "learning_rate": 0.0001999999221879989,
1312
+ "loss": 1.742,
1313
+ "step": 184
1314
+ },
1315
+ {
1316
+ "epoch": 0.0013471102664365656,
1317
+ "grad_norm": 13.533452033996582,
1318
+ "learning_rate": 0.0001999999221879989,
1319
+ "loss": 1.7024,
1320
+ "step": 185
1321
+ },
1322
+ {
1323
+ "epoch": 0.001354391943552439,
1324
+ "grad_norm": 9.46273422241211,
1325
+ "learning_rate": 0.0001999999221879989,
1326
+ "loss": 1.8248,
1327
+ "step": 186
1328
+ },
1329
+ {
1330
+ "epoch": 0.0013616736206683124,
1331
+ "grad_norm": 20.131072998046875,
1332
+ "learning_rate": 0.0001999999221879989,
1333
+ "loss": 1.7554,
1334
+ "step": 187
1335
+ },
1336
+ {
1337
+ "epoch": 0.0013689552977841856,
1338
+ "grad_norm": 6.414751052856445,
1339
+ "learning_rate": 0.0001999999221879989,
1340
+ "loss": 0.8933,
1341
+ "step": 188
1342
+ },
1343
+ {
1344
+ "epoch": 0.001376236974900059,
1345
+ "grad_norm": 7.662179946899414,
1346
+ "learning_rate": 0.0001999999221879989,
1347
+ "loss": 1.4111,
1348
+ "step": 189
1349
+ },
1350
+ {
1351
+ "epoch": 0.0013835186520159324,
1352
+ "grad_norm": 10.391175270080566,
1353
+ "learning_rate": 0.0001999999221879989,
1354
+ "loss": 2.0454,
1355
+ "step": 190
1356
+ },
1357
+ {
1358
+ "epoch": 0.0013908003291318056,
1359
+ "grad_norm": 9.538223266601562,
1360
+ "learning_rate": 0.0001999999221879989,
1361
+ "loss": 2.2686,
1362
+ "step": 191
1363
+ },
1364
+ {
1365
+ "epoch": 0.001398082006247679,
1366
+ "grad_norm": 13.454922676086426,
1367
+ "learning_rate": 0.0001999999221879989,
1368
+ "loss": 2.0085,
1369
+ "step": 192
1370
+ },
1371
+ {
1372
+ "epoch": 0.0014053636833635523,
1373
+ "grad_norm": 8.68910026550293,
1374
+ "learning_rate": 0.0001999999221879989,
1375
+ "loss": 1.2429,
1376
+ "step": 193
1377
+ },
1378
+ {
1379
+ "epoch": 0.0014126453604794255,
1380
+ "grad_norm": 11.174205780029297,
1381
+ "learning_rate": 0.00019999990763608366,
1382
+ "loss": 1.5733,
1383
+ "step": 194
1384
+ },
1385
+ {
1386
+ "epoch": 0.001419927037595299,
1387
+ "grad_norm": 6.9463791847229,
1388
+ "learning_rate": 0.00019999990763608366,
1389
+ "loss": 1.1096,
1390
+ "step": 195
1391
+ },
1392
+ {
1393
+ "epoch": 0.0014272087147111723,
1394
+ "grad_norm": 10.863914489746094,
1395
+ "learning_rate": 0.00019999990763608366,
1396
+ "loss": 1.374,
1397
+ "step": 196
1398
+ },
1399
+ {
1400
+ "epoch": 0.0014344903918270457,
1401
+ "grad_norm": 7.314742565155029,
1402
+ "learning_rate": 0.00019999990763608366,
1403
+ "loss": 1.8196,
1404
+ "step": 197
1405
+ },
1406
+ {
1407
+ "epoch": 0.0014417720689429189,
1408
+ "grad_norm": 13.76647663116455,
1409
+ "learning_rate": 0.00019999990763608366,
1410
+ "loss": 1.8654,
1411
+ "step": 198
1412
+ },
1413
+ {
1414
+ "epoch": 0.0014490537460587923,
1415
+ "grad_norm": 18.60045051574707,
1416
+ "learning_rate": 0.00019999990763608366,
1417
+ "loss": 1.8438,
1418
+ "step": 199
1419
+ },
1420
+ {
1421
+ "epoch": 0.0014563354231746657,
1422
+ "grad_norm": 6.76107931137085,
1423
+ "learning_rate": 0.00019999990763608366,
1424
+ "loss": 1.1957,
1425
+ "step": 200
1426
+ },
1427
+ {
1428
+ "epoch": 0.0014563354231746657,
1429
+ "eval_loss": 1.8587133884429932,
1430
+ "eval_runtime": 36.3018,
1431
+ "eval_samples_per_second": 5.702,
1432
+ "eval_steps_per_second": 1.901,
1433
+ "step": 200
1434
  }
1435
  ],
1436
  "logging_steps": 1,
 
1445
  "early_stopping_threshold": 0.0
1446
  },
1447
  "attributes": {
1448
+ "early_stopping_patience_counter": 1
1449
  }
1450
  },
1451
  "TrainerControl": {
 
1459
  "attributes": {}
1460
  }
1461
  },
1462
+ "total_flos": 1.32610101608448e+16,
1463
  "train_batch_size": 3,
1464
  "trial_name": null,
1465
  "trial_params": null