ErrorAI commited on
Commit
a10932d
·
verified ·
1 Parent(s): c877109

Training in progress, step 167, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d9d39744d7d6dcdc045382bac949896cdd56c3a51acb3382c1ff30184fb9b54
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63f0232e7b506fef7d2e12841b36fc5ab0b51d545c78f674b38b70d223ec68fe
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3678af00249a9bc4835e5ac12d301b949f9bb1ee506b7f7fb1ce32b86be1bd77
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176f10ff9a1721647deff2714511e6fb56ada5350eb89f6a8a6cc2d1ebf6a1c6
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5b33506a82a92e59a5bc28b8ff979fe9268fe5f28fc8ffdedd6dfba1330cfc6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4740969335d0ca1e5851b9a3ee510297bb49177fbaa8e72e5119db37858a218d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd7bd812f49d7c127aa907e5f80ff84e361879563f5604f92118217b0128fd9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92317938583156fc0a1371ce3c4446db203520cf84ded2f12700846a207afcb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7578947368421053,
5
  "eval_steps": 500,
6
- "global_step": 126,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -889,6 +889,301 @@
889
  "learning_rate": 1.4989132611641576e-05,
890
  "loss": 1.2182,
891
  "step": 126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892
  }
893
  ],
894
  "logging_steps": 1,
@@ -903,12 +1198,12 @@
903
  "should_evaluate": false,
904
  "should_log": false,
905
  "should_save": true,
906
- "should_training_stop": false
907
  },
908
  "attributes": {}
909
  }
910
  },
911
- "total_flos": 9.340299453136896e+16,
912
  "train_batch_size": 4,
913
  "trial_name": null,
914
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0045112781954888,
5
  "eval_steps": 500,
6
+ "global_step": 167,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
889
  "learning_rate": 1.4989132611641576e-05,
890
  "loss": 1.2182,
891
  "step": 126
892
+ },
893
+ {
894
+ "epoch": 0.7639097744360902,
895
+ "grad_norm": 0.864700436592102,
896
+ "learning_rate": 1.4303513272105057e-05,
897
+ "loss": 1.3224,
898
+ "step": 127
899
+ },
900
+ {
901
+ "epoch": 0.7699248120300752,
902
+ "grad_norm": 0.8970008492469788,
903
+ "learning_rate": 1.3631317921347563e-05,
904
+ "loss": 1.3282,
905
+ "step": 128
906
+ },
907
+ {
908
+ "epoch": 0.7759398496240602,
909
+ "grad_norm": 0.9462831020355225,
910
+ "learning_rate": 1.297279934454978e-05,
911
+ "loss": 1.3372,
912
+ "step": 129
913
+ },
914
+ {
915
+ "epoch": 0.7819548872180451,
916
+ "grad_norm": 0.8102074861526489,
917
+ "learning_rate": 1.2328205183616965e-05,
918
+ "loss": 1.1941,
919
+ "step": 130
920
+ },
921
+ {
922
+ "epoch": 0.7879699248120301,
923
+ "grad_norm": 0.8798099756240845,
924
+ "learning_rate": 1.1697777844051105e-05,
925
+ "loss": 1.3555,
926
+ "step": 131
927
+ },
928
+ {
929
+ "epoch": 0.793984962406015,
930
+ "grad_norm": 0.9303496479988098,
931
+ "learning_rate": 1.1081754403791999e-05,
932
+ "loss": 1.1306,
933
+ "step": 132
934
+ },
935
+ {
936
+ "epoch": 0.8,
937
+ "grad_norm": 1.058530569076538,
938
+ "learning_rate": 1.0480366524062042e-05,
939
+ "loss": 1.5215,
940
+ "step": 133
941
+ },
942
+ {
943
+ "epoch": 0.806015037593985,
944
+ "grad_norm": 0.9745376110076904,
945
+ "learning_rate": 9.893840362247809e-06,
946
+ "loss": 1.5797,
947
+ "step": 134
948
+ },
949
+ {
950
+ "epoch": 0.8120300751879699,
951
+ "grad_norm": 0.9844892621040344,
952
+ "learning_rate": 9.322396486851626e-06,
953
+ "loss": 1.5185,
954
+ "step": 135
955
+ },
956
+ {
957
+ "epoch": 0.8180451127819549,
958
+ "grad_norm": 1.0114489793777466,
959
+ "learning_rate": 8.766249794544662e-06,
960
+ "loss": 1.44,
961
+ "step": 136
962
+ },
963
+ {
964
+ "epoch": 0.8240601503759398,
965
+ "grad_norm": 1.1200000047683716,
966
+ "learning_rate": 8.225609429353187e-06,
967
+ "loss": 1.3672,
968
+ "step": 137
969
+ },
970
+ {
971
+ "epoch": 0.8300751879699249,
972
+ "grad_norm": 1.17137610912323,
973
+ "learning_rate": 7.700678704007947e-06,
974
+ "loss": 1.5414,
975
+ "step": 138
976
+ },
977
+ {
978
+ "epoch": 0.8360902255639098,
979
+ "grad_norm": 1.447938084602356,
980
+ "learning_rate": 7.191655023486682e-06,
981
+ "loss": 1.561,
982
+ "step": 139
983
+ },
984
+ {
985
+ "epoch": 0.8421052631578947,
986
+ "grad_norm": 1.2338807582855225,
987
+ "learning_rate": 6.698729810778065e-06,
988
+ "loss": 1.3352,
989
+ "step": 140
990
+ },
991
+ {
992
+ "epoch": 0.8481203007518797,
993
+ "grad_norm": 1.2002118825912476,
994
+ "learning_rate": 6.222088434895462e-06,
995
+ "loss": 1.4649,
996
+ "step": 141
997
+ },
998
+ {
999
+ "epoch": 0.8541353383458646,
1000
+ "grad_norm": 1.2546420097351074,
1001
+ "learning_rate": 5.7619101411671095e-06,
1002
+ "loss": 1.3337,
1003
+ "step": 142
1004
+ },
1005
+ {
1006
+ "epoch": 0.8601503759398497,
1007
+ "grad_norm": 1.2262368202209473,
1008
+ "learning_rate": 5.318367983829392e-06,
1009
+ "loss": 1.395,
1010
+ "step": 143
1011
+ },
1012
+ {
1013
+ "epoch": 0.8661654135338346,
1014
+ "grad_norm": 1.2686101198196411,
1015
+ "learning_rate": 4.891628760948114e-06,
1016
+ "loss": 1.527,
1017
+ "step": 144
1018
+ },
1019
+ {
1020
+ "epoch": 0.8721804511278195,
1021
+ "grad_norm": 1.2336403131484985,
1022
+ "learning_rate": 4.4818529516926726e-06,
1023
+ "loss": 1.4527,
1024
+ "step": 145
1025
+ },
1026
+ {
1027
+ "epoch": 0.8781954887218045,
1028
+ "grad_norm": 1.3499011993408203,
1029
+ "learning_rate": 4.089194655986306e-06,
1030
+ "loss": 1.3562,
1031
+ "step": 146
1032
+ },
1033
+ {
1034
+ "epoch": 0.8842105263157894,
1035
+ "grad_norm": 1.4406931400299072,
1036
+ "learning_rate": 3.7138015365554833e-06,
1037
+ "loss": 1.4555,
1038
+ "step": 147
1039
+ },
1040
+ {
1041
+ "epoch": 0.8902255639097745,
1042
+ "grad_norm": 1.434403419494629,
1043
+ "learning_rate": 3.3558147633999728e-06,
1044
+ "loss": 1.4964,
1045
+ "step": 148
1046
+ },
1047
+ {
1048
+ "epoch": 0.8962406015037594,
1049
+ "grad_norm": 1.4196852445602417,
1050
+ "learning_rate": 3.0153689607045845e-06,
1051
+ "loss": 1.3747,
1052
+ "step": 149
1053
+ },
1054
+ {
1055
+ "epoch": 0.9022556390977443,
1056
+ "grad_norm": 1.5109477043151855,
1057
+ "learning_rate": 2.692592156212487e-06,
1058
+ "loss": 1.4056,
1059
+ "step": 150
1060
+ },
1061
+ {
1062
+ "epoch": 0.9082706766917293,
1063
+ "grad_norm": 1.2970283031463623,
1064
+ "learning_rate": 2.3876057330792346e-06,
1065
+ "loss": 1.3722,
1066
+ "step": 151
1067
+ },
1068
+ {
1069
+ "epoch": 0.9142857142857143,
1070
+ "grad_norm": 1.3502446413040161,
1071
+ "learning_rate": 2.100524384225555e-06,
1072
+ "loss": 1.5935,
1073
+ "step": 152
1074
+ },
1075
+ {
1076
+ "epoch": 0.9203007518796993,
1077
+ "grad_norm": 1.3725895881652832,
1078
+ "learning_rate": 1.8314560692059835e-06,
1079
+ "loss": 1.4272,
1080
+ "step": 153
1081
+ },
1082
+ {
1083
+ "epoch": 0.9263157894736842,
1084
+ "grad_norm": 1.4336990118026733,
1085
+ "learning_rate": 1.5805019736097104e-06,
1086
+ "loss": 1.4357,
1087
+ "step": 154
1088
+ },
1089
+ {
1090
+ "epoch": 0.9323308270676691,
1091
+ "grad_norm": 1.3459137678146362,
1092
+ "learning_rate": 1.3477564710088098e-06,
1093
+ "loss": 1.4523,
1094
+ "step": 155
1095
+ },
1096
+ {
1097
+ "epoch": 0.9383458646616541,
1098
+ "grad_norm": 1.516906499862671,
1099
+ "learning_rate": 1.1333070874682216e-06,
1100
+ "loss": 1.4304,
1101
+ "step": 156
1102
+ },
1103
+ {
1104
+ "epoch": 0.9443609022556391,
1105
+ "grad_norm": 1.4473116397857666,
1106
+ "learning_rate": 9.372344686307655e-07,
1107
+ "loss": 1.4226,
1108
+ "step": 157
1109
+ },
1110
+ {
1111
+ "epoch": 0.9503759398496241,
1112
+ "grad_norm": 1.4904662370681763,
1113
+ "learning_rate": 7.596123493895991e-07,
1114
+ "loss": 1.5851,
1115
+ "step": 158
1116
+ },
1117
+ {
1118
+ "epoch": 0.956390977443609,
1119
+ "grad_norm": 1.6148744821548462,
1120
+ "learning_rate": 6.005075261595494e-07,
1121
+ "loss": 1.6208,
1122
+ "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 0.9624060150375939,
1126
+ "grad_norm": 1.755981683731079,
1127
+ "learning_rate": 4.5997983175773417e-07,
1128
+ "loss": 1.4813,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 0.968421052631579,
1133
+ "grad_norm": 1.6025625467300415,
1134
+ "learning_rate": 3.380821129028489e-07,
1135
+ "loss": 1.479,
1136
+ "step": 161
1137
+ },
1138
+ {
1139
+ "epoch": 0.9744360902255639,
1140
+ "grad_norm": 2.444580316543579,
1141
+ "learning_rate": 2.3486021034170857e-07,
1142
+ "loss": 1.7314,
1143
+ "step": 162
1144
+ },
1145
+ {
1146
+ "epoch": 0.9804511278195489,
1147
+ "grad_norm": 6.058820724487305,
1148
+ "learning_rate": 1.503529416103988e-07,
1149
+ "loss": 2.5463,
1150
+ "step": 163
1151
+ },
1152
+ {
1153
+ "epoch": 0.9864661654135338,
1154
+ "grad_norm": 6.485790729522705,
1155
+ "learning_rate": 8.459208643659122e-08,
1156
+ "loss": 3.2891,
1157
+ "step": 164
1158
+ },
1159
+ {
1160
+ "epoch": 0.9924812030075187,
1161
+ "grad_norm": 0.9536194801330566,
1162
+ "learning_rate": 3.760237478849793e-08,
1163
+ "loss": 1.2774,
1164
+ "step": 165
1165
+ },
1166
+ {
1167
+ "epoch": 0.9984962406015038,
1168
+ "grad_norm": 1.3168344497680664,
1169
+ "learning_rate": 9.401477574932926e-09,
1170
+ "loss": 1.4629,
1171
+ "step": 166
1172
+ },
1173
+ {
1174
+ "epoch": 0.9984962406015038,
1175
+ "eval_loss": 1.425169587135315,
1176
+ "eval_runtime": 7.4236,
1177
+ "eval_samples_per_second": 18.859,
1178
+ "eval_steps_per_second": 4.715,
1179
+ "step": 166
1180
+ },
1181
+ {
1182
+ "epoch": 1.0045112781954888,
1183
+ "grad_norm": 4.509108543395996,
1184
+ "learning_rate": 0.0,
1185
+ "loss": 3.1146,
1186
+ "step": 167
1187
  }
1188
  ],
1189
  "logging_steps": 1,
 
1198
  "should_evaluate": false,
1199
  "should_log": false,
1200
  "should_save": true,
1201
+ "should_training_stop": true
1202
  },
1203
  "attributes": {}
1204
  }
1205
  },
1206
+ "total_flos": 1.2392080462577664e+17,
1207
  "train_batch_size": 4,
1208
  "trial_name": null,
1209
  "trial_params": null