Training in progress, step 1794, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 389074464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:442d270b50645e8920121f8bcb1e3642bfd469619b17f2c672139a95fd23af56
|
| 3 |
size 389074464
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 198016005
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:186f1f434e7d3e152023d123ef21c94c4a0d6ce9718d70926a1817fd851c08fe
|
| 3 |
size 198016005
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4453f8a4437d3cd9972effeb19da458043df1075458e51d17d709e5b85678e59
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -988,6 +988,279 @@
|
|
| 988 |
"learning_rate": 2.3107863556921237e-05,
|
| 989 |
"loss": 3.587228012084961,
|
| 990 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 991 |
}
|
| 992 |
],
|
| 993 |
"logging_steps": 10,
|
|
@@ -1002,12 +1275,12 @@
|
|
| 1002 |
"should_evaluate": false,
|
| 1003 |
"should_log": false,
|
| 1004 |
"should_save": true,
|
| 1005 |
-
"should_training_stop":
|
| 1006 |
},
|
| 1007 |
"attributes": {}
|
| 1008 |
}
|
| 1009 |
},
|
| 1010 |
-
"total_flos":
|
| 1011 |
"train_batch_size": 42,
|
| 1012 |
"trial_name": null,
|
| 1013 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1794,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 988 |
"learning_rate": 2.3107863556921237e-05,
|
| 989 |
"loss": 3.587228012084961,
|
| 990 |
"step": 1400
|
| 991 |
+
},
|
| 992 |
+
{
|
| 993 |
+
"epoch": 2.3578595317725752,
|
| 994 |
+
"grad_norm": 0.21534579992294312,
|
| 995 |
+
"learning_rate": 2.199705098358459e-05,
|
| 996 |
+
"loss": 3.6128841400146485,
|
| 997 |
+
"step": 1410
|
| 998 |
+
},
|
| 999 |
+
{
|
| 1000 |
+
"epoch": 2.374581939799331,
|
| 1001 |
+
"grad_norm": 0.19469194114208221,
|
| 1002 |
+
"learning_rate": 2.091029194607431e-05,
|
| 1003 |
+
"loss": 3.612522506713867,
|
| 1004 |
+
"step": 1420
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 2.391304347826087,
|
| 1008 |
+
"grad_norm": 0.19594796001911163,
|
| 1009 |
+
"learning_rate": 1.984792156502072e-05,
|
| 1010 |
+
"loss": 3.612636184692383,
|
| 1011 |
+
"step": 1430
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 2.408026755852843,
|
| 1015 |
+
"grad_norm": 0.19416235387325287,
|
| 1016 |
+
"learning_rate": 1.8810267440397246e-05,
|
| 1017 |
+
"loss": 3.611737823486328,
|
| 1018 |
+
"step": 1440
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 2.4247491638795986,
|
| 1022 |
+
"grad_norm": 0.20568251609802246,
|
| 1023 |
+
"learning_rate": 1.779764955049925e-05,
|
| 1024 |
+
"loss": 3.6048515319824217,
|
| 1025 |
+
"step": 1450
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 2.4414715719063547,
|
| 1029 |
+
"grad_norm": 0.21181504428386688,
|
| 1030 |
+
"learning_rate": 1.6810380153273362e-05,
|
| 1031 |
+
"loss": 3.60155029296875,
|
| 1032 |
+
"step": 1460
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 2.4581939799331103,
|
| 1036 |
+
"grad_norm": 0.20696775615215302,
|
| 1037 |
+
"learning_rate": 1.584876369002751e-05,
|
| 1038 |
+
"loss": 3.623727035522461,
|
| 1039 |
+
"step": 1470
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 2.4749163879598663,
|
| 1043 |
+
"grad_norm": 0.20903262495994568,
|
| 1044 |
+
"learning_rate": 1.4913096691551077e-05,
|
| 1045 |
+
"loss": 3.623518371582031,
|
| 1046 |
+
"step": 1480
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"epoch": 2.491638795986622,
|
| 1050 |
+
"grad_norm": 0.22404134273529053,
|
| 1051 |
+
"learning_rate": 1.4003667686674793e-05,
|
| 1052 |
+
"loss": 3.6152099609375,
|
| 1053 |
+
"step": 1490
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 2.508361204013378,
|
| 1057 |
+
"grad_norm": 0.2037034034729004,
|
| 1058 |
+
"learning_rate": 1.3120757113297777e-05,
|
| 1059 |
+
"loss": 3.612331771850586,
|
| 1060 |
+
"step": 1500
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 2.5250836120401337,
|
| 1064 |
+
"grad_norm": 0.21633440256118774,
|
| 1065 |
+
"learning_rate": 1.226463723190987e-05,
|
| 1066 |
+
"loss": 3.601060485839844,
|
| 1067 |
+
"step": 1510
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 2.5418060200668897,
|
| 1071 |
+
"grad_norm": 0.20832663774490356,
|
| 1072 |
+
"learning_rate": 1.1435572041635489e-05,
|
| 1073 |
+
"loss": 3.6438526153564452,
|
| 1074 |
+
"step": 1520
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 2.5585284280936453,
|
| 1078 |
+
"grad_norm": 0.21630828082561493,
|
| 1079 |
+
"learning_rate": 1.0633817198824858e-05,
|
| 1080 |
+
"loss": 3.6141563415527345,
|
| 1081 |
+
"step": 1530
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 2.5752508361204014,
|
| 1085 |
+
"grad_norm": 0.21327731013298035,
|
| 1086 |
+
"learning_rate": 9.859619938218222e-06,
|
| 1087 |
+
"loss": 3.5744644165039063,
|
| 1088 |
+
"step": 1540
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 2.591973244147157,
|
| 1092 |
+
"grad_norm": 0.20320357382297516,
|
| 1093 |
+
"learning_rate": 9.113218996706651e-06,
|
| 1094 |
+
"loss": 3.6112804412841797,
|
| 1095 |
+
"step": 1550
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 2.608695652173913,
|
| 1099 |
+
"grad_norm": 0.20723755657672882,
|
| 1100 |
+
"learning_rate": 8.394844539713587e-06,
|
| 1101 |
+
"loss": 3.6093166351318358,
|
| 1102 |
+
"step": 1560
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 2.625418060200669,
|
| 1106 |
+
"grad_norm": 0.20720350742340088,
|
| 1107 |
+
"learning_rate": 7.704718090219299e-06,
|
| 1108 |
+
"loss": 3.6185359954833984,
|
| 1109 |
+
"step": 1570
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 2.6421404682274248,
|
| 1113 |
+
"grad_norm": 0.20689421892166138,
|
| 1114 |
+
"learning_rate": 7.043052460450595e-06,
|
| 1115 |
+
"loss": 3.5807472229003907,
|
| 1116 |
+
"step": 1580
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 2.6588628762541804,
|
| 1120 |
+
"grad_norm": 0.1947193443775177,
|
| 1121 |
+
"learning_rate": 6.410051686256524e-06,
|
| 1122 |
+
"loss": 3.63294677734375,
|
| 1123 |
+
"step": 1590
|
| 1124 |
+
},
|
| 1125 |
+
{
|
| 1126 |
+
"epoch": 2.6755852842809364,
|
| 1127 |
+
"grad_norm": 0.20829661190509796,
|
| 1128 |
+
"learning_rate": 5.805910964190464e-06,
|
| 1129 |
+
"loss": 3.5655914306640626,
|
| 1130 |
+
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 2.6923076923076925,
|
| 1134 |
+
"grad_norm": 0.21496719121932983,
|
| 1135 |
+
"learning_rate": 5.2308165913179e-06,
|
| 1136 |
+
"loss": 3.6056419372558595,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 2.709030100334448,
|
| 1141 |
+
"grad_norm": 0.21569029986858368,
|
| 1142 |
+
"learning_rate": 4.684945907768623e-06,
|
| 1143 |
+
"loss": 3.63220329284668,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 2.7257525083612038,
|
| 1148 |
+
"grad_norm": 0.21249784529209137,
|
| 1149 |
+
"learning_rate": 4.168467242050822e-06,
|
| 1150 |
+
"loss": 3.6161312103271483,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 2.74247491638796,
|
| 1155 |
+
"grad_norm": 0.19498836994171143,
|
| 1156 |
+
"learning_rate": 3.6815398591441676e-06,
|
| 1157 |
+
"loss": 3.6304805755615233,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 2.759197324414716,
|
| 1162 |
+
"grad_norm": 0.19185300171375275,
|
| 1163 |
+
"learning_rate": 3.224313911387755e-06,
|
| 1164 |
+
"loss": 3.610300064086914,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 2.7759197324414715,
|
| 1169 |
+
"grad_norm": 0.2097301483154297,
|
| 1170 |
+
"learning_rate": 2.79693039217801e-06,
|
| 1171 |
+
"loss": 3.6425819396972656,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 2.7926421404682276,
|
| 1176 |
+
"grad_norm": 0.2074955701828003,
|
| 1177 |
+
"learning_rate": 2.399521092491075e-06,
|
| 1178 |
+
"loss": 3.5936614990234377,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 2.809364548494983,
|
| 1183 |
+
"grad_norm": 0.20003236830234528,
|
| 1184 |
+
"learning_rate": 2.032208560242732e-06,
|
| 1185 |
+
"loss": 3.5973114013671874,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 2.8260869565217392,
|
| 1190 |
+
"grad_norm": 0.20352588593959808,
|
| 1191 |
+
"learning_rate": 1.695106062498708e-06,
|
| 1192 |
+
"loss": 3.6302867889404298,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 2.842809364548495,
|
| 1197 |
+
"grad_norm": 0.20454245805740356,
|
| 1198 |
+
"learning_rate": 1.3883175505468693e-06,
|
| 1199 |
+
"loss": 3.614506149291992,
|
| 1200 |
+
"step": 1700
|
| 1201 |
+
},
|
| 1202 |
+
{
|
| 1203 |
+
"epoch": 2.859531772575251,
|
| 1204 |
+
"grad_norm": 0.20201674103736877,
|
| 1205 |
+
"learning_rate": 1.11193762784203e-06,
|
| 1206 |
+
"loss": 3.5982948303222657,
|
| 1207 |
+
"step": 1710
|
| 1208 |
+
},
|
| 1209 |
+
{
|
| 1210 |
+
"epoch": 2.8762541806020065,
|
| 1211 |
+
"grad_norm": 0.20037053525447845,
|
| 1212 |
+
"learning_rate": 8.660515208334108e-07,
|
| 1213 |
+
"loss": 3.6015445709228517,
|
| 1214 |
+
"step": 1720
|
| 1215 |
+
},
|
| 1216 |
+
{
|
| 1217 |
+
"epoch": 2.8929765886287626,
|
| 1218 |
+
"grad_norm": 0.21147583425045013,
|
| 1219 |
+
"learning_rate": 6.507350526835709e-07,
|
| 1220 |
+
"loss": 3.5722988128662108,
|
| 1221 |
+
"step": 1730
|
| 1222 |
+
},
|
| 1223 |
+
{
|
| 1224 |
+
"epoch": 2.9096989966555182,
|
| 1225 |
+
"grad_norm": 0.20878112316131592,
|
| 1226 |
+
"learning_rate": 4.6605461988707965e-07,
|
| 1227 |
+
"loss": 3.6185012817382813,
|
| 1228 |
+
"step": 1740
|
| 1229 |
+
},
|
| 1230 |
+
{
|
| 1231 |
+
"epoch": 2.9264214046822743,
|
| 1232 |
+
"grad_norm": 0.19872544705867767,
|
| 1233 |
+
"learning_rate": 3.1206717179601554e-07,
|
| 1234 |
+
"loss": 3.6195068359375,
|
| 1235 |
+
"step": 1750
|
| 1236 |
+
},
|
| 1237 |
+
{
|
| 1238 |
+
"epoch": 2.94314381270903,
|
| 1239 |
+
"grad_norm": 0.21033529937267303,
|
| 1240 |
+
"learning_rate": 1.8882019305866972e-07,
|
| 1241 |
+
"loss": 3.6103542327880858,
|
| 1242 |
+
"step": 1760
|
| 1243 |
+
},
|
| 1244 |
+
{
|
| 1245 |
+
"epoch": 2.959866220735786,
|
| 1246 |
+
"grad_norm": 0.20153765380382538,
|
| 1247 |
+
"learning_rate": 9.635168897684787e-08,
|
| 1248 |
+
"loss": 3.585107421875,
|
| 1249 |
+
"step": 1770
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 2.976588628762542,
|
| 1253 |
+
"grad_norm": 0.21875017881393433,
|
| 1254 |
+
"learning_rate": 3.4690173786255945e-08,
|
| 1255 |
+
"loss": 3.6015293121337892,
|
| 1256 |
+
"step": 1780
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"epoch": 2.9933110367892977,
|
| 1260 |
+
"grad_norm": 0.2030608206987381,
|
| 1261 |
+
"learning_rate": 3.8546618637225196e-09,
|
| 1262 |
+
"loss": 3.6415565490722654,
|
| 1263 |
+
"step": 1790
|
| 1264 |
}
|
| 1265 |
],
|
| 1266 |
"logging_steps": 10,
|
|
|
|
| 1275 |
"should_evaluate": false,
|
| 1276 |
"should_log": false,
|
| 1277 |
"should_save": true,
|
| 1278 |
+
"should_training_stop": true
|
| 1279 |
},
|
| 1280 |
"attributes": {}
|
| 1281 |
}
|
| 1282 |
},
|
| 1283 |
+
"total_flos": 2.058402188770345e+18,
|
| 1284 |
"train_batch_size": 42,
|
| 1285 |
"trial_name": null,
|
| 1286 |
"trial_params": null
|