Training in progress, step 8536, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5506ccea2414f4acaa8ed413cb646ff560b431e5f2ab781d9d1c3c3fadd5af75
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44a732b77585886a6fb022dbe21d3f3de54f8e919f9982a63ff1c4f2a7c9834a
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:774612f2d4811ca7da639cec8d3b509c1f87b5ffc57761546af6466447a0619a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bece9809bf70ad158471014e9f0407932e462ce7f7864e1800e151872b48ca7
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6119,6 +6119,409 @@
|
|
| 6119 |
"eval_spearman_manhattan": 0.767544325158077,
|
| 6120 |
"eval_steps_per_second": 25.45,
|
| 6121 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6122 |
}
|
| 6123 |
],
|
| 6124 |
"logging_steps": 10,
|
|
@@ -6133,7 +6536,7 @@
|
|
| 6133 |
"should_evaluate": false,
|
| 6134 |
"should_log": false,
|
| 6135 |
"should_save": true,
|
| 6136 |
-
"should_training_stop":
|
| 6137 |
},
|
| 6138 |
"attributes": {}
|
| 6139 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 8536,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6119 |
"eval_spearman_manhattan": 0.767544325158077,
|
| 6120 |
"eval_steps_per_second": 25.45,
|
| 6121 |
"step": 8000
|
| 6122 |
+
},
|
| 6123 |
+
{
|
| 6124 |
+
"epoch": 3.753514526710403,
|
| 6125 |
+
"grad_norm": 2.859675645828247,
|
| 6126 |
+
"learning_rate": 1.7654053420805998e-05,
|
| 6127 |
+
"loss": 0.1442,
|
| 6128 |
+
"step": 8010
|
| 6129 |
+
},
|
| 6130 |
+
{
|
| 6131 |
+
"epoch": 3.758200562324274,
|
| 6132 |
+
"grad_norm": 1.54740571975708,
|
| 6133 |
+
"learning_rate": 1.765112464854733e-05,
|
| 6134 |
+
"loss": 0.1194,
|
| 6135 |
+
"step": 8020
|
| 6136 |
+
},
|
| 6137 |
+
{
|
| 6138 |
+
"epoch": 3.7628865979381443,
|
| 6139 |
+
"grad_norm": 1.5696630477905273,
|
| 6140 |
+
"learning_rate": 1.764819587628866e-05,
|
| 6141 |
+
"loss": 0.1542,
|
| 6142 |
+
"step": 8030
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 3.7675726335520148,
|
| 6146 |
+
"grad_norm": 1.8833867311477661,
|
| 6147 |
+
"learning_rate": 1.7645267104029993e-05,
|
| 6148 |
+
"loss": 0.1222,
|
| 6149 |
+
"step": 8040
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 3.7722586691658857,
|
| 6153 |
+
"grad_norm": 2.2312562465667725,
|
| 6154 |
+
"learning_rate": 1.7642338331771322e-05,
|
| 6155 |
+
"loss": 0.1378,
|
| 6156 |
+
"step": 8050
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 3.776944704779756,
|
| 6160 |
+
"grad_norm": 1.9470106363296509,
|
| 6161 |
+
"learning_rate": 1.7639409559512655e-05,
|
| 6162 |
+
"loss": 0.136,
|
| 6163 |
+
"step": 8060
|
| 6164 |
+
},
|
| 6165 |
+
{
|
| 6166 |
+
"epoch": 3.781630740393627,
|
| 6167 |
+
"grad_norm": 1.736140489578247,
|
| 6168 |
+
"learning_rate": 1.7636480787253985e-05,
|
| 6169 |
+
"loss": 0.144,
|
| 6170 |
+
"step": 8070
|
| 6171 |
+
},
|
| 6172 |
+
{
|
| 6173 |
+
"epoch": 3.7863167760074976,
|
| 6174 |
+
"grad_norm": 2.0280628204345703,
|
| 6175 |
+
"learning_rate": 1.7633552014995314e-05,
|
| 6176 |
+
"loss": 0.1331,
|
| 6177 |
+
"step": 8080
|
| 6178 |
+
},
|
| 6179 |
+
{
|
| 6180 |
+
"epoch": 3.791002811621368,
|
| 6181 |
+
"grad_norm": 1.1331037282943726,
|
| 6182 |
+
"learning_rate": 1.7630623242736647e-05,
|
| 6183 |
+
"loss": 0.1327,
|
| 6184 |
+
"step": 8090
|
| 6185 |
+
},
|
| 6186 |
+
{
|
| 6187 |
+
"epoch": 3.795688847235239,
|
| 6188 |
+
"grad_norm": 2.00110125541687,
|
| 6189 |
+
"learning_rate": 1.7627694470477977e-05,
|
| 6190 |
+
"loss": 0.1335,
|
| 6191 |
+
"step": 8100
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 3.8003748828491095,
|
| 6195 |
+
"grad_norm": 1.589747428894043,
|
| 6196 |
+
"learning_rate": 1.762476569821931e-05,
|
| 6197 |
+
"loss": 0.1222,
|
| 6198 |
+
"step": 8110
|
| 6199 |
+
},
|
| 6200 |
+
{
|
| 6201 |
+
"epoch": 3.8050609184629804,
|
| 6202 |
+
"grad_norm": 1.8979542255401611,
|
| 6203 |
+
"learning_rate": 1.762183692596064e-05,
|
| 6204 |
+
"loss": 0.1534,
|
| 6205 |
+
"step": 8120
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 3.809746954076851,
|
| 6209 |
+
"grad_norm": 2.384608268737793,
|
| 6210 |
+
"learning_rate": 1.761890815370197e-05,
|
| 6211 |
+
"loss": 0.1395,
|
| 6212 |
+
"step": 8130
|
| 6213 |
+
},
|
| 6214 |
+
{
|
| 6215 |
+
"epoch": 3.8144329896907214,
|
| 6216 |
+
"grad_norm": 1.8555763959884644,
|
| 6217 |
+
"learning_rate": 1.76159793814433e-05,
|
| 6218 |
+
"loss": 0.1298,
|
| 6219 |
+
"step": 8140
|
| 6220 |
+
},
|
| 6221 |
+
{
|
| 6222 |
+
"epoch": 3.8191190253045924,
|
| 6223 |
+
"grad_norm": 2.0810048580169678,
|
| 6224 |
+
"learning_rate": 1.761305060918463e-05,
|
| 6225 |
+
"loss": 0.1165,
|
| 6226 |
+
"step": 8150
|
| 6227 |
+
},
|
| 6228 |
+
{
|
| 6229 |
+
"epoch": 3.823805060918463,
|
| 6230 |
+
"grad_norm": 1.9116188287734985,
|
| 6231 |
+
"learning_rate": 1.761012183692596e-05,
|
| 6232 |
+
"loss": 0.1316,
|
| 6233 |
+
"step": 8160
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 3.8284910965323338,
|
| 6237 |
+
"grad_norm": 2.3420250415802,
|
| 6238 |
+
"learning_rate": 1.7607193064667293e-05,
|
| 6239 |
+
"loss": 0.1176,
|
| 6240 |
+
"step": 8170
|
| 6241 |
+
},
|
| 6242 |
+
{
|
| 6243 |
+
"epoch": 3.8331771321462043,
|
| 6244 |
+
"grad_norm": 2.0588436126708984,
|
| 6245 |
+
"learning_rate": 1.7604264292408623e-05,
|
| 6246 |
+
"loss": 0.1365,
|
| 6247 |
+
"step": 8180
|
| 6248 |
+
},
|
| 6249 |
+
{
|
| 6250 |
+
"epoch": 3.8378631677600747,
|
| 6251 |
+
"grad_norm": 1.89832603931427,
|
| 6252 |
+
"learning_rate": 1.7601335520149952e-05,
|
| 6253 |
+
"loss": 0.1459,
|
| 6254 |
+
"step": 8190
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 3.8425492033739457,
|
| 6258 |
+
"grad_norm": 2.0133140087127686,
|
| 6259 |
+
"learning_rate": 1.7598406747891285e-05,
|
| 6260 |
+
"loss": 0.1075,
|
| 6261 |
+
"step": 8200
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 3.847235238987816,
|
| 6265 |
+
"grad_norm": 1.7235685586929321,
|
| 6266 |
+
"learning_rate": 1.7595477975632615e-05,
|
| 6267 |
+
"loss": 0.1143,
|
| 6268 |
+
"step": 8210
|
| 6269 |
+
},
|
| 6270 |
+
{
|
| 6271 |
+
"epoch": 3.851921274601687,
|
| 6272 |
+
"grad_norm": 1.6717524528503418,
|
| 6273 |
+
"learning_rate": 1.7592549203373948e-05,
|
| 6274 |
+
"loss": 0.1269,
|
| 6275 |
+
"step": 8220
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 3.8566073102155576,
|
| 6279 |
+
"grad_norm": 1.5314955711364746,
|
| 6280 |
+
"learning_rate": 1.7589620431115277e-05,
|
| 6281 |
+
"loss": 0.1264,
|
| 6282 |
+
"step": 8230
|
| 6283 |
+
},
|
| 6284 |
+
{
|
| 6285 |
+
"epoch": 3.861293345829428,
|
| 6286 |
+
"grad_norm": 2.4005510807037354,
|
| 6287 |
+
"learning_rate": 1.758669165885661e-05,
|
| 6288 |
+
"loss": 0.1299,
|
| 6289 |
+
"step": 8240
|
| 6290 |
+
},
|
| 6291 |
+
{
|
| 6292 |
+
"epoch": 3.865979381443299,
|
| 6293 |
+
"grad_norm": 1.6033201217651367,
|
| 6294 |
+
"learning_rate": 1.758376288659794e-05,
|
| 6295 |
+
"loss": 0.1107,
|
| 6296 |
+
"step": 8250
|
| 6297 |
+
},
|
| 6298 |
+
{
|
| 6299 |
+
"epoch": 3.865979381443299,
|
| 6300 |
+
"eval_loss": 0.04320518299937248,
|
| 6301 |
+
"eval_pearson_cosine": 0.7953389681810563,
|
| 6302 |
+
"eval_pearson_dot": 0.6355398535557981,
|
| 6303 |
+
"eval_pearson_euclidean": 0.7518228241740452,
|
| 6304 |
+
"eval_pearson_manhattan": 0.7507054666151873,
|
| 6305 |
+
"eval_runtime": 3.6594,
|
| 6306 |
+
"eval_samples_per_second": 409.902,
|
| 6307 |
+
"eval_spearman_cosine": 0.799221389686279,
|
| 6308 |
+
"eval_spearman_dot": 0.6411224962967852,
|
| 6309 |
+
"eval_spearman_euclidean": 0.7674570737836293,
|
| 6310 |
+
"eval_spearman_manhattan": 0.7673149114040088,
|
| 6311 |
+
"eval_steps_per_second": 25.687,
|
| 6312 |
+
"step": 8250
|
| 6313 |
+
},
|
| 6314 |
+
{
|
| 6315 |
+
"epoch": 3.8706654170571695,
|
| 6316 |
+
"grad_norm": 1.8069274425506592,
|
| 6317 |
+
"learning_rate": 1.7580834114339272e-05,
|
| 6318 |
+
"loss": 0.1293,
|
| 6319 |
+
"step": 8260
|
| 6320 |
+
},
|
| 6321 |
+
{
|
| 6322 |
+
"epoch": 3.8753514526710404,
|
| 6323 |
+
"grad_norm": 1.9214448928833008,
|
| 6324 |
+
"learning_rate": 1.7577905342080602e-05,
|
| 6325 |
+
"loss": 0.1235,
|
| 6326 |
+
"step": 8270
|
| 6327 |
+
},
|
| 6328 |
+
{
|
| 6329 |
+
"epoch": 3.880037488284911,
|
| 6330 |
+
"grad_norm": 2.0332281589508057,
|
| 6331 |
+
"learning_rate": 1.757497656982193e-05,
|
| 6332 |
+
"loss": 0.1303,
|
| 6333 |
+
"step": 8280
|
| 6334 |
+
},
|
| 6335 |
+
{
|
| 6336 |
+
"epoch": 3.8847235238987814,
|
| 6337 |
+
"grad_norm": 1.8587048053741455,
|
| 6338 |
+
"learning_rate": 1.7572047797563264e-05,
|
| 6339 |
+
"loss": 0.1272,
|
| 6340 |
+
"step": 8290
|
| 6341 |
+
},
|
| 6342 |
+
{
|
| 6343 |
+
"epoch": 3.8894095595126523,
|
| 6344 |
+
"grad_norm": 1.7040314674377441,
|
| 6345 |
+
"learning_rate": 1.7569119025304594e-05,
|
| 6346 |
+
"loss": 0.1304,
|
| 6347 |
+
"step": 8300
|
| 6348 |
+
},
|
| 6349 |
+
{
|
| 6350 |
+
"epoch": 3.894095595126523,
|
| 6351 |
+
"grad_norm": 1.7979313135147095,
|
| 6352 |
+
"learning_rate": 1.7566190253045923e-05,
|
| 6353 |
+
"loss": 0.1226,
|
| 6354 |
+
"step": 8310
|
| 6355 |
+
},
|
| 6356 |
+
{
|
| 6357 |
+
"epoch": 3.8987816307403937,
|
| 6358 |
+
"grad_norm": 1.6295264959335327,
|
| 6359 |
+
"learning_rate": 1.7563261480787256e-05,
|
| 6360 |
+
"loss": 0.1198,
|
| 6361 |
+
"step": 8320
|
| 6362 |
+
},
|
| 6363 |
+
{
|
| 6364 |
+
"epoch": 3.9034676663542642,
|
| 6365 |
+
"grad_norm": 1.764858603477478,
|
| 6366 |
+
"learning_rate": 1.7560332708528586e-05,
|
| 6367 |
+
"loss": 0.1294,
|
| 6368 |
+
"step": 8330
|
| 6369 |
+
},
|
| 6370 |
+
{
|
| 6371 |
+
"epoch": 3.9081537019681347,
|
| 6372 |
+
"grad_norm": 2.3997533321380615,
|
| 6373 |
+
"learning_rate": 1.7557403936269915e-05,
|
| 6374 |
+
"loss": 0.1288,
|
| 6375 |
+
"step": 8340
|
| 6376 |
+
},
|
| 6377 |
+
{
|
| 6378 |
+
"epoch": 3.9128397375820057,
|
| 6379 |
+
"grad_norm": 2.302992820739746,
|
| 6380 |
+
"learning_rate": 1.7554475164011248e-05,
|
| 6381 |
+
"loss": 0.1482,
|
| 6382 |
+
"step": 8350
|
| 6383 |
+
},
|
| 6384 |
+
{
|
| 6385 |
+
"epoch": 3.917525773195876,
|
| 6386 |
+
"grad_norm": 1.8705153465270996,
|
| 6387 |
+
"learning_rate": 1.7551546391752578e-05,
|
| 6388 |
+
"loss": 0.1203,
|
| 6389 |
+
"step": 8360
|
| 6390 |
+
},
|
| 6391 |
+
{
|
| 6392 |
+
"epoch": 3.922211808809747,
|
| 6393 |
+
"grad_norm": 2.1490349769592285,
|
| 6394 |
+
"learning_rate": 1.754861761949391e-05,
|
| 6395 |
+
"loss": 0.1074,
|
| 6396 |
+
"step": 8370
|
| 6397 |
+
},
|
| 6398 |
+
{
|
| 6399 |
+
"epoch": 3.9268978444236176,
|
| 6400 |
+
"grad_norm": 1.5012431144714355,
|
| 6401 |
+
"learning_rate": 1.754568884723524e-05,
|
| 6402 |
+
"loss": 0.1202,
|
| 6403 |
+
"step": 8380
|
| 6404 |
+
},
|
| 6405 |
+
{
|
| 6406 |
+
"epoch": 3.931583880037488,
|
| 6407 |
+
"grad_norm": 1.2775022983551025,
|
| 6408 |
+
"learning_rate": 1.754276007497657e-05,
|
| 6409 |
+
"loss": 0.1241,
|
| 6410 |
+
"step": 8390
|
| 6411 |
+
},
|
| 6412 |
+
{
|
| 6413 |
+
"epoch": 3.936269915651359,
|
| 6414 |
+
"grad_norm": 2.361064910888672,
|
| 6415 |
+
"learning_rate": 1.7539831302717902e-05,
|
| 6416 |
+
"loss": 0.1393,
|
| 6417 |
+
"step": 8400
|
| 6418 |
+
},
|
| 6419 |
+
{
|
| 6420 |
+
"epoch": 3.9409559512652295,
|
| 6421 |
+
"grad_norm": 1.8726896047592163,
|
| 6422 |
+
"learning_rate": 1.7536902530459232e-05,
|
| 6423 |
+
"loss": 0.1191,
|
| 6424 |
+
"step": 8410
|
| 6425 |
+
},
|
| 6426 |
+
{
|
| 6427 |
+
"epoch": 3.9456419868791004,
|
| 6428 |
+
"grad_norm": 1.8277250528335571,
|
| 6429 |
+
"learning_rate": 1.7533973758200565e-05,
|
| 6430 |
+
"loss": 0.1265,
|
| 6431 |
+
"step": 8420
|
| 6432 |
+
},
|
| 6433 |
+
{
|
| 6434 |
+
"epoch": 3.950328022492971,
|
| 6435 |
+
"grad_norm": 1.534006118774414,
|
| 6436 |
+
"learning_rate": 1.7531044985941894e-05,
|
| 6437 |
+
"loss": 0.1153,
|
| 6438 |
+
"step": 8430
|
| 6439 |
+
},
|
| 6440 |
+
{
|
| 6441 |
+
"epoch": 3.9550140581068414,
|
| 6442 |
+
"grad_norm": 1.9085185527801514,
|
| 6443 |
+
"learning_rate": 1.7528116213683227e-05,
|
| 6444 |
+
"loss": 0.1216,
|
| 6445 |
+
"step": 8440
|
| 6446 |
+
},
|
| 6447 |
+
{
|
| 6448 |
+
"epoch": 3.9597000937207123,
|
| 6449 |
+
"grad_norm": 2.059544563293457,
|
| 6450 |
+
"learning_rate": 1.7525187441424557e-05,
|
| 6451 |
+
"loss": 0.1185,
|
| 6452 |
+
"step": 8450
|
| 6453 |
+
},
|
| 6454 |
+
{
|
| 6455 |
+
"epoch": 3.964386129334583,
|
| 6456 |
+
"grad_norm": 2.6308438777923584,
|
| 6457 |
+
"learning_rate": 1.752225866916589e-05,
|
| 6458 |
+
"loss": 0.1137,
|
| 6459 |
+
"step": 8460
|
| 6460 |
+
},
|
| 6461 |
+
{
|
| 6462 |
+
"epoch": 3.9690721649484537,
|
| 6463 |
+
"grad_norm": 1.2617682218551636,
|
| 6464 |
+
"learning_rate": 1.751932989690722e-05,
|
| 6465 |
+
"loss": 0.143,
|
| 6466 |
+
"step": 8470
|
| 6467 |
+
},
|
| 6468 |
+
{
|
| 6469 |
+
"epoch": 3.973758200562324,
|
| 6470 |
+
"grad_norm": 2.1921794414520264,
|
| 6471 |
+
"learning_rate": 1.751640112464855e-05,
|
| 6472 |
+
"loss": 0.1086,
|
| 6473 |
+
"step": 8480
|
| 6474 |
+
},
|
| 6475 |
+
{
|
| 6476 |
+
"epoch": 3.9784442361761947,
|
| 6477 |
+
"grad_norm": 1.7036564350128174,
|
| 6478 |
+
"learning_rate": 1.751347235238988e-05,
|
| 6479 |
+
"loss": 0.127,
|
| 6480 |
+
"step": 8490
|
| 6481 |
+
},
|
| 6482 |
+
{
|
| 6483 |
+
"epoch": 3.9831302717900656,
|
| 6484 |
+
"grad_norm": 1.6190659999847412,
|
| 6485 |
+
"learning_rate": 1.751054358013121e-05,
|
| 6486 |
+
"loss": 0.1232,
|
| 6487 |
+
"step": 8500
|
| 6488 |
+
},
|
| 6489 |
+
{
|
| 6490 |
+
"epoch": 3.9831302717900656,
|
| 6491 |
+
"eval_loss": 0.04382430762052536,
|
| 6492 |
+
"eval_pearson_cosine": 0.7946660833476784,
|
| 6493 |
+
"eval_pearson_dot": 0.6408322008008298,
|
| 6494 |
+
"eval_pearson_euclidean": 0.7506581872281402,
|
| 6495 |
+
"eval_pearson_manhattan": 0.7492952268314212,
|
| 6496 |
+
"eval_runtime": 3.2595,
|
| 6497 |
+
"eval_samples_per_second": 460.2,
|
| 6498 |
+
"eval_spearman_cosine": 0.7991836067006112,
|
| 6499 |
+
"eval_spearman_dot": 0.6471776019534929,
|
| 6500 |
+
"eval_spearman_euclidean": 0.7665561093361127,
|
| 6501 |
+
"eval_spearman_manhattan": 0.7655283844207366,
|
| 6502 |
+
"eval_steps_per_second": 28.839,
|
| 6503 |
+
"step": 8500
|
| 6504 |
+
},
|
| 6505 |
+
{
|
| 6506 |
+
"epoch": 3.987816307403936,
|
| 6507 |
+
"grad_norm": 1.5553314685821533,
|
| 6508 |
+
"learning_rate": 1.750761480787254e-05,
|
| 6509 |
+
"loss": 0.1199,
|
| 6510 |
+
"step": 8510
|
| 6511 |
+
},
|
| 6512 |
+
{
|
| 6513 |
+
"epoch": 3.992502343017807,
|
| 6514 |
+
"grad_norm": 1.8198939561843872,
|
| 6515 |
+
"learning_rate": 1.750468603561387e-05,
|
| 6516 |
+
"loss": 0.1387,
|
| 6517 |
+
"step": 8520
|
| 6518 |
+
},
|
| 6519 |
+
{
|
| 6520 |
+
"epoch": 3.9971883786316775,
|
| 6521 |
+
"grad_norm": 1.5737895965576172,
|
| 6522 |
+
"learning_rate": 1.7501757263355203e-05,
|
| 6523 |
+
"loss": 0.1291,
|
| 6524 |
+
"step": 8530
|
| 6525 |
}
|
| 6526 |
],
|
| 6527 |
"logging_steps": 10,
|
|
|
|
| 6536 |
"should_evaluate": false,
|
| 6537 |
"should_log": false,
|
| 6538 |
"should_save": true,
|
| 6539 |
+
"should_training_stop": true
|
| 6540 |
},
|
| 6541 |
"attributes": {}
|
| 6542 |
}
|