Training in progress, step 8536, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737580392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703b75465ed45e4a47b755f4dbf7613f34e8cb9a9c6557491a46a67bc25a57ef
|
| 3 |
size 737580392
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475248442
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:943fd807c0afa2eb74111f9b3a9e2bfca879f4ad22f91c7601145761a7d127c8
|
| 3 |
size 1475248442
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:774612f2d4811ca7da639cec8d3b509c1f87b5ffc57761546af6466447a0619a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bece9809bf70ad158471014e9f0407932e462ce7f7864e1800e151872b48ca7
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6119,6 +6119,409 @@
|
|
| 6119 |
"eval_spearman_manhattan": 0.8023527525471515,
|
| 6120 |
"eval_steps_per_second": 29.406,
|
| 6121 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6122 |
}
|
| 6123 |
],
|
| 6124 |
"logging_steps": 10,
|
|
@@ -6133,7 +6536,7 @@
|
|
| 6133 |
"should_evaluate": false,
|
| 6134 |
"should_log": false,
|
| 6135 |
"should_save": true,
|
| 6136 |
-
"should_training_stop":
|
| 6137 |
},
|
| 6138 |
"attributes": {}
|
| 6139 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 8536,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6119 |
"eval_spearman_manhattan": 0.8023527525471515,
|
| 6120 |
"eval_steps_per_second": 29.406,
|
| 6121 |
"step": 8000
|
| 6122 |
+
},
|
| 6123 |
+
{
|
| 6124 |
+
"epoch": 3.753514526710403,
|
| 6125 |
+
"grad_norm": 1.525901198387146,
|
| 6126 |
+
"learning_rate": 1.7654053420805998e-05,
|
| 6127 |
+
"loss": 0.1211,
|
| 6128 |
+
"step": 8010
|
| 6129 |
+
},
|
| 6130 |
+
{
|
| 6131 |
+
"epoch": 3.758200562324274,
|
| 6132 |
+
"grad_norm": 2.8532297611236572,
|
| 6133 |
+
"learning_rate": 1.765112464854733e-05,
|
| 6134 |
+
"loss": 0.1165,
|
| 6135 |
+
"step": 8020
|
| 6136 |
+
},
|
| 6137 |
+
{
|
| 6138 |
+
"epoch": 3.7628865979381443,
|
| 6139 |
+
"grad_norm": 1.453282117843628,
|
| 6140 |
+
"learning_rate": 1.764819587628866e-05,
|
| 6141 |
+
"loss": 0.1293,
|
| 6142 |
+
"step": 8030
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 3.7675726335520148,
|
| 6146 |
+
"grad_norm": 1.6476629972457886,
|
| 6147 |
+
"learning_rate": 1.7645267104029993e-05,
|
| 6148 |
+
"loss": 0.1104,
|
| 6149 |
+
"step": 8040
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 3.7722586691658857,
|
| 6153 |
+
"grad_norm": 1.583380103111267,
|
| 6154 |
+
"learning_rate": 1.7642338331771322e-05,
|
| 6155 |
+
"loss": 0.1191,
|
| 6156 |
+
"step": 8050
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 3.776944704779756,
|
| 6160 |
+
"grad_norm": 1.4234002828598022,
|
| 6161 |
+
"learning_rate": 1.7639409559512655e-05,
|
| 6162 |
+
"loss": 0.1201,
|
| 6163 |
+
"step": 8060
|
| 6164 |
+
},
|
| 6165 |
+
{
|
| 6166 |
+
"epoch": 3.781630740393627,
|
| 6167 |
+
"grad_norm": 2.0201187133789062,
|
| 6168 |
+
"learning_rate": 1.7636480787253985e-05,
|
| 6169 |
+
"loss": 0.1398,
|
| 6170 |
+
"step": 8070
|
| 6171 |
+
},
|
| 6172 |
+
{
|
| 6173 |
+
"epoch": 3.7863167760074976,
|
| 6174 |
+
"grad_norm": 1.8647639751434326,
|
| 6175 |
+
"learning_rate": 1.7633552014995314e-05,
|
| 6176 |
+
"loss": 0.1344,
|
| 6177 |
+
"step": 8080
|
| 6178 |
+
},
|
| 6179 |
+
{
|
| 6180 |
+
"epoch": 3.791002811621368,
|
| 6181 |
+
"grad_norm": 1.5310838222503662,
|
| 6182 |
+
"learning_rate": 1.7630623242736647e-05,
|
| 6183 |
+
"loss": 0.118,
|
| 6184 |
+
"step": 8090
|
| 6185 |
+
},
|
| 6186 |
+
{
|
| 6187 |
+
"epoch": 3.795688847235239,
|
| 6188 |
+
"grad_norm": 1.740401268005371,
|
| 6189 |
+
"learning_rate": 1.7627694470477977e-05,
|
| 6190 |
+
"loss": 0.1187,
|
| 6191 |
+
"step": 8100
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 3.8003748828491095,
|
| 6195 |
+
"grad_norm": 1.0978221893310547,
|
| 6196 |
+
"learning_rate": 1.762476569821931e-05,
|
| 6197 |
+
"loss": 0.1218,
|
| 6198 |
+
"step": 8110
|
| 6199 |
+
},
|
| 6200 |
+
{
|
| 6201 |
+
"epoch": 3.8050609184629804,
|
| 6202 |
+
"grad_norm": 1.8838212490081787,
|
| 6203 |
+
"learning_rate": 1.762183692596064e-05,
|
| 6204 |
+
"loss": 0.1349,
|
| 6205 |
+
"step": 8120
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 3.809746954076851,
|
| 6209 |
+
"grad_norm": 1.8535819053649902,
|
| 6210 |
+
"learning_rate": 1.761890815370197e-05,
|
| 6211 |
+
"loss": 0.118,
|
| 6212 |
+
"step": 8130
|
| 6213 |
+
},
|
| 6214 |
+
{
|
| 6215 |
+
"epoch": 3.8144329896907214,
|
| 6216 |
+
"grad_norm": 1.482851266860962,
|
| 6217 |
+
"learning_rate": 1.76159793814433e-05,
|
| 6218 |
+
"loss": 0.1136,
|
| 6219 |
+
"step": 8140
|
| 6220 |
+
},
|
| 6221 |
+
{
|
| 6222 |
+
"epoch": 3.8191190253045924,
|
| 6223 |
+
"grad_norm": 1.4817432165145874,
|
| 6224 |
+
"learning_rate": 1.761305060918463e-05,
|
| 6225 |
+
"loss": 0.1144,
|
| 6226 |
+
"step": 8150
|
| 6227 |
+
},
|
| 6228 |
+
{
|
| 6229 |
+
"epoch": 3.823805060918463,
|
| 6230 |
+
"grad_norm": 1.8795218467712402,
|
| 6231 |
+
"learning_rate": 1.761012183692596e-05,
|
| 6232 |
+
"loss": 0.1337,
|
| 6233 |
+
"step": 8160
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 3.8284910965323338,
|
| 6237 |
+
"grad_norm": 1.5762320756912231,
|
| 6238 |
+
"learning_rate": 1.7607193064667293e-05,
|
| 6239 |
+
"loss": 0.1186,
|
| 6240 |
+
"step": 8170
|
| 6241 |
+
},
|
| 6242 |
+
{
|
| 6243 |
+
"epoch": 3.8331771321462043,
|
| 6244 |
+
"grad_norm": 1.3855458498001099,
|
| 6245 |
+
"learning_rate": 1.7604264292408623e-05,
|
| 6246 |
+
"loss": 0.1213,
|
| 6247 |
+
"step": 8180
|
| 6248 |
+
},
|
| 6249 |
+
{
|
| 6250 |
+
"epoch": 3.8378631677600747,
|
| 6251 |
+
"grad_norm": 1.619994044303894,
|
| 6252 |
+
"learning_rate": 1.7601335520149952e-05,
|
| 6253 |
+
"loss": 0.1484,
|
| 6254 |
+
"step": 8190
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 3.8425492033739457,
|
| 6258 |
+
"grad_norm": 1.3682477474212646,
|
| 6259 |
+
"learning_rate": 1.7598406747891285e-05,
|
| 6260 |
+
"loss": 0.0907,
|
| 6261 |
+
"step": 8200
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 3.847235238987816,
|
| 6265 |
+
"grad_norm": 1.5766955614089966,
|
| 6266 |
+
"learning_rate": 1.7595477975632615e-05,
|
| 6267 |
+
"loss": 0.1081,
|
| 6268 |
+
"step": 8210
|
| 6269 |
+
},
|
| 6270 |
+
{
|
| 6271 |
+
"epoch": 3.851921274601687,
|
| 6272 |
+
"grad_norm": 1.5448287725448608,
|
| 6273 |
+
"learning_rate": 1.7592549203373948e-05,
|
| 6274 |
+
"loss": 0.1296,
|
| 6275 |
+
"step": 8220
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 3.8566073102155576,
|
| 6279 |
+
"grad_norm": 1.2130484580993652,
|
| 6280 |
+
"learning_rate": 1.7589620431115277e-05,
|
| 6281 |
+
"loss": 0.1141,
|
| 6282 |
+
"step": 8230
|
| 6283 |
+
},
|
| 6284 |
+
{
|
| 6285 |
+
"epoch": 3.861293345829428,
|
| 6286 |
+
"grad_norm": 1.5601420402526855,
|
| 6287 |
+
"learning_rate": 1.758669165885661e-05,
|
| 6288 |
+
"loss": 0.1303,
|
| 6289 |
+
"step": 8240
|
| 6290 |
+
},
|
| 6291 |
+
{
|
| 6292 |
+
"epoch": 3.865979381443299,
|
| 6293 |
+
"grad_norm": 1.26397705078125,
|
| 6294 |
+
"learning_rate": 1.758376288659794e-05,
|
| 6295 |
+
"loss": 0.1017,
|
| 6296 |
+
"step": 8250
|
| 6297 |
+
},
|
| 6298 |
+
{
|
| 6299 |
+
"epoch": 3.865979381443299,
|
| 6300 |
+
"eval_loss": 0.031061464920639992,
|
| 6301 |
+
"eval_pearson_cosine": 0.8181771715322625,
|
| 6302 |
+
"eval_pearson_dot": 0.753218304404399,
|
| 6303 |
+
"eval_pearson_euclidean": 0.7900298439690836,
|
| 6304 |
+
"eval_pearson_manhattan": 0.792536151039883,
|
| 6305 |
+
"eval_runtime": 3.1857,
|
| 6306 |
+
"eval_samples_per_second": 470.856,
|
| 6307 |
+
"eval_spearman_cosine": 0.8174158550444287,
|
| 6308 |
+
"eval_spearman_dot": 0.7522993308222343,
|
| 6309 |
+
"eval_spearman_euclidean": 0.798562890702385,
|
| 6310 |
+
"eval_spearman_manhattan": 0.8006887717421057,
|
| 6311 |
+
"eval_steps_per_second": 29.507,
|
| 6312 |
+
"step": 8250
|
| 6313 |
+
},
|
| 6314 |
+
{
|
| 6315 |
+
"epoch": 3.8706654170571695,
|
| 6316 |
+
"grad_norm": 1.8911367654800415,
|
| 6317 |
+
"learning_rate": 1.7580834114339272e-05,
|
| 6318 |
+
"loss": 0.1197,
|
| 6319 |
+
"step": 8260
|
| 6320 |
+
},
|
| 6321 |
+
{
|
| 6322 |
+
"epoch": 3.8753514526710404,
|
| 6323 |
+
"grad_norm": 1.5515751838684082,
|
| 6324 |
+
"learning_rate": 1.7577905342080602e-05,
|
| 6325 |
+
"loss": 0.1278,
|
| 6326 |
+
"step": 8270
|
| 6327 |
+
},
|
| 6328 |
+
{
|
| 6329 |
+
"epoch": 3.880037488284911,
|
| 6330 |
+
"grad_norm": 2.1667306423187256,
|
| 6331 |
+
"learning_rate": 1.757497656982193e-05,
|
| 6332 |
+
"loss": 0.1387,
|
| 6333 |
+
"step": 8280
|
| 6334 |
+
},
|
| 6335 |
+
{
|
| 6336 |
+
"epoch": 3.8847235238987814,
|
| 6337 |
+
"grad_norm": 1.149591326713562,
|
| 6338 |
+
"learning_rate": 1.7572047797563264e-05,
|
| 6339 |
+
"loss": 0.1231,
|
| 6340 |
+
"step": 8290
|
| 6341 |
+
},
|
| 6342 |
+
{
|
| 6343 |
+
"epoch": 3.8894095595126523,
|
| 6344 |
+
"grad_norm": 1.848067283630371,
|
| 6345 |
+
"learning_rate": 1.7569119025304594e-05,
|
| 6346 |
+
"loss": 0.1228,
|
| 6347 |
+
"step": 8300
|
| 6348 |
+
},
|
| 6349 |
+
{
|
| 6350 |
+
"epoch": 3.894095595126523,
|
| 6351 |
+
"grad_norm": 1.451674222946167,
|
| 6352 |
+
"learning_rate": 1.7566190253045923e-05,
|
| 6353 |
+
"loss": 0.1161,
|
| 6354 |
+
"step": 8310
|
| 6355 |
+
},
|
| 6356 |
+
{
|
| 6357 |
+
"epoch": 3.8987816307403937,
|
| 6358 |
+
"grad_norm": 1.7280783653259277,
|
| 6359 |
+
"learning_rate": 1.7563261480787256e-05,
|
| 6360 |
+
"loss": 0.1191,
|
| 6361 |
+
"step": 8320
|
| 6362 |
+
},
|
| 6363 |
+
{
|
| 6364 |
+
"epoch": 3.9034676663542642,
|
| 6365 |
+
"grad_norm": 1.5939549207687378,
|
| 6366 |
+
"learning_rate": 1.7560332708528586e-05,
|
| 6367 |
+
"loss": 0.1272,
|
| 6368 |
+
"step": 8330
|
| 6369 |
+
},
|
| 6370 |
+
{
|
| 6371 |
+
"epoch": 3.9081537019681347,
|
| 6372 |
+
"grad_norm": 1.6509348154067993,
|
| 6373 |
+
"learning_rate": 1.7557403936269915e-05,
|
| 6374 |
+
"loss": 0.127,
|
| 6375 |
+
"step": 8340
|
| 6376 |
+
},
|
| 6377 |
+
{
|
| 6378 |
+
"epoch": 3.9128397375820057,
|
| 6379 |
+
"grad_norm": 1.8915349245071411,
|
| 6380 |
+
"learning_rate": 1.7554475164011248e-05,
|
| 6381 |
+
"loss": 0.1352,
|
| 6382 |
+
"step": 8350
|
| 6383 |
+
},
|
| 6384 |
+
{
|
| 6385 |
+
"epoch": 3.917525773195876,
|
| 6386 |
+
"grad_norm": 2.188493490219116,
|
| 6387 |
+
"learning_rate": 1.7551546391752578e-05,
|
| 6388 |
+
"loss": 0.1105,
|
| 6389 |
+
"step": 8360
|
| 6390 |
+
},
|
| 6391 |
+
{
|
| 6392 |
+
"epoch": 3.922211808809747,
|
| 6393 |
+
"grad_norm": 1.8589377403259277,
|
| 6394 |
+
"learning_rate": 1.754861761949391e-05,
|
| 6395 |
+
"loss": 0.1031,
|
| 6396 |
+
"step": 8370
|
| 6397 |
+
},
|
| 6398 |
+
{
|
| 6399 |
+
"epoch": 3.9268978444236176,
|
| 6400 |
+
"grad_norm": 1.7054208517074585,
|
| 6401 |
+
"learning_rate": 1.754568884723524e-05,
|
| 6402 |
+
"loss": 0.1165,
|
| 6403 |
+
"step": 8380
|
| 6404 |
+
},
|
| 6405 |
+
{
|
| 6406 |
+
"epoch": 3.931583880037488,
|
| 6407 |
+
"grad_norm": 1.2826303243637085,
|
| 6408 |
+
"learning_rate": 1.754276007497657e-05,
|
| 6409 |
+
"loss": 0.0994,
|
| 6410 |
+
"step": 8390
|
| 6411 |
+
},
|
| 6412 |
+
{
|
| 6413 |
+
"epoch": 3.936269915651359,
|
| 6414 |
+
"grad_norm": 2.087935209274292,
|
| 6415 |
+
"learning_rate": 1.7539831302717902e-05,
|
| 6416 |
+
"loss": 0.1493,
|
| 6417 |
+
"step": 8400
|
| 6418 |
+
},
|
| 6419 |
+
{
|
| 6420 |
+
"epoch": 3.9409559512652295,
|
| 6421 |
+
"grad_norm": 1.4399867057800293,
|
| 6422 |
+
"learning_rate": 1.7536902530459232e-05,
|
| 6423 |
+
"loss": 0.1126,
|
| 6424 |
+
"step": 8410
|
| 6425 |
+
},
|
| 6426 |
+
{
|
| 6427 |
+
"epoch": 3.9456419868791004,
|
| 6428 |
+
"grad_norm": 2.081295967102051,
|
| 6429 |
+
"learning_rate": 1.7533973758200565e-05,
|
| 6430 |
+
"loss": 0.1149,
|
| 6431 |
+
"step": 8420
|
| 6432 |
+
},
|
| 6433 |
+
{
|
| 6434 |
+
"epoch": 3.950328022492971,
|
| 6435 |
+
"grad_norm": 1.6477272510528564,
|
| 6436 |
+
"learning_rate": 1.7531044985941894e-05,
|
| 6437 |
+
"loss": 0.124,
|
| 6438 |
+
"step": 8430
|
| 6439 |
+
},
|
| 6440 |
+
{
|
| 6441 |
+
"epoch": 3.9550140581068414,
|
| 6442 |
+
"grad_norm": 1.43690025806427,
|
| 6443 |
+
"learning_rate": 1.7528116213683227e-05,
|
| 6444 |
+
"loss": 0.1175,
|
| 6445 |
+
"step": 8440
|
| 6446 |
+
},
|
| 6447 |
+
{
|
| 6448 |
+
"epoch": 3.9597000937207123,
|
| 6449 |
+
"grad_norm": 2.231391429901123,
|
| 6450 |
+
"learning_rate": 1.7525187441424557e-05,
|
| 6451 |
+
"loss": 0.1039,
|
| 6452 |
+
"step": 8450
|
| 6453 |
+
},
|
| 6454 |
+
{
|
| 6455 |
+
"epoch": 3.964386129334583,
|
| 6456 |
+
"grad_norm": 1.699771761894226,
|
| 6457 |
+
"learning_rate": 1.752225866916589e-05,
|
| 6458 |
+
"loss": 0.096,
|
| 6459 |
+
"step": 8460
|
| 6460 |
+
},
|
| 6461 |
+
{
|
| 6462 |
+
"epoch": 3.9690721649484537,
|
| 6463 |
+
"grad_norm": 0.9869770407676697,
|
| 6464 |
+
"learning_rate": 1.751932989690722e-05,
|
| 6465 |
+
"loss": 0.1318,
|
| 6466 |
+
"step": 8470
|
| 6467 |
+
},
|
| 6468 |
+
{
|
| 6469 |
+
"epoch": 3.973758200562324,
|
| 6470 |
+
"grad_norm": 1.2464418411254883,
|
| 6471 |
+
"learning_rate": 1.751640112464855e-05,
|
| 6472 |
+
"loss": 0.1025,
|
| 6473 |
+
"step": 8480
|
| 6474 |
+
},
|
| 6475 |
+
{
|
| 6476 |
+
"epoch": 3.9784442361761947,
|
| 6477 |
+
"grad_norm": 1.8724063634872437,
|
| 6478 |
+
"learning_rate": 1.751347235238988e-05,
|
| 6479 |
+
"loss": 0.1154,
|
| 6480 |
+
"step": 8490
|
| 6481 |
+
},
|
| 6482 |
+
{
|
| 6483 |
+
"epoch": 3.9831302717900656,
|
| 6484 |
+
"grad_norm": 1.6470191478729248,
|
| 6485 |
+
"learning_rate": 1.751054358013121e-05,
|
| 6486 |
+
"loss": 0.1132,
|
| 6487 |
+
"step": 8500
|
| 6488 |
+
},
|
| 6489 |
+
{
|
| 6490 |
+
"epoch": 3.9831302717900656,
|
| 6491 |
+
"eval_loss": 0.03063393384218216,
|
| 6492 |
+
"eval_pearson_cosine": 0.821106317003462,
|
| 6493 |
+
"eval_pearson_dot": 0.7578136492444401,
|
| 6494 |
+
"eval_pearson_euclidean": 0.7882531341441634,
|
| 6495 |
+
"eval_pearson_manhattan": 0.790852878268538,
|
| 6496 |
+
"eval_runtime": 3.174,
|
| 6497 |
+
"eval_samples_per_second": 472.588,
|
| 6498 |
+
"eval_spearman_cosine": 0.8198241690509209,
|
| 6499 |
+
"eval_spearman_dot": 0.7578266102334076,
|
| 6500 |
+
"eval_spearman_euclidean": 0.7967830640080272,
|
| 6501 |
+
"eval_spearman_manhattan": 0.7991467507473939,
|
| 6502 |
+
"eval_steps_per_second": 29.616,
|
| 6503 |
+
"step": 8500
|
| 6504 |
+
},
|
| 6505 |
+
{
|
| 6506 |
+
"epoch": 3.987816307403936,
|
| 6507 |
+
"grad_norm": 1.676721453666687,
|
| 6508 |
+
"learning_rate": 1.750761480787254e-05,
|
| 6509 |
+
"loss": 0.1188,
|
| 6510 |
+
"step": 8510
|
| 6511 |
+
},
|
| 6512 |
+
{
|
| 6513 |
+
"epoch": 3.992502343017807,
|
| 6514 |
+
"grad_norm": 1.187525987625122,
|
| 6515 |
+
"learning_rate": 1.750468603561387e-05,
|
| 6516 |
+
"loss": 0.132,
|
| 6517 |
+
"step": 8520
|
| 6518 |
+
},
|
| 6519 |
+
{
|
| 6520 |
+
"epoch": 3.9971883786316775,
|
| 6521 |
+
"grad_norm": 1.2841644287109375,
|
| 6522 |
+
"learning_rate": 1.7501757263355203e-05,
|
| 6523 |
+
"loss": 0.1379,
|
| 6524 |
+
"step": 8530
|
| 6525 |
}
|
| 6526 |
],
|
| 6527 |
"logging_steps": 10,
|
|
|
|
| 6536 |
"should_evaluate": false,
|
| 6537 |
"should_log": false,
|
| 6538 |
"should_save": true,
|
| 6539 |
+
"should_training_stop": true
|
| 6540 |
},
|
| 6541 |
"attributes": {}
|
| 6542 |
}
|