Training in progress, step 17000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:723de6fd746cbba66015f7a7da153864465a825d5f4e24435edd8645a25ac837
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dafb37725ed184eeb99653a88821c53652c298a048e783bcf251a3b487c248c8
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c551e87aa3f069465eab6343f6462d0da8c27e46770ad44ff0400698bec95cda
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64e32232d2d68b6508947dd18795fe7ac8dd583abb7b016b68d853036e32fd4b
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 7.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12231,6 +12231,770 @@
|
|
| 12231 |
"eval_spearman_manhattan": 0.7524283280152466,
|
| 12232 |
"eval_steps_per_second": 37.268,
|
| 12233 |
"step": 16000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12234 |
}
|
| 12235 |
],
|
| 12236 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.966260543580131,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 17000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12231 |
"eval_spearman_manhattan": 0.7524283280152466,
|
| 12232 |
"eval_steps_per_second": 37.268,
|
| 12233 |
"step": 16000
|
| 12234 |
+
},
|
| 12235 |
+
{
|
| 12236 |
+
"epoch": 7.502343017806935,
|
| 12237 |
+
"grad_norm": 2.1431262493133545,
|
| 12238 |
+
"learning_rate": 9.062207122774134e-06,
|
| 12239 |
+
"loss": 0.0768,
|
| 12240 |
+
"step": 16010
|
| 12241 |
+
},
|
| 12242 |
+
{
|
| 12243 |
+
"epoch": 7.507029053420806,
|
| 12244 |
+
"grad_norm": 1.5847636461257935,
|
| 12245 |
+
"learning_rate": 9.0616213683224e-06,
|
| 12246 |
+
"loss": 0.0772,
|
| 12247 |
+
"step": 16020
|
| 12248 |
+
},
|
| 12249 |
+
{
|
| 12250 |
+
"epoch": 7.511715089034677,
|
| 12251 |
+
"grad_norm": 1.5291898250579834,
|
| 12252 |
+
"learning_rate": 9.061035613870666e-06,
|
| 12253 |
+
"loss": 0.0642,
|
| 12254 |
+
"step": 16030
|
| 12255 |
+
},
|
| 12256 |
+
{
|
| 12257 |
+
"epoch": 7.516401124648548,
|
| 12258 |
+
"grad_norm": 1.497979998588562,
|
| 12259 |
+
"learning_rate": 9.060449859418933e-06,
|
| 12260 |
+
"loss": 0.0846,
|
| 12261 |
+
"step": 16040
|
| 12262 |
+
},
|
| 12263 |
+
{
|
| 12264 |
+
"epoch": 7.521087160262418,
|
| 12265 |
+
"grad_norm": 2.9073336124420166,
|
| 12266 |
+
"learning_rate": 9.059864104967199e-06,
|
| 12267 |
+
"loss": 0.0735,
|
| 12268 |
+
"step": 16050
|
| 12269 |
+
},
|
| 12270 |
+
{
|
| 12271 |
+
"epoch": 7.525773195876289,
|
| 12272 |
+
"grad_norm": 2.264319896697998,
|
| 12273 |
+
"learning_rate": 9.059278350515464e-06,
|
| 12274 |
+
"loss": 0.0752,
|
| 12275 |
+
"step": 16060
|
| 12276 |
+
},
|
| 12277 |
+
{
|
| 12278 |
+
"epoch": 7.530459231490159,
|
| 12279 |
+
"grad_norm": 1.6372432708740234,
|
| 12280 |
+
"learning_rate": 9.05869259606373e-06,
|
| 12281 |
+
"loss": 0.0842,
|
| 12282 |
+
"step": 16070
|
| 12283 |
+
},
|
| 12284 |
+
{
|
| 12285 |
+
"epoch": 7.5351452671040295,
|
| 12286 |
+
"grad_norm": 1.1619336605072021,
|
| 12287 |
+
"learning_rate": 9.058106841611996e-06,
|
| 12288 |
+
"loss": 0.0687,
|
| 12289 |
+
"step": 16080
|
| 12290 |
+
},
|
| 12291 |
+
{
|
| 12292 |
+
"epoch": 7.539831302717901,
|
| 12293 |
+
"grad_norm": 0.9320247173309326,
|
| 12294 |
+
"learning_rate": 9.057521087160263e-06,
|
| 12295 |
+
"loss": 0.0709,
|
| 12296 |
+
"step": 16090
|
| 12297 |
+
},
|
| 12298 |
+
{
|
| 12299 |
+
"epoch": 7.544517338331771,
|
| 12300 |
+
"grad_norm": 1.3003836870193481,
|
| 12301 |
+
"learning_rate": 9.05693533270853e-06,
|
| 12302 |
+
"loss": 0.062,
|
| 12303 |
+
"step": 16100
|
| 12304 |
+
},
|
| 12305 |
+
{
|
| 12306 |
+
"epoch": 7.549203373945642,
|
| 12307 |
+
"grad_norm": 1.8614999055862427,
|
| 12308 |
+
"learning_rate": 9.056349578256795e-06,
|
| 12309 |
+
"loss": 0.0713,
|
| 12310 |
+
"step": 16110
|
| 12311 |
+
},
|
| 12312 |
+
{
|
| 12313 |
+
"epoch": 7.553889409559512,
|
| 12314 |
+
"grad_norm": 2.049309492111206,
|
| 12315 |
+
"learning_rate": 9.055763823805062e-06,
|
| 12316 |
+
"loss": 0.0763,
|
| 12317 |
+
"step": 16120
|
| 12318 |
+
},
|
| 12319 |
+
{
|
| 12320 |
+
"epoch": 7.558575445173384,
|
| 12321 |
+
"grad_norm": 1.3978779315948486,
|
| 12322 |
+
"learning_rate": 9.055178069353327e-06,
|
| 12323 |
+
"loss": 0.0778,
|
| 12324 |
+
"step": 16130
|
| 12325 |
+
},
|
| 12326 |
+
{
|
| 12327 |
+
"epoch": 7.563261480787254,
|
| 12328 |
+
"grad_norm": 1.9440947771072388,
|
| 12329 |
+
"learning_rate": 9.054592314901594e-06,
|
| 12330 |
+
"loss": 0.0637,
|
| 12331 |
+
"step": 16140
|
| 12332 |
+
},
|
| 12333 |
+
{
|
| 12334 |
+
"epoch": 7.567947516401125,
|
| 12335 |
+
"grad_norm": 1.5653728246688843,
|
| 12336 |
+
"learning_rate": 9.054006560449861e-06,
|
| 12337 |
+
"loss": 0.0791,
|
| 12338 |
+
"step": 16150
|
| 12339 |
+
},
|
| 12340 |
+
{
|
| 12341 |
+
"epoch": 7.572633552014995,
|
| 12342 |
+
"grad_norm": 1.3674747943878174,
|
| 12343 |
+
"learning_rate": 9.053420805998126e-06,
|
| 12344 |
+
"loss": 0.0662,
|
| 12345 |
+
"step": 16160
|
| 12346 |
+
},
|
| 12347 |
+
{
|
| 12348 |
+
"epoch": 7.577319587628866,
|
| 12349 |
+
"grad_norm": 1.2067365646362305,
|
| 12350 |
+
"learning_rate": 9.052835051546393e-06,
|
| 12351 |
+
"loss": 0.0677,
|
| 12352 |
+
"step": 16170
|
| 12353 |
+
},
|
| 12354 |
+
{
|
| 12355 |
+
"epoch": 7.582005623242736,
|
| 12356 |
+
"grad_norm": 1.9453731775283813,
|
| 12357 |
+
"learning_rate": 9.052249297094658e-06,
|
| 12358 |
+
"loss": 0.0776,
|
| 12359 |
+
"step": 16180
|
| 12360 |
+
},
|
| 12361 |
+
{
|
| 12362 |
+
"epoch": 7.586691658856608,
|
| 12363 |
+
"grad_norm": 1.6629338264465332,
|
| 12364 |
+
"learning_rate": 9.051663542642925e-06,
|
| 12365 |
+
"loss": 0.0615,
|
| 12366 |
+
"step": 16190
|
| 12367 |
+
},
|
| 12368 |
+
{
|
| 12369 |
+
"epoch": 7.591377694470478,
|
| 12370 |
+
"grad_norm": 2.192781925201416,
|
| 12371 |
+
"learning_rate": 9.051077788191192e-06,
|
| 12372 |
+
"loss": 0.0762,
|
| 12373 |
+
"step": 16200
|
| 12374 |
+
},
|
| 12375 |
+
{
|
| 12376 |
+
"epoch": 7.5960637300843485,
|
| 12377 |
+
"grad_norm": 1.7695443630218506,
|
| 12378 |
+
"learning_rate": 9.050492033739457e-06,
|
| 12379 |
+
"loss": 0.0798,
|
| 12380 |
+
"step": 16210
|
| 12381 |
+
},
|
| 12382 |
+
{
|
| 12383 |
+
"epoch": 7.600749765698219,
|
| 12384 |
+
"grad_norm": 2.5343542098999023,
|
| 12385 |
+
"learning_rate": 9.049906279287723e-06,
|
| 12386 |
+
"loss": 0.0642,
|
| 12387 |
+
"step": 16220
|
| 12388 |
+
},
|
| 12389 |
+
{
|
| 12390 |
+
"epoch": 7.60543580131209,
|
| 12391 |
+
"grad_norm": 2.3569960594177246,
|
| 12392 |
+
"learning_rate": 9.04932052483599e-06,
|
| 12393 |
+
"loss": 0.0791,
|
| 12394 |
+
"step": 16230
|
| 12395 |
+
},
|
| 12396 |
+
{
|
| 12397 |
+
"epoch": 7.610121836925961,
|
| 12398 |
+
"grad_norm": 1.6627905368804932,
|
| 12399 |
+
"learning_rate": 9.048734770384255e-06,
|
| 12400 |
+
"loss": 0.0751,
|
| 12401 |
+
"step": 16240
|
| 12402 |
+
},
|
| 12403 |
+
{
|
| 12404 |
+
"epoch": 7.614807872539831,
|
| 12405 |
+
"grad_norm": 2.732750177383423,
|
| 12406 |
+
"learning_rate": 9.048149015932522e-06,
|
| 12407 |
+
"loss": 0.0779,
|
| 12408 |
+
"step": 16250
|
| 12409 |
+
},
|
| 12410 |
+
{
|
| 12411 |
+
"epoch": 7.614807872539831,
|
| 12412 |
+
"eval_loss": 0.03914293646812439,
|
| 12413 |
+
"eval_pearson_cosine": 0.7825741148617453,
|
| 12414 |
+
"eval_pearson_dot": 0.6371994676784709,
|
| 12415 |
+
"eval_pearson_euclidean": 0.7333148039136894,
|
| 12416 |
+
"eval_pearson_manhattan": 0.7325802747620713,
|
| 12417 |
+
"eval_runtime": 39.7805,
|
| 12418 |
+
"eval_samples_per_second": 37.707,
|
| 12419 |
+
"eval_spearman_cosine": 0.7845511113526636,
|
| 12420 |
+
"eval_spearman_dot": 0.6532379504600656,
|
| 12421 |
+
"eval_spearman_euclidean": 0.746661992951968,
|
| 12422 |
+
"eval_spearman_manhattan": 0.7461572363651041,
|
| 12423 |
+
"eval_steps_per_second": 37.707,
|
| 12424 |
+
"step": 16250
|
| 12425 |
+
},
|
| 12426 |
+
{
|
| 12427 |
+
"epoch": 7.619493908153702,
|
| 12428 |
+
"grad_norm": 1.1624847650527954,
|
| 12429 |
+
"learning_rate": 9.047563261480789e-06,
|
| 12430 |
+
"loss": 0.0677,
|
| 12431 |
+
"step": 16260
|
| 12432 |
+
},
|
| 12433 |
+
{
|
| 12434 |
+
"epoch": 7.624179943767572,
|
| 12435 |
+
"grad_norm": 2.173877477645874,
|
| 12436 |
+
"learning_rate": 9.046977507029054e-06,
|
| 12437 |
+
"loss": 0.0697,
|
| 12438 |
+
"step": 16270
|
| 12439 |
+
},
|
| 12440 |
+
{
|
| 12441 |
+
"epoch": 7.628865979381443,
|
| 12442 |
+
"grad_norm": 2.1271920204162598,
|
| 12443 |
+
"learning_rate": 9.04639175257732e-06,
|
| 12444 |
+
"loss": 0.0736,
|
| 12445 |
+
"step": 16280
|
| 12446 |
+
},
|
| 12447 |
+
{
|
| 12448 |
+
"epoch": 7.633552014995314,
|
| 12449 |
+
"grad_norm": 1.3459683656692505,
|
| 12450 |
+
"learning_rate": 9.045805998125586e-06,
|
| 12451 |
+
"loss": 0.082,
|
| 12452 |
+
"step": 16290
|
| 12453 |
+
},
|
| 12454 |
+
{
|
| 12455 |
+
"epoch": 7.638238050609185,
|
| 12456 |
+
"grad_norm": 2.0023584365844727,
|
| 12457 |
+
"learning_rate": 9.045220243673853e-06,
|
| 12458 |
+
"loss": 0.0662,
|
| 12459 |
+
"step": 16300
|
| 12460 |
+
},
|
| 12461 |
+
{
|
| 12462 |
+
"epoch": 7.642924086223055,
|
| 12463 |
+
"grad_norm": 1.8559486865997314,
|
| 12464 |
+
"learning_rate": 9.044634489222118e-06,
|
| 12465 |
+
"loss": 0.0685,
|
| 12466 |
+
"step": 16310
|
| 12467 |
+
},
|
| 12468 |
+
{
|
| 12469 |
+
"epoch": 7.647610121836926,
|
| 12470 |
+
"grad_norm": 2.1703007221221924,
|
| 12471 |
+
"learning_rate": 9.044048734770385e-06,
|
| 12472 |
+
"loss": 0.0659,
|
| 12473 |
+
"step": 16320
|
| 12474 |
+
},
|
| 12475 |
+
{
|
| 12476 |
+
"epoch": 7.652296157450797,
|
| 12477 |
+
"grad_norm": 2.0675439834594727,
|
| 12478 |
+
"learning_rate": 9.043462980318652e-06,
|
| 12479 |
+
"loss": 0.0708,
|
| 12480 |
+
"step": 16330
|
| 12481 |
+
},
|
| 12482 |
+
{
|
| 12483 |
+
"epoch": 7.6569821930646675,
|
| 12484 |
+
"grad_norm": 1.600040316581726,
|
| 12485 |
+
"learning_rate": 9.042877225866917e-06,
|
| 12486 |
+
"loss": 0.0682,
|
| 12487 |
+
"step": 16340
|
| 12488 |
+
},
|
| 12489 |
+
{
|
| 12490 |
+
"epoch": 7.661668228678538,
|
| 12491 |
+
"grad_norm": 2.5737037658691406,
|
| 12492 |
+
"learning_rate": 9.042291471415184e-06,
|
| 12493 |
+
"loss": 0.077,
|
| 12494 |
+
"step": 16350
|
| 12495 |
+
},
|
| 12496 |
+
{
|
| 12497 |
+
"epoch": 7.6663542642924085,
|
| 12498 |
+
"grad_norm": 1.909056544303894,
|
| 12499 |
+
"learning_rate": 9.04170571696345e-06,
|
| 12500 |
+
"loss": 0.076,
|
| 12501 |
+
"step": 16360
|
| 12502 |
+
},
|
| 12503 |
+
{
|
| 12504 |
+
"epoch": 7.671040299906279,
|
| 12505 |
+
"grad_norm": 2.5772509574890137,
|
| 12506 |
+
"learning_rate": 9.041119962511716e-06,
|
| 12507 |
+
"loss": 0.0743,
|
| 12508 |
+
"step": 16370
|
| 12509 |
+
},
|
| 12510 |
+
{
|
| 12511 |
+
"epoch": 7.6757263355201495,
|
| 12512 |
+
"grad_norm": 1.6890363693237305,
|
| 12513 |
+
"learning_rate": 9.040534208059981e-06,
|
| 12514 |
+
"loss": 0.0696,
|
| 12515 |
+
"step": 16380
|
| 12516 |
+
},
|
| 12517 |
+
{
|
| 12518 |
+
"epoch": 7.680412371134021,
|
| 12519 |
+
"grad_norm": 1.6115903854370117,
|
| 12520 |
+
"learning_rate": 9.039948453608248e-06,
|
| 12521 |
+
"loss": 0.0849,
|
| 12522 |
+
"step": 16390
|
| 12523 |
+
},
|
| 12524 |
+
{
|
| 12525 |
+
"epoch": 7.685098406747891,
|
| 12526 |
+
"grad_norm": 2.3779239654541016,
|
| 12527 |
+
"learning_rate": 9.039362699156514e-06,
|
| 12528 |
+
"loss": 0.0819,
|
| 12529 |
+
"step": 16400
|
| 12530 |
+
},
|
| 12531 |
+
{
|
| 12532 |
+
"epoch": 7.689784442361762,
|
| 12533 |
+
"grad_norm": 1.5444949865341187,
|
| 12534 |
+
"learning_rate": 9.03877694470478e-06,
|
| 12535 |
+
"loss": 0.0718,
|
| 12536 |
+
"step": 16410
|
| 12537 |
+
},
|
| 12538 |
+
{
|
| 12539 |
+
"epoch": 7.694470477975632,
|
| 12540 |
+
"grad_norm": 2.221595525741577,
|
| 12541 |
+
"learning_rate": 9.038191190253046e-06,
|
| 12542 |
+
"loss": 0.084,
|
| 12543 |
+
"step": 16420
|
| 12544 |
+
},
|
| 12545 |
+
{
|
| 12546 |
+
"epoch": 7.699156513589504,
|
| 12547 |
+
"grad_norm": 2.68977427482605,
|
| 12548 |
+
"learning_rate": 9.037605435801313e-06,
|
| 12549 |
+
"loss": 0.0828,
|
| 12550 |
+
"step": 16430
|
| 12551 |
+
},
|
| 12552 |
+
{
|
| 12553 |
+
"epoch": 7.703842549203374,
|
| 12554 |
+
"grad_norm": 1.9625297784805298,
|
| 12555 |
+
"learning_rate": 9.03701968134958e-06,
|
| 12556 |
+
"loss": 0.0645,
|
| 12557 |
+
"step": 16440
|
| 12558 |
+
},
|
| 12559 |
+
{
|
| 12560 |
+
"epoch": 7.708528584817245,
|
| 12561 |
+
"grad_norm": 1.4848051071166992,
|
| 12562 |
+
"learning_rate": 9.036433926897845e-06,
|
| 12563 |
+
"loss": 0.0775,
|
| 12564 |
+
"step": 16450
|
| 12565 |
+
},
|
| 12566 |
+
{
|
| 12567 |
+
"epoch": 7.713214620431115,
|
| 12568 |
+
"grad_norm": 1.2312238216400146,
|
| 12569 |
+
"learning_rate": 9.035848172446112e-06,
|
| 12570 |
+
"loss": 0.0654,
|
| 12571 |
+
"step": 16460
|
| 12572 |
+
},
|
| 12573 |
+
{
|
| 12574 |
+
"epoch": 7.717900656044986,
|
| 12575 |
+
"grad_norm": 2.353621006011963,
|
| 12576 |
+
"learning_rate": 9.035262417994377e-06,
|
| 12577 |
+
"loss": 0.0798,
|
| 12578 |
+
"step": 16470
|
| 12579 |
+
},
|
| 12580 |
+
{
|
| 12581 |
+
"epoch": 7.722586691658856,
|
| 12582 |
+
"grad_norm": 2.2561025619506836,
|
| 12583 |
+
"learning_rate": 9.034676663542644e-06,
|
| 12584 |
+
"loss": 0.0679,
|
| 12585 |
+
"step": 16480
|
| 12586 |
+
},
|
| 12587 |
+
{
|
| 12588 |
+
"epoch": 7.7272727272727275,
|
| 12589 |
+
"grad_norm": 1.7776751518249512,
|
| 12590 |
+
"learning_rate": 9.03409090909091e-06,
|
| 12591 |
+
"loss": 0.0619,
|
| 12592 |
+
"step": 16490
|
| 12593 |
+
},
|
| 12594 |
+
{
|
| 12595 |
+
"epoch": 7.731958762886598,
|
| 12596 |
+
"grad_norm": 1.9327503442764282,
|
| 12597 |
+
"learning_rate": 9.033505154639176e-06,
|
| 12598 |
+
"loss": 0.078,
|
| 12599 |
+
"step": 16500
|
| 12600 |
+
},
|
| 12601 |
+
{
|
| 12602 |
+
"epoch": 7.731958762886598,
|
| 12603 |
+
"eval_loss": 0.039704494178295135,
|
| 12604 |
+
"eval_pearson_cosine": 0.7809507850262349,
|
| 12605 |
+
"eval_pearson_dot": 0.636416760886064,
|
| 12606 |
+
"eval_pearson_euclidean": 0.7299738247708838,
|
| 12607 |
+
"eval_pearson_manhattan": 0.7298765220753651,
|
| 12608 |
+
"eval_runtime": 40.0275,
|
| 12609 |
+
"eval_samples_per_second": 37.474,
|
| 12610 |
+
"eval_spearman_cosine": 0.782649086170428,
|
| 12611 |
+
"eval_spearman_dot": 0.6554585356303039,
|
| 12612 |
+
"eval_spearman_euclidean": 0.7456788267720733,
|
| 12613 |
+
"eval_spearman_manhattan": 0.7461425779916862,
|
| 12614 |
+
"eval_steps_per_second": 37.474,
|
| 12615 |
+
"step": 16500
|
| 12616 |
+
},
|
| 12617 |
+
{
|
| 12618 |
+
"epoch": 7.7366447985004685,
|
| 12619 |
+
"grad_norm": 1.9791489839553833,
|
| 12620 |
+
"learning_rate": 9.032919400187443e-06,
|
| 12621 |
+
"loss": 0.08,
|
| 12622 |
+
"step": 16510
|
| 12623 |
+
},
|
| 12624 |
+
{
|
| 12625 |
+
"epoch": 7.741330834114339,
|
| 12626 |
+
"grad_norm": 2.5181267261505127,
|
| 12627 |
+
"learning_rate": 9.032333645735708e-06,
|
| 12628 |
+
"loss": 0.0822,
|
| 12629 |
+
"step": 16520
|
| 12630 |
+
},
|
| 12631 |
+
{
|
| 12632 |
+
"epoch": 7.74601686972821,
|
| 12633 |
+
"grad_norm": 1.2553796768188477,
|
| 12634 |
+
"learning_rate": 9.031747891283973e-06,
|
| 12635 |
+
"loss": 0.0713,
|
| 12636 |
+
"step": 16530
|
| 12637 |
+
},
|
| 12638 |
+
{
|
| 12639 |
+
"epoch": 7.750702905342081,
|
| 12640 |
+
"grad_norm": 2.394421100616455,
|
| 12641 |
+
"learning_rate": 9.03116213683224e-06,
|
| 12642 |
+
"loss": 0.0852,
|
| 12643 |
+
"step": 16540
|
| 12644 |
+
},
|
| 12645 |
+
{
|
| 12646 |
+
"epoch": 7.755388940955951,
|
| 12647 |
+
"grad_norm": 2.388476848602295,
|
| 12648 |
+
"learning_rate": 9.030576382380507e-06,
|
| 12649 |
+
"loss": 0.0703,
|
| 12650 |
+
"step": 16550
|
| 12651 |
+
},
|
| 12652 |
+
{
|
| 12653 |
+
"epoch": 7.760074976569822,
|
| 12654 |
+
"grad_norm": 1.3286539316177368,
|
| 12655 |
+
"learning_rate": 9.029990627928772e-06,
|
| 12656 |
+
"loss": 0.0606,
|
| 12657 |
+
"step": 16560
|
| 12658 |
+
},
|
| 12659 |
+
{
|
| 12660 |
+
"epoch": 7.764761012183692,
|
| 12661 |
+
"grad_norm": 2.0466766357421875,
|
| 12662 |
+
"learning_rate": 9.02940487347704e-06,
|
| 12663 |
+
"loss": 0.0729,
|
| 12664 |
+
"step": 16570
|
| 12665 |
+
},
|
| 12666 |
+
{
|
| 12667 |
+
"epoch": 7.769447047797563,
|
| 12668 |
+
"grad_norm": 1.3759188652038574,
|
| 12669 |
+
"learning_rate": 9.028819119025305e-06,
|
| 12670 |
+
"loss": 0.0776,
|
| 12671 |
+
"step": 16580
|
| 12672 |
+
},
|
| 12673 |
+
{
|
| 12674 |
+
"epoch": 7.774133083411434,
|
| 12675 |
+
"grad_norm": 1.6511011123657227,
|
| 12676 |
+
"learning_rate": 9.028233364573571e-06,
|
| 12677 |
+
"loss": 0.0728,
|
| 12678 |
+
"step": 16590
|
| 12679 |
+
},
|
| 12680 |
+
{
|
| 12681 |
+
"epoch": 7.778819119025305,
|
| 12682 |
+
"grad_norm": 2.05136775970459,
|
| 12683 |
+
"learning_rate": 9.027647610121838e-06,
|
| 12684 |
+
"loss": 0.0719,
|
| 12685 |
+
"step": 16600
|
| 12686 |
+
},
|
| 12687 |
+
{
|
| 12688 |
+
"epoch": 7.783505154639175,
|
| 12689 |
+
"grad_norm": 2.3014705181121826,
|
| 12690 |
+
"learning_rate": 9.027061855670104e-06,
|
| 12691 |
+
"loss": 0.0682,
|
| 12692 |
+
"step": 16610
|
| 12693 |
+
},
|
| 12694 |
+
{
|
| 12695 |
+
"epoch": 7.788191190253046,
|
| 12696 |
+
"grad_norm": 2.6752190589904785,
|
| 12697 |
+
"learning_rate": 9.02647610121837e-06,
|
| 12698 |
+
"loss": 0.0856,
|
| 12699 |
+
"step": 16620
|
| 12700 |
+
},
|
| 12701 |
+
{
|
| 12702 |
+
"epoch": 7.792877225866917,
|
| 12703 |
+
"grad_norm": 1.7644881010055542,
|
| 12704 |
+
"learning_rate": 9.025890346766636e-06,
|
| 12705 |
+
"loss": 0.0705,
|
| 12706 |
+
"step": 16630
|
| 12707 |
+
},
|
| 12708 |
+
{
|
| 12709 |
+
"epoch": 7.7975632614807875,
|
| 12710 |
+
"grad_norm": 2.1563751697540283,
|
| 12711 |
+
"learning_rate": 9.025304592314903e-06,
|
| 12712 |
+
"loss": 0.0842,
|
| 12713 |
+
"step": 16640
|
| 12714 |
+
},
|
| 12715 |
+
{
|
| 12716 |
+
"epoch": 7.802249297094658,
|
| 12717 |
+
"grad_norm": 1.4930392503738403,
|
| 12718 |
+
"learning_rate": 9.02471883786317e-06,
|
| 12719 |
+
"loss": 0.0759,
|
| 12720 |
+
"step": 16650
|
| 12721 |
+
},
|
| 12722 |
+
{
|
| 12723 |
+
"epoch": 7.8069353327085285,
|
| 12724 |
+
"grad_norm": 2.3332340717315674,
|
| 12725 |
+
"learning_rate": 9.024133083411435e-06,
|
| 12726 |
+
"loss": 0.0668,
|
| 12727 |
+
"step": 16660
|
| 12728 |
+
},
|
| 12729 |
+
{
|
| 12730 |
+
"epoch": 7.811621368322399,
|
| 12731 |
+
"grad_norm": 2.424914836883545,
|
| 12732 |
+
"learning_rate": 9.023547328959702e-06,
|
| 12733 |
+
"loss": 0.0619,
|
| 12734 |
+
"step": 16670
|
| 12735 |
+
},
|
| 12736 |
+
{
|
| 12737 |
+
"epoch": 7.816307403936269,
|
| 12738 |
+
"grad_norm": 2.246410369873047,
|
| 12739 |
+
"learning_rate": 9.022961574507967e-06,
|
| 12740 |
+
"loss": 0.0772,
|
| 12741 |
+
"step": 16680
|
| 12742 |
+
},
|
| 12743 |
+
{
|
| 12744 |
+
"epoch": 7.820993439550141,
|
| 12745 |
+
"grad_norm": 1.8411740064620972,
|
| 12746 |
+
"learning_rate": 9.022375820056232e-06,
|
| 12747 |
+
"loss": 0.0685,
|
| 12748 |
+
"step": 16690
|
| 12749 |
+
},
|
| 12750 |
+
{
|
| 12751 |
+
"epoch": 7.825679475164011,
|
| 12752 |
+
"grad_norm": 1.6910183429718018,
|
| 12753 |
+
"learning_rate": 9.021790065604499e-06,
|
| 12754 |
+
"loss": 0.0666,
|
| 12755 |
+
"step": 16700
|
| 12756 |
+
},
|
| 12757 |
+
{
|
| 12758 |
+
"epoch": 7.830365510777882,
|
| 12759 |
+
"grad_norm": 1.7055261135101318,
|
| 12760 |
+
"learning_rate": 9.021204311152766e-06,
|
| 12761 |
+
"loss": 0.0555,
|
| 12762 |
+
"step": 16710
|
| 12763 |
+
},
|
| 12764 |
+
{
|
| 12765 |
+
"epoch": 7.835051546391752,
|
| 12766 |
+
"grad_norm": 1.0138518810272217,
|
| 12767 |
+
"learning_rate": 9.020618556701031e-06,
|
| 12768 |
+
"loss": 0.0709,
|
| 12769 |
+
"step": 16720
|
| 12770 |
+
},
|
| 12771 |
+
{
|
| 12772 |
+
"epoch": 7.839737582005624,
|
| 12773 |
+
"grad_norm": 1.5108051300048828,
|
| 12774 |
+
"learning_rate": 9.020032802249298e-06,
|
| 12775 |
+
"loss": 0.0563,
|
| 12776 |
+
"step": 16730
|
| 12777 |
+
},
|
| 12778 |
+
{
|
| 12779 |
+
"epoch": 7.844423617619494,
|
| 12780 |
+
"grad_norm": 1.4900165796279907,
|
| 12781 |
+
"learning_rate": 9.019447047797563e-06,
|
| 12782 |
+
"loss": 0.0725,
|
| 12783 |
+
"step": 16740
|
| 12784 |
+
},
|
| 12785 |
+
{
|
| 12786 |
+
"epoch": 7.849109653233365,
|
| 12787 |
+
"grad_norm": 2.11224627494812,
|
| 12788 |
+
"learning_rate": 9.01886129334583e-06,
|
| 12789 |
+
"loss": 0.0699,
|
| 12790 |
+
"step": 16750
|
| 12791 |
+
},
|
| 12792 |
+
{
|
| 12793 |
+
"epoch": 7.849109653233365,
|
| 12794 |
+
"eval_loss": 0.04045228287577629,
|
| 12795 |
+
"eval_pearson_cosine": 0.7810519865633125,
|
| 12796 |
+
"eval_pearson_dot": 0.6315366018290618,
|
| 12797 |
+
"eval_pearson_euclidean": 0.7311610922445455,
|
| 12798 |
+
"eval_pearson_manhattan": 0.7308498943499657,
|
| 12799 |
+
"eval_runtime": 40.5903,
|
| 12800 |
+
"eval_samples_per_second": 36.955,
|
| 12801 |
+
"eval_spearman_cosine": 0.7836878662688926,
|
| 12802 |
+
"eval_spearman_dot": 0.642582986344888,
|
| 12803 |
+
"eval_spearman_euclidean": 0.7470082334118219,
|
| 12804 |
+
"eval_spearman_manhattan": 0.7467779627853639,
|
| 12805 |
+
"eval_steps_per_second": 36.955,
|
| 12806 |
+
"step": 16750
|
| 12807 |
+
},
|
| 12808 |
+
{
|
| 12809 |
+
"epoch": 7.853795688847235,
|
| 12810 |
+
"grad_norm": 1.3252798318862915,
|
| 12811 |
+
"learning_rate": 9.018275538894097e-06,
|
| 12812 |
+
"loss": 0.0691,
|
| 12813 |
+
"step": 16760
|
| 12814 |
+
},
|
| 12815 |
+
{
|
| 12816 |
+
"epoch": 7.858481724461106,
|
| 12817 |
+
"grad_norm": 0.9918208122253418,
|
| 12818 |
+
"learning_rate": 9.017689784442362e-06,
|
| 12819 |
+
"loss": 0.0723,
|
| 12820 |
+
"step": 16770
|
| 12821 |
+
},
|
| 12822 |
+
{
|
| 12823 |
+
"epoch": 7.863167760074976,
|
| 12824 |
+
"grad_norm": 2.2344889640808105,
|
| 12825 |
+
"learning_rate": 9.01710402999063e-06,
|
| 12826 |
+
"loss": 0.0739,
|
| 12827 |
+
"step": 16780
|
| 12828 |
+
},
|
| 12829 |
+
{
|
| 12830 |
+
"epoch": 7.8678537956888475,
|
| 12831 |
+
"grad_norm": 1.8005706071853638,
|
| 12832 |
+
"learning_rate": 9.016518275538895e-06,
|
| 12833 |
+
"loss": 0.0831,
|
| 12834 |
+
"step": 16790
|
| 12835 |
+
},
|
| 12836 |
+
{
|
| 12837 |
+
"epoch": 7.872539831302718,
|
| 12838 |
+
"grad_norm": 1.365945816040039,
|
| 12839 |
+
"learning_rate": 9.015932521087161e-06,
|
| 12840 |
+
"loss": 0.0759,
|
| 12841 |
+
"step": 16800
|
| 12842 |
+
},
|
| 12843 |
+
{
|
| 12844 |
+
"epoch": 7.877225866916588,
|
| 12845 |
+
"grad_norm": 1.3977360725402832,
|
| 12846 |
+
"learning_rate": 9.015346766635427e-06,
|
| 12847 |
+
"loss": 0.0806,
|
| 12848 |
+
"step": 16810
|
| 12849 |
+
},
|
| 12850 |
+
{
|
| 12851 |
+
"epoch": 7.881911902530459,
|
| 12852 |
+
"grad_norm": 1.3826375007629395,
|
| 12853 |
+
"learning_rate": 9.014761012183694e-06,
|
| 12854 |
+
"loss": 0.0744,
|
| 12855 |
+
"step": 16820
|
| 12856 |
+
},
|
| 12857 |
+
{
|
| 12858 |
+
"epoch": 7.88659793814433,
|
| 12859 |
+
"grad_norm": 2.0823261737823486,
|
| 12860 |
+
"learning_rate": 9.01417525773196e-06,
|
| 12861 |
+
"loss": 0.0731,
|
| 12862 |
+
"step": 16830
|
| 12863 |
+
},
|
| 12864 |
+
{
|
| 12865 |
+
"epoch": 7.891283973758201,
|
| 12866 |
+
"grad_norm": 1.4947584867477417,
|
| 12867 |
+
"learning_rate": 9.013589503280226e-06,
|
| 12868 |
+
"loss": 0.0788,
|
| 12869 |
+
"step": 16840
|
| 12870 |
+
},
|
| 12871 |
+
{
|
| 12872 |
+
"epoch": 7.895970009372071,
|
| 12873 |
+
"grad_norm": 1.659224033355713,
|
| 12874 |
+
"learning_rate": 9.013003748828491e-06,
|
| 12875 |
+
"loss": 0.0733,
|
| 12876 |
+
"step": 16850
|
| 12877 |
+
},
|
| 12878 |
+
{
|
| 12879 |
+
"epoch": 7.900656044985942,
|
| 12880 |
+
"grad_norm": 1.4698199033737183,
|
| 12881 |
+
"learning_rate": 9.012417994376758e-06,
|
| 12882 |
+
"loss": 0.0789,
|
| 12883 |
+
"step": 16860
|
| 12884 |
+
},
|
| 12885 |
+
{
|
| 12886 |
+
"epoch": 7.905342080599812,
|
| 12887 |
+
"grad_norm": 1.6106451749801636,
|
| 12888 |
+
"learning_rate": 9.011832239925025e-06,
|
| 12889 |
+
"loss": 0.0656,
|
| 12890 |
+
"step": 16870
|
| 12891 |
+
},
|
| 12892 |
+
{
|
| 12893 |
+
"epoch": 7.910028116213683,
|
| 12894 |
+
"grad_norm": 1.2820615768432617,
|
| 12895 |
+
"learning_rate": 9.01124648547329e-06,
|
| 12896 |
+
"loss": 0.0648,
|
| 12897 |
+
"step": 16880
|
| 12898 |
+
},
|
| 12899 |
+
{
|
| 12900 |
+
"epoch": 7.914714151827554,
|
| 12901 |
+
"grad_norm": 2.3736705780029297,
|
| 12902 |
+
"learning_rate": 9.010660731021557e-06,
|
| 12903 |
+
"loss": 0.0884,
|
| 12904 |
+
"step": 16890
|
| 12905 |
+
},
|
| 12906 |
+
{
|
| 12907 |
+
"epoch": 7.919400187441425,
|
| 12908 |
+
"grad_norm": 1.1591442823410034,
|
| 12909 |
+
"learning_rate": 9.010074976569822e-06,
|
| 12910 |
+
"loss": 0.0657,
|
| 12911 |
+
"step": 16900
|
| 12912 |
+
},
|
| 12913 |
+
{
|
| 12914 |
+
"epoch": 7.924086223055295,
|
| 12915 |
+
"grad_norm": 1.9707759618759155,
|
| 12916 |
+
"learning_rate": 9.009489222118089e-06,
|
| 12917 |
+
"loss": 0.0833,
|
| 12918 |
+
"step": 16910
|
| 12919 |
+
},
|
| 12920 |
+
{
|
| 12921 |
+
"epoch": 7.928772258669166,
|
| 12922 |
+
"grad_norm": 2.5806972980499268,
|
| 12923 |
+
"learning_rate": 9.008903467666354e-06,
|
| 12924 |
+
"loss": 0.073,
|
| 12925 |
+
"step": 16920
|
| 12926 |
+
},
|
| 12927 |
+
{
|
| 12928 |
+
"epoch": 7.933458294283037,
|
| 12929 |
+
"grad_norm": 0.8301031589508057,
|
| 12930 |
+
"learning_rate": 9.008317713214621e-06,
|
| 12931 |
+
"loss": 0.0694,
|
| 12932 |
+
"step": 16930
|
| 12933 |
+
},
|
| 12934 |
+
{
|
| 12935 |
+
"epoch": 7.938144329896907,
|
| 12936 |
+
"grad_norm": 2.491325855255127,
|
| 12937 |
+
"learning_rate": 9.007731958762888e-06,
|
| 12938 |
+
"loss": 0.07,
|
| 12939 |
+
"step": 16940
|
| 12940 |
+
},
|
| 12941 |
+
{
|
| 12942 |
+
"epoch": 7.942830365510778,
|
| 12943 |
+
"grad_norm": 1.3585147857666016,
|
| 12944 |
+
"learning_rate": 9.007146204311153e-06,
|
| 12945 |
+
"loss": 0.0844,
|
| 12946 |
+
"step": 16950
|
| 12947 |
+
},
|
| 12948 |
+
{
|
| 12949 |
+
"epoch": 7.947516401124648,
|
| 12950 |
+
"grad_norm": 0.8648898601531982,
|
| 12951 |
+
"learning_rate": 9.00656044985942e-06,
|
| 12952 |
+
"loss": 0.0706,
|
| 12953 |
+
"step": 16960
|
| 12954 |
+
},
|
| 12955 |
+
{
|
| 12956 |
+
"epoch": 7.952202436738519,
|
| 12957 |
+
"grad_norm": 1.6157063245773315,
|
| 12958 |
+
"learning_rate": 9.005974695407685e-06,
|
| 12959 |
+
"loss": 0.0849,
|
| 12960 |
+
"step": 16970
|
| 12961 |
+
},
|
| 12962 |
+
{
|
| 12963 |
+
"epoch": 7.956888472352389,
|
| 12964 |
+
"grad_norm": 2.6578354835510254,
|
| 12965 |
+
"learning_rate": 9.005388940955952e-06,
|
| 12966 |
+
"loss": 0.0785,
|
| 12967 |
+
"step": 16980
|
| 12968 |
+
},
|
| 12969 |
+
{
|
| 12970 |
+
"epoch": 7.961574507966261,
|
| 12971 |
+
"grad_norm": 2.8184850215911865,
|
| 12972 |
+
"learning_rate": 9.00480318650422e-06,
|
| 12973 |
+
"loss": 0.0769,
|
| 12974 |
+
"step": 16990
|
| 12975 |
+
},
|
| 12976 |
+
{
|
| 12977 |
+
"epoch": 7.966260543580131,
|
| 12978 |
+
"grad_norm": 1.8346798419952393,
|
| 12979 |
+
"learning_rate": 9.004217432052485e-06,
|
| 12980 |
+
"loss": 0.0735,
|
| 12981 |
+
"step": 17000
|
| 12982 |
+
},
|
| 12983 |
+
{
|
| 12984 |
+
"epoch": 7.966260543580131,
|
| 12985 |
+
"eval_loss": 0.03939095139503479,
|
| 12986 |
+
"eval_pearson_cosine": 0.780422600052205,
|
| 12987 |
+
"eval_pearson_dot": 0.646788551622171,
|
| 12988 |
+
"eval_pearson_euclidean": 0.7325980054422985,
|
| 12989 |
+
"eval_pearson_manhattan": 0.731991687137608,
|
| 12990 |
+
"eval_runtime": 39.8363,
|
| 12991 |
+
"eval_samples_per_second": 37.654,
|
| 12992 |
+
"eval_spearman_cosine": 0.7823114033515521,
|
| 12993 |
+
"eval_spearman_dot": 0.6607344073150395,
|
| 12994 |
+
"eval_spearman_euclidean": 0.7461718651526544,
|
| 12995 |
+
"eval_spearman_manhattan": 0.745468210963869,
|
| 12996 |
+
"eval_steps_per_second": 37.654,
|
| 12997 |
+
"step": 17000
|
| 12998 |
}
|
| 12999 |
],
|
| 13000 |
"logging_steps": 10,
|