Training in progress, step 4390, checkpoint
Browse files
last-checkpoint/2_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3149984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98502da6c4dbee1502fa8ebc31ff356b5762eb792a899d4e5339d3cd3a7c0ae4
|
| 3 |
size 3149984
|
last-checkpoint/README.md
CHANGED
|
@@ -830,6 +830,20 @@ You can finetune this model on your own dataset.
|
|
| 830 |
| 9.6146 | 4230 | 0.061 | - | - |
|
| 831 |
| 9.6374 | 4240 | 0.0616 | - | - |
|
| 832 |
| 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
|
| 834 |
</details>
|
| 835 |
|
|
|
|
| 830 |
| 9.6146 | 4230 | 0.061 | - | - |
|
| 831 |
| 9.6374 | 4240 | 0.0616 | - | - |
|
| 832 |
| 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
|
| 833 |
+
| 9.6829 | 4260 | 0.0623 | - | - |
|
| 834 |
+
| 9.7056 | 4270 | 0.0605 | - | - |
|
| 835 |
+
| 9.7284 | 4280 | 0.0637 | - | - |
|
| 836 |
+
| 9.7512 | 4290 | 0.0604 | - | - |
|
| 837 |
+
| 9.7739 | 4300 | 0.0606 | - | - |
|
| 838 |
+
| 9.7967 | 4310 | 0.0622 | - | - |
|
| 839 |
+
| 9.8195 | 4320 | 0.0598 | - | - |
|
| 840 |
+
| 9.8422 | 4330 | 0.0611 | - | - |
|
| 841 |
+
| 9.8650 | 4340 | 0.0604 | - | - |
|
| 842 |
+
| 9.8878 | 4350 | 0.0598 | - | - |
|
| 843 |
+
| 9.9105 | 4360 | 0.0626 | - | - |
|
| 844 |
+
| 9.9333 | 4370 | 0.0624 | - | - |
|
| 845 |
+
| 9.9560 | 4380 | 0.0617 | - | - |
|
| 846 |
+
| 9.9788 | 4390 | 0.0603 | - | - |
|
| 847 |
|
| 848 |
</details>
|
| 849 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735216376
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4533a4e396a4cedfa433a333dfd7f93b95b00042cf7dc09f5854fa0650746841
|
| 3 |
size 735216376
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1476823354
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03f40734fc193c019c97cbc28b1ae04414cdc745d19240313f29ceea320dd5c0
|
| 3 |
size 1476823354
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbcc6d0c1acd705e5ef3f7c1bdfb510617f9ad5f9bd9d641c051fe36f40e8b31
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3288,6 +3288,104 @@
|
|
| 3288 |
"eval_sts_dev_spearman_manhattan": 0.7996541111809876,
|
| 3289 |
"eval_sts_dev_spearman_max": 0.8282368218808581,
|
| 3290 |
"step": 4250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3291 |
}
|
| 3292 |
],
|
| 3293 |
"logging_steps": 10,
|
|
@@ -3302,7 +3400,7 @@
|
|
| 3302 |
"should_evaluate": false,
|
| 3303 |
"should_log": false,
|
| 3304 |
"should_save": true,
|
| 3305 |
-
"should_training_stop":
|
| 3306 |
},
|
| 3307 |
"attributes": {}
|
| 3308 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.97880210556267,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 4390,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3288 |
"eval_sts_dev_spearman_manhattan": 0.7996541111809876,
|
| 3289 |
"eval_sts_dev_spearman_max": 0.8282368218808581,
|
| 3290 |
"step": 4250
|
| 3291 |
+
},
|
| 3292 |
+
{
|
| 3293 |
+
"epoch": 9.682885189927443,
|
| 3294 |
+
"grad_norm": 0.2565889060497284,
|
| 3295 |
+
"learning_rate": 3.0299797290088556e-06,
|
| 3296 |
+
"loss": 0.0623,
|
| 3297 |
+
"step": 4260
|
| 3298 |
+
},
|
| 3299 |
+
{
|
| 3300 |
+
"epoch": 9.705648029591691,
|
| 3301 |
+
"grad_norm": 0.2263515293598175,
|
| 3302 |
+
"learning_rate": 3.037092357480707e-06,
|
| 3303 |
+
"loss": 0.0605,
|
| 3304 |
+
"step": 4270
|
| 3305 |
+
},
|
| 3306 |
+
{
|
| 3307 |
+
"epoch": 9.72841086925594,
|
| 3308 |
+
"grad_norm": 0.21705535054206848,
|
| 3309 |
+
"learning_rate": 3.044204985952559e-06,
|
| 3310 |
+
"loss": 0.0637,
|
| 3311 |
+
"step": 4280
|
| 3312 |
+
},
|
| 3313 |
+
{
|
| 3314 |
+
"epoch": 9.751173708920188,
|
| 3315 |
+
"grad_norm": 0.21649038791656494,
|
| 3316 |
+
"learning_rate": 3.0513176144244106e-06,
|
| 3317 |
+
"loss": 0.0604,
|
| 3318 |
+
"step": 4290
|
| 3319 |
+
},
|
| 3320 |
+
{
|
| 3321 |
+
"epoch": 9.773936548584436,
|
| 3322 |
+
"grad_norm": 0.22717022895812988,
|
| 3323 |
+
"learning_rate": 3.0584302428962625e-06,
|
| 3324 |
+
"loss": 0.0606,
|
| 3325 |
+
"step": 4300
|
| 3326 |
+
},
|
| 3327 |
+
{
|
| 3328 |
+
"epoch": 9.796699388248683,
|
| 3329 |
+
"grad_norm": 0.23610946536064148,
|
| 3330 |
+
"learning_rate": 3.0655428713681145e-06,
|
| 3331 |
+
"loss": 0.0622,
|
| 3332 |
+
"step": 4310
|
| 3333 |
+
},
|
| 3334 |
+
{
|
| 3335 |
+
"epoch": 9.819462227912933,
|
| 3336 |
+
"grad_norm": 0.2080880105495453,
|
| 3337 |
+
"learning_rate": 3.072655499839966e-06,
|
| 3338 |
+
"loss": 0.0598,
|
| 3339 |
+
"step": 4320
|
| 3340 |
+
},
|
| 3341 |
+
{
|
| 3342 |
+
"epoch": 9.84222506757718,
|
| 3343 |
+
"grad_norm": 0.2862449884414673,
|
| 3344 |
+
"learning_rate": 3.079768128311818e-06,
|
| 3345 |
+
"loss": 0.0611,
|
| 3346 |
+
"step": 4330
|
| 3347 |
+
},
|
| 3348 |
+
{
|
| 3349 |
+
"epoch": 9.864987907241428,
|
| 3350 |
+
"grad_norm": 0.2211073935031891,
|
| 3351 |
+
"learning_rate": 3.0868807567836695e-06,
|
| 3352 |
+
"loss": 0.0604,
|
| 3353 |
+
"step": 4340
|
| 3354 |
+
},
|
| 3355 |
+
{
|
| 3356 |
+
"epoch": 9.887750746905677,
|
| 3357 |
+
"grad_norm": 0.2399899959564209,
|
| 3358 |
+
"learning_rate": 3.0939933852555214e-06,
|
| 3359 |
+
"loss": 0.0598,
|
| 3360 |
+
"step": 4350
|
| 3361 |
+
},
|
| 3362 |
+
{
|
| 3363 |
+
"epoch": 9.910513586569925,
|
| 3364 |
+
"grad_norm": 0.2330579161643982,
|
| 3365 |
+
"learning_rate": 3.101106013727373e-06,
|
| 3366 |
+
"loss": 0.0626,
|
| 3367 |
+
"step": 4360
|
| 3368 |
+
},
|
| 3369 |
+
{
|
| 3370 |
+
"epoch": 9.933276426234173,
|
| 3371 |
+
"grad_norm": 0.23163940012454987,
|
| 3372 |
+
"learning_rate": 3.108218642199225e-06,
|
| 3373 |
+
"loss": 0.0624,
|
| 3374 |
+
"step": 4370
|
| 3375 |
+
},
|
| 3376 |
+
{
|
| 3377 |
+
"epoch": 9.95603926589842,
|
| 3378 |
+
"grad_norm": 0.2087012380361557,
|
| 3379 |
+
"learning_rate": 3.115331270671077e-06,
|
| 3380 |
+
"loss": 0.0617,
|
| 3381 |
+
"step": 4380
|
| 3382 |
+
},
|
| 3383 |
+
{
|
| 3384 |
+
"epoch": 9.97880210556267,
|
| 3385 |
+
"grad_norm": 0.24286577105522156,
|
| 3386 |
+
"learning_rate": 3.1224438991429284e-06,
|
| 3387 |
+
"loss": 0.0603,
|
| 3388 |
+
"step": 4390
|
| 3389 |
}
|
| 3390 |
],
|
| 3391 |
"logging_steps": 10,
|
|
|
|
| 3400 |
"should_evaluate": false,
|
| 3401 |
"should_log": false,
|
| 3402 |
"should_save": true,
|
| 3403 |
+
"should_training_stop": true
|
| 3404 |
},
|
| 3405 |
"attributes": {}
|
| 3406 |
}
|