Training in progress, step 4268, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737580392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6655c6985e492e8d6919382548a7effa0be42aa4ce41de6a3afb623371f715b6
|
| 3 |
size 737580392
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475248442
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4d24494f3685341ff655be78dd4d6b804adf234b56e7ca404f9eb06e1b340b6
|
| 3 |
size 1475248442
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a736126bf032a05408714e9a6309ebf595d5e8e36aa317f7cb41422c442e7ab
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdfd054ee415a43775ee6882ba10b5080791cd5c9e7e77c4915c3e4fc9fe5d58
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3447,6 +3447,220 @@
|
|
| 3447 |
"eval_spearman_manhattan": 0.8198041702608989,
|
| 3448 |
"eval_steps_per_second": 13.426,
|
| 3449 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3450 |
}
|
| 3451 |
],
|
| 3452 |
"logging_steps": 10,
|
|
@@ -3461,7 +3675,7 @@
|
|
| 3461 |
"should_evaluate": false,
|
| 3462 |
"should_log": false,
|
| 3463 |
"should_save": true,
|
| 3464 |
-
"should_training_stop":
|
| 3465 |
},
|
| 3466 |
"attributes": {}
|
| 3467 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 4268,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3447 |
"eval_spearman_manhattan": 0.8198041702608989,
|
| 3448 |
"eval_steps_per_second": 13.426,
|
| 3449 |
"step": 4000
|
| 3450 |
+
},
|
| 3451 |
+
{
|
| 3452 |
+
"epoch": 1.879100281162137,
|
| 3453 |
+
"grad_norm": 1.8194371461868286,
|
| 3454 |
+
"learning_rate": 4.4127811621368325e-05,
|
| 3455 |
+
"loss": 0.1598,
|
| 3456 |
+
"step": 4010
|
| 3457 |
+
},
|
| 3458 |
+
{
|
| 3459 |
+
"epoch": 1.8837863167760074,
|
| 3460 |
+
"grad_norm": 1.2515980005264282,
|
| 3461 |
+
"learning_rate": 4.411316776007498e-05,
|
| 3462 |
+
"loss": 0.1872,
|
| 3463 |
+
"step": 4020
|
| 3464 |
+
},
|
| 3465 |
+
{
|
| 3466 |
+
"epoch": 1.8884723523898783,
|
| 3467 |
+
"grad_norm": 1.4522411823272705,
|
| 3468 |
+
"learning_rate": 4.409852389878163e-05,
|
| 3469 |
+
"loss": 0.1906,
|
| 3470 |
+
"step": 4030
|
| 3471 |
+
},
|
| 3472 |
+
{
|
| 3473 |
+
"epoch": 1.8931583880037488,
|
| 3474 |
+
"grad_norm": 1.3392481803894043,
|
| 3475 |
+
"learning_rate": 4.408388003748829e-05,
|
| 3476 |
+
"loss": 0.1947,
|
| 3477 |
+
"step": 4040
|
| 3478 |
+
},
|
| 3479 |
+
{
|
| 3480 |
+
"epoch": 1.8978444236176195,
|
| 3481 |
+
"grad_norm": 1.1387908458709717,
|
| 3482 |
+
"learning_rate": 4.406923617619494e-05,
|
| 3483 |
+
"loss": 0.1719,
|
| 3484 |
+
"step": 4050
|
| 3485 |
+
},
|
| 3486 |
+
{
|
| 3487 |
+
"epoch": 1.9025304592314902,
|
| 3488 |
+
"grad_norm": 1.7648086547851562,
|
| 3489 |
+
"learning_rate": 4.4054592314901596e-05,
|
| 3490 |
+
"loss": 0.2144,
|
| 3491 |
+
"step": 4060
|
| 3492 |
+
},
|
| 3493 |
+
{
|
| 3494 |
+
"epoch": 1.9072164948453607,
|
| 3495 |
+
"grad_norm": 1.774842381477356,
|
| 3496 |
+
"learning_rate": 4.403994845360825e-05,
|
| 3497 |
+
"loss": 0.1973,
|
| 3498 |
+
"step": 4070
|
| 3499 |
+
},
|
| 3500 |
+
{
|
| 3501 |
+
"epoch": 1.9119025304592316,
|
| 3502 |
+
"grad_norm": 1.3129111528396606,
|
| 3503 |
+
"learning_rate": 4.4025304592314905e-05,
|
| 3504 |
+
"loss": 0.1723,
|
| 3505 |
+
"step": 4080
|
| 3506 |
+
},
|
| 3507 |
+
{
|
| 3508 |
+
"epoch": 1.9165885660731021,
|
| 3509 |
+
"grad_norm": 1.311933159828186,
|
| 3510 |
+
"learning_rate": 4.401066073102156e-05,
|
| 3511 |
+
"loss": 0.1768,
|
| 3512 |
+
"step": 4090
|
| 3513 |
+
},
|
| 3514 |
+
{
|
| 3515 |
+
"epoch": 1.9212746016869728,
|
| 3516 |
+
"grad_norm": 1.510150671005249,
|
| 3517 |
+
"learning_rate": 4.399601686972821e-05,
|
| 3518 |
+
"loss": 0.1679,
|
| 3519 |
+
"step": 4100
|
| 3520 |
+
},
|
| 3521 |
+
{
|
| 3522 |
+
"epoch": 1.9212746016869728,
|
| 3523 |
+
"eval_loss": 0.034906383603811264,
|
| 3524 |
+
"eval_pearson_cosine": 0.8238323682543012,
|
| 3525 |
+
"eval_pearson_dot": 0.75607099937789,
|
| 3526 |
+
"eval_pearson_euclidean": 0.8097449943516324,
|
| 3527 |
+
"eval_pearson_manhattan": 0.8109217792674599,
|
| 3528 |
+
"eval_runtime": 7.5129,
|
| 3529 |
+
"eval_samples_per_second": 199.657,
|
| 3530 |
+
"eval_spearman_cosine": 0.824907875327154,
|
| 3531 |
+
"eval_spearman_dot": 0.7550909354777231,
|
| 3532 |
+
"eval_spearman_euclidean": 0.8187348127942441,
|
| 3533 |
+
"eval_spearman_manhattan": 0.8200370653936264,
|
| 3534 |
+
"eval_steps_per_second": 12.512,
|
| 3535 |
+
"step": 4100
|
| 3536 |
+
},
|
| 3537 |
+
{
|
| 3538 |
+
"epoch": 1.9259606373008435,
|
| 3539 |
+
"grad_norm": 1.644677996635437,
|
| 3540 |
+
"learning_rate": 4.398137300843487e-05,
|
| 3541 |
+
"loss": 0.1961,
|
| 3542 |
+
"step": 4110
|
| 3543 |
+
},
|
| 3544 |
+
{
|
| 3545 |
+
"epoch": 1.930646672914714,
|
| 3546 |
+
"grad_norm": 1.508178472518921,
|
| 3547 |
+
"learning_rate": 4.396672914714152e-05,
|
| 3548 |
+
"loss": 0.1841,
|
| 3549 |
+
"step": 4120
|
| 3550 |
+
},
|
| 3551 |
+
{
|
| 3552 |
+
"epoch": 1.935332708528585,
|
| 3553 |
+
"grad_norm": 1.5336145162582397,
|
| 3554 |
+
"learning_rate": 4.3952085285848176e-05,
|
| 3555 |
+
"loss": 0.1637,
|
| 3556 |
+
"step": 4130
|
| 3557 |
+
},
|
| 3558 |
+
{
|
| 3559 |
+
"epoch": 1.9400187441424555,
|
| 3560 |
+
"grad_norm": 1.7044395208358765,
|
| 3561 |
+
"learning_rate": 4.3937441424554824e-05,
|
| 3562 |
+
"loss": 0.2063,
|
| 3563 |
+
"step": 4140
|
| 3564 |
+
},
|
| 3565 |
+
{
|
| 3566 |
+
"epoch": 1.9447047797563262,
|
| 3567 |
+
"grad_norm": 1.6980154514312744,
|
| 3568 |
+
"learning_rate": 4.392279756326148e-05,
|
| 3569 |
+
"loss": 0.2118,
|
| 3570 |
+
"step": 4150
|
| 3571 |
+
},
|
| 3572 |
+
{
|
| 3573 |
+
"epoch": 1.9493908153701969,
|
| 3574 |
+
"grad_norm": 1.833633542060852,
|
| 3575 |
+
"learning_rate": 4.390815370196814e-05,
|
| 3576 |
+
"loss": 0.171,
|
| 3577 |
+
"step": 4160
|
| 3578 |
+
},
|
| 3579 |
+
{
|
| 3580 |
+
"epoch": 1.9540768509840674,
|
| 3581 |
+
"grad_norm": 1.7349201440811157,
|
| 3582 |
+
"learning_rate": 4.389350984067479e-05,
|
| 3583 |
+
"loss": 0.1885,
|
| 3584 |
+
"step": 4170
|
| 3585 |
+
},
|
| 3586 |
+
{
|
| 3587 |
+
"epoch": 1.9587628865979383,
|
| 3588 |
+
"grad_norm": 2.3254284858703613,
|
| 3589 |
+
"learning_rate": 4.387886597938145e-05,
|
| 3590 |
+
"loss": 0.1843,
|
| 3591 |
+
"step": 4180
|
| 3592 |
+
},
|
| 3593 |
+
{
|
| 3594 |
+
"epoch": 1.9634489222118088,
|
| 3595 |
+
"grad_norm": 1.1924229860305786,
|
| 3596 |
+
"learning_rate": 4.38642221180881e-05,
|
| 3597 |
+
"loss": 0.1862,
|
| 3598 |
+
"step": 4190
|
| 3599 |
+
},
|
| 3600 |
+
{
|
| 3601 |
+
"epoch": 1.9681349578256795,
|
| 3602 |
+
"grad_norm": 1.5753990411758423,
|
| 3603 |
+
"learning_rate": 4.3849578256794756e-05,
|
| 3604 |
+
"loss": 0.1699,
|
| 3605 |
+
"step": 4200
|
| 3606 |
+
},
|
| 3607 |
+
{
|
| 3608 |
+
"epoch": 1.9681349578256795,
|
| 3609 |
+
"eval_loss": 0.03551472723484039,
|
| 3610 |
+
"eval_pearson_cosine": 0.8273878707711191,
|
| 3611 |
+
"eval_pearson_dot": 0.7646820898603437,
|
| 3612 |
+
"eval_pearson_euclidean": 0.8112987734110177,
|
| 3613 |
+
"eval_pearson_manhattan": 0.8125188338482303,
|
| 3614 |
+
"eval_runtime": 5.9715,
|
| 3615 |
+
"eval_samples_per_second": 251.194,
|
| 3616 |
+
"eval_spearman_cosine": 0.8298080691919564,
|
| 3617 |
+
"eval_spearman_dot": 0.7648333772102188,
|
| 3618 |
+
"eval_spearman_euclidean": 0.8214596205940881,
|
| 3619 |
+
"eval_spearman_manhattan": 0.8226861322419045,
|
| 3620 |
+
"eval_steps_per_second": 15.742,
|
| 3621 |
+
"step": 4200
|
| 3622 |
+
},
|
| 3623 |
+
{
|
| 3624 |
+
"epoch": 1.9728209934395502,
|
| 3625 |
+
"grad_norm": 1.7450155019760132,
|
| 3626 |
+
"learning_rate": 4.383493439550141e-05,
|
| 3627 |
+
"loss": 0.2132,
|
| 3628 |
+
"step": 4210
|
| 3629 |
+
},
|
| 3630 |
+
{
|
| 3631 |
+
"epoch": 1.9775070290534207,
|
| 3632 |
+
"grad_norm": 2.049828290939331,
|
| 3633 |
+
"learning_rate": 4.3820290534208064e-05,
|
| 3634 |
+
"loss": 0.2,
|
| 3635 |
+
"step": 4220
|
| 3636 |
+
},
|
| 3637 |
+
{
|
| 3638 |
+
"epoch": 1.9821930646672916,
|
| 3639 |
+
"grad_norm": 1.8437615633010864,
|
| 3640 |
+
"learning_rate": 4.380564667291472e-05,
|
| 3641 |
+
"loss": 0.1787,
|
| 3642 |
+
"step": 4230
|
| 3643 |
+
},
|
| 3644 |
+
{
|
| 3645 |
+
"epoch": 1.986879100281162,
|
| 3646 |
+
"grad_norm": 1.3667303323745728,
|
| 3647 |
+
"learning_rate": 4.3791002811621366e-05,
|
| 3648 |
+
"loss": 0.1995,
|
| 3649 |
+
"step": 4240
|
| 3650 |
+
},
|
| 3651 |
+
{
|
| 3652 |
+
"epoch": 1.9915651358950328,
|
| 3653 |
+
"grad_norm": 1.3837028741836548,
|
| 3654 |
+
"learning_rate": 4.377635895032802e-05,
|
| 3655 |
+
"loss": 0.2021,
|
| 3656 |
+
"step": 4250
|
| 3657 |
+
},
|
| 3658 |
+
{
|
| 3659 |
+
"epoch": 1.9962511715089035,
|
| 3660 |
+
"grad_norm": 1.6766111850738525,
|
| 3661 |
+
"learning_rate": 4.3761715089034675e-05,
|
| 3662 |
+
"loss": 0.1918,
|
| 3663 |
+
"step": 4260
|
| 3664 |
}
|
| 3665 |
],
|
| 3666 |
"logging_steps": 10,
|
|
|
|
| 3675 |
"should_evaluate": false,
|
| 3676 |
"should_log": false,
|
| 3677 |
"should_save": true,
|
| 3678 |
+
"should_training_stop": true
|
| 3679 |
},
|
| 3680 |
"attributes": {}
|
| 3681 |
}
|