Training in progress, step 3750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7586d4afd9fadcaa6083460f8f6841b5a702dba00cbd480cc156933cd79c41b2
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:474241a6101d37df838210445f01853e23b11101c165e76bd69cf2cda41699a2
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da0e93581e91c352d5ee493f505f8757c94a31fb5b16f71a9d85577535431525
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea37f66b9478389c6f1b54e20d4b883ef028f78a1a1497fe4ee340f7d291f09
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3487,6 +3487,151 @@
|
|
| 3487 |
"EMA_steps_per_second": 24.982,
|
| 3488 |
"epoch": 156.52173913043478,
|
| 3489 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3490 |
}
|
| 3491 |
],
|
| 3492 |
"logging_steps": 10,
|
|
@@ -3506,7 +3651,7 @@
|
|
| 3506 |
"attributes": {}
|
| 3507 |
}
|
| 3508 |
},
|
| 3509 |
-
"total_flos": 9.
|
| 3510 |
"train_batch_size": 4,
|
| 3511 |
"trial_name": null,
|
| 3512 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 163.04347826086956,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3750,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3487 |
"EMA_steps_per_second": 24.982,
|
| 3488 |
"epoch": 156.52173913043478,
|
| 3489 |
"step": 3600
|
| 3490 |
+
},
|
| 3491 |
+
{
|
| 3492 |
+
"epoch": 156.95652173913044,
|
| 3493 |
+
"grad_norm": 2.5946836471557617,
|
| 3494 |
+
"learning_rate": 2.513936770131954e-06,
|
| 3495 |
+
"loss": 0.1973,
|
| 3496 |
+
"step": 3610
|
| 3497 |
+
},
|
| 3498 |
+
{
|
| 3499 |
+
"epoch": 157.3913043478261,
|
| 3500 |
+
"grad_norm": 1.8816180229187012,
|
| 3501 |
+
"learning_rate": 2.5139335316856892e-06,
|
| 3502 |
+
"loss": 0.2551,
|
| 3503 |
+
"step": 3620
|
| 3504 |
+
},
|
| 3505 |
+
{
|
| 3506 |
+
"epoch": 157.82608695652175,
|
| 3507 |
+
"grad_norm": 1.969436764717102,
|
| 3508 |
+
"learning_rate": 2.5139297950203775e-06,
|
| 3509 |
+
"loss": 0.2349,
|
| 3510 |
+
"step": 3630
|
| 3511 |
+
},
|
| 3512 |
+
{
|
| 3513 |
+
"epoch": 158.2608695652174,
|
| 3514 |
+
"grad_norm": 2.1921560764312744,
|
| 3515 |
+
"learning_rate": 2.5139255601375007e-06,
|
| 3516 |
+
"loss": 0.2243,
|
| 3517 |
+
"step": 3640
|
| 3518 |
+
},
|
| 3519 |
+
{
|
| 3520 |
+
"epoch": 158.69565217391303,
|
| 3521 |
+
"grad_norm": 3.598989725112915,
|
| 3522 |
+
"learning_rate": 2.513920827038737e-06,
|
| 3523 |
+
"loss": 0.2276,
|
| 3524 |
+
"step": 3650
|
| 3525 |
+
},
|
| 3526 |
+
{
|
| 3527 |
+
"epoch": 159.1304347826087,
|
| 3528 |
+
"grad_norm": 2.583705186843872,
|
| 3529 |
+
"learning_rate": 2.513915595725963e-06,
|
| 3530 |
+
"loss": 0.2528,
|
| 3531 |
+
"step": 3660
|
| 3532 |
+
},
|
| 3533 |
+
{
|
| 3534 |
+
"epoch": 159.56521739130434,
|
| 3535 |
+
"grad_norm": 1.8946772813796997,
|
| 3536 |
+
"learning_rate": 2.5139098662012514e-06,
|
| 3537 |
+
"loss": 0.2368,
|
| 3538 |
+
"step": 3670
|
| 3539 |
+
},
|
| 3540 |
+
{
|
| 3541 |
+
"epoch": 160.0,
|
| 3542 |
+
"grad_norm": 2.685317039489746,
|
| 3543 |
+
"learning_rate": 2.513903638466874e-06,
|
| 3544 |
+
"loss": 0.2026,
|
| 3545 |
+
"step": 3680
|
| 3546 |
+
},
|
| 3547 |
+
{
|
| 3548 |
+
"epoch": 160.43478260869566,
|
| 3549 |
+
"grad_norm": 1.9969098567962646,
|
| 3550 |
+
"learning_rate": 2.5138969125252985e-06,
|
| 3551 |
+
"loss": 0.228,
|
| 3552 |
+
"step": 3690
|
| 3553 |
+
},
|
| 3554 |
+
{
|
| 3555 |
+
"epoch": 160.8695652173913,
|
| 3556 |
+
"grad_norm": 1.5398179292678833,
|
| 3557 |
+
"learning_rate": 2.5138896883791913e-06,
|
| 3558 |
+
"loss": 0.2437,
|
| 3559 |
+
"step": 3700
|
| 3560 |
+
},
|
| 3561 |
+
{
|
| 3562 |
+
"epoch": 161.30434782608697,
|
| 3563 |
+
"grad_norm": 1.6144198179244995,
|
| 3564 |
+
"learning_rate": 2.5138819660314154e-06,
|
| 3565 |
+
"loss": 0.2764,
|
| 3566 |
+
"step": 3710
|
| 3567 |
+
},
|
| 3568 |
+
{
|
| 3569 |
+
"epoch": 161.7391304347826,
|
| 3570 |
+
"grad_norm": 2.053276777267456,
|
| 3571 |
+
"learning_rate": 2.513873745485033e-06,
|
| 3572 |
+
"loss": 0.2278,
|
| 3573 |
+
"step": 3720
|
| 3574 |
+
},
|
| 3575 |
+
{
|
| 3576 |
+
"epoch": 162.17391304347825,
|
| 3577 |
+
"grad_norm": 2.3131282329559326,
|
| 3578 |
+
"learning_rate": 2.513865026743301e-06,
|
| 3579 |
+
"loss": 0.2157,
|
| 3580 |
+
"step": 3730
|
| 3581 |
+
},
|
| 3582 |
+
{
|
| 3583 |
+
"epoch": 162.6086956521739,
|
| 3584 |
+
"grad_norm": 2.0463197231292725,
|
| 3585 |
+
"learning_rate": 2.5138558098096753e-06,
|
| 3586 |
+
"loss": 0.2233,
|
| 3587 |
+
"step": 3740
|
| 3588 |
+
},
|
| 3589 |
+
{
|
| 3590 |
+
"epoch": 163.04347826086956,
|
| 3591 |
+
"grad_norm": 2.3754689693450928,
|
| 3592 |
+
"learning_rate": 2.51384609468781e-06,
|
| 3593 |
+
"loss": 0.2231,
|
| 3594 |
+
"step": 3750
|
| 3595 |
+
},
|
| 3596 |
+
{
|
| 3597 |
+
"epoch": 163.04347826086956,
|
| 3598 |
+
"eval_loss": 0.9596047401428223,
|
| 3599 |
+
"eval_runtime": 0.4563,
|
| 3600 |
+
"eval_samples_per_second": 21.916,
|
| 3601 |
+
"eval_steps_per_second": 21.916,
|
| 3602 |
+
"step": 3750
|
| 3603 |
+
},
|
| 3604 |
+
{
|
| 3605 |
+
"Start_State_loss": 0.861186683177948,
|
| 3606 |
+
"Start_State_runtime": 0.456,
|
| 3607 |
+
"Start_State_samples_per_second": 21.93,
|
| 3608 |
+
"Start_State_steps_per_second": 21.93,
|
| 3609 |
+
"epoch": 163.04347826086956,
|
| 3610 |
+
"step": 3750
|
| 3611 |
+
},
|
| 3612 |
+
{
|
| 3613 |
+
"Raw_Model_loss": 0.9596047401428223,
|
| 3614 |
+
"Raw_Model_runtime": 0.4822,
|
| 3615 |
+
"Raw_Model_samples_per_second": 20.737,
|
| 3616 |
+
"Raw_Model_steps_per_second": 20.737,
|
| 3617 |
+
"epoch": 163.04347826086956,
|
| 3618 |
+
"step": 3750
|
| 3619 |
+
},
|
| 3620 |
+
{
|
| 3621 |
+
"SWA_loss": 0.7939289808273315,
|
| 3622 |
+
"SWA_runtime": 0.4295,
|
| 3623 |
+
"SWA_samples_per_second": 23.281,
|
| 3624 |
+
"SWA_steps_per_second": 23.281,
|
| 3625 |
+
"epoch": 163.04347826086956,
|
| 3626 |
+
"step": 3750
|
| 3627 |
+
},
|
| 3628 |
+
{
|
| 3629 |
+
"EMA_loss": 0.8596266508102417,
|
| 3630 |
+
"EMA_runtime": 0.4196,
|
| 3631 |
+
"EMA_samples_per_second": 23.833,
|
| 3632 |
+
"EMA_steps_per_second": 23.833,
|
| 3633 |
+
"epoch": 163.04347826086956,
|
| 3634 |
+
"step": 3750
|
| 3635 |
}
|
| 3636 |
],
|
| 3637 |
"logging_steps": 10,
|
|
|
|
| 3651 |
"attributes": {}
|
| 3652 |
}
|
| 3653 |
},
|
| 3654 |
+
"total_flos": 9.668631592798618e+16,
|
| 3655 |
"train_batch_size": 4,
|
| 3656 |
"trial_name": null,
|
| 3657 |
"trial_params": null
|