Training in progress, step 5493, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2718107304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2807387c3f7c038eca212dca41a58ecfff1755585862e7e2318b6286dd29cb8f
|
| 3 |
size 2718107304
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 145486330
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71dd57ecc32f710a2531c2b41f6cbb162801c9ad0f9bb31b277daffb7fe2f9b4
|
| 3 |
size 145486330
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ad39c2fd71a09f27709f37c0a489f4c2b0a997a89343f75cb61234192319689
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2455594c5b90eff022ef3ec1c714caddd8dcf4c8dacec82303fc8c5605b9f1d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3547,6 +3547,349 @@
|
|
| 3547 |
"eval_samples_per_second": 9.646,
|
| 3548 |
"eval_steps_per_second": 1.206,
|
| 3549 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3550 |
}
|
| 3551 |
],
|
| 3552 |
"logging_steps": 10,
|
|
@@ -3561,12 +3904,12 @@
|
|
| 3561 |
"should_evaluate": false,
|
| 3562 |
"should_log": false,
|
| 3563 |
"should_save": true,
|
| 3564 |
-
"should_training_stop":
|
| 3565 |
},
|
| 3566 |
"attributes": {}
|
| 3567 |
}
|
| 3568 |
},
|
| 3569 |
-
"total_flos": 1.
|
| 3570 |
"train_batch_size": 4,
|
| 3571 |
"trial_name": null,
|
| 3572 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 5493,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3547 |
"eval_samples_per_second": 9.646,
|
| 3548 |
"eval_steps_per_second": 1.206,
|
| 3549 |
"step": 5000
|
| 3550 |
+
},
|
| 3551 |
+
{
|
| 3552 |
+
"epoch": 0.9120699071545604,
|
| 3553 |
+
"grad_norm": 4.849244117736816,
|
| 3554 |
+
"learning_rate": 2.291214703757982e-06,
|
| 3555 |
+
"loss": 2.3958,
|
| 3556 |
+
"step": 5010
|
| 3557 |
+
},
|
| 3558 |
+
{
|
| 3559 |
+
"epoch": 0.9138904059712362,
|
| 3560 |
+
"grad_norm": 4.8128204345703125,
|
| 3561 |
+
"learning_rate": 2.1980167035280163e-06,
|
| 3562 |
+
"loss": 2.4288,
|
| 3563 |
+
"step": 5020
|
| 3564 |
+
},
|
| 3565 |
+
{
|
| 3566 |
+
"epoch": 0.9157109047879118,
|
| 3567 |
+
"grad_norm": 5.573403835296631,
|
| 3568 |
+
"learning_rate": 2.1067111388414163e-06,
|
| 3569 |
+
"loss": 2.4134,
|
| 3570 |
+
"step": 5030
|
| 3571 |
+
},
|
| 3572 |
+
{
|
| 3573 |
+
"epoch": 0.9175314036045876,
|
| 3574 |
+
"grad_norm": 4.9487504959106445,
|
| 3575 |
+
"learning_rate": 2.0173016243995866e-06,
|
| 3576 |
+
"loss": 2.4095,
|
| 3577 |
+
"step": 5040
|
| 3578 |
+
},
|
| 3579 |
+
{
|
| 3580 |
+
"epoch": 0.9193519024212634,
|
| 3581 |
+
"grad_norm": 4.933927536010742,
|
| 3582 |
+
"learning_rate": 1.929791699841066e-06,
|
| 3583 |
+
"loss": 2.4014,
|
| 3584 |
+
"step": 5050
|
| 3585 |
+
},
|
| 3586 |
+
{
|
| 3587 |
+
"epoch": 0.9211724012379392,
|
| 3588 |
+
"grad_norm": 5.116062641143799,
|
| 3589 |
+
"learning_rate": 1.844184829601453e-06,
|
| 3590 |
+
"loss": 2.4196,
|
| 3591 |
+
"step": 5060
|
| 3592 |
+
},
|
| 3593 |
+
{
|
| 3594 |
+
"epoch": 0.922992900054615,
|
| 3595 |
+
"grad_norm": 4.888516902923584,
|
| 3596 |
+
"learning_rate": 1.7604844027761802e-06,
|
| 3597 |
+
"loss": 2.4418,
|
| 3598 |
+
"step": 5070
|
| 3599 |
+
},
|
| 3600 |
+
{
|
| 3601 |
+
"epoch": 0.9248133988712908,
|
| 3602 |
+
"grad_norm": 4.990447998046875,
|
| 3603 |
+
"learning_rate": 1.6786937329864027e-06,
|
| 3604 |
+
"loss": 2.4049,
|
| 3605 |
+
"step": 5080
|
| 3606 |
+
},
|
| 3607 |
+
{
|
| 3608 |
+
"epoch": 0.9266338976879666,
|
| 3609 |
+
"grad_norm": 4.672518253326416,
|
| 3610 |
+
"learning_rate": 1.5988160582477818e-06,
|
| 3611 |
+
"loss": 2.3873,
|
| 3612 |
+
"step": 5090
|
| 3613 |
+
},
|
| 3614 |
+
{
|
| 3615 |
+
"epoch": 0.9284543965046422,
|
| 3616 |
+
"grad_norm": 5.029353618621826,
|
| 3617 |
+
"learning_rate": 1.5208545408423092e-06,
|
| 3618 |
+
"loss": 2.4754,
|
| 3619 |
+
"step": 5100
|
| 3620 |
+
},
|
| 3621 |
+
{
|
| 3622 |
+
"epoch": 0.930274895321318,
|
| 3623 |
+
"grad_norm": 4.660059928894043,
|
| 3624 |
+
"learning_rate": 1.444812267193102e-06,
|
| 3625 |
+
"loss": 2.4081,
|
| 3626 |
+
"step": 5110
|
| 3627 |
+
},
|
| 3628 |
+
{
|
| 3629 |
+
"epoch": 0.9320953941379938,
|
| 3630 |
+
"grad_norm": 5.001034259796143,
|
| 3631 |
+
"learning_rate": 1.3706922477422336e-06,
|
| 3632 |
+
"loss": 2.4014,
|
| 3633 |
+
"step": 5120
|
| 3634 |
+
},
|
| 3635 |
+
{
|
| 3636 |
+
"epoch": 0.9339158929546696,
|
| 3637 |
+
"grad_norm": 5.1275858879089355,
|
| 3638 |
+
"learning_rate": 1.2984974168315234e-06,
|
| 3639 |
+
"loss": 2.4251,
|
| 3640 |
+
"step": 5130
|
| 3641 |
+
},
|
| 3642 |
+
{
|
| 3643 |
+
"epoch": 0.9357363917713454,
|
| 3644 |
+
"grad_norm": 4.893324375152588,
|
| 3645 |
+
"learning_rate": 1.2282306325864135e-06,
|
| 3646 |
+
"loss": 2.4196,
|
| 3647 |
+
"step": 5140
|
| 3648 |
+
},
|
| 3649 |
+
{
|
| 3650 |
+
"epoch": 0.9375568905880212,
|
| 3651 |
+
"grad_norm": 4.734968662261963,
|
| 3652 |
+
"learning_rate": 1.1598946768027863e-06,
|
| 3653 |
+
"loss": 2.401,
|
| 3654 |
+
"step": 5150
|
| 3655 |
+
},
|
| 3656 |
+
{
|
| 3657 |
+
"epoch": 0.9393773894046968,
|
| 3658 |
+
"grad_norm": 4.66255521774292,
|
| 3659 |
+
"learning_rate": 1.0934922548368254e-06,
|
| 3660 |
+
"loss": 2.3846,
|
| 3661 |
+
"step": 5160
|
| 3662 |
+
},
|
| 3663 |
+
{
|
| 3664 |
+
"epoch": 0.9411978882213726,
|
| 3665 |
+
"grad_norm": 4.771427631378174,
|
| 3666 |
+
"learning_rate": 1.0290259954979397e-06,
|
| 3667 |
+
"loss": 2.3953,
|
| 3668 |
+
"step": 5170
|
| 3669 |
+
},
|
| 3670 |
+
{
|
| 3671 |
+
"epoch": 0.9430183870380484,
|
| 3672 |
+
"grad_norm": 4.673166275024414,
|
| 3673 |
+
"learning_rate": 9.664984509446917e-07,
|
| 3674 |
+
"loss": 2.3694,
|
| 3675 |
+
"step": 5180
|
| 3676 |
+
},
|
| 3677 |
+
{
|
| 3678 |
+
"epoch": 0.9448388858547242,
|
| 3679 |
+
"grad_norm": 4.778134346008301,
|
| 3680 |
+
"learning_rate": 9.059120965837331e-07,
|
| 3681 |
+
"loss": 2.3948,
|
| 3682 |
+
"step": 5190
|
| 3683 |
+
},
|
| 3684 |
+
{
|
| 3685 |
+
"epoch": 0.9466593846714,
|
| 3686 |
+
"grad_norm": 4.706231594085693,
|
| 3687 |
+
"learning_rate": 8.472693309718283e-07,
|
| 3688 |
+
"loss": 2.4153,
|
| 3689 |
+
"step": 5200
|
| 3690 |
+
},
|
| 3691 |
+
{
|
| 3692 |
+
"epoch": 0.9484798834880758,
|
| 3693 |
+
"grad_norm": 4.645259380340576,
|
| 3694 |
+
"learning_rate": 7.905724757208965e-07,
|
| 3695 |
+
"loss": 2.3806,
|
| 3696 |
+
"step": 5210
|
| 3697 |
+
},
|
| 3698 |
+
{
|
| 3699 |
+
"epoch": 0.9503003823047516,
|
| 3700 |
+
"grad_norm": 5.04796838760376,
|
| 3701 |
+
"learning_rate": 7.358237754060915e-07,
|
| 3702 |
+
"loss": 2.454,
|
| 3703 |
+
"step": 5220
|
| 3704 |
+
},
|
| 3705 |
+
{
|
| 3706 |
+
"epoch": 0.9521208811214272,
|
| 3707 |
+
"grad_norm": 4.7881646156311035,
|
| 3708 |
+
"learning_rate": 6.830253974769496e-07,
|
| 3709 |
+
"loss": 2.4161,
|
| 3710 |
+
"step": 5230
|
| 3711 |
+
},
|
| 3712 |
+
{
|
| 3713 |
+
"epoch": 0.953941379938103,
|
| 3714 |
+
"grad_norm": 4.7254743576049805,
|
| 3715 |
+
"learning_rate": 6.321794321715757e-07,
|
| 3716 |
+
"loss": 2.4715,
|
| 3717 |
+
"step": 5240
|
| 3718 |
+
},
|
| 3719 |
+
{
|
| 3720 |
+
"epoch": 0.9557618787547788,
|
| 3721 |
+
"grad_norm": 5.13754415512085,
|
| 3722 |
+
"learning_rate": 5.832878924338869e-07,
|
| 3723 |
+
"loss": 2.4191,
|
| 3724 |
+
"step": 5250
|
| 3725 |
+
},
|
| 3726 |
+
{
|
| 3727 |
+
"epoch": 0.9575823775714546,
|
| 3728 |
+
"grad_norm": 4.781599998474121,
|
| 3729 |
+
"learning_rate": 5.363527138339597e-07,
|
| 3730 |
+
"loss": 2.4127,
|
| 3731 |
+
"step": 5260
|
| 3732 |
+
},
|
| 3733 |
+
{
|
| 3734 |
+
"epoch": 0.9594028763881304,
|
| 3735 |
+
"grad_norm": 4.541421413421631,
|
| 3736 |
+
"learning_rate": 4.913757544913355e-07,
|
| 3737 |
+
"loss": 2.3908,
|
| 3738 |
+
"step": 5270
|
| 3739 |
+
},
|
| 3740 |
+
{
|
| 3741 |
+
"epoch": 0.9612233752048062,
|
| 3742 |
+
"grad_norm": 5.078845500946045,
|
| 3743 |
+
"learning_rate": 4.4835879500153556e-07,
|
| 3744 |
+
"loss": 2.4303,
|
| 3745 |
+
"step": 5280
|
| 3746 |
+
},
|
| 3747 |
+
{
|
| 3748 |
+
"epoch": 0.9630438740214818,
|
| 3749 |
+
"grad_norm": 4.745322227478027,
|
| 3750 |
+
"learning_rate": 4.0730353836549993e-07,
|
| 3751 |
+
"loss": 2.4046,
|
| 3752 |
+
"step": 5290
|
| 3753 |
+
},
|
| 3754 |
+
{
|
| 3755 |
+
"epoch": 0.9648643728381576,
|
| 3756 |
+
"grad_norm": 4.688536643981934,
|
| 3757 |
+
"learning_rate": 3.6821160992221993e-07,
|
| 3758 |
+
"loss": 2.4456,
|
| 3759 |
+
"step": 5300
|
| 3760 |
+
},
|
| 3761 |
+
{
|
| 3762 |
+
"epoch": 0.9666848716548334,
|
| 3763 |
+
"grad_norm": 4.9088592529296875,
|
| 3764 |
+
"learning_rate": 3.310845572843557e-07,
|
| 3765 |
+
"loss": 2.3846,
|
| 3766 |
+
"step": 5310
|
| 3767 |
+
},
|
| 3768 |
+
{
|
| 3769 |
+
"epoch": 0.9685053704715092,
|
| 3770 |
+
"grad_norm": 5.126766681671143,
|
| 3771 |
+
"learning_rate": 2.959238502769912e-07,
|
| 3772 |
+
"loss": 2.4093,
|
| 3773 |
+
"step": 5320
|
| 3774 |
+
},
|
| 3775 |
+
{
|
| 3776 |
+
"epoch": 0.970325869288185,
|
| 3777 |
+
"grad_norm": 4.49152946472168,
|
| 3778 |
+
"learning_rate": 2.6273088087943597e-07,
|
| 3779 |
+
"loss": 2.3837,
|
| 3780 |
+
"step": 5330
|
| 3781 |
+
},
|
| 3782 |
+
{
|
| 3783 |
+
"epoch": 0.9721463681048608,
|
| 3784 |
+
"grad_norm": 4.944559097290039,
|
| 3785 |
+
"learning_rate": 2.315069631701139e-07,
|
| 3786 |
+
"loss": 2.3791,
|
| 3787 |
+
"step": 5340
|
| 3788 |
+
},
|
| 3789 |
+
{
|
| 3790 |
+
"epoch": 0.9739668669215366,
|
| 3791 |
+
"grad_norm": 4.91040563583374,
|
| 3792 |
+
"learning_rate": 2.022533332745602e-07,
|
| 3793 |
+
"loss": 2.4035,
|
| 3794 |
+
"step": 5350
|
| 3795 |
+
},
|
| 3796 |
+
{
|
| 3797 |
+
"epoch": 0.9757873657382122,
|
| 3798 |
+
"grad_norm": 4.91538143157959,
|
| 3799 |
+
"learning_rate": 1.7497114931644965e-07,
|
| 3800 |
+
"loss": 2.4057,
|
| 3801 |
+
"step": 5360
|
| 3802 |
+
},
|
| 3803 |
+
{
|
| 3804 |
+
"epoch": 0.977607864554888,
|
| 3805 |
+
"grad_norm": 5.63076114654541,
|
| 3806 |
+
"learning_rate": 1.496614913717831e-07,
|
| 3807 |
+
"loss": 2.3627,
|
| 3808 |
+
"step": 5370
|
| 3809 |
+
},
|
| 3810 |
+
{
|
| 3811 |
+
"epoch": 0.9794283633715638,
|
| 3812 |
+
"grad_norm": 4.944591045379639,
|
| 3813 |
+
"learning_rate": 1.2632536142609397e-07,
|
| 3814 |
+
"loss": 2.3662,
|
| 3815 |
+
"step": 5380
|
| 3816 |
+
},
|
| 3817 |
+
{
|
| 3818 |
+
"epoch": 0.9812488621882396,
|
| 3819 |
+
"grad_norm": 4.864638328552246,
|
| 3820 |
+
"learning_rate": 1.0496368333482442e-07,
|
| 3821 |
+
"loss": 2.3704,
|
| 3822 |
+
"step": 5390
|
| 3823 |
+
},
|
| 3824 |
+
{
|
| 3825 |
+
"epoch": 0.9830693610049154,
|
| 3826 |
+
"grad_norm": 4.991931438446045,
|
| 3827 |
+
"learning_rate": 8.557730278669906e-08,
|
| 3828 |
+
"loss": 2.3767,
|
| 3829 |
+
"step": 5400
|
| 3830 |
+
},
|
| 3831 |
+
{
|
| 3832 |
+
"epoch": 0.9848898598215912,
|
| 3833 |
+
"grad_norm": 4.382468223571777,
|
| 3834 |
+
"learning_rate": 6.816698727029614e-08,
|
| 3835 |
+
"loss": 2.4112,
|
| 3836 |
+
"step": 5410
|
| 3837 |
+
},
|
| 3838 |
+
{
|
| 3839 |
+
"epoch": 0.9867103586382668,
|
| 3840 |
+
"grad_norm": 44.841453552246094,
|
| 3841 |
+
"learning_rate": 5.273342604361631e-08,
|
| 3842 |
+
"loss": 2.4092,
|
| 3843 |
+
"step": 5420
|
| 3844 |
+
},
|
| 3845 |
+
{
|
| 3846 |
+
"epoch": 0.9885308574549426,
|
| 3847 |
+
"grad_norm": 4.815988063812256,
|
| 3848 |
+
"learning_rate": 3.9277230106832264e-08,
|
| 3849 |
+
"loss": 2.4256,
|
| 3850 |
+
"step": 5430
|
| 3851 |
+
},
|
| 3852 |
+
{
|
| 3853 |
+
"epoch": 0.9903513562716184,
|
| 3854 |
+
"grad_norm": 4.87392520904541,
|
| 3855 |
+
"learning_rate": 2.7798932178080274e-08,
|
| 3856 |
+
"loss": 2.3936,
|
| 3857 |
+
"step": 5440
|
| 3858 |
+
},
|
| 3859 |
+
{
|
| 3860 |
+
"epoch": 0.9921718550882942,
|
| 3861 |
+
"grad_norm": 5.1465559005737305,
|
| 3862 |
+
"learning_rate": 1.829898667237151e-08,
|
| 3863 |
+
"loss": 2.3805,
|
| 3864 |
+
"step": 5450
|
| 3865 |
+
},
|
| 3866 |
+
{
|
| 3867 |
+
"epoch": 0.99399235390497,
|
| 3868 |
+
"grad_norm": 4.486802101135254,
|
| 3869 |
+
"learning_rate": 1.0777769683617544e-08,
|
| 3870 |
+
"loss": 2.3492,
|
| 3871 |
+
"step": 5460
|
| 3872 |
+
},
|
| 3873 |
+
{
|
| 3874 |
+
"epoch": 0.9958128527216458,
|
| 3875 |
+
"grad_norm": 5.0049614906311035,
|
| 3876 |
+
"learning_rate": 5.2355789697144945e-09,
|
| 3877 |
+
"loss": 2.4414,
|
| 3878 |
+
"step": 5470
|
| 3879 |
+
},
|
| 3880 |
+
{
|
| 3881 |
+
"epoch": 0.9976333515383216,
|
| 3882 |
+
"grad_norm": 4.7070441246032715,
|
| 3883 |
+
"learning_rate": 1.6726339407857616e-09,
|
| 3884 |
+
"loss": 2.4294,
|
| 3885 |
+
"step": 5480
|
| 3886 |
+
},
|
| 3887 |
+
{
|
| 3888 |
+
"epoch": 0.9994538503549972,
|
| 3889 |
+
"grad_norm": 4.9832539558410645,
|
| 3890 |
+
"learning_rate": 8.907565046678557e-11,
|
| 3891 |
+
"loss": 2.3724,
|
| 3892 |
+
"step": 5490
|
| 3893 |
}
|
| 3894 |
],
|
| 3895 |
"logging_steps": 10,
|
|
|
|
| 3904 |
"should_evaluate": false,
|
| 3905 |
"should_log": false,
|
| 3906 |
"should_save": true,
|
| 3907 |
+
"should_training_stop": true
|
| 3908 |
},
|
| 3909 |
"attributes": {}
|
| 3910 |
}
|
| 3911 |
},
|
| 3912 |
+
"total_flos": 1.2843428615741768e+18,
|
| 3913 |
"train_batch_size": 4,
|
| 3914 |
"trial_name": null,
|
| 3915 |
"trial_params": null
|