Training in progress, step 62500
Browse files- last-checkpoint/config.json +2 -2
- last-checkpoint/generation_config.json +3 -4
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scheduler.pt +2 -2
- last-checkpoint/trainer_state.json +6 -1058
- last-checkpoint/training_args.bin +2 -2
- model.safetensors +1 -1
last-checkpoint/config.json
CHANGED
|
@@ -9,7 +9,6 @@
|
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
"dense_act_fn": "gelu_new",
|
| 11 |
"dropout_rate": 0.1,
|
| 12 |
-
"dtype": "float32",
|
| 13 |
"eos_token_id": 1,
|
| 14 |
"feed_forward_proj": "gated-gelu",
|
| 15 |
"gradient_checkpointing": false,
|
|
@@ -26,7 +25,8 @@
|
|
| 26 |
"relative_attention_max_distance": 128,
|
| 27 |
"relative_attention_num_buckets": 32,
|
| 28 |
"tie_word_embeddings": false,
|
| 29 |
-
"
|
|
|
|
| 30 |
"use_cache": true,
|
| 31 |
"vocab_size": 32102
|
| 32 |
}
|
|
|
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
"dense_act_fn": "gelu_new",
|
| 11 |
"dropout_rate": 0.1,
|
|
|
|
| 12 |
"eos_token_id": 1,
|
| 13 |
"feed_forward_proj": "gated-gelu",
|
| 14 |
"gradient_checkpointing": false,
|
|
|
|
| 25 |
"relative_attention_max_distance": 128,
|
| 26 |
"relative_attention_num_buckets": 32,
|
| 27 |
"tie_word_embeddings": false,
|
| 28 |
+
"torch_dtype": "float32",
|
| 29 |
+
"transformers_version": "4.54.1",
|
| 30 |
"use_cache": true,
|
| 31 |
"vocab_size": 32102
|
| 32 |
}
|
last-checkpoint/generation_config.json
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"decoder_start_token_id": 0,
|
| 3 |
-
"eos_token_id":
|
| 4 |
-
1
|
| 5 |
-
],
|
| 6 |
"pad_token_id": 0,
|
| 7 |
-
"transformers_version": "4.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
"decoder_start_token_id": 0,
|
| 4 |
+
"eos_token_id": 1,
|
|
|
|
|
|
|
| 5 |
"pad_token_id": 0,
|
| 6 |
+
"transformers_version": "4.54.1"
|
| 7 |
}
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 990185320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7d9005889552e029ae0f4ad6c88f14926c5f00dadfc9d159f3073b9bd1ed7e5
|
| 3 |
size 990185320
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbc2f647bccc068bd4031b56c17c84cc33de7c9cbadfbc3408e9aad88ac8b8cf
|
| 3 |
+
size 1980540922
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab3a3008648501d24764fe333294c85c928d239dc3b0530f6dceccd81d60bc59
|
| 3 |
+
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25f0268140834c71b91899b9158c0b00cd62b9c624ee206b9aa4ff7a0e9ff469
|
| 3 |
+
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3696,1064 +3696,12 @@
|
|
| 3696 |
"eval_samples_per_second": 47.97,
|
| 3697 |
"eval_steps_per_second": 6.004,
|
| 3698 |
"step": 50420
|
| 3699 |
-
},
|
| 3700 |
-
{
|
| 3701 |
-
"epoch": 18.634686346863468,
|
| 3702 |
-
"grad_norm": 6.383838176727295,
|
| 3703 |
-
"learning_rate": 2.3379546652609387e-05,
|
| 3704 |
-
"loss": 1.0453,
|
| 3705 |
-
"step": 50500
|
| 3706 |
-
},
|
| 3707 |
-
{
|
| 3708 |
-
"epoch": 18.634686346863468,
|
| 3709 |
-
"eval_bleu": 45.041038890307775,
|
| 3710 |
-
"eval_chrf": 69.41451182032911,
|
| 3711 |
-
"eval_loss": 1.023424744606018,
|
| 3712 |
-
"eval_runtime": 93.238,
|
| 3713 |
-
"eval_samples_per_second": 8.752,
|
| 3714 |
-
"eval_steps_per_second": 1.094,
|
| 3715 |
-
"step": 50500
|
| 3716 |
-
},
|
| 3717 |
-
{
|
| 3718 |
-
"epoch": 18.671586715867157,
|
| 3719 |
-
"grad_norm": 3.4755825996398926,
|
| 3720 |
-
"learning_rate": 2.332683183974697e-05,
|
| 3721 |
-
"loss": 1.0585,
|
| 3722 |
-
"step": 50600
|
| 3723 |
-
},
|
| 3724 |
-
{
|
| 3725 |
-
"epoch": 18.70848708487085,
|
| 3726 |
-
"grad_norm": 4.731332302093506,
|
| 3727 |
-
"learning_rate": 2.3274117026884556e-05,
|
| 3728 |
-
"loss": 1.0681,
|
| 3729 |
-
"step": 50700
|
| 3730 |
-
},
|
| 3731 |
-
{
|
| 3732 |
-
"epoch": 18.74538745387454,
|
| 3733 |
-
"grad_norm": 3.3240673542022705,
|
| 3734 |
-
"learning_rate": 2.3221402214022142e-05,
|
| 3735 |
-
"loss": 0.9811,
|
| 3736 |
-
"step": 50800
|
| 3737 |
-
},
|
| 3738 |
-
{
|
| 3739 |
-
"epoch": 18.782287822878228,
|
| 3740 |
-
"grad_norm": 4.01174783706665,
|
| 3741 |
-
"learning_rate": 2.316868740115973e-05,
|
| 3742 |
-
"loss": 0.9955,
|
| 3743 |
-
"step": 50900
|
| 3744 |
-
},
|
| 3745 |
-
{
|
| 3746 |
-
"epoch": 18.81918819188192,
|
| 3747 |
-
"grad_norm": 3.45139741897583,
|
| 3748 |
-
"learning_rate": 2.3115972588297315e-05,
|
| 3749 |
-
"loss": 1.0291,
|
| 3750 |
-
"step": 51000
|
| 3751 |
-
},
|
| 3752 |
-
{
|
| 3753 |
-
"epoch": 18.81918819188192,
|
| 3754 |
-
"eval_bleu": 45.05869645905028,
|
| 3755 |
-
"eval_chrf": 69.40206039350805,
|
| 3756 |
-
"eval_loss": 1.0173133611679077,
|
| 3757 |
-
"eval_runtime": 93.4838,
|
| 3758 |
-
"eval_samples_per_second": 8.729,
|
| 3759 |
-
"eval_steps_per_second": 1.091,
|
| 3760 |
-
"step": 51000
|
| 3761 |
-
},
|
| 3762 |
-
{
|
| 3763 |
-
"epoch": 18.85608856088561,
|
| 3764 |
-
"grad_norm": 5.509322643280029,
|
| 3765 |
-
"learning_rate": 2.3063257775434898e-05,
|
| 3766 |
-
"loss": 1.0201,
|
| 3767 |
-
"step": 51100
|
| 3768 |
-
},
|
| 3769 |
-
{
|
| 3770 |
-
"epoch": 18.8929889298893,
|
| 3771 |
-
"grad_norm": 3.0491347312927246,
|
| 3772 |
-
"learning_rate": 2.3010542962572484e-05,
|
| 3773 |
-
"loss": 1.0014,
|
| 3774 |
-
"step": 51200
|
| 3775 |
-
},
|
| 3776 |
-
{
|
| 3777 |
-
"epoch": 18.929889298892988,
|
| 3778 |
-
"grad_norm": 2.939685821533203,
|
| 3779 |
-
"learning_rate": 2.295782814971007e-05,
|
| 3780 |
-
"loss": 1.0372,
|
| 3781 |
-
"step": 51300
|
| 3782 |
-
},
|
| 3783 |
-
{
|
| 3784 |
-
"epoch": 18.96678966789668,
|
| 3785 |
-
"grad_norm": 6.572051525115967,
|
| 3786 |
-
"learning_rate": 2.2905113336847657e-05,
|
| 3787 |
-
"loss": 1.0221,
|
| 3788 |
-
"step": 51400
|
| 3789 |
-
},
|
| 3790 |
-
{
|
| 3791 |
-
"epoch": 19.00369003690037,
|
| 3792 |
-
"grad_norm": 6.473498821258545,
|
| 3793 |
-
"learning_rate": 2.285239852398524e-05,
|
| 3794 |
-
"loss": 1.0437,
|
| 3795 |
-
"step": 51500
|
| 3796 |
-
},
|
| 3797 |
-
{
|
| 3798 |
-
"epoch": 19.00369003690037,
|
| 3799 |
-
"eval_bleu": 45.377464139800466,
|
| 3800 |
-
"eval_chrf": 69.57987345624291,
|
| 3801 |
-
"eval_loss": 1.0092017650604248,
|
| 3802 |
-
"eval_runtime": 92.7751,
|
| 3803 |
-
"eval_samples_per_second": 8.795,
|
| 3804 |
-
"eval_steps_per_second": 1.099,
|
| 3805 |
-
"step": 51500
|
| 3806 |
-
},
|
| 3807 |
-
{
|
| 3808 |
-
"epoch": 19.04059040590406,
|
| 3809 |
-
"grad_norm": 5.248044967651367,
|
| 3810 |
-
"learning_rate": 2.2799683711122826e-05,
|
| 3811 |
-
"loss": 1.0291,
|
| 3812 |
-
"step": 51600
|
| 3813 |
-
},
|
| 3814 |
-
{
|
| 3815 |
-
"epoch": 19.077490774907748,
|
| 3816 |
-
"grad_norm": 3.91925311088562,
|
| 3817 |
-
"learning_rate": 2.2746968898260412e-05,
|
| 3818 |
-
"loss": 1.0228,
|
| 3819 |
-
"step": 51700
|
| 3820 |
-
},
|
| 3821 |
-
{
|
| 3822 |
-
"epoch": 19.11439114391144,
|
| 3823 |
-
"grad_norm": 4.581681728363037,
|
| 3824 |
-
"learning_rate": 2.2694254085398e-05,
|
| 3825 |
-
"loss": 0.9896,
|
| 3826 |
-
"step": 51800
|
| 3827 |
-
},
|
| 3828 |
-
{
|
| 3829 |
-
"epoch": 19.15129151291513,
|
| 3830 |
-
"grad_norm": 3.2478437423706055,
|
| 3831 |
-
"learning_rate": 2.2641539272535585e-05,
|
| 3832 |
-
"loss": 0.9898,
|
| 3833 |
-
"step": 51900
|
| 3834 |
-
},
|
| 3835 |
-
{
|
| 3836 |
-
"epoch": 19.18819188191882,
|
| 3837 |
-
"grad_norm": 4.589653015136719,
|
| 3838 |
-
"learning_rate": 2.2588824459673168e-05,
|
| 3839 |
-
"loss": 0.9883,
|
| 3840 |
-
"step": 52000
|
| 3841 |
-
},
|
| 3842 |
-
{
|
| 3843 |
-
"epoch": 19.18819188191882,
|
| 3844 |
-
"eval_bleu": 45.182278437943154,
|
| 3845 |
-
"eval_chrf": 69.60795584033274,
|
| 3846 |
-
"eval_loss": 0.9994527101516724,
|
| 3847 |
-
"eval_runtime": 93.0344,
|
| 3848 |
-
"eval_samples_per_second": 8.771,
|
| 3849 |
-
"eval_steps_per_second": 1.096,
|
| 3850 |
-
"step": 52000
|
| 3851 |
-
},
|
| 3852 |
-
{
|
| 3853 |
-
"epoch": 19.225092250922508,
|
| 3854 |
-
"grad_norm": 3.865722179412842,
|
| 3855 |
-
"learning_rate": 2.2536109646810754e-05,
|
| 3856 |
-
"loss": 0.9701,
|
| 3857 |
-
"step": 52100
|
| 3858 |
-
},
|
| 3859 |
-
{
|
| 3860 |
-
"epoch": 19.2619926199262,
|
| 3861 |
-
"grad_norm": 3.5048811435699463,
|
| 3862 |
-
"learning_rate": 2.248339483394834e-05,
|
| 3863 |
-
"loss": 0.986,
|
| 3864 |
-
"step": 52200
|
| 3865 |
-
},
|
| 3866 |
-
{
|
| 3867 |
-
"epoch": 19.29889298892989,
|
| 3868 |
-
"grad_norm": 4.171955585479736,
|
| 3869 |
-
"learning_rate": 2.2430680021085927e-05,
|
| 3870 |
-
"loss": 1.0095,
|
| 3871 |
-
"step": 52300
|
| 3872 |
-
},
|
| 3873 |
-
{
|
| 3874 |
-
"epoch": 19.33579335793358,
|
| 3875 |
-
"grad_norm": 2.502441644668579,
|
| 3876 |
-
"learning_rate": 2.237796520822351e-05,
|
| 3877 |
-
"loss": 0.9996,
|
| 3878 |
-
"step": 52400
|
| 3879 |
-
},
|
| 3880 |
-
{
|
| 3881 |
-
"epoch": 19.372693726937268,
|
| 3882 |
-
"grad_norm": 4.4848737716674805,
|
| 3883 |
-
"learning_rate": 2.2325250395361096e-05,
|
| 3884 |
-
"loss": 0.9496,
|
| 3885 |
-
"step": 52500
|
| 3886 |
-
},
|
| 3887 |
-
{
|
| 3888 |
-
"epoch": 19.372693726937268,
|
| 3889 |
-
"eval_bleu": 45.2363393703102,
|
| 3890 |
-
"eval_chrf": 69.37454521026567,
|
| 3891 |
-
"eval_loss": 0.9982830882072449,
|
| 3892 |
-
"eval_runtime": 93.5422,
|
| 3893 |
-
"eval_samples_per_second": 8.723,
|
| 3894 |
-
"eval_steps_per_second": 1.09,
|
| 3895 |
-
"step": 52500
|
| 3896 |
-
},
|
| 3897 |
-
{
|
| 3898 |
-
"epoch": 19.40959409594096,
|
| 3899 |
-
"grad_norm": 4.602016925811768,
|
| 3900 |
-
"learning_rate": 2.2272535582498682e-05,
|
| 3901 |
-
"loss": 0.9874,
|
| 3902 |
-
"step": 52600
|
| 3903 |
-
},
|
| 3904 |
-
{
|
| 3905 |
-
"epoch": 19.44649446494465,
|
| 3906 |
-
"grad_norm": 3.7375121116638184,
|
| 3907 |
-
"learning_rate": 2.221982076963627e-05,
|
| 3908 |
-
"loss": 0.9843,
|
| 3909 |
-
"step": 52700
|
| 3910 |
-
},
|
| 3911 |
-
{
|
| 3912 |
-
"epoch": 19.48339483394834,
|
| 3913 |
-
"grad_norm": 3.5808184146881104,
|
| 3914 |
-
"learning_rate": 2.2167105956773855e-05,
|
| 3915 |
-
"loss": 1.0236,
|
| 3916 |
-
"step": 52800
|
| 3917 |
-
},
|
| 3918 |
-
{
|
| 3919 |
-
"epoch": 19.52029520295203,
|
| 3920 |
-
"grad_norm": 1.8931940793991089,
|
| 3921 |
-
"learning_rate": 2.2114391143911438e-05,
|
| 3922 |
-
"loss": 1.0546,
|
| 3923 |
-
"step": 52900
|
| 3924 |
-
},
|
| 3925 |
-
{
|
| 3926 |
-
"epoch": 19.55719557195572,
|
| 3927 |
-
"grad_norm": 3.6316375732421875,
|
| 3928 |
-
"learning_rate": 2.2061676331049024e-05,
|
| 3929 |
-
"loss": 0.989,
|
| 3930 |
-
"step": 53000
|
| 3931 |
-
},
|
| 3932 |
-
{
|
| 3933 |
-
"epoch": 19.55719557195572,
|
| 3934 |
-
"eval_bleu": 45.49473361839618,
|
| 3935 |
-
"eval_chrf": 69.6430207436413,
|
| 3936 |
-
"eval_loss": 0.9908942580223083,
|
| 3937 |
-
"eval_runtime": 94.2497,
|
| 3938 |
-
"eval_samples_per_second": 8.658,
|
| 3939 |
-
"eval_steps_per_second": 1.082,
|
| 3940 |
-
"step": 53000
|
| 3941 |
-
},
|
| 3942 |
-
{
|
| 3943 |
-
"epoch": 19.59409594095941,
|
| 3944 |
-
"grad_norm": 4.76518440246582,
|
| 3945 |
-
"learning_rate": 2.200896151818661e-05,
|
| 3946 |
-
"loss": 1.0023,
|
| 3947 |
-
"step": 53100
|
| 3948 |
-
},
|
| 3949 |
-
{
|
| 3950 |
-
"epoch": 19.6309963099631,
|
| 3951 |
-
"grad_norm": 3.2376883029937744,
|
| 3952 |
-
"learning_rate": 2.1956246705324197e-05,
|
| 3953 |
-
"loss": 1.0358,
|
| 3954 |
-
"step": 53200
|
| 3955 |
-
},
|
| 3956 |
-
{
|
| 3957 |
-
"epoch": 19.66789667896679,
|
| 3958 |
-
"grad_norm": 4.3444318771362305,
|
| 3959 |
-
"learning_rate": 2.190353189246178e-05,
|
| 3960 |
-
"loss": 1.0315,
|
| 3961 |
-
"step": 53300
|
| 3962 |
-
},
|
| 3963 |
-
{
|
| 3964 |
-
"epoch": 19.70479704797048,
|
| 3965 |
-
"grad_norm": 5.0184102058410645,
|
| 3966 |
-
"learning_rate": 2.1850817079599366e-05,
|
| 3967 |
-
"loss": 1.0637,
|
| 3968 |
-
"step": 53400
|
| 3969 |
-
},
|
| 3970 |
-
{
|
| 3971 |
-
"epoch": 19.74169741697417,
|
| 3972 |
-
"grad_norm": 3.515033483505249,
|
| 3973 |
-
"learning_rate": 2.1798102266736953e-05,
|
| 3974 |
-
"loss": 1.0379,
|
| 3975 |
-
"step": 53500
|
| 3976 |
-
},
|
| 3977 |
-
{
|
| 3978 |
-
"epoch": 19.74169741697417,
|
| 3979 |
-
"eval_bleu": 45.418676252884964,
|
| 3980 |
-
"eval_chrf": 69.78137735109217,
|
| 3981 |
-
"eval_loss": 0.9870654940605164,
|
| 3982 |
-
"eval_runtime": 93.9848,
|
| 3983 |
-
"eval_samples_per_second": 8.682,
|
| 3984 |
-
"eval_steps_per_second": 1.085,
|
| 3985 |
-
"step": 53500
|
| 3986 |
-
},
|
| 3987 |
-
{
|
| 3988 |
-
"epoch": 19.77859778597786,
|
| 3989 |
-
"grad_norm": 3.5330288410186768,
|
| 3990 |
-
"learning_rate": 2.1745387453874542e-05,
|
| 3991 |
-
"loss": 0.9752,
|
| 3992 |
-
"step": 53600
|
| 3993 |
-
},
|
| 3994 |
-
{
|
| 3995 |
-
"epoch": 19.81549815498155,
|
| 3996 |
-
"grad_norm": 4.465782642364502,
|
| 3997 |
-
"learning_rate": 2.1692672641012125e-05,
|
| 3998 |
-
"loss": 1.0044,
|
| 3999 |
-
"step": 53700
|
| 4000 |
-
},
|
| 4001 |
-
{
|
| 4002 |
-
"epoch": 19.85239852398524,
|
| 4003 |
-
"grad_norm": 5.873017311096191,
|
| 4004 |
-
"learning_rate": 2.163995782814971e-05,
|
| 4005 |
-
"loss": 1.0744,
|
| 4006 |
-
"step": 53800
|
| 4007 |
-
},
|
| 4008 |
-
{
|
| 4009 |
-
"epoch": 19.88929889298893,
|
| 4010 |
-
"grad_norm": 3.305344581604004,
|
| 4011 |
-
"learning_rate": 2.1587243015287298e-05,
|
| 4012 |
-
"loss": 0.8986,
|
| 4013 |
-
"step": 53900
|
| 4014 |
-
},
|
| 4015 |
-
{
|
| 4016 |
-
"epoch": 19.92619926199262,
|
| 4017 |
-
"grad_norm": 3.4520583152770996,
|
| 4018 |
-
"learning_rate": 2.1534528202424884e-05,
|
| 4019 |
-
"loss": 1.0354,
|
| 4020 |
-
"step": 54000
|
| 4021 |
-
},
|
| 4022 |
-
{
|
| 4023 |
-
"epoch": 19.92619926199262,
|
| 4024 |
-
"eval_bleu": 45.40658414024313,
|
| 4025 |
-
"eval_chrf": 69.79824864043482,
|
| 4026 |
-
"eval_loss": 0.9898651242256165,
|
| 4027 |
-
"eval_runtime": 93.077,
|
| 4028 |
-
"eval_samples_per_second": 8.767,
|
| 4029 |
-
"eval_steps_per_second": 1.096,
|
| 4030 |
-
"step": 54000
|
| 4031 |
-
},
|
| 4032 |
-
{
|
| 4033 |
-
"epoch": 19.96309963099631,
|
| 4034 |
-
"grad_norm": 4.757823944091797,
|
| 4035 |
-
"learning_rate": 2.148181338956247e-05,
|
| 4036 |
-
"loss": 1.0031,
|
| 4037 |
-
"step": 54100
|
| 4038 |
-
},
|
| 4039 |
-
{
|
| 4040 |
-
"epoch": 20.0,
|
| 4041 |
-
"grad_norm": 3.4500021934509277,
|
| 4042 |
-
"learning_rate": 2.1429098576700054e-05,
|
| 4043 |
-
"loss": 1.0126,
|
| 4044 |
-
"step": 54200
|
| 4045 |
-
},
|
| 4046 |
-
{
|
| 4047 |
-
"epoch": 20.03690036900369,
|
| 4048 |
-
"grad_norm": 3.2020909786224365,
|
| 4049 |
-
"learning_rate": 2.137638376383764e-05,
|
| 4050 |
-
"loss": 0.96,
|
| 4051 |
-
"step": 54300
|
| 4052 |
-
},
|
| 4053 |
-
{
|
| 4054 |
-
"epoch": 20.07380073800738,
|
| 4055 |
-
"grad_norm": 4.481809616088867,
|
| 4056 |
-
"learning_rate": 2.1323668950975226e-05,
|
| 4057 |
-
"loss": 0.9614,
|
| 4058 |
-
"step": 54400
|
| 4059 |
-
},
|
| 4060 |
-
{
|
| 4061 |
-
"epoch": 20.11070110701107,
|
| 4062 |
-
"grad_norm": 3.2812538146972656,
|
| 4063 |
-
"learning_rate": 2.1270954138112813e-05,
|
| 4064 |
-
"loss": 0.9636,
|
| 4065 |
-
"step": 54500
|
| 4066 |
-
},
|
| 4067 |
-
{
|
| 4068 |
-
"epoch": 20.11070110701107,
|
| 4069 |
-
"eval_bleu": 45.41666654146133,
|
| 4070 |
-
"eval_chrf": 69.96897005750033,
|
| 4071 |
-
"eval_loss": 0.9812939167022705,
|
| 4072 |
-
"eval_runtime": 93.5969,
|
| 4073 |
-
"eval_samples_per_second": 8.718,
|
| 4074 |
-
"eval_steps_per_second": 1.09,
|
| 4075 |
-
"step": 54500
|
| 4076 |
-
},
|
| 4077 |
-
{
|
| 4078 |
-
"epoch": 20.14760147601476,
|
| 4079 |
-
"grad_norm": 4.628011703491211,
|
| 4080 |
-
"learning_rate": 2.1218239325250395e-05,
|
| 4081 |
-
"loss": 0.9792,
|
| 4082 |
-
"step": 54600
|
| 4083 |
-
},
|
| 4084 |
-
{
|
| 4085 |
-
"epoch": 20.18450184501845,
|
| 4086 |
-
"grad_norm": 4.964925765991211,
|
| 4087 |
-
"learning_rate": 2.1165524512387982e-05,
|
| 4088 |
-
"loss": 0.9477,
|
| 4089 |
-
"step": 54700
|
| 4090 |
-
},
|
| 4091 |
-
{
|
| 4092 |
-
"epoch": 20.22140221402214,
|
| 4093 |
-
"grad_norm": 3.1874473094940186,
|
| 4094 |
-
"learning_rate": 2.1112809699525568e-05,
|
| 4095 |
-
"loss": 0.9928,
|
| 4096 |
-
"step": 54800
|
| 4097 |
-
},
|
| 4098 |
-
{
|
| 4099 |
-
"epoch": 20.25830258302583,
|
| 4100 |
-
"grad_norm": 3.8035147190093994,
|
| 4101 |
-
"learning_rate": 2.1060094886663154e-05,
|
| 4102 |
-
"loss": 0.9688,
|
| 4103 |
-
"step": 54900
|
| 4104 |
-
},
|
| 4105 |
-
{
|
| 4106 |
-
"epoch": 20.29520295202952,
|
| 4107 |
-
"grad_norm": 4.595950603485107,
|
| 4108 |
-
"learning_rate": 2.100738007380074e-05,
|
| 4109 |
-
"loss": 0.9575,
|
| 4110 |
-
"step": 55000
|
| 4111 |
-
},
|
| 4112 |
-
{
|
| 4113 |
-
"epoch": 20.29520295202952,
|
| 4114 |
-
"eval_bleu": 45.26791746653591,
|
| 4115 |
-
"eval_chrf": 69.59072341748964,
|
| 4116 |
-
"eval_loss": 0.9762688279151917,
|
| 4117 |
-
"eval_runtime": 93.4589,
|
| 4118 |
-
"eval_samples_per_second": 8.731,
|
| 4119 |
-
"eval_steps_per_second": 1.091,
|
| 4120 |
-
"step": 55000
|
| 4121 |
-
},
|
| 4122 |
-
{
|
| 4123 |
-
"epoch": 20.33210332103321,
|
| 4124 |
-
"grad_norm": 4.018571853637695,
|
| 4125 |
-
"learning_rate": 2.0954665260938324e-05,
|
| 4126 |
-
"loss": 0.9425,
|
| 4127 |
-
"step": 55100
|
| 4128 |
-
},
|
| 4129 |
-
{
|
| 4130 |
-
"epoch": 20.3690036900369,
|
| 4131 |
-
"grad_norm": 5.99881649017334,
|
| 4132 |
-
"learning_rate": 2.090195044807591e-05,
|
| 4133 |
-
"loss": 0.9874,
|
| 4134 |
-
"step": 55200
|
| 4135 |
-
},
|
| 4136 |
-
{
|
| 4137 |
-
"epoch": 20.40590405904059,
|
| 4138 |
-
"grad_norm": 3.563143253326416,
|
| 4139 |
-
"learning_rate": 2.0849235635213496e-05,
|
| 4140 |
-
"loss": 0.9795,
|
| 4141 |
-
"step": 55300
|
| 4142 |
-
},
|
| 4143 |
-
{
|
| 4144 |
-
"epoch": 20.44280442804428,
|
| 4145 |
-
"grad_norm": 5.876094341278076,
|
| 4146 |
-
"learning_rate": 2.0796520822351083e-05,
|
| 4147 |
-
"loss": 0.9751,
|
| 4148 |
-
"step": 55400
|
| 4149 |
-
},
|
| 4150 |
-
{
|
| 4151 |
-
"epoch": 20.47970479704797,
|
| 4152 |
-
"grad_norm": 2.8638973236083984,
|
| 4153 |
-
"learning_rate": 2.0743806009488666e-05,
|
| 4154 |
-
"loss": 0.9223,
|
| 4155 |
-
"step": 55500
|
| 4156 |
-
},
|
| 4157 |
-
{
|
| 4158 |
-
"epoch": 20.47970479704797,
|
| 4159 |
-
"eval_bleu": 45.916257953363164,
|
| 4160 |
-
"eval_chrf": 69.76962902573563,
|
| 4161 |
-
"eval_loss": 0.9757766127586365,
|
| 4162 |
-
"eval_runtime": 92.5471,
|
| 4163 |
-
"eval_samples_per_second": 8.817,
|
| 4164 |
-
"eval_steps_per_second": 1.102,
|
| 4165 |
-
"step": 55500
|
| 4166 |
-
},
|
| 4167 |
-
{
|
| 4168 |
-
"epoch": 20.51660516605166,
|
| 4169 |
-
"grad_norm": 6.471508502960205,
|
| 4170 |
-
"learning_rate": 2.0691091196626252e-05,
|
| 4171 |
-
"loss": 0.9933,
|
| 4172 |
-
"step": 55600
|
| 4173 |
-
},
|
| 4174 |
-
{
|
| 4175 |
-
"epoch": 20.55350553505535,
|
| 4176 |
-
"grad_norm": 3.317532539367676,
|
| 4177 |
-
"learning_rate": 2.0638376383763838e-05,
|
| 4178 |
-
"loss": 0.9708,
|
| 4179 |
-
"step": 55700
|
| 4180 |
-
},
|
| 4181 |
-
{
|
| 4182 |
-
"epoch": 20.59040590405904,
|
| 4183 |
-
"grad_norm": 3.4863741397857666,
|
| 4184 |
-
"learning_rate": 2.0585661570901425e-05,
|
| 4185 |
-
"loss": 0.9717,
|
| 4186 |
-
"step": 55800
|
| 4187 |
-
},
|
| 4188 |
-
{
|
| 4189 |
-
"epoch": 20.627306273062732,
|
| 4190 |
-
"grad_norm": 3.6253013610839844,
|
| 4191 |
-
"learning_rate": 2.053294675803901e-05,
|
| 4192 |
-
"loss": 0.9628,
|
| 4193 |
-
"step": 55900
|
| 4194 |
-
},
|
| 4195 |
-
{
|
| 4196 |
-
"epoch": 20.66420664206642,
|
| 4197 |
-
"grad_norm": 4.725039958953857,
|
| 4198 |
-
"learning_rate": 2.0480231945176594e-05,
|
| 4199 |
-
"loss": 0.9179,
|
| 4200 |
-
"step": 56000
|
| 4201 |
-
},
|
| 4202 |
-
{
|
| 4203 |
-
"epoch": 20.66420664206642,
|
| 4204 |
-
"eval_bleu": 46.28172619461589,
|
| 4205 |
-
"eval_chrf": 70.11693794292695,
|
| 4206 |
-
"eval_loss": 0.968273401260376,
|
| 4207 |
-
"eval_runtime": 93.3175,
|
| 4208 |
-
"eval_samples_per_second": 8.744,
|
| 4209 |
-
"eval_steps_per_second": 1.093,
|
| 4210 |
-
"step": 56000
|
| 4211 |
-
},
|
| 4212 |
-
{
|
| 4213 |
-
"epoch": 20.70110701107011,
|
| 4214 |
-
"grad_norm": 2.480011463165283,
|
| 4215 |
-
"learning_rate": 2.042751713231418e-05,
|
| 4216 |
-
"loss": 0.9707,
|
| 4217 |
-
"step": 56100
|
| 4218 |
-
},
|
| 4219 |
-
{
|
| 4220 |
-
"epoch": 20.7380073800738,
|
| 4221 |
-
"grad_norm": 4.387946605682373,
|
| 4222 |
-
"learning_rate": 2.0374802319451766e-05,
|
| 4223 |
-
"loss": 0.9964,
|
| 4224 |
-
"step": 56200
|
| 4225 |
-
},
|
| 4226 |
-
{
|
| 4227 |
-
"epoch": 20.774907749077492,
|
| 4228 |
-
"grad_norm": 4.427938461303711,
|
| 4229 |
-
"learning_rate": 2.0322087506589353e-05,
|
| 4230 |
-
"loss": 0.947,
|
| 4231 |
-
"step": 56300
|
| 4232 |
-
},
|
| 4233 |
-
{
|
| 4234 |
-
"epoch": 20.81180811808118,
|
| 4235 |
-
"grad_norm": 3.5348544120788574,
|
| 4236 |
-
"learning_rate": 2.0269372693726936e-05,
|
| 4237 |
-
"loss": 0.9539,
|
| 4238 |
-
"step": 56400
|
| 4239 |
-
},
|
| 4240 |
-
{
|
| 4241 |
-
"epoch": 20.84870848708487,
|
| 4242 |
-
"grad_norm": 3.754854440689087,
|
| 4243 |
-
"learning_rate": 2.0216657880864522e-05,
|
| 4244 |
-
"loss": 0.9696,
|
| 4245 |
-
"step": 56500
|
| 4246 |
-
},
|
| 4247 |
-
{
|
| 4248 |
-
"epoch": 20.84870848708487,
|
| 4249 |
-
"eval_bleu": 46.21577470581903,
|
| 4250 |
-
"eval_chrf": 70.26264135399155,
|
| 4251 |
-
"eval_loss": 0.9621157646179199,
|
| 4252 |
-
"eval_runtime": 92.5871,
|
| 4253 |
-
"eval_samples_per_second": 8.813,
|
| 4254 |
-
"eval_steps_per_second": 1.102,
|
| 4255 |
-
"step": 56500
|
| 4256 |
-
},
|
| 4257 |
-
{
|
| 4258 |
-
"epoch": 20.88560885608856,
|
| 4259 |
-
"grad_norm": 6.947758197784424,
|
| 4260 |
-
"learning_rate": 2.016394306800211e-05,
|
| 4261 |
-
"loss": 1.0204,
|
| 4262 |
-
"step": 56600
|
| 4263 |
-
},
|
| 4264 |
-
{
|
| 4265 |
-
"epoch": 20.922509225092252,
|
| 4266 |
-
"grad_norm": 4.733431339263916,
|
| 4267 |
-
"learning_rate": 2.0111228255139695e-05,
|
| 4268 |
-
"loss": 1.0111,
|
| 4269 |
-
"step": 56700
|
| 4270 |
-
},
|
| 4271 |
-
{
|
| 4272 |
-
"epoch": 20.95940959409594,
|
| 4273 |
-
"grad_norm": 4.140303134918213,
|
| 4274 |
-
"learning_rate": 2.005851344227728e-05,
|
| 4275 |
-
"loss": 0.9599,
|
| 4276 |
-
"step": 56800
|
| 4277 |
-
},
|
| 4278 |
-
{
|
| 4279 |
-
"epoch": 20.99630996309963,
|
| 4280 |
-
"grad_norm": 6.344222068786621,
|
| 4281 |
-
"learning_rate": 2.0005798629414864e-05,
|
| 4282 |
-
"loss": 0.9768,
|
| 4283 |
-
"step": 56900
|
| 4284 |
-
},
|
| 4285 |
-
{
|
| 4286 |
-
"epoch": 21.03321033210332,
|
| 4287 |
-
"grad_norm": 4.12951135635376,
|
| 4288 |
-
"learning_rate": 1.995308381655245e-05,
|
| 4289 |
-
"loss": 1.002,
|
| 4290 |
-
"step": 57000
|
| 4291 |
-
},
|
| 4292 |
-
{
|
| 4293 |
-
"epoch": 21.03321033210332,
|
| 4294 |
-
"eval_bleu": 45.90122957003376,
|
| 4295 |
-
"eval_chrf": 70.13557288840417,
|
| 4296 |
-
"eval_loss": 0.9641706943511963,
|
| 4297 |
-
"eval_runtime": 92.0161,
|
| 4298 |
-
"eval_samples_per_second": 8.868,
|
| 4299 |
-
"eval_steps_per_second": 1.109,
|
| 4300 |
-
"step": 57000
|
| 4301 |
-
},
|
| 4302 |
-
{
|
| 4303 |
-
"epoch": 21.070110701107012,
|
| 4304 |
-
"grad_norm": 2.7685956954956055,
|
| 4305 |
-
"learning_rate": 1.990036900369004e-05,
|
| 4306 |
-
"loss": 0.9171,
|
| 4307 |
-
"step": 57100
|
| 4308 |
-
},
|
| 4309 |
-
{
|
| 4310 |
-
"epoch": 21.1070110701107,
|
| 4311 |
-
"grad_norm": 3.8239712715148926,
|
| 4312 |
-
"learning_rate": 1.9847654190827626e-05,
|
| 4313 |
-
"loss": 0.935,
|
| 4314 |
-
"step": 57200
|
| 4315 |
-
},
|
| 4316 |
-
{
|
| 4317 |
-
"epoch": 21.14391143911439,
|
| 4318 |
-
"grad_norm": 3.2187681198120117,
|
| 4319 |
-
"learning_rate": 1.979493937796521e-05,
|
| 4320 |
-
"loss": 0.9205,
|
| 4321 |
-
"step": 57300
|
| 4322 |
-
},
|
| 4323 |
-
{
|
| 4324 |
-
"epoch": 21.18081180811808,
|
| 4325 |
-
"grad_norm": 3.0021488666534424,
|
| 4326 |
-
"learning_rate": 1.9742224565102796e-05,
|
| 4327 |
-
"loss": 0.9241,
|
| 4328 |
-
"step": 57400
|
| 4329 |
-
},
|
| 4330 |
-
{
|
| 4331 |
-
"epoch": 21.217712177121772,
|
| 4332 |
-
"grad_norm": 2.5294923782348633,
|
| 4333 |
-
"learning_rate": 1.9689509752240382e-05,
|
| 4334 |
-
"loss": 0.9327,
|
| 4335 |
-
"step": 57500
|
| 4336 |
-
},
|
| 4337 |
-
{
|
| 4338 |
-
"epoch": 21.217712177121772,
|
| 4339 |
-
"eval_bleu": 46.11265350722499,
|
| 4340 |
-
"eval_chrf": 70.36260746480464,
|
| 4341 |
-
"eval_loss": 0.9620640873908997,
|
| 4342 |
-
"eval_runtime": 93.0763,
|
| 4343 |
-
"eval_samples_per_second": 8.767,
|
| 4344 |
-
"eval_steps_per_second": 1.096,
|
| 4345 |
-
"step": 57500
|
| 4346 |
-
},
|
| 4347 |
-
{
|
| 4348 |
-
"epoch": 21.25461254612546,
|
| 4349 |
-
"grad_norm": 3.535879135131836,
|
| 4350 |
-
"learning_rate": 1.9636794939377968e-05,
|
| 4351 |
-
"loss": 0.9289,
|
| 4352 |
-
"step": 57600
|
| 4353 |
-
},
|
| 4354 |
-
{
|
| 4355 |
-
"epoch": 21.29151291512915,
|
| 4356 |
-
"grad_norm": 4.959736347198486,
|
| 4357 |
-
"learning_rate": 1.958408012651555e-05,
|
| 4358 |
-
"loss": 0.9379,
|
| 4359 |
-
"step": 57700
|
| 4360 |
-
},
|
| 4361 |
-
{
|
| 4362 |
-
"epoch": 21.328413284132843,
|
| 4363 |
-
"grad_norm": 4.482137203216553,
|
| 4364 |
-
"learning_rate": 1.9531365313653138e-05,
|
| 4365 |
-
"loss": 0.9173,
|
| 4366 |
-
"step": 57800
|
| 4367 |
-
},
|
| 4368 |
-
{
|
| 4369 |
-
"epoch": 21.365313653136532,
|
| 4370 |
-
"grad_norm": 4.553799629211426,
|
| 4371 |
-
"learning_rate": 1.9478650500790724e-05,
|
| 4372 |
-
"loss": 0.8799,
|
| 4373 |
-
"step": 57900
|
| 4374 |
-
},
|
| 4375 |
-
{
|
| 4376 |
-
"epoch": 21.40221402214022,
|
| 4377 |
-
"grad_norm": 5.203136444091797,
|
| 4378 |
-
"learning_rate": 1.942593568792831e-05,
|
| 4379 |
-
"loss": 0.8895,
|
| 4380 |
-
"step": 58000
|
| 4381 |
-
},
|
| 4382 |
-
{
|
| 4383 |
-
"epoch": 21.40221402214022,
|
| 4384 |
-
"eval_bleu": 46.48714823641416,
|
| 4385 |
-
"eval_chrf": 70.45381756168473,
|
| 4386 |
-
"eval_loss": 0.9568957090377808,
|
| 4387 |
-
"eval_runtime": 92.5969,
|
| 4388 |
-
"eval_samples_per_second": 8.812,
|
| 4389 |
-
"eval_steps_per_second": 1.102,
|
| 4390 |
-
"step": 58000
|
| 4391 |
-
},
|
| 4392 |
-
{
|
| 4393 |
-
"epoch": 21.43911439114391,
|
| 4394 |
-
"grad_norm": 4.262024879455566,
|
| 4395 |
-
"learning_rate": 1.9373220875065897e-05,
|
| 4396 |
-
"loss": 0.959,
|
| 4397 |
-
"step": 58100
|
| 4398 |
-
},
|
| 4399 |
-
{
|
| 4400 |
-
"epoch": 21.476014760147603,
|
| 4401 |
-
"grad_norm": 3.5024478435516357,
|
| 4402 |
-
"learning_rate": 1.932050606220348e-05,
|
| 4403 |
-
"loss": 0.9557,
|
| 4404 |
-
"step": 58200
|
| 4405 |
-
},
|
| 4406 |
-
{
|
| 4407 |
-
"epoch": 21.512915129151292,
|
| 4408 |
-
"grad_norm": 5.715458393096924,
|
| 4409 |
-
"learning_rate": 1.9267791249341066e-05,
|
| 4410 |
-
"loss": 0.9356,
|
| 4411 |
-
"step": 58300
|
| 4412 |
-
},
|
| 4413 |
-
{
|
| 4414 |
-
"epoch": 21.54981549815498,
|
| 4415 |
-
"grad_norm": 3.2510526180267334,
|
| 4416 |
-
"learning_rate": 1.9215076436478652e-05,
|
| 4417 |
-
"loss": 0.9177,
|
| 4418 |
-
"step": 58400
|
| 4419 |
-
},
|
| 4420 |
-
{
|
| 4421 |
-
"epoch": 21.58671586715867,
|
| 4422 |
-
"grad_norm": 3.2294719219207764,
|
| 4423 |
-
"learning_rate": 1.916236162361624e-05,
|
| 4424 |
-
"loss": 0.955,
|
| 4425 |
-
"step": 58500
|
| 4426 |
-
},
|
| 4427 |
-
{
|
| 4428 |
-
"epoch": 21.58671586715867,
|
| 4429 |
-
"eval_bleu": 46.45543563139011,
|
| 4430 |
-
"eval_chrf": 70.41267683825878,
|
| 4431 |
-
"eval_loss": 0.9556043148040771,
|
| 4432 |
-
"eval_runtime": 93.5764,
|
| 4433 |
-
"eval_samples_per_second": 8.72,
|
| 4434 |
-
"eval_steps_per_second": 1.09,
|
| 4435 |
-
"step": 58500
|
| 4436 |
-
},
|
| 4437 |
-
{
|
| 4438 |
-
"epoch": 21.623616236162363,
|
| 4439 |
-
"grad_norm": 4.483983039855957,
|
| 4440 |
-
"learning_rate": 1.910964681075382e-05,
|
| 4441 |
-
"loss": 0.9296,
|
| 4442 |
-
"step": 58600
|
| 4443 |
-
},
|
| 4444 |
-
{
|
| 4445 |
-
"epoch": 21.660516605166052,
|
| 4446 |
-
"grad_norm": 5.226687431335449,
|
| 4447 |
-
"learning_rate": 1.9056931997891408e-05,
|
| 4448 |
-
"loss": 0.9644,
|
| 4449 |
-
"step": 58700
|
| 4450 |
-
},
|
| 4451 |
-
{
|
| 4452 |
-
"epoch": 21.69741697416974,
|
| 4453 |
-
"grad_norm": 4.515336036682129,
|
| 4454 |
-
"learning_rate": 1.9004217185028994e-05,
|
| 4455 |
-
"loss": 0.9485,
|
| 4456 |
-
"step": 58800
|
| 4457 |
-
},
|
| 4458 |
-
{
|
| 4459 |
-
"epoch": 21.73431734317343,
|
| 4460 |
-
"grad_norm": 4.825827121734619,
|
| 4461 |
-
"learning_rate": 1.895150237216658e-05,
|
| 4462 |
-
"loss": 0.9554,
|
| 4463 |
-
"step": 58900
|
| 4464 |
-
},
|
| 4465 |
-
{
|
| 4466 |
-
"epoch": 21.771217712177123,
|
| 4467 |
-
"grad_norm": 3.3719112873077393,
|
| 4468 |
-
"learning_rate": 1.8898787559304167e-05,
|
| 4469 |
-
"loss": 0.937,
|
| 4470 |
-
"step": 59000
|
| 4471 |
-
},
|
| 4472 |
-
{
|
| 4473 |
-
"epoch": 21.771217712177123,
|
| 4474 |
-
"eval_bleu": 46.87817019053264,
|
| 4475 |
-
"eval_chrf": 70.79154520929303,
|
| 4476 |
-
"eval_loss": 0.9485617280006409,
|
| 4477 |
-
"eval_runtime": 92.225,
|
| 4478 |
-
"eval_samples_per_second": 8.848,
|
| 4479 |
-
"eval_steps_per_second": 1.106,
|
| 4480 |
-
"step": 59000
|
| 4481 |
-
},
|
| 4482 |
-
{
|
| 4483 |
-
"epoch": 21.80811808118081,
|
| 4484 |
-
"grad_norm": 3.7261431217193604,
|
| 4485 |
-
"learning_rate": 1.884607274644175e-05,
|
| 4486 |
-
"loss": 0.983,
|
| 4487 |
-
"step": 59100
|
| 4488 |
-
},
|
| 4489 |
-
{
|
| 4490 |
-
"epoch": 21.8450184501845,
|
| 4491 |
-
"grad_norm": 5.664323329925537,
|
| 4492 |
-
"learning_rate": 1.8793357933579336e-05,
|
| 4493 |
-
"loss": 0.9468,
|
| 4494 |
-
"step": 59200
|
| 4495 |
-
},
|
| 4496 |
-
{
|
| 4497 |
-
"epoch": 21.881918819188193,
|
| 4498 |
-
"grad_norm": 3.194990873336792,
|
| 4499 |
-
"learning_rate": 1.8740643120716922e-05,
|
| 4500 |
-
"loss": 0.9457,
|
| 4501 |
-
"step": 59300
|
| 4502 |
-
},
|
| 4503 |
-
{
|
| 4504 |
-
"epoch": 21.918819188191883,
|
| 4505 |
-
"grad_norm": 6.040603160858154,
|
| 4506 |
-
"learning_rate": 1.868792830785451e-05,
|
| 4507 |
-
"loss": 0.8814,
|
| 4508 |
-
"step": 59400
|
| 4509 |
-
},
|
| 4510 |
-
{
|
| 4511 |
-
"epoch": 21.95571955719557,
|
| 4512 |
-
"grad_norm": 3.8153860569000244,
|
| 4513 |
-
"learning_rate": 1.863521349499209e-05,
|
| 4514 |
-
"loss": 0.8858,
|
| 4515 |
-
"step": 59500
|
| 4516 |
-
},
|
| 4517 |
-
{
|
| 4518 |
-
"epoch": 21.95571955719557,
|
| 4519 |
-
"eval_bleu": 47.37172520377558,
|
| 4520 |
-
"eval_chrf": 71.22515832818081,
|
| 4521 |
-
"eval_loss": 0.9359919428825378,
|
| 4522 |
-
"eval_runtime": 92.8284,
|
| 4523 |
-
"eval_samples_per_second": 8.79,
|
| 4524 |
-
"eval_steps_per_second": 1.099,
|
| 4525 |
-
"step": 59500
|
| 4526 |
-
},
|
| 4527 |
-
{
|
| 4528 |
-
"epoch": 21.99261992619926,
|
| 4529 |
-
"grad_norm": 6.741827487945557,
|
| 4530 |
-
"learning_rate": 1.8582498682129678e-05,
|
| 4531 |
-
"loss": 0.9004,
|
| 4532 |
-
"step": 59600
|
| 4533 |
-
},
|
| 4534 |
-
{
|
| 4535 |
-
"epoch": 22.029520295202953,
|
| 4536 |
-
"grad_norm": 3.961733818054199,
|
| 4537 |
-
"learning_rate": 1.8529783869267264e-05,
|
| 4538 |
-
"loss": 0.9092,
|
| 4539 |
-
"step": 59700
|
| 4540 |
-
},
|
| 4541 |
-
{
|
| 4542 |
-
"epoch": 22.066420664206642,
|
| 4543 |
-
"grad_norm": 3.397977352142334,
|
| 4544 |
-
"learning_rate": 1.847706905640485e-05,
|
| 4545 |
-
"loss": 0.9474,
|
| 4546 |
-
"step": 59800
|
| 4547 |
-
},
|
| 4548 |
-
{
|
| 4549 |
-
"epoch": 22.10332103321033,
|
| 4550 |
-
"grad_norm": 4.655407428741455,
|
| 4551 |
-
"learning_rate": 1.8424354243542437e-05,
|
| 4552 |
-
"loss": 0.9012,
|
| 4553 |
-
"step": 59900
|
| 4554 |
-
},
|
| 4555 |
-
{
|
| 4556 |
-
"epoch": 22.14022140221402,
|
| 4557 |
-
"grad_norm": 5.649438381195068,
|
| 4558 |
-
"learning_rate": 1.837163943068002e-05,
|
| 4559 |
-
"loss": 0.8825,
|
| 4560 |
-
"step": 60000
|
| 4561 |
-
},
|
| 4562 |
-
{
|
| 4563 |
-
"epoch": 22.14022140221402,
|
| 4564 |
-
"eval_bleu": 46.5005676140855,
|
| 4565 |
-
"eval_chrf": 70.5285416310898,
|
| 4566 |
-
"eval_loss": 0.9432012438774109,
|
| 4567 |
-
"eval_runtime": 93.2807,
|
| 4568 |
-
"eval_samples_per_second": 8.748,
|
| 4569 |
-
"eval_steps_per_second": 1.093,
|
| 4570 |
-
"step": 60000
|
| 4571 |
-
},
|
| 4572 |
-
{
|
| 4573 |
-
"epoch": 22.177121771217713,
|
| 4574 |
-
"grad_norm": 4.629878520965576,
|
| 4575 |
-
"learning_rate": 1.8318924617817606e-05,
|
| 4576 |
-
"loss": 0.942,
|
| 4577 |
-
"step": 60100
|
| 4578 |
-
},
|
| 4579 |
-
{
|
| 4580 |
-
"epoch": 22.214022140221402,
|
| 4581 |
-
"grad_norm": 3.1749112606048584,
|
| 4582 |
-
"learning_rate": 1.8266209804955192e-05,
|
| 4583 |
-
"loss": 0.8875,
|
| 4584 |
-
"step": 60200
|
| 4585 |
-
},
|
| 4586 |
-
{
|
| 4587 |
-
"epoch": 22.25092250922509,
|
| 4588 |
-
"grad_norm": 4.075937271118164,
|
| 4589 |
-
"learning_rate": 1.821349499209278e-05,
|
| 4590 |
-
"loss": 0.8974,
|
| 4591 |
-
"step": 60300
|
| 4592 |
-
},
|
| 4593 |
-
{
|
| 4594 |
-
"epoch": 22.28782287822878,
|
| 4595 |
-
"grad_norm": 4.900486946105957,
|
| 4596 |
-
"learning_rate": 1.816078017923036e-05,
|
| 4597 |
-
"loss": 0.8756,
|
| 4598 |
-
"step": 60400
|
| 4599 |
-
},
|
| 4600 |
-
{
|
| 4601 |
-
"epoch": 22.324723247232473,
|
| 4602 |
-
"grad_norm": 5.85085391998291,
|
| 4603 |
-
"learning_rate": 1.8108065366367948e-05,
|
| 4604 |
-
"loss": 0.9037,
|
| 4605 |
-
"step": 60500
|
| 4606 |
-
},
|
| 4607 |
-
{
|
| 4608 |
-
"epoch": 22.324723247232473,
|
| 4609 |
-
"eval_bleu": 46.80960057807192,
|
| 4610 |
-
"eval_chrf": 70.97934673749698,
|
| 4611 |
-
"eval_loss": 0.9416308403015137,
|
| 4612 |
-
"eval_runtime": 93.7729,
|
| 4613 |
-
"eval_samples_per_second": 8.702,
|
| 4614 |
-
"eval_steps_per_second": 1.088,
|
| 4615 |
-
"step": 60500
|
| 4616 |
-
},
|
| 4617 |
-
{
|
| 4618 |
-
"epoch": 22.361623616236162,
|
| 4619 |
-
"grad_norm": 3.0147836208343506,
|
| 4620 |
-
"learning_rate": 1.8055350553505538e-05,
|
| 4621 |
-
"loss": 0.9062,
|
| 4622 |
-
"step": 60600
|
| 4623 |
-
},
|
| 4624 |
-
{
|
| 4625 |
-
"epoch": 22.39852398523985,
|
| 4626 |
-
"grad_norm": 4.866212368011475,
|
| 4627 |
-
"learning_rate": 1.8002635740643124e-05,
|
| 4628 |
-
"loss": 0.8591,
|
| 4629 |
-
"step": 60700
|
| 4630 |
-
},
|
| 4631 |
-
{
|
| 4632 |
-
"epoch": 22.435424354243544,
|
| 4633 |
-
"grad_norm": 5.024223804473877,
|
| 4634 |
-
"learning_rate": 1.7949920927780707e-05,
|
| 4635 |
-
"loss": 0.9472,
|
| 4636 |
-
"step": 60800
|
| 4637 |
-
},
|
| 4638 |
-
{
|
| 4639 |
-
"epoch": 22.472324723247233,
|
| 4640 |
-
"grad_norm": 4.542778968811035,
|
| 4641 |
-
"learning_rate": 1.7897206114918293e-05,
|
| 4642 |
-
"loss": 0.8968,
|
| 4643 |
-
"step": 60900
|
| 4644 |
-
},
|
| 4645 |
-
{
|
| 4646 |
-
"epoch": 22.509225092250922,
|
| 4647 |
-
"grad_norm": 4.99670934677124,
|
| 4648 |
-
"learning_rate": 1.784449130205588e-05,
|
| 4649 |
-
"loss": 0.8718,
|
| 4650 |
-
"step": 61000
|
| 4651 |
-
},
|
| 4652 |
-
{
|
| 4653 |
-
"epoch": 22.509225092250922,
|
| 4654 |
-
"eval_bleu": 47.081381775180894,
|
| 4655 |
-
"eval_chrf": 70.98627232269999,
|
| 4656 |
-
"eval_loss": 0.9361989498138428,
|
| 4657 |
-
"eval_runtime": 92.7139,
|
| 4658 |
-
"eval_samples_per_second": 8.801,
|
| 4659 |
-
"eval_steps_per_second": 1.1,
|
| 4660 |
-
"step": 61000
|
| 4661 |
-
},
|
| 4662 |
-
{
|
| 4663 |
-
"epoch": 22.54612546125461,
|
| 4664 |
-
"grad_norm": 4.439650535583496,
|
| 4665 |
-
"learning_rate": 1.7791776489193466e-05,
|
| 4666 |
-
"loss": 0.9003,
|
| 4667 |
-
"step": 61100
|
| 4668 |
-
},
|
| 4669 |
-
{
|
| 4670 |
-
"epoch": 22.583025830258304,
|
| 4671 |
-
"grad_norm": 3.1378018856048584,
|
| 4672 |
-
"learning_rate": 1.7739061676331052e-05,
|
| 4673 |
-
"loss": 0.8969,
|
| 4674 |
-
"step": 61200
|
| 4675 |
-
},
|
| 4676 |
-
{
|
| 4677 |
-
"epoch": 22.619926199261993,
|
| 4678 |
-
"grad_norm": 4.780954837799072,
|
| 4679 |
-
"learning_rate": 1.7686346863468635e-05,
|
| 4680 |
-
"loss": 0.923,
|
| 4681 |
-
"step": 61300
|
| 4682 |
-
},
|
| 4683 |
-
{
|
| 4684 |
-
"epoch": 22.656826568265682,
|
| 4685 |
-
"grad_norm": 3.4595189094543457,
|
| 4686 |
-
"learning_rate": 1.763363205060622e-05,
|
| 4687 |
-
"loss": 0.8594,
|
| 4688 |
-
"step": 61400
|
| 4689 |
-
},
|
| 4690 |
-
{
|
| 4691 |
-
"epoch": 22.69372693726937,
|
| 4692 |
-
"grad_norm": 3.2997348308563232,
|
| 4693 |
-
"learning_rate": 1.7580917237743808e-05,
|
| 4694 |
-
"loss": 0.9022,
|
| 4695 |
-
"step": 61500
|
| 4696 |
-
},
|
| 4697 |
-
{
|
| 4698 |
-
"epoch": 22.69372693726937,
|
| 4699 |
-
"eval_bleu": 47.1028817880757,
|
| 4700 |
-
"eval_chrf": 71.06093863096652,
|
| 4701 |
-
"eval_loss": 0.9276468753814697,
|
| 4702 |
-
"eval_runtime": 93.0419,
|
| 4703 |
-
"eval_samples_per_second": 8.77,
|
| 4704 |
-
"eval_steps_per_second": 1.096,
|
| 4705 |
-
"step": 61500
|
| 4706 |
-
},
|
| 4707 |
-
{
|
| 4708 |
-
"epoch": 22.730627306273064,
|
| 4709 |
-
"grad_norm": 3.217003583908081,
|
| 4710 |
-
"learning_rate": 1.7528202424881394e-05,
|
| 4711 |
-
"loss": 0.9786,
|
| 4712 |
-
"step": 61600
|
| 4713 |
-
},
|
| 4714 |
-
{
|
| 4715 |
-
"epoch": 22.767527675276753,
|
| 4716 |
-
"grad_norm": 3.641460418701172,
|
| 4717 |
-
"learning_rate": 1.7475487612018977e-05,
|
| 4718 |
-
"loss": 0.9548,
|
| 4719 |
-
"step": 61700
|
| 4720 |
-
},
|
| 4721 |
-
{
|
| 4722 |
-
"epoch": 22.804428044280442,
|
| 4723 |
-
"grad_norm": 4.382227420806885,
|
| 4724 |
-
"learning_rate": 1.7422772799156563e-05,
|
| 4725 |
-
"loss": 0.885,
|
| 4726 |
-
"step": 61800
|
| 4727 |
-
},
|
| 4728 |
-
{
|
| 4729 |
-
"epoch": 22.84132841328413,
|
| 4730 |
-
"grad_norm": 4.368484973907471,
|
| 4731 |
-
"learning_rate": 1.737005798629415e-05,
|
| 4732 |
-
"loss": 0.8851,
|
| 4733 |
-
"step": 61900
|
| 4734 |
-
},
|
| 4735 |
-
{
|
| 4736 |
-
"epoch": 22.878228782287824,
|
| 4737 |
-
"grad_norm": 3.0234997272491455,
|
| 4738 |
-
"learning_rate": 1.7317343173431736e-05,
|
| 4739 |
-
"loss": 0.8995,
|
| 4740 |
-
"step": 62000
|
| 4741 |
-
},
|
| 4742 |
-
{
|
| 4743 |
-
"epoch": 22.878228782287824,
|
| 4744 |
-
"eval_bleu": 46.956009820086116,
|
| 4745 |
-
"eval_chrf": 71.03846001924904,
|
| 4746 |
-
"eval_loss": 0.9235355257987976,
|
| 4747 |
-
"eval_runtime": 92.5602,
|
| 4748 |
-
"eval_samples_per_second": 8.816,
|
| 4749 |
-
"eval_steps_per_second": 1.102,
|
| 4750 |
-
"step": 62000
|
| 4751 |
}
|
| 4752 |
],
|
| 4753 |
"logging_steps": 100,
|
| 4754 |
-
"max_steps":
|
| 4755 |
"num_input_tokens_seen": 0,
|
| 4756 |
-
"num_train_epochs":
|
| 4757 |
"save_steps": 500,
|
| 4758 |
"stateful_callbacks": {
|
| 4759 |
"TrainerControl": {
|
|
@@ -4762,12 +3710,12 @@
|
|
| 4762 |
"should_evaluate": false,
|
| 4763 |
"should_log": false,
|
| 4764 |
"should_save": true,
|
| 4765 |
-
"should_training_stop":
|
| 4766 |
},
|
| 4767 |
"attributes": {}
|
| 4768 |
}
|
| 4769 |
},
|
| 4770 |
-
"total_flos":
|
| 4771 |
"train_batch_size": 8,
|
| 4772 |
"trial_name": null,
|
| 4773 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 20.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 50420,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3696 |
"eval_samples_per_second": 47.97,
|
| 3697 |
"eval_steps_per_second": 6.004,
|
| 3698 |
"step": 50420
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3699 |
}
|
| 3700 |
],
|
| 3701 |
"logging_steps": 100,
|
| 3702 |
+
"max_steps": 50420,
|
| 3703 |
"num_input_tokens_seen": 0,
|
| 3704 |
+
"num_train_epochs": 20,
|
| 3705 |
"save_steps": 500,
|
| 3706 |
"stateful_callbacks": {
|
| 3707 |
"TrainerControl": {
|
|
|
|
| 3710 |
"should_evaluate": false,
|
| 3711 |
"should_log": false,
|
| 3712 |
"should_save": true,
|
| 3713 |
+
"should_training_stop": true
|
| 3714 |
},
|
| 3715 |
"attributes": {}
|
| 3716 |
}
|
| 3717 |
},
|
| 3718 |
+
"total_flos": 6.902420484390912e+16,
|
| 3719 |
"train_batch_size": 8,
|
| 3720 |
"trial_name": null,
|
| 3721 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75188bd643b1477939c9a6819e9a5ad3a7a388c5c13a37c2fcbd210bd1b5fc26
|
| 3 |
+
size 5496
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 990185320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a29e0afbdf7dacbda2541986ccaba1c2707ac1b4969c8e3a880d544bb15612e
|
| 3 |
size 990185320
|