mushkil / trainer_state.json
riotu-lab's picture
upload file
8f40f24 verified
{
"best_metric": 0.4629,
"best_model_checkpoint": "AraT5_FT_AraT5V2_Transaltion/checkpoint-41500",
"epoch": 21.020408163265305,
"eval_steps": 500,
"global_step": 51500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 4.953617810760668e-05,
"loss": 1.9101,
"step": 500
},
{
"epoch": 0.2,
"eval_bleu": 0.3871,
"eval_gen_len": 19.0,
"eval_loss": 0.29340660572052,
"eval_runtime": 130.614,
"eval_samples_per_second": 200.002,
"eval_steps_per_second": 1.049,
"step": 500
},
{
"epoch": 0.41,
"learning_rate": 4.907235621521336e-05,
"loss": 0.4711,
"step": 1000
},
{
"epoch": 0.41,
"eval_bleu": 0.4212,
"eval_gen_len": 19.0,
"eval_loss": 0.20559465885162354,
"eval_runtime": 130.3147,
"eval_samples_per_second": 200.461,
"eval_steps_per_second": 1.051,
"step": 1000
},
{
"epoch": 0.61,
"learning_rate": 4.860853432282004e-05,
"loss": 0.3416,
"step": 1500
},
{
"epoch": 0.61,
"eval_bleu": 0.4334,
"eval_gen_len": 19.0,
"eval_loss": 0.16219736635684967,
"eval_runtime": 129.566,
"eval_samples_per_second": 201.619,
"eval_steps_per_second": 1.057,
"step": 1500
},
{
"epoch": 0.82,
"learning_rate": 4.814471243042672e-05,
"loss": 0.2762,
"step": 2000
},
{
"epoch": 0.82,
"eval_bleu": 0.4406,
"eval_gen_len": 19.0,
"eval_loss": 0.15120063722133636,
"eval_runtime": 129.7775,
"eval_samples_per_second": 201.291,
"eval_steps_per_second": 1.056,
"step": 2000
},
{
"epoch": 1.02,
"learning_rate": 4.7680890538033396e-05,
"loss": 0.2357,
"step": 2500
},
{
"epoch": 1.02,
"eval_bleu": 0.4453,
"eval_gen_len": 19.0,
"eval_loss": 0.1276528686285019,
"eval_runtime": 130.1173,
"eval_samples_per_second": 200.765,
"eval_steps_per_second": 1.053,
"step": 2500
},
{
"epoch": 1.22,
"learning_rate": 4.721706864564008e-05,
"loss": 0.2052,
"step": 3000
},
{
"epoch": 1.22,
"eval_bleu": 0.4471,
"eval_gen_len": 19.0,
"eval_loss": 0.11476743966341019,
"eval_runtime": 129.921,
"eval_samples_per_second": 201.068,
"eval_steps_per_second": 1.054,
"step": 3000
},
{
"epoch": 1.43,
"learning_rate": 4.675324675324675e-05,
"loss": 0.1844,
"step": 3500
},
{
"epoch": 1.43,
"eval_bleu": 0.4491,
"eval_gen_len": 19.0,
"eval_loss": 0.10187335312366486,
"eval_runtime": 130.0031,
"eval_samples_per_second": 200.941,
"eval_steps_per_second": 1.054,
"step": 3500
},
{
"epoch": 1.63,
"learning_rate": 4.628942486085344e-05,
"loss": 0.1694,
"step": 4000
},
{
"epoch": 1.63,
"eval_bleu": 0.4508,
"eval_gen_len": 19.0,
"eval_loss": 0.09269851446151733,
"eval_runtime": 129.8071,
"eval_samples_per_second": 201.245,
"eval_steps_per_second": 1.055,
"step": 4000
},
{
"epoch": 1.84,
"learning_rate": 4.582560296846011e-05,
"loss": 0.1562,
"step": 4500
},
{
"epoch": 1.84,
"eval_bleu": 0.4515,
"eval_gen_len": 19.0,
"eval_loss": 0.08425677567720413,
"eval_runtime": 129.5856,
"eval_samples_per_second": 201.589,
"eval_steps_per_second": 1.057,
"step": 4500
},
{
"epoch": 2.04,
"learning_rate": 4.5361781076066796e-05,
"loss": 0.1461,
"step": 5000
},
{
"epoch": 2.04,
"eval_bleu": 0.4531,
"eval_gen_len": 19.0,
"eval_loss": 0.08317717164754868,
"eval_runtime": 130.0012,
"eval_samples_per_second": 200.944,
"eval_steps_per_second": 1.054,
"step": 5000
},
{
"epoch": 2.24,
"learning_rate": 4.4897959183673474e-05,
"loss": 0.1358,
"step": 5500
},
{
"epoch": 2.24,
"eval_bleu": 0.4535,
"eval_gen_len": 19.0,
"eval_loss": 0.07654570788145065,
"eval_runtime": 129.977,
"eval_samples_per_second": 200.982,
"eval_steps_per_second": 1.054,
"step": 5500
},
{
"epoch": 2.45,
"learning_rate": 4.4434137291280146e-05,
"loss": 0.1279,
"step": 6000
},
{
"epoch": 2.45,
"eval_bleu": 0.4547,
"eval_gen_len": 19.0,
"eval_loss": 0.07310701906681061,
"eval_runtime": 129.9269,
"eval_samples_per_second": 201.059,
"eval_steps_per_second": 1.054,
"step": 6000
},
{
"epoch": 2.65,
"learning_rate": 4.397031539888683e-05,
"loss": 0.122,
"step": 6500
},
{
"epoch": 2.65,
"eval_bleu": 0.4549,
"eval_gen_len": 19.0,
"eval_loss": 0.07025206089019775,
"eval_runtime": 130.0454,
"eval_samples_per_second": 200.876,
"eval_steps_per_second": 1.053,
"step": 6500
},
{
"epoch": 2.86,
"learning_rate": 4.3506493506493503e-05,
"loss": 0.1166,
"step": 7000
},
{
"epoch": 2.86,
"eval_bleu": 0.4563,
"eval_gen_len": 19.0,
"eval_loss": 0.06883203238248825,
"eval_runtime": 129.7879,
"eval_samples_per_second": 201.275,
"eval_steps_per_second": 1.056,
"step": 7000
},
{
"epoch": 3.06,
"learning_rate": 4.304267161410019e-05,
"loss": 0.1113,
"step": 7500
},
{
"epoch": 3.06,
"eval_bleu": 0.456,
"eval_gen_len": 19.0,
"eval_loss": 0.06709539890289307,
"eval_runtime": 130.5194,
"eval_samples_per_second": 200.146,
"eval_steps_per_second": 1.05,
"step": 7500
},
{
"epoch": 3.27,
"learning_rate": 4.257884972170687e-05,
"loss": 0.1063,
"step": 8000
},
{
"epoch": 3.27,
"eval_bleu": 0.4566,
"eval_gen_len": 19.0,
"eval_loss": 0.06505035609006882,
"eval_runtime": 130.1863,
"eval_samples_per_second": 200.659,
"eval_steps_per_second": 1.052,
"step": 8000
},
{
"epoch": 3.47,
"learning_rate": 4.2115027829313546e-05,
"loss": 0.1023,
"step": 8500
},
{
"epoch": 3.47,
"eval_bleu": 0.4573,
"eval_gen_len": 19.0,
"eval_loss": 0.0633101835846901,
"eval_runtime": 130.238,
"eval_samples_per_second": 200.579,
"eval_steps_per_second": 1.052,
"step": 8500
},
{
"epoch": 3.67,
"learning_rate": 4.1651205936920225e-05,
"loss": 0.0996,
"step": 9000
},
{
"epoch": 3.67,
"eval_bleu": 0.4572,
"eval_gen_len": 19.0,
"eval_loss": 0.06185136362910271,
"eval_runtime": 130.2134,
"eval_samples_per_second": 200.617,
"eval_steps_per_second": 1.052,
"step": 9000
},
{
"epoch": 3.88,
"learning_rate": 4.1187384044526903e-05,
"loss": 0.0963,
"step": 9500
},
{
"epoch": 3.88,
"eval_bleu": 0.458,
"eval_gen_len": 19.0,
"eval_loss": 0.061708223074674606,
"eval_runtime": 137.0848,
"eval_samples_per_second": 190.561,
"eval_steps_per_second": 0.999,
"step": 9500
},
{
"epoch": 4.08,
"learning_rate": 4.072356215213358e-05,
"loss": 0.0927,
"step": 10000
},
{
"epoch": 4.08,
"eval_bleu": 0.4588,
"eval_gen_len": 19.0,
"eval_loss": 0.0601879358291626,
"eval_runtime": 139.7185,
"eval_samples_per_second": 186.969,
"eval_steps_per_second": 0.981,
"step": 10000
},
{
"epoch": 4.29,
"learning_rate": 4.025974025974026e-05,
"loss": 0.0902,
"step": 10500
},
{
"epoch": 4.29,
"eval_bleu": 0.459,
"eval_gen_len": 19.0,
"eval_loss": 0.05851432681083679,
"eval_runtime": 138.3043,
"eval_samples_per_second": 188.881,
"eval_steps_per_second": 0.991,
"step": 10500
},
{
"epoch": 4.49,
"learning_rate": 3.979591836734694e-05,
"loss": 0.0877,
"step": 11000
},
{
"epoch": 4.49,
"eval_bleu": 0.4594,
"eval_gen_len": 19.0,
"eval_loss": 0.05787012353539467,
"eval_runtime": 137.0383,
"eval_samples_per_second": 190.626,
"eval_steps_per_second": 1.0,
"step": 11000
},
{
"epoch": 4.69,
"learning_rate": 3.933209647495362e-05,
"loss": 0.0857,
"step": 11500
},
{
"epoch": 4.69,
"eval_bleu": 0.459,
"eval_gen_len": 19.0,
"eval_loss": 0.05705270916223526,
"eval_runtime": 137.1566,
"eval_samples_per_second": 190.461,
"eval_steps_per_second": 0.999,
"step": 11500
},
{
"epoch": 4.9,
"learning_rate": 3.88682745825603e-05,
"loss": 0.0844,
"step": 12000
},
{
"epoch": 4.9,
"eval_bleu": 0.4591,
"eval_gen_len": 19.0,
"eval_loss": 0.0567244328558445,
"eval_runtime": 136.9999,
"eval_samples_per_second": 190.679,
"eval_steps_per_second": 1.0,
"step": 12000
},
{
"epoch": 5.1,
"learning_rate": 3.8404452690166975e-05,
"loss": 0.0822,
"step": 12500
},
{
"epoch": 5.1,
"eval_bleu": 0.4588,
"eval_gen_len": 19.0,
"eval_loss": 0.055685680359601974,
"eval_runtime": 136.8888,
"eval_samples_per_second": 190.834,
"eval_steps_per_second": 1.001,
"step": 12500
},
{
"epoch": 5.31,
"learning_rate": 3.794063079777366e-05,
"loss": 0.0797,
"step": 13000
},
{
"epoch": 5.31,
"eval_bleu": 0.4593,
"eval_gen_len": 19.0,
"eval_loss": 0.05490221455693245,
"eval_runtime": 136.7754,
"eval_samples_per_second": 190.992,
"eval_steps_per_second": 1.002,
"step": 13000
},
{
"epoch": 5.51,
"learning_rate": 3.747680890538033e-05,
"loss": 0.0783,
"step": 13500
},
{
"epoch": 5.51,
"eval_bleu": 0.4592,
"eval_gen_len": 19.0,
"eval_loss": 0.05454099550843239,
"eval_runtime": 136.6541,
"eval_samples_per_second": 191.161,
"eval_steps_per_second": 1.003,
"step": 13500
},
{
"epoch": 5.71,
"learning_rate": 3.701298701298702e-05,
"loss": 0.0773,
"step": 14000
},
{
"epoch": 5.71,
"eval_bleu": 0.4601,
"eval_gen_len": 19.0,
"eval_loss": 0.054198332130908966,
"eval_runtime": 136.7896,
"eval_samples_per_second": 190.972,
"eval_steps_per_second": 1.002,
"step": 14000
},
{
"epoch": 5.92,
"learning_rate": 3.654916512059369e-05,
"loss": 0.0759,
"step": 14500
},
{
"epoch": 5.92,
"eval_bleu": 0.46,
"eval_gen_len": 19.0,
"eval_loss": 0.05303099378943443,
"eval_runtime": 137.0308,
"eval_samples_per_second": 190.636,
"eval_steps_per_second": 1.0,
"step": 14500
},
{
"epoch": 6.12,
"learning_rate": 3.6085343228200375e-05,
"loss": 0.0742,
"step": 15000
},
{
"epoch": 6.12,
"eval_bleu": 0.4596,
"eval_gen_len": 19.0,
"eval_loss": 0.053217481821775436,
"eval_runtime": 136.5228,
"eval_samples_per_second": 191.345,
"eval_steps_per_second": 1.003,
"step": 15000
},
{
"epoch": 6.33,
"learning_rate": 3.5621521335807054e-05,
"loss": 0.0723,
"step": 15500
},
{
"epoch": 6.33,
"eval_bleu": 0.461,
"eval_gen_len": 19.0,
"eval_loss": 0.052737053483724594,
"eval_runtime": 136.5843,
"eval_samples_per_second": 191.259,
"eval_steps_per_second": 1.003,
"step": 15500
},
{
"epoch": 6.53,
"learning_rate": 3.515769944341373e-05,
"loss": 0.0717,
"step": 16000
},
{
"epoch": 6.53,
"eval_bleu": 0.4609,
"eval_gen_len": 19.0,
"eval_loss": 0.052782874554395676,
"eval_runtime": 136.5496,
"eval_samples_per_second": 191.308,
"eval_steps_per_second": 1.003,
"step": 16000
},
{
"epoch": 6.73,
"learning_rate": 3.469387755102041e-05,
"loss": 0.0711,
"step": 16500
},
{
"epoch": 6.73,
"eval_bleu": 0.4605,
"eval_gen_len": 19.0,
"eval_loss": 0.05168753117322922,
"eval_runtime": 136.5216,
"eval_samples_per_second": 191.347,
"eval_steps_per_second": 1.004,
"step": 16500
},
{
"epoch": 6.94,
"learning_rate": 3.423005565862709e-05,
"loss": 0.0701,
"step": 17000
},
{
"epoch": 6.94,
"eval_bleu": 0.461,
"eval_gen_len": 19.0,
"eval_loss": 0.05145658180117607,
"eval_runtime": 136.9443,
"eval_samples_per_second": 190.756,
"eval_steps_per_second": 1.0,
"step": 17000
},
{
"epoch": 7.14,
"learning_rate": 3.376623376623377e-05,
"loss": 0.0686,
"step": 17500
},
{
"epoch": 7.14,
"eval_bleu": 0.4615,
"eval_gen_len": 19.0,
"eval_loss": 0.051403045654296875,
"eval_runtime": 137.1116,
"eval_samples_per_second": 190.524,
"eval_steps_per_second": 0.999,
"step": 17500
},
{
"epoch": 7.35,
"learning_rate": 3.330241187384045e-05,
"loss": 0.0673,
"step": 18000
},
{
"epoch": 7.35,
"eval_bleu": 0.4609,
"eval_gen_len": 19.0,
"eval_loss": 0.05048130825161934,
"eval_runtime": 136.7447,
"eval_samples_per_second": 191.035,
"eval_steps_per_second": 1.002,
"step": 18000
},
{
"epoch": 7.55,
"learning_rate": 3.2838589981447126e-05,
"loss": 0.0669,
"step": 18500
},
{
"epoch": 7.55,
"eval_bleu": 0.4608,
"eval_gen_len": 19.0,
"eval_loss": 0.05009736865758896,
"eval_runtime": 136.3353,
"eval_samples_per_second": 191.608,
"eval_steps_per_second": 1.005,
"step": 18500
},
{
"epoch": 7.76,
"learning_rate": 3.2374768089053805e-05,
"loss": 0.0658,
"step": 19000
},
{
"epoch": 7.76,
"eval_bleu": 0.461,
"eval_gen_len": 19.0,
"eval_loss": 0.050111617892980576,
"eval_runtime": 136.6576,
"eval_samples_per_second": 191.157,
"eval_steps_per_second": 1.003,
"step": 19000
},
{
"epoch": 7.96,
"learning_rate": 3.191094619666048e-05,
"loss": 0.0656,
"step": 19500
},
{
"epoch": 7.96,
"eval_bleu": 0.4614,
"eval_gen_len": 19.0,
"eval_loss": 0.04940654709935188,
"eval_runtime": 136.917,
"eval_samples_per_second": 190.794,
"eval_steps_per_second": 1.001,
"step": 19500
},
{
"epoch": 8.16,
"learning_rate": 3.144712430426716e-05,
"loss": 0.0639,
"step": 20000
},
{
"epoch": 8.16,
"eval_bleu": 0.4613,
"eval_gen_len": 19.0,
"eval_loss": 0.049783505499362946,
"eval_runtime": 136.756,
"eval_samples_per_second": 191.019,
"eval_steps_per_second": 1.002,
"step": 20000
},
{
"epoch": 8.37,
"learning_rate": 3.098330241187384e-05,
"loss": 0.0627,
"step": 20500
},
{
"epoch": 8.37,
"eval_bleu": 0.4615,
"eval_gen_len": 19.0,
"eval_loss": 0.049393024295568466,
"eval_runtime": 136.8452,
"eval_samples_per_second": 190.895,
"eval_steps_per_second": 1.001,
"step": 20500
},
{
"epoch": 8.57,
"learning_rate": 3.051948051948052e-05,
"loss": 0.063,
"step": 21000
},
{
"epoch": 8.57,
"eval_bleu": 0.4614,
"eval_gen_len": 19.0,
"eval_loss": 0.0488428920507431,
"eval_runtime": 130.0772,
"eval_samples_per_second": 200.827,
"eval_steps_per_second": 1.053,
"step": 21000
},
{
"epoch": 8.78,
"learning_rate": 3.00556586270872e-05,
"loss": 0.0626,
"step": 21500
},
{
"epoch": 8.78,
"eval_bleu": 0.4612,
"eval_gen_len": 19.0,
"eval_loss": 0.048447445034980774,
"eval_runtime": 130.2418,
"eval_samples_per_second": 200.573,
"eval_steps_per_second": 1.052,
"step": 21500
},
{
"epoch": 8.98,
"learning_rate": 2.959183673469388e-05,
"loss": 0.062,
"step": 22000
},
{
"epoch": 8.98,
"eval_bleu": 0.4616,
"eval_gen_len": 19.0,
"eval_loss": 0.04808622598648071,
"eval_runtime": 130.5977,
"eval_samples_per_second": 200.027,
"eval_steps_per_second": 1.049,
"step": 22000
},
{
"epoch": 9.18,
"learning_rate": 2.9128014842300562e-05,
"loss": 0.0603,
"step": 22500
},
{
"epoch": 9.18,
"eval_bleu": 0.4619,
"eval_gen_len": 19.0,
"eval_loss": 0.04833626002073288,
"eval_runtime": 130.0816,
"eval_samples_per_second": 200.82,
"eval_steps_per_second": 1.053,
"step": 22500
},
{
"epoch": 9.39,
"learning_rate": 2.8664192949907237e-05,
"loss": 0.0598,
"step": 23000
},
{
"epoch": 9.39,
"eval_bleu": 0.4616,
"eval_gen_len": 19.0,
"eval_loss": 0.048464007675647736,
"eval_runtime": 129.8908,
"eval_samples_per_second": 201.115,
"eval_steps_per_second": 1.055,
"step": 23000
},
{
"epoch": 9.59,
"learning_rate": 2.8200371057513912e-05,
"loss": 0.0595,
"step": 23500
},
{
"epoch": 9.59,
"eval_bleu": 0.4617,
"eval_gen_len": 19.0,
"eval_loss": 0.048219963908195496,
"eval_runtime": 130.5366,
"eval_samples_per_second": 200.12,
"eval_steps_per_second": 1.05,
"step": 23500
},
{
"epoch": 9.8,
"learning_rate": 2.7736549165120594e-05,
"loss": 0.0592,
"step": 24000
},
{
"epoch": 9.8,
"eval_bleu": 0.4612,
"eval_gen_len": 19.0,
"eval_loss": 0.04728322476148605,
"eval_runtime": 130.4335,
"eval_samples_per_second": 200.278,
"eval_steps_per_second": 1.05,
"step": 24000
},
{
"epoch": 10.0,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.0591,
"step": 24500
},
{
"epoch": 10.0,
"eval_bleu": 0.4618,
"eval_gen_len": 19.0,
"eval_loss": 0.04734385386109352,
"eval_runtime": 130.0364,
"eval_samples_per_second": 200.89,
"eval_steps_per_second": 1.054,
"step": 24500
},
{
"epoch": 10.2,
"learning_rate": 2.6808905380333955e-05,
"loss": 0.0574,
"step": 25000
},
{
"epoch": 10.2,
"eval_bleu": 0.4617,
"eval_gen_len": 19.0,
"eval_loss": 0.04747864603996277,
"eval_runtime": 130.0909,
"eval_samples_per_second": 200.806,
"eval_steps_per_second": 1.053,
"step": 25000
},
{
"epoch": 10.41,
"learning_rate": 2.634508348794063e-05,
"loss": 0.0573,
"step": 25500
},
{
"epoch": 10.41,
"eval_bleu": 0.462,
"eval_gen_len": 19.0,
"eval_loss": 0.04742683470249176,
"eval_runtime": 129.9305,
"eval_samples_per_second": 201.054,
"eval_steps_per_second": 1.054,
"step": 25500
},
{
"epoch": 10.61,
"learning_rate": 2.5881261595547312e-05,
"loss": 0.0565,
"step": 26000
},
{
"epoch": 10.61,
"eval_bleu": 0.4618,
"eval_gen_len": 19.0,
"eval_loss": 0.046983037143945694,
"eval_runtime": 130.4351,
"eval_samples_per_second": 200.276,
"eval_steps_per_second": 1.05,
"step": 26000
},
{
"epoch": 10.82,
"learning_rate": 2.5417439703153988e-05,
"loss": 0.0569,
"step": 26500
},
{
"epoch": 10.82,
"eval_bleu": 0.4619,
"eval_gen_len": 19.0,
"eval_loss": 0.04676728695631027,
"eval_runtime": 130.1051,
"eval_samples_per_second": 200.784,
"eval_steps_per_second": 1.053,
"step": 26500
},
{
"epoch": 11.02,
"learning_rate": 2.495361781076067e-05,
"loss": 0.0566,
"step": 27000
},
{
"epoch": 11.02,
"eval_bleu": 0.4619,
"eval_gen_len": 19.0,
"eval_loss": 0.0468904972076416,
"eval_runtime": 130.194,
"eval_samples_per_second": 200.647,
"eval_steps_per_second": 1.052,
"step": 27000
},
{
"epoch": 11.22,
"learning_rate": 2.448979591836735e-05,
"loss": 0.0552,
"step": 27500
},
{
"epoch": 11.22,
"eval_bleu": 0.462,
"eval_gen_len": 19.0,
"eval_loss": 0.04681343212723732,
"eval_runtime": 130.2625,
"eval_samples_per_second": 200.541,
"eval_steps_per_second": 1.052,
"step": 27500
},
{
"epoch": 11.43,
"learning_rate": 2.4025974025974027e-05,
"loss": 0.0549,
"step": 28000
},
{
"epoch": 11.43,
"eval_bleu": 0.462,
"eval_gen_len": 19.0,
"eval_loss": 0.04649100452661514,
"eval_runtime": 130.2691,
"eval_samples_per_second": 200.531,
"eval_steps_per_second": 1.052,
"step": 28000
},
{
"epoch": 11.63,
"learning_rate": 2.3562152133580706e-05,
"loss": 0.055,
"step": 28500
},
{
"epoch": 11.63,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.04652680456638336,
"eval_runtime": 130.1537,
"eval_samples_per_second": 200.709,
"eval_steps_per_second": 1.053,
"step": 28500
},
{
"epoch": 11.84,
"learning_rate": 2.3098330241187384e-05,
"loss": 0.0547,
"step": 29000
},
{
"epoch": 11.84,
"eval_bleu": 0.4623,
"eval_gen_len": 19.0,
"eval_loss": 0.045862022787332535,
"eval_runtime": 130.0719,
"eval_samples_per_second": 200.835,
"eval_steps_per_second": 1.053,
"step": 29000
},
{
"epoch": 12.04,
"learning_rate": 2.2634508348794063e-05,
"loss": 0.0545,
"step": 29500
},
{
"epoch": 12.04,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.04637685418128967,
"eval_runtime": 130.2373,
"eval_samples_per_second": 200.58,
"eval_steps_per_second": 1.052,
"step": 29500
},
{
"epoch": 12.24,
"learning_rate": 2.2170686456400745e-05,
"loss": 0.0533,
"step": 30000
},
{
"epoch": 12.24,
"eval_bleu": 0.4622,
"eval_gen_len": 19.0,
"eval_loss": 0.045851390808820724,
"eval_runtime": 130.0575,
"eval_samples_per_second": 200.857,
"eval_steps_per_second": 1.053,
"step": 30000
},
{
"epoch": 12.45,
"learning_rate": 2.1706864564007424e-05,
"loss": 0.0533,
"step": 30500
},
{
"epoch": 12.45,
"eval_bleu": 0.4618,
"eval_gen_len": 19.0,
"eval_loss": 0.045971017330884933,
"eval_runtime": 130.0582,
"eval_samples_per_second": 200.856,
"eval_steps_per_second": 1.053,
"step": 30500
},
{
"epoch": 12.65,
"learning_rate": 2.1243042671614102e-05,
"loss": 0.053,
"step": 31000
},
{
"epoch": 12.65,
"eval_bleu": 0.462,
"eval_gen_len": 19.0,
"eval_loss": 0.04581404849886894,
"eval_runtime": 130.0729,
"eval_samples_per_second": 200.833,
"eval_steps_per_second": 1.053,
"step": 31000
},
{
"epoch": 12.86,
"learning_rate": 2.077922077922078e-05,
"loss": 0.0527,
"step": 31500
},
{
"epoch": 12.86,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.046112846583127975,
"eval_runtime": 130.0922,
"eval_samples_per_second": 200.804,
"eval_steps_per_second": 1.053,
"step": 31500
},
{
"epoch": 13.06,
"learning_rate": 2.031539888682746e-05,
"loss": 0.0523,
"step": 32000
},
{
"epoch": 13.06,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.04600910842418671,
"eval_runtime": 129.8407,
"eval_samples_per_second": 201.193,
"eval_steps_per_second": 1.055,
"step": 32000
},
{
"epoch": 13.27,
"learning_rate": 1.9851576994434138e-05,
"loss": 0.0516,
"step": 32500
},
{
"epoch": 13.27,
"eval_bleu": 0.4623,
"eval_gen_len": 19.0,
"eval_loss": 0.04571113362908363,
"eval_runtime": 129.9111,
"eval_samples_per_second": 201.084,
"eval_steps_per_second": 1.055,
"step": 32500
},
{
"epoch": 13.47,
"learning_rate": 1.9387755102040817e-05,
"loss": 0.0515,
"step": 33000
},
{
"epoch": 13.47,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.0457097552716732,
"eval_runtime": 130.0314,
"eval_samples_per_second": 200.898,
"eval_steps_per_second": 1.054,
"step": 33000
},
{
"epoch": 13.67,
"learning_rate": 1.8923933209647496e-05,
"loss": 0.0517,
"step": 33500
},
{
"epoch": 13.67,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.04519006237387657,
"eval_runtime": 130.4174,
"eval_samples_per_second": 200.303,
"eval_steps_per_second": 1.05,
"step": 33500
},
{
"epoch": 13.88,
"learning_rate": 1.8460111317254174e-05,
"loss": 0.0512,
"step": 34000
},
{
"epoch": 13.88,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.045346666127443314,
"eval_runtime": 130.758,
"eval_samples_per_second": 199.781,
"eval_steps_per_second": 1.048,
"step": 34000
},
{
"epoch": 14.08,
"learning_rate": 1.7996289424860853e-05,
"loss": 0.0513,
"step": 34500
},
{
"epoch": 14.08,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.04493604227900505,
"eval_runtime": 130.5867,
"eval_samples_per_second": 200.043,
"eval_steps_per_second": 1.049,
"step": 34500
},
{
"epoch": 14.29,
"learning_rate": 1.7532467532467535e-05,
"loss": 0.0502,
"step": 35000
},
{
"epoch": 14.29,
"eval_bleu": 0.462,
"eval_gen_len": 19.0,
"eval_loss": 0.044917818158864975,
"eval_runtime": 130.8497,
"eval_samples_per_second": 199.641,
"eval_steps_per_second": 1.047,
"step": 35000
},
{
"epoch": 14.49,
"learning_rate": 1.7068645640074214e-05,
"loss": 0.0501,
"step": 35500
},
{
"epoch": 14.49,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.045045047998428345,
"eval_runtime": 130.6038,
"eval_samples_per_second": 200.017,
"eval_steps_per_second": 1.049,
"step": 35500
},
{
"epoch": 14.69,
"learning_rate": 1.6604823747680892e-05,
"loss": 0.0504,
"step": 36000
},
{
"epoch": 14.69,
"eval_bleu": 0.4623,
"eval_gen_len": 19.0,
"eval_loss": 0.04437680542469025,
"eval_runtime": 134.994,
"eval_samples_per_second": 193.512,
"eval_steps_per_second": 1.015,
"step": 36000
},
{
"epoch": 14.9,
"learning_rate": 1.614100185528757e-05,
"loss": 0.0499,
"step": 36500
},
{
"epoch": 14.9,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04487466439604759,
"eval_runtime": 135.3356,
"eval_samples_per_second": 193.024,
"eval_steps_per_second": 1.012,
"step": 36500
},
{
"epoch": 15.1,
"learning_rate": 1.567717996289425e-05,
"loss": 0.0496,
"step": 37000
},
{
"epoch": 15.1,
"eval_bleu": 0.4622,
"eval_gen_len": 19.0,
"eval_loss": 0.04475782439112663,
"eval_runtime": 131.0438,
"eval_samples_per_second": 199.346,
"eval_steps_per_second": 1.045,
"step": 37000
},
{
"epoch": 15.31,
"learning_rate": 1.5213358070500926e-05,
"loss": 0.0491,
"step": 37500
},
{
"epoch": 15.31,
"eval_bleu": 0.4621,
"eval_gen_len": 19.0,
"eval_loss": 0.0445503406226635,
"eval_runtime": 130.6545,
"eval_samples_per_second": 199.94,
"eval_steps_per_second": 1.049,
"step": 37500
},
{
"epoch": 15.51,
"learning_rate": 1.4749536178107607e-05,
"loss": 0.0488,
"step": 38000
},
{
"epoch": 15.51,
"eval_bleu": 0.4624,
"eval_gen_len": 19.0,
"eval_loss": 0.044734835624694824,
"eval_runtime": 131.2382,
"eval_samples_per_second": 199.05,
"eval_steps_per_second": 1.044,
"step": 38000
},
{
"epoch": 15.71,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.0491,
"step": 38500
},
{
"epoch": 15.71,
"eval_bleu": 0.4627,
"eval_gen_len": 19.0,
"eval_loss": 0.044320594519376755,
"eval_runtime": 130.7903,
"eval_samples_per_second": 199.732,
"eval_steps_per_second": 1.047,
"step": 38500
},
{
"epoch": 15.92,
"learning_rate": 1.3821892393320964e-05,
"loss": 0.0487,
"step": 39000
},
{
"epoch": 15.92,
"eval_bleu": 0.4623,
"eval_gen_len": 19.0,
"eval_loss": 0.04423439875245094,
"eval_runtime": 130.6316,
"eval_samples_per_second": 199.975,
"eval_steps_per_second": 1.049,
"step": 39000
},
{
"epoch": 16.12,
"learning_rate": 1.3358070500927644e-05,
"loss": 0.0485,
"step": 39500
},
{
"epoch": 16.12,
"eval_bleu": 0.4624,
"eval_gen_len": 19.0,
"eval_loss": 0.04461168125271797,
"eval_runtime": 131.1245,
"eval_samples_per_second": 199.223,
"eval_steps_per_second": 1.045,
"step": 39500
},
{
"epoch": 16.33,
"learning_rate": 1.2894248608534323e-05,
"loss": 0.0479,
"step": 40000
},
{
"epoch": 16.33,
"eval_bleu": 0.4627,
"eval_gen_len": 19.0,
"eval_loss": 0.04443284496665001,
"eval_runtime": 138.8413,
"eval_samples_per_second": 188.15,
"eval_steps_per_second": 0.987,
"step": 40000
},
{
"epoch": 16.53,
"learning_rate": 1.2430426716141003e-05,
"loss": 0.0481,
"step": 40500
},
{
"epoch": 16.53,
"eval_bleu": 0.4628,
"eval_gen_len": 19.0,
"eval_loss": 0.044452179223299026,
"eval_runtime": 130.3846,
"eval_samples_per_second": 200.353,
"eval_steps_per_second": 1.051,
"step": 40500
},
{
"epoch": 16.73,
"learning_rate": 1.1966604823747682e-05,
"loss": 0.0481,
"step": 41000
},
{
"epoch": 16.73,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04454488679766655,
"eval_runtime": 131.2398,
"eval_samples_per_second": 199.048,
"eval_steps_per_second": 1.044,
"step": 41000
},
{
"epoch": 16.94,
"learning_rate": 1.150278293135436e-05,
"loss": 0.0481,
"step": 41500
},
{
"epoch": 16.94,
"eval_bleu": 0.4629,
"eval_gen_len": 19.0,
"eval_loss": 0.0442616231739521,
"eval_runtime": 131.0927,
"eval_samples_per_second": 199.271,
"eval_steps_per_second": 1.045,
"step": 41500
},
{
"epoch": 17.14,
"learning_rate": 1.103896103896104e-05,
"loss": 0.0476,
"step": 42000
},
{
"epoch": 17.14,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04419331252574921,
"eval_runtime": 130.0403,
"eval_samples_per_second": 200.884,
"eval_steps_per_second": 1.054,
"step": 42000
},
{
"epoch": 17.35,
"learning_rate": 1.0575139146567718e-05,
"loss": 0.0472,
"step": 42500
},
{
"epoch": 17.35,
"eval_bleu": 0.4622,
"eval_gen_len": 19.0,
"eval_loss": 0.04414073005318642,
"eval_runtime": 130.0275,
"eval_samples_per_second": 200.904,
"eval_steps_per_second": 1.054,
"step": 42500
},
{
"epoch": 17.55,
"learning_rate": 1.0111317254174398e-05,
"loss": 0.0473,
"step": 43000
},
{
"epoch": 17.55,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04418780282139778,
"eval_runtime": 130.0799,
"eval_samples_per_second": 200.823,
"eval_steps_per_second": 1.053,
"step": 43000
},
{
"epoch": 17.76,
"learning_rate": 9.647495361781077e-06,
"loss": 0.047,
"step": 43500
},
{
"epoch": 17.76,
"eval_bleu": 0.4627,
"eval_gen_len": 19.0,
"eval_loss": 0.044407520443201065,
"eval_runtime": 130.0385,
"eval_samples_per_second": 200.887,
"eval_steps_per_second": 1.054,
"step": 43500
},
{
"epoch": 17.96,
"learning_rate": 9.183673469387756e-06,
"loss": 0.0473,
"step": 44000
},
{
"epoch": 17.96,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.04396551474928856,
"eval_runtime": 130.0298,
"eval_samples_per_second": 200.9,
"eval_steps_per_second": 1.054,
"step": 44000
},
{
"epoch": 18.16,
"learning_rate": 8.719851576994434e-06,
"loss": 0.0471,
"step": 44500
},
{
"epoch": 18.16,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.04414234310388565,
"eval_runtime": 130.1342,
"eval_samples_per_second": 200.739,
"eval_steps_per_second": 1.053,
"step": 44500
},
{
"epoch": 18.37,
"learning_rate": 8.256029684601113e-06,
"loss": 0.0465,
"step": 45000
},
{
"epoch": 18.37,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04401896893978119,
"eval_runtime": 130.0762,
"eval_samples_per_second": 200.828,
"eval_steps_per_second": 1.053,
"step": 45000
},
{
"epoch": 18.57,
"learning_rate": 7.792207792207792e-06,
"loss": 0.0464,
"step": 45500
},
{
"epoch": 18.57,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.044011421501636505,
"eval_runtime": 130.2257,
"eval_samples_per_second": 200.598,
"eval_steps_per_second": 1.052,
"step": 45500
},
{
"epoch": 18.78,
"learning_rate": 7.328385899814472e-06,
"loss": 0.0467,
"step": 46000
},
{
"epoch": 18.78,
"eval_bleu": 0.4624,
"eval_gen_len": 19.0,
"eval_loss": 0.04399794712662697,
"eval_runtime": 129.9749,
"eval_samples_per_second": 200.985,
"eval_steps_per_second": 1.054,
"step": 46000
},
{
"epoch": 18.98,
"learning_rate": 6.864564007421151e-06,
"loss": 0.0464,
"step": 46500
},
{
"epoch": 18.98,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.043924346566200256,
"eval_runtime": 130.377,
"eval_samples_per_second": 200.365,
"eval_steps_per_second": 1.051,
"step": 46500
},
{
"epoch": 19.18,
"learning_rate": 6.40074211502783e-06,
"loss": 0.0459,
"step": 47000
},
{
"epoch": 19.18,
"eval_bleu": 0.4627,
"eval_gen_len": 19.0,
"eval_loss": 0.04413146525621414,
"eval_runtime": 130.3898,
"eval_samples_per_second": 200.345,
"eval_steps_per_second": 1.051,
"step": 47000
},
{
"epoch": 19.39,
"learning_rate": 5.936920222634509e-06,
"loss": 0.0459,
"step": 47500
},
{
"epoch": 19.39,
"eval_bleu": 0.4628,
"eval_gen_len": 19.0,
"eval_loss": 0.044018395245075226,
"eval_runtime": 130.4511,
"eval_samples_per_second": 200.251,
"eval_steps_per_second": 1.05,
"step": 47500
},
{
"epoch": 19.59,
"learning_rate": 5.473098330241188e-06,
"loss": 0.0462,
"step": 48000
},
{
"epoch": 19.59,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.04403121769428253,
"eval_runtime": 130.6104,
"eval_samples_per_second": 200.007,
"eval_steps_per_second": 1.049,
"step": 48000
},
{
"epoch": 19.8,
"learning_rate": 5.009276437847867e-06,
"loss": 0.0459,
"step": 48500
},
{
"epoch": 19.8,
"eval_bleu": 0.4625,
"eval_gen_len": 19.0,
"eval_loss": 0.043812066316604614,
"eval_runtime": 130.4073,
"eval_samples_per_second": 200.319,
"eval_steps_per_second": 1.051,
"step": 48500
},
{
"epoch": 20.0,
"learning_rate": 4.5454545454545455e-06,
"loss": 0.0461,
"step": 49000
},
{
"epoch": 20.0,
"eval_bleu": 0.4629,
"eval_gen_len": 19.0,
"eval_loss": 0.043816644698381424,
"eval_runtime": 130.4125,
"eval_samples_per_second": 200.311,
"eval_steps_per_second": 1.051,
"step": 49000
},
{
"epoch": 20.2,
"learning_rate": 4.081632653061224e-06,
"loss": 0.0459,
"step": 49500
},
{
"epoch": 20.2,
"eval_bleu": 0.4628,
"eval_gen_len": 19.0,
"eval_loss": 0.043995313346385956,
"eval_runtime": 130.1674,
"eval_samples_per_second": 200.688,
"eval_steps_per_second": 1.052,
"step": 49500
},
{
"epoch": 20.41,
"learning_rate": 3.6178107606679037e-06,
"loss": 0.0454,
"step": 50000
},
{
"epoch": 20.41,
"eval_bleu": 0.4628,
"eval_gen_len": 19.0,
"eval_loss": 0.0440911240875721,
"eval_runtime": 130.4665,
"eval_samples_per_second": 200.228,
"eval_steps_per_second": 1.05,
"step": 50000
},
{
"epoch": 20.61,
"learning_rate": 3.1539888682745827e-06,
"loss": 0.0456,
"step": 50500
},
{
"epoch": 20.61,
"eval_bleu": 0.4626,
"eval_gen_len": 19.0,
"eval_loss": 0.04386087507009506,
"eval_runtime": 129.8427,
"eval_samples_per_second": 201.19,
"eval_steps_per_second": 1.055,
"step": 50500
},
{
"epoch": 20.82,
"learning_rate": 2.690166975881262e-06,
"loss": 0.0455,
"step": 51000
},
{
"epoch": 20.82,
"eval_bleu": 0.4627,
"eval_gen_len": 19.0,
"eval_loss": 0.04383744299411774,
"eval_runtime": 130.4933,
"eval_samples_per_second": 200.187,
"eval_steps_per_second": 1.05,
"step": 51000
},
{
"epoch": 21.02,
"learning_rate": 2.226345083487941e-06,
"loss": 0.0454,
"step": 51500
},
{
"epoch": 21.02,
"eval_bleu": 0.4628,
"eval_gen_len": 19.0,
"eval_loss": 0.044012218713760376,
"eval_runtime": 130.1662,
"eval_samples_per_second": 200.69,
"eval_steps_per_second": 1.053,
"step": 51500
}
],
"logging_steps": 500,
"max_steps": 53900,
"num_input_tokens_seen": 0,
"num_train_epochs": 22,
"save_steps": 500,
"total_flos": 1.5131790882663137e+18,
"train_batch_size": 192,
"trial_name": null,
"trial_params": null
}