| { |
| "best_metric": 0.4629, |
| "best_model_checkpoint": "AraT5_FT_AraT5V2_Transaltion/checkpoint-41500", |
| "epoch": 21.020408163265305, |
| "eval_steps": 500, |
| "global_step": 51500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.953617810760668e-05, |
| "loss": 1.9101, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_bleu": 0.3871, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.29340660572052, |
| "eval_runtime": 130.614, |
| "eval_samples_per_second": 200.002, |
| "eval_steps_per_second": 1.049, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.907235621521336e-05, |
| "loss": 0.4711, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_bleu": 0.4212, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.20559465885162354, |
| "eval_runtime": 130.3147, |
| "eval_samples_per_second": 200.461, |
| "eval_steps_per_second": 1.051, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.860853432282004e-05, |
| "loss": 0.3416, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_bleu": 0.4334, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.16219736635684967, |
| "eval_runtime": 129.566, |
| "eval_samples_per_second": 201.619, |
| "eval_steps_per_second": 1.057, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.814471243042672e-05, |
| "loss": 0.2762, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_bleu": 0.4406, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.15120063722133636, |
| "eval_runtime": 129.7775, |
| "eval_samples_per_second": 201.291, |
| "eval_steps_per_second": 1.056, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7680890538033396e-05, |
| "loss": 0.2357, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_bleu": 0.4453, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.1276528686285019, |
| "eval_runtime": 130.1173, |
| "eval_samples_per_second": 200.765, |
| "eval_steps_per_second": 1.053, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 4.721706864564008e-05, |
| "loss": 0.2052, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_bleu": 0.4471, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.11476743966341019, |
| "eval_runtime": 129.921, |
| "eval_samples_per_second": 201.068, |
| "eval_steps_per_second": 1.054, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 4.675324675324675e-05, |
| "loss": 0.1844, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_bleu": 0.4491, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.10187335312366486, |
| "eval_runtime": 130.0031, |
| "eval_samples_per_second": 200.941, |
| "eval_steps_per_second": 1.054, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.628942486085344e-05, |
| "loss": 0.1694, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_bleu": 0.4508, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.09269851446151733, |
| "eval_runtime": 129.8071, |
| "eval_samples_per_second": 201.245, |
| "eval_steps_per_second": 1.055, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 4.582560296846011e-05, |
| "loss": 0.1562, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_bleu": 0.4515, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.08425677567720413, |
| "eval_runtime": 129.5856, |
| "eval_samples_per_second": 201.589, |
| "eval_steps_per_second": 1.057, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.5361781076066796e-05, |
| "loss": 0.1461, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_bleu": 0.4531, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.08317717164754868, |
| "eval_runtime": 130.0012, |
| "eval_samples_per_second": 200.944, |
| "eval_steps_per_second": 1.054, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 4.4897959183673474e-05, |
| "loss": 0.1358, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_bleu": 0.4535, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.07654570788145065, |
| "eval_runtime": 129.977, |
| "eval_samples_per_second": 200.982, |
| "eval_steps_per_second": 1.054, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 4.4434137291280146e-05, |
| "loss": 0.1279, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_bleu": 0.4547, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.07310701906681061, |
| "eval_runtime": 129.9269, |
| "eval_samples_per_second": 201.059, |
| "eval_steps_per_second": 1.054, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 4.397031539888683e-05, |
| "loss": 0.122, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_bleu": 0.4549, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.07025206089019775, |
| "eval_runtime": 130.0454, |
| "eval_samples_per_second": 200.876, |
| "eval_steps_per_second": 1.053, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 4.3506493506493503e-05, |
| "loss": 0.1166, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.86, |
| "eval_bleu": 0.4563, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.06883203238248825, |
| "eval_runtime": 129.7879, |
| "eval_samples_per_second": 201.275, |
| "eval_steps_per_second": 1.056, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 4.304267161410019e-05, |
| "loss": 0.1113, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.06, |
| "eval_bleu": 0.456, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.06709539890289307, |
| "eval_runtime": 130.5194, |
| "eval_samples_per_second": 200.146, |
| "eval_steps_per_second": 1.05, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 4.257884972170687e-05, |
| "loss": 0.1063, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.27, |
| "eval_bleu": 0.4566, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.06505035609006882, |
| "eval_runtime": 130.1863, |
| "eval_samples_per_second": 200.659, |
| "eval_steps_per_second": 1.052, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 4.2115027829313546e-05, |
| "loss": 0.1023, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.47, |
| "eval_bleu": 0.4573, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0633101835846901, |
| "eval_runtime": 130.238, |
| "eval_samples_per_second": 200.579, |
| "eval_steps_per_second": 1.052, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 4.1651205936920225e-05, |
| "loss": 0.0996, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.67, |
| "eval_bleu": 0.4572, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.06185136362910271, |
| "eval_runtime": 130.2134, |
| "eval_samples_per_second": 200.617, |
| "eval_steps_per_second": 1.052, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 4.1187384044526903e-05, |
| "loss": 0.0963, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.88, |
| "eval_bleu": 0.458, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.061708223074674606, |
| "eval_runtime": 137.0848, |
| "eval_samples_per_second": 190.561, |
| "eval_steps_per_second": 0.999, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 4.072356215213358e-05, |
| "loss": 0.0927, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.08, |
| "eval_bleu": 0.4588, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0601879358291626, |
| "eval_runtime": 139.7185, |
| "eval_samples_per_second": 186.969, |
| "eval_steps_per_second": 0.981, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 4.025974025974026e-05, |
| "loss": 0.0902, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.29, |
| "eval_bleu": 0.459, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05851432681083679, |
| "eval_runtime": 138.3043, |
| "eval_samples_per_second": 188.881, |
| "eval_steps_per_second": 0.991, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 3.979591836734694e-05, |
| "loss": 0.0877, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.49, |
| "eval_bleu": 0.4594, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05787012353539467, |
| "eval_runtime": 137.0383, |
| "eval_samples_per_second": 190.626, |
| "eval_steps_per_second": 1.0, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 3.933209647495362e-05, |
| "loss": 0.0857, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.69, |
| "eval_bleu": 0.459, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05705270916223526, |
| "eval_runtime": 137.1566, |
| "eval_samples_per_second": 190.461, |
| "eval_steps_per_second": 0.999, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 3.88682745825603e-05, |
| "loss": 0.0844, |
| "step": 12000 |
| }, |
| { |
| "epoch": 4.9, |
| "eval_bleu": 0.4591, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0567244328558445, |
| "eval_runtime": 136.9999, |
| "eval_samples_per_second": 190.679, |
| "eval_steps_per_second": 1.0, |
| "step": 12000 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 3.8404452690166975e-05, |
| "loss": 0.0822, |
| "step": 12500 |
| }, |
| { |
| "epoch": 5.1, |
| "eval_bleu": 0.4588, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.055685680359601974, |
| "eval_runtime": 136.8888, |
| "eval_samples_per_second": 190.834, |
| "eval_steps_per_second": 1.001, |
| "step": 12500 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 3.794063079777366e-05, |
| "loss": 0.0797, |
| "step": 13000 |
| }, |
| { |
| "epoch": 5.31, |
| "eval_bleu": 0.4593, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05490221455693245, |
| "eval_runtime": 136.7754, |
| "eval_samples_per_second": 190.992, |
| "eval_steps_per_second": 1.002, |
| "step": 13000 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 3.747680890538033e-05, |
| "loss": 0.0783, |
| "step": 13500 |
| }, |
| { |
| "epoch": 5.51, |
| "eval_bleu": 0.4592, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05454099550843239, |
| "eval_runtime": 136.6541, |
| "eval_samples_per_second": 191.161, |
| "eval_steps_per_second": 1.003, |
| "step": 13500 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 3.701298701298702e-05, |
| "loss": 0.0773, |
| "step": 14000 |
| }, |
| { |
| "epoch": 5.71, |
| "eval_bleu": 0.4601, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.054198332130908966, |
| "eval_runtime": 136.7896, |
| "eval_samples_per_second": 190.972, |
| "eval_steps_per_second": 1.002, |
| "step": 14000 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 3.654916512059369e-05, |
| "loss": 0.0759, |
| "step": 14500 |
| }, |
| { |
| "epoch": 5.92, |
| "eval_bleu": 0.46, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05303099378943443, |
| "eval_runtime": 137.0308, |
| "eval_samples_per_second": 190.636, |
| "eval_steps_per_second": 1.0, |
| "step": 14500 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 3.6085343228200375e-05, |
| "loss": 0.0742, |
| "step": 15000 |
| }, |
| { |
| "epoch": 6.12, |
| "eval_bleu": 0.4596, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.053217481821775436, |
| "eval_runtime": 136.5228, |
| "eval_samples_per_second": 191.345, |
| "eval_steps_per_second": 1.003, |
| "step": 15000 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 3.5621521335807054e-05, |
| "loss": 0.0723, |
| "step": 15500 |
| }, |
| { |
| "epoch": 6.33, |
| "eval_bleu": 0.461, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.052737053483724594, |
| "eval_runtime": 136.5843, |
| "eval_samples_per_second": 191.259, |
| "eval_steps_per_second": 1.003, |
| "step": 15500 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 3.515769944341373e-05, |
| "loss": 0.0717, |
| "step": 16000 |
| }, |
| { |
| "epoch": 6.53, |
| "eval_bleu": 0.4609, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.052782874554395676, |
| "eval_runtime": 136.5496, |
| "eval_samples_per_second": 191.308, |
| "eval_steps_per_second": 1.003, |
| "step": 16000 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 3.469387755102041e-05, |
| "loss": 0.0711, |
| "step": 16500 |
| }, |
| { |
| "epoch": 6.73, |
| "eval_bleu": 0.4605, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05168753117322922, |
| "eval_runtime": 136.5216, |
| "eval_samples_per_second": 191.347, |
| "eval_steps_per_second": 1.004, |
| "step": 16500 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 3.423005565862709e-05, |
| "loss": 0.0701, |
| "step": 17000 |
| }, |
| { |
| "epoch": 6.94, |
| "eval_bleu": 0.461, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05145658180117607, |
| "eval_runtime": 136.9443, |
| "eval_samples_per_second": 190.756, |
| "eval_steps_per_second": 1.0, |
| "step": 17000 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 3.376623376623377e-05, |
| "loss": 0.0686, |
| "step": 17500 |
| }, |
| { |
| "epoch": 7.14, |
| "eval_bleu": 0.4615, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.051403045654296875, |
| "eval_runtime": 137.1116, |
| "eval_samples_per_second": 190.524, |
| "eval_steps_per_second": 0.999, |
| "step": 17500 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 3.330241187384045e-05, |
| "loss": 0.0673, |
| "step": 18000 |
| }, |
| { |
| "epoch": 7.35, |
| "eval_bleu": 0.4609, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05048130825161934, |
| "eval_runtime": 136.7447, |
| "eval_samples_per_second": 191.035, |
| "eval_steps_per_second": 1.002, |
| "step": 18000 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 3.2838589981447126e-05, |
| "loss": 0.0669, |
| "step": 18500 |
| }, |
| { |
| "epoch": 7.55, |
| "eval_bleu": 0.4608, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.05009736865758896, |
| "eval_runtime": 136.3353, |
| "eval_samples_per_second": 191.608, |
| "eval_steps_per_second": 1.005, |
| "step": 18500 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 3.2374768089053805e-05, |
| "loss": 0.0658, |
| "step": 19000 |
| }, |
| { |
| "epoch": 7.76, |
| "eval_bleu": 0.461, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.050111617892980576, |
| "eval_runtime": 136.6576, |
| "eval_samples_per_second": 191.157, |
| "eval_steps_per_second": 1.003, |
| "step": 19000 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 3.191094619666048e-05, |
| "loss": 0.0656, |
| "step": 19500 |
| }, |
| { |
| "epoch": 7.96, |
| "eval_bleu": 0.4614, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04940654709935188, |
| "eval_runtime": 136.917, |
| "eval_samples_per_second": 190.794, |
| "eval_steps_per_second": 1.001, |
| "step": 19500 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 3.144712430426716e-05, |
| "loss": 0.0639, |
| "step": 20000 |
| }, |
| { |
| "epoch": 8.16, |
| "eval_bleu": 0.4613, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.049783505499362946, |
| "eval_runtime": 136.756, |
| "eval_samples_per_second": 191.019, |
| "eval_steps_per_second": 1.002, |
| "step": 20000 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 3.098330241187384e-05, |
| "loss": 0.0627, |
| "step": 20500 |
| }, |
| { |
| "epoch": 8.37, |
| "eval_bleu": 0.4615, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.049393024295568466, |
| "eval_runtime": 136.8452, |
| "eval_samples_per_second": 190.895, |
| "eval_steps_per_second": 1.001, |
| "step": 20500 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 3.051948051948052e-05, |
| "loss": 0.063, |
| "step": 21000 |
| }, |
| { |
| "epoch": 8.57, |
| "eval_bleu": 0.4614, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0488428920507431, |
| "eval_runtime": 130.0772, |
| "eval_samples_per_second": 200.827, |
| "eval_steps_per_second": 1.053, |
| "step": 21000 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 3.00556586270872e-05, |
| "loss": 0.0626, |
| "step": 21500 |
| }, |
| { |
| "epoch": 8.78, |
| "eval_bleu": 0.4612, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.048447445034980774, |
| "eval_runtime": 130.2418, |
| "eval_samples_per_second": 200.573, |
| "eval_steps_per_second": 1.052, |
| "step": 21500 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 2.959183673469388e-05, |
| "loss": 0.062, |
| "step": 22000 |
| }, |
| { |
| "epoch": 8.98, |
| "eval_bleu": 0.4616, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04808622598648071, |
| "eval_runtime": 130.5977, |
| "eval_samples_per_second": 200.027, |
| "eval_steps_per_second": 1.049, |
| "step": 22000 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 2.9128014842300562e-05, |
| "loss": 0.0603, |
| "step": 22500 |
| }, |
| { |
| "epoch": 9.18, |
| "eval_bleu": 0.4619, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04833626002073288, |
| "eval_runtime": 130.0816, |
| "eval_samples_per_second": 200.82, |
| "eval_steps_per_second": 1.053, |
| "step": 22500 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 2.8664192949907237e-05, |
| "loss": 0.0598, |
| "step": 23000 |
| }, |
| { |
| "epoch": 9.39, |
| "eval_bleu": 0.4616, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.048464007675647736, |
| "eval_runtime": 129.8908, |
| "eval_samples_per_second": 201.115, |
| "eval_steps_per_second": 1.055, |
| "step": 23000 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 2.8200371057513912e-05, |
| "loss": 0.0595, |
| "step": 23500 |
| }, |
| { |
| "epoch": 9.59, |
| "eval_bleu": 0.4617, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.048219963908195496, |
| "eval_runtime": 130.5366, |
| "eval_samples_per_second": 200.12, |
| "eval_steps_per_second": 1.05, |
| "step": 23500 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 2.7736549165120594e-05, |
| "loss": 0.0592, |
| "step": 24000 |
| }, |
| { |
| "epoch": 9.8, |
| "eval_bleu": 0.4612, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04728322476148605, |
| "eval_runtime": 130.4335, |
| "eval_samples_per_second": 200.278, |
| "eval_steps_per_second": 1.05, |
| "step": 24000 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.0591, |
| "step": 24500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bleu": 0.4618, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04734385386109352, |
| "eval_runtime": 130.0364, |
| "eval_samples_per_second": 200.89, |
| "eval_steps_per_second": 1.054, |
| "step": 24500 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 2.6808905380333955e-05, |
| "loss": 0.0574, |
| "step": 25000 |
| }, |
| { |
| "epoch": 10.2, |
| "eval_bleu": 0.4617, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04747864603996277, |
| "eval_runtime": 130.0909, |
| "eval_samples_per_second": 200.806, |
| "eval_steps_per_second": 1.053, |
| "step": 25000 |
| }, |
| { |
| "epoch": 10.41, |
| "learning_rate": 2.634508348794063e-05, |
| "loss": 0.0573, |
| "step": 25500 |
| }, |
| { |
| "epoch": 10.41, |
| "eval_bleu": 0.462, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04742683470249176, |
| "eval_runtime": 129.9305, |
| "eval_samples_per_second": 201.054, |
| "eval_steps_per_second": 1.054, |
| "step": 25500 |
| }, |
| { |
| "epoch": 10.61, |
| "learning_rate": 2.5881261595547312e-05, |
| "loss": 0.0565, |
| "step": 26000 |
| }, |
| { |
| "epoch": 10.61, |
| "eval_bleu": 0.4618, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.046983037143945694, |
| "eval_runtime": 130.4351, |
| "eval_samples_per_second": 200.276, |
| "eval_steps_per_second": 1.05, |
| "step": 26000 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 2.5417439703153988e-05, |
| "loss": 0.0569, |
| "step": 26500 |
| }, |
| { |
| "epoch": 10.82, |
| "eval_bleu": 0.4619, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04676728695631027, |
| "eval_runtime": 130.1051, |
| "eval_samples_per_second": 200.784, |
| "eval_steps_per_second": 1.053, |
| "step": 26500 |
| }, |
| { |
| "epoch": 11.02, |
| "learning_rate": 2.495361781076067e-05, |
| "loss": 0.0566, |
| "step": 27000 |
| }, |
| { |
| "epoch": 11.02, |
| "eval_bleu": 0.4619, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0468904972076416, |
| "eval_runtime": 130.194, |
| "eval_samples_per_second": 200.647, |
| "eval_steps_per_second": 1.052, |
| "step": 27000 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 2.448979591836735e-05, |
| "loss": 0.0552, |
| "step": 27500 |
| }, |
| { |
| "epoch": 11.22, |
| "eval_bleu": 0.462, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04681343212723732, |
| "eval_runtime": 130.2625, |
| "eval_samples_per_second": 200.541, |
| "eval_steps_per_second": 1.052, |
| "step": 27500 |
| }, |
| { |
| "epoch": 11.43, |
| "learning_rate": 2.4025974025974027e-05, |
| "loss": 0.0549, |
| "step": 28000 |
| }, |
| { |
| "epoch": 11.43, |
| "eval_bleu": 0.462, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04649100452661514, |
| "eval_runtime": 130.2691, |
| "eval_samples_per_second": 200.531, |
| "eval_steps_per_second": 1.052, |
| "step": 28000 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 2.3562152133580706e-05, |
| "loss": 0.055, |
| "step": 28500 |
| }, |
| { |
| "epoch": 11.63, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04652680456638336, |
| "eval_runtime": 130.1537, |
| "eval_samples_per_second": 200.709, |
| "eval_steps_per_second": 1.053, |
| "step": 28500 |
| }, |
| { |
| "epoch": 11.84, |
| "learning_rate": 2.3098330241187384e-05, |
| "loss": 0.0547, |
| "step": 29000 |
| }, |
| { |
| "epoch": 11.84, |
| "eval_bleu": 0.4623, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.045862022787332535, |
| "eval_runtime": 130.0719, |
| "eval_samples_per_second": 200.835, |
| "eval_steps_per_second": 1.053, |
| "step": 29000 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 2.2634508348794063e-05, |
| "loss": 0.0545, |
| "step": 29500 |
| }, |
| { |
| "epoch": 12.04, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04637685418128967, |
| "eval_runtime": 130.2373, |
| "eval_samples_per_second": 200.58, |
| "eval_steps_per_second": 1.052, |
| "step": 29500 |
| }, |
| { |
| "epoch": 12.24, |
| "learning_rate": 2.2170686456400745e-05, |
| "loss": 0.0533, |
| "step": 30000 |
| }, |
| { |
| "epoch": 12.24, |
| "eval_bleu": 0.4622, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.045851390808820724, |
| "eval_runtime": 130.0575, |
| "eval_samples_per_second": 200.857, |
| "eval_steps_per_second": 1.053, |
| "step": 30000 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 2.1706864564007424e-05, |
| "loss": 0.0533, |
| "step": 30500 |
| }, |
| { |
| "epoch": 12.45, |
| "eval_bleu": 0.4618, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.045971017330884933, |
| "eval_runtime": 130.0582, |
| "eval_samples_per_second": 200.856, |
| "eval_steps_per_second": 1.053, |
| "step": 30500 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 2.1243042671614102e-05, |
| "loss": 0.053, |
| "step": 31000 |
| }, |
| { |
| "epoch": 12.65, |
| "eval_bleu": 0.462, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04581404849886894, |
| "eval_runtime": 130.0729, |
| "eval_samples_per_second": 200.833, |
| "eval_steps_per_second": 1.053, |
| "step": 31000 |
| }, |
| { |
| "epoch": 12.86, |
| "learning_rate": 2.077922077922078e-05, |
| "loss": 0.0527, |
| "step": 31500 |
| }, |
| { |
| "epoch": 12.86, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.046112846583127975, |
| "eval_runtime": 130.0922, |
| "eval_samples_per_second": 200.804, |
| "eval_steps_per_second": 1.053, |
| "step": 31500 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 2.031539888682746e-05, |
| "loss": 0.0523, |
| "step": 32000 |
| }, |
| { |
| "epoch": 13.06, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04600910842418671, |
| "eval_runtime": 129.8407, |
| "eval_samples_per_second": 201.193, |
| "eval_steps_per_second": 1.055, |
| "step": 32000 |
| }, |
| { |
| "epoch": 13.27, |
| "learning_rate": 1.9851576994434138e-05, |
| "loss": 0.0516, |
| "step": 32500 |
| }, |
| { |
| "epoch": 13.27, |
| "eval_bleu": 0.4623, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04571113362908363, |
| "eval_runtime": 129.9111, |
| "eval_samples_per_second": 201.084, |
| "eval_steps_per_second": 1.055, |
| "step": 32500 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 1.9387755102040817e-05, |
| "loss": 0.0515, |
| "step": 33000 |
| }, |
| { |
| "epoch": 13.47, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0457097552716732, |
| "eval_runtime": 130.0314, |
| "eval_samples_per_second": 200.898, |
| "eval_steps_per_second": 1.054, |
| "step": 33000 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 1.8923933209647496e-05, |
| "loss": 0.0517, |
| "step": 33500 |
| }, |
| { |
| "epoch": 13.67, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04519006237387657, |
| "eval_runtime": 130.4174, |
| "eval_samples_per_second": 200.303, |
| "eval_steps_per_second": 1.05, |
| "step": 33500 |
| }, |
| { |
| "epoch": 13.88, |
| "learning_rate": 1.8460111317254174e-05, |
| "loss": 0.0512, |
| "step": 34000 |
| }, |
| { |
| "epoch": 13.88, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.045346666127443314, |
| "eval_runtime": 130.758, |
| "eval_samples_per_second": 199.781, |
| "eval_steps_per_second": 1.048, |
| "step": 34000 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 1.7996289424860853e-05, |
| "loss": 0.0513, |
| "step": 34500 |
| }, |
| { |
| "epoch": 14.08, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04493604227900505, |
| "eval_runtime": 130.5867, |
| "eval_samples_per_second": 200.043, |
| "eval_steps_per_second": 1.049, |
| "step": 34500 |
| }, |
| { |
| "epoch": 14.29, |
| "learning_rate": 1.7532467532467535e-05, |
| "loss": 0.0502, |
| "step": 35000 |
| }, |
| { |
| "epoch": 14.29, |
| "eval_bleu": 0.462, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044917818158864975, |
| "eval_runtime": 130.8497, |
| "eval_samples_per_second": 199.641, |
| "eval_steps_per_second": 1.047, |
| "step": 35000 |
| }, |
| { |
| "epoch": 14.49, |
| "learning_rate": 1.7068645640074214e-05, |
| "loss": 0.0501, |
| "step": 35500 |
| }, |
| { |
| "epoch": 14.49, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.045045047998428345, |
| "eval_runtime": 130.6038, |
| "eval_samples_per_second": 200.017, |
| "eval_steps_per_second": 1.049, |
| "step": 35500 |
| }, |
| { |
| "epoch": 14.69, |
| "learning_rate": 1.6604823747680892e-05, |
| "loss": 0.0504, |
| "step": 36000 |
| }, |
| { |
| "epoch": 14.69, |
| "eval_bleu": 0.4623, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04437680542469025, |
| "eval_runtime": 134.994, |
| "eval_samples_per_second": 193.512, |
| "eval_steps_per_second": 1.015, |
| "step": 36000 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 1.614100185528757e-05, |
| "loss": 0.0499, |
| "step": 36500 |
| }, |
| { |
| "epoch": 14.9, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04487466439604759, |
| "eval_runtime": 135.3356, |
| "eval_samples_per_second": 193.024, |
| "eval_steps_per_second": 1.012, |
| "step": 36500 |
| }, |
| { |
| "epoch": 15.1, |
| "learning_rate": 1.567717996289425e-05, |
| "loss": 0.0496, |
| "step": 37000 |
| }, |
| { |
| "epoch": 15.1, |
| "eval_bleu": 0.4622, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04475782439112663, |
| "eval_runtime": 131.0438, |
| "eval_samples_per_second": 199.346, |
| "eval_steps_per_second": 1.045, |
| "step": 37000 |
| }, |
| { |
| "epoch": 15.31, |
| "learning_rate": 1.5213358070500926e-05, |
| "loss": 0.0491, |
| "step": 37500 |
| }, |
| { |
| "epoch": 15.31, |
| "eval_bleu": 0.4621, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0445503406226635, |
| "eval_runtime": 130.6545, |
| "eval_samples_per_second": 199.94, |
| "eval_steps_per_second": 1.049, |
| "step": 37500 |
| }, |
| { |
| "epoch": 15.51, |
| "learning_rate": 1.4749536178107607e-05, |
| "loss": 0.0488, |
| "step": 38000 |
| }, |
| { |
| "epoch": 15.51, |
| "eval_bleu": 0.4624, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044734835624694824, |
| "eval_runtime": 131.2382, |
| "eval_samples_per_second": 199.05, |
| "eval_steps_per_second": 1.044, |
| "step": 38000 |
| }, |
| { |
| "epoch": 15.71, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.0491, |
| "step": 38500 |
| }, |
| { |
| "epoch": 15.71, |
| "eval_bleu": 0.4627, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044320594519376755, |
| "eval_runtime": 130.7903, |
| "eval_samples_per_second": 199.732, |
| "eval_steps_per_second": 1.047, |
| "step": 38500 |
| }, |
| { |
| "epoch": 15.92, |
| "learning_rate": 1.3821892393320964e-05, |
| "loss": 0.0487, |
| "step": 39000 |
| }, |
| { |
| "epoch": 15.92, |
| "eval_bleu": 0.4623, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04423439875245094, |
| "eval_runtime": 130.6316, |
| "eval_samples_per_second": 199.975, |
| "eval_steps_per_second": 1.049, |
| "step": 39000 |
| }, |
| { |
| "epoch": 16.12, |
| "learning_rate": 1.3358070500927644e-05, |
| "loss": 0.0485, |
| "step": 39500 |
| }, |
| { |
| "epoch": 16.12, |
| "eval_bleu": 0.4624, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04461168125271797, |
| "eval_runtime": 131.1245, |
| "eval_samples_per_second": 199.223, |
| "eval_steps_per_second": 1.045, |
| "step": 39500 |
| }, |
| { |
| "epoch": 16.33, |
| "learning_rate": 1.2894248608534323e-05, |
| "loss": 0.0479, |
| "step": 40000 |
| }, |
| { |
| "epoch": 16.33, |
| "eval_bleu": 0.4627, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04443284496665001, |
| "eval_runtime": 138.8413, |
| "eval_samples_per_second": 188.15, |
| "eval_steps_per_second": 0.987, |
| "step": 40000 |
| }, |
| { |
| "epoch": 16.53, |
| "learning_rate": 1.2430426716141003e-05, |
| "loss": 0.0481, |
| "step": 40500 |
| }, |
| { |
| "epoch": 16.53, |
| "eval_bleu": 0.4628, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044452179223299026, |
| "eval_runtime": 130.3846, |
| "eval_samples_per_second": 200.353, |
| "eval_steps_per_second": 1.051, |
| "step": 40500 |
| }, |
| { |
| "epoch": 16.73, |
| "learning_rate": 1.1966604823747682e-05, |
| "loss": 0.0481, |
| "step": 41000 |
| }, |
| { |
| "epoch": 16.73, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04454488679766655, |
| "eval_runtime": 131.2398, |
| "eval_samples_per_second": 199.048, |
| "eval_steps_per_second": 1.044, |
| "step": 41000 |
| }, |
| { |
| "epoch": 16.94, |
| "learning_rate": 1.150278293135436e-05, |
| "loss": 0.0481, |
| "step": 41500 |
| }, |
| { |
| "epoch": 16.94, |
| "eval_bleu": 0.4629, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0442616231739521, |
| "eval_runtime": 131.0927, |
| "eval_samples_per_second": 199.271, |
| "eval_steps_per_second": 1.045, |
| "step": 41500 |
| }, |
| { |
| "epoch": 17.14, |
| "learning_rate": 1.103896103896104e-05, |
| "loss": 0.0476, |
| "step": 42000 |
| }, |
| { |
| "epoch": 17.14, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04419331252574921, |
| "eval_runtime": 130.0403, |
| "eval_samples_per_second": 200.884, |
| "eval_steps_per_second": 1.054, |
| "step": 42000 |
| }, |
| { |
| "epoch": 17.35, |
| "learning_rate": 1.0575139146567718e-05, |
| "loss": 0.0472, |
| "step": 42500 |
| }, |
| { |
| "epoch": 17.35, |
| "eval_bleu": 0.4622, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04414073005318642, |
| "eval_runtime": 130.0275, |
| "eval_samples_per_second": 200.904, |
| "eval_steps_per_second": 1.054, |
| "step": 42500 |
| }, |
| { |
| "epoch": 17.55, |
| "learning_rate": 1.0111317254174398e-05, |
| "loss": 0.0473, |
| "step": 43000 |
| }, |
| { |
| "epoch": 17.55, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04418780282139778, |
| "eval_runtime": 130.0799, |
| "eval_samples_per_second": 200.823, |
| "eval_steps_per_second": 1.053, |
| "step": 43000 |
| }, |
| { |
| "epoch": 17.76, |
| "learning_rate": 9.647495361781077e-06, |
| "loss": 0.047, |
| "step": 43500 |
| }, |
| { |
| "epoch": 17.76, |
| "eval_bleu": 0.4627, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044407520443201065, |
| "eval_runtime": 130.0385, |
| "eval_samples_per_second": 200.887, |
| "eval_steps_per_second": 1.054, |
| "step": 43500 |
| }, |
| { |
| "epoch": 17.96, |
| "learning_rate": 9.183673469387756e-06, |
| "loss": 0.0473, |
| "step": 44000 |
| }, |
| { |
| "epoch": 17.96, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04396551474928856, |
| "eval_runtime": 130.0298, |
| "eval_samples_per_second": 200.9, |
| "eval_steps_per_second": 1.054, |
| "step": 44000 |
| }, |
| { |
| "epoch": 18.16, |
| "learning_rate": 8.719851576994434e-06, |
| "loss": 0.0471, |
| "step": 44500 |
| }, |
| { |
| "epoch": 18.16, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04414234310388565, |
| "eval_runtime": 130.1342, |
| "eval_samples_per_second": 200.739, |
| "eval_steps_per_second": 1.053, |
| "step": 44500 |
| }, |
| { |
| "epoch": 18.37, |
| "learning_rate": 8.256029684601113e-06, |
| "loss": 0.0465, |
| "step": 45000 |
| }, |
| { |
| "epoch": 18.37, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04401896893978119, |
| "eval_runtime": 130.0762, |
| "eval_samples_per_second": 200.828, |
| "eval_steps_per_second": 1.053, |
| "step": 45000 |
| }, |
| { |
| "epoch": 18.57, |
| "learning_rate": 7.792207792207792e-06, |
| "loss": 0.0464, |
| "step": 45500 |
| }, |
| { |
| "epoch": 18.57, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044011421501636505, |
| "eval_runtime": 130.2257, |
| "eval_samples_per_second": 200.598, |
| "eval_steps_per_second": 1.052, |
| "step": 45500 |
| }, |
| { |
| "epoch": 18.78, |
| "learning_rate": 7.328385899814472e-06, |
| "loss": 0.0467, |
| "step": 46000 |
| }, |
| { |
| "epoch": 18.78, |
| "eval_bleu": 0.4624, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04399794712662697, |
| "eval_runtime": 129.9749, |
| "eval_samples_per_second": 200.985, |
| "eval_steps_per_second": 1.054, |
| "step": 46000 |
| }, |
| { |
| "epoch": 18.98, |
| "learning_rate": 6.864564007421151e-06, |
| "loss": 0.0464, |
| "step": 46500 |
| }, |
| { |
| "epoch": 18.98, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.043924346566200256, |
| "eval_runtime": 130.377, |
| "eval_samples_per_second": 200.365, |
| "eval_steps_per_second": 1.051, |
| "step": 46500 |
| }, |
| { |
| "epoch": 19.18, |
| "learning_rate": 6.40074211502783e-06, |
| "loss": 0.0459, |
| "step": 47000 |
| }, |
| { |
| "epoch": 19.18, |
| "eval_bleu": 0.4627, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04413146525621414, |
| "eval_runtime": 130.3898, |
| "eval_samples_per_second": 200.345, |
| "eval_steps_per_second": 1.051, |
| "step": 47000 |
| }, |
| { |
| "epoch": 19.39, |
| "learning_rate": 5.936920222634509e-06, |
| "loss": 0.0459, |
| "step": 47500 |
| }, |
| { |
| "epoch": 19.39, |
| "eval_bleu": 0.4628, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044018395245075226, |
| "eval_runtime": 130.4511, |
| "eval_samples_per_second": 200.251, |
| "eval_steps_per_second": 1.05, |
| "step": 47500 |
| }, |
| { |
| "epoch": 19.59, |
| "learning_rate": 5.473098330241188e-06, |
| "loss": 0.0462, |
| "step": 48000 |
| }, |
| { |
| "epoch": 19.59, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04403121769428253, |
| "eval_runtime": 130.6104, |
| "eval_samples_per_second": 200.007, |
| "eval_steps_per_second": 1.049, |
| "step": 48000 |
| }, |
| { |
| "epoch": 19.8, |
| "learning_rate": 5.009276437847867e-06, |
| "loss": 0.0459, |
| "step": 48500 |
| }, |
| { |
| "epoch": 19.8, |
| "eval_bleu": 0.4625, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.043812066316604614, |
| "eval_runtime": 130.4073, |
| "eval_samples_per_second": 200.319, |
| "eval_steps_per_second": 1.051, |
| "step": 48500 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.0461, |
| "step": 49000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_bleu": 0.4629, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.043816644698381424, |
| "eval_runtime": 130.4125, |
| "eval_samples_per_second": 200.311, |
| "eval_steps_per_second": 1.051, |
| "step": 49000 |
| }, |
| { |
| "epoch": 20.2, |
| "learning_rate": 4.081632653061224e-06, |
| "loss": 0.0459, |
| "step": 49500 |
| }, |
| { |
| "epoch": 20.2, |
| "eval_bleu": 0.4628, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.043995313346385956, |
| "eval_runtime": 130.1674, |
| "eval_samples_per_second": 200.688, |
| "eval_steps_per_second": 1.052, |
| "step": 49500 |
| }, |
| { |
| "epoch": 20.41, |
| "learning_rate": 3.6178107606679037e-06, |
| "loss": 0.0454, |
| "step": 50000 |
| }, |
| { |
| "epoch": 20.41, |
| "eval_bleu": 0.4628, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.0440911240875721, |
| "eval_runtime": 130.4665, |
| "eval_samples_per_second": 200.228, |
| "eval_steps_per_second": 1.05, |
| "step": 50000 |
| }, |
| { |
| "epoch": 20.61, |
| "learning_rate": 3.1539888682745827e-06, |
| "loss": 0.0456, |
| "step": 50500 |
| }, |
| { |
| "epoch": 20.61, |
| "eval_bleu": 0.4626, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04386087507009506, |
| "eval_runtime": 129.8427, |
| "eval_samples_per_second": 201.19, |
| "eval_steps_per_second": 1.055, |
| "step": 50500 |
| }, |
| { |
| "epoch": 20.82, |
| "learning_rate": 2.690166975881262e-06, |
| "loss": 0.0455, |
| "step": 51000 |
| }, |
| { |
| "epoch": 20.82, |
| "eval_bleu": 0.4627, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.04383744299411774, |
| "eval_runtime": 130.4933, |
| "eval_samples_per_second": 200.187, |
| "eval_steps_per_second": 1.05, |
| "step": 51000 |
| }, |
| { |
| "epoch": 21.02, |
| "learning_rate": 2.226345083487941e-06, |
| "loss": 0.0454, |
| "step": 51500 |
| }, |
| { |
| "epoch": 21.02, |
| "eval_bleu": 0.4628, |
| "eval_gen_len": 19.0, |
| "eval_loss": 0.044012218713760376, |
| "eval_runtime": 130.1662, |
| "eval_samples_per_second": 200.69, |
| "eval_steps_per_second": 1.053, |
| "step": 51500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 53900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 22, |
| "save_steps": 500, |
| "total_flos": 1.5131790882663137e+18, |
| "train_batch_size": 192, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|