| { |
| "best_metric": 0.40765267610549927, |
| "best_model_checkpoint": "m2m100_418M_finetuned_fr_to_sw/checkpoint-32000", |
| "epoch": 5.977956286194657, |
| "eval_steps": 1000, |
| "global_step": 32000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.992527554642257e-05, |
| "loss": 2.3549, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9850551092845137e-05, |
| "loss": 0.8843, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_bleu": 10.6171, |
| "eval_gen_len": 60.1469, |
| "eval_loss": 0.76387619972229, |
| "eval_runtime": 3073.3727, |
| "eval_samples_per_second": 3.483, |
| "eval_steps_per_second": 0.436, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.97758266392677e-05, |
| "loss": 0.7804, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9701102185690268e-05, |
| "loss": 0.7269, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_bleu": 14.3741, |
| "eval_gen_len": 61.2398, |
| "eval_loss": 0.6497731804847717, |
| "eval_runtime": 3071.4883, |
| "eval_samples_per_second": 3.486, |
| "eval_steps_per_second": 0.436, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9626377732112836e-05, |
| "loss": 0.6685, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.9551653278535403e-05, |
| "loss": 0.6504, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_bleu": 19.1778, |
| "eval_gen_len": 54.184, |
| "eval_loss": 0.5995421409606934, |
| "eval_runtime": 2745.8255, |
| "eval_samples_per_second": 3.899, |
| "eval_steps_per_second": 0.488, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.9476928824957967e-05, |
| "loss": 0.6243, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.9402204371380535e-05, |
| "loss": 0.6093, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_bleu": 20.3586, |
| "eval_gen_len": 56.1132, |
| "eval_loss": 0.5621405243873596, |
| "eval_runtime": 2804.8241, |
| "eval_samples_per_second": 3.817, |
| "eval_steps_per_second": 0.477, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.9327479917803102e-05, |
| "loss": 0.5779, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.925275546422567e-05, |
| "loss": 0.58, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_bleu": 22.497, |
| "eval_gen_len": 53.262, |
| "eval_loss": 0.5317678451538086, |
| "eval_runtime": 2510.0734, |
| "eval_samples_per_second": 4.265, |
| "eval_steps_per_second": 0.533, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.9178031010648237e-05, |
| "loss": 0.5499, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.91033065570708e-05, |
| "loss": 0.5067, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_bleu": 24.1584, |
| "eval_gen_len": 56.1712, |
| "eval_loss": 0.5155890583992004, |
| "eval_runtime": 2540.4899, |
| "eval_samples_per_second": 4.214, |
| "eval_steps_per_second": 0.527, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.902858210349337e-05, |
| "loss": 0.5104, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.8953857649915936e-05, |
| "loss": 0.4985, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_bleu": 24.902, |
| "eval_gen_len": 55.1034, |
| "eval_loss": 0.5012524127960205, |
| "eval_runtime": 2433.6643, |
| "eval_samples_per_second": 4.399, |
| "eval_steps_per_second": 0.55, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.8879133196338504e-05, |
| "loss": 0.4949, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.880440874276107e-05, |
| "loss": 0.4861, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_bleu": 25.8945, |
| "eval_gen_len": 55.7148, |
| "eval_loss": 0.48973962664604187, |
| "eval_runtime": 2476.4919, |
| "eval_samples_per_second": 4.323, |
| "eval_steps_per_second": 0.541, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.8729684289183636e-05, |
| "loss": 0.4827, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.8654959835606203e-05, |
| "loss": 0.4789, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_bleu": 26.2593, |
| "eval_gen_len": 54.9688, |
| "eval_loss": 0.4776358902454376, |
| "eval_runtime": 2500.3532, |
| "eval_samples_per_second": 4.282, |
| "eval_steps_per_second": 0.536, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.858023538202877e-05, |
| "loss": 0.4757, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.8505510928451338e-05, |
| "loss": 0.4748, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_bleu": 26.8308, |
| "eval_gen_len": 53.234, |
| "eval_loss": 0.4675232470035553, |
| "eval_runtime": 2354.2254, |
| "eval_samples_per_second": 4.548, |
| "eval_steps_per_second": 0.569, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.8430786474873902e-05, |
| "loss": 0.4721, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.835606202129647e-05, |
| "loss": 0.4365, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_bleu": 27.8127, |
| "eval_gen_len": 53.7894, |
| "eval_loss": 0.46269142627716064, |
| "eval_runtime": 2320.7276, |
| "eval_samples_per_second": 4.613, |
| "eval_steps_per_second": 0.577, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.8281337567719037e-05, |
| "loss": 0.4124, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.8206613114141605e-05, |
| "loss": 0.4065, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_bleu": 28.1334, |
| "eval_gen_len": 52.4625, |
| "eval_loss": 0.4552680253982544, |
| "eval_runtime": 2260.3502, |
| "eval_samples_per_second": 4.736, |
| "eval_steps_per_second": 0.592, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.8131888660564172e-05, |
| "loss": 0.4236, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8057164206986736e-05, |
| "loss": 0.4159, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_bleu": 28.5473, |
| "eval_gen_len": 53.1084, |
| "eval_loss": 0.4502773582935333, |
| "eval_runtime": 2305.3584, |
| "eval_samples_per_second": 4.644, |
| "eval_steps_per_second": 0.581, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.7982439753409304e-05, |
| "loss": 0.4037, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 1.790771529983187e-05, |
| "loss": 0.4078, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.62, |
| "eval_bleu": 28.522, |
| "eval_gen_len": 53.9566, |
| "eval_loss": 0.44360852241516113, |
| "eval_runtime": 2357.175, |
| "eval_samples_per_second": 4.542, |
| "eval_steps_per_second": 0.568, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.783299084625444e-05, |
| "loss": 0.4016, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 1.7758266392677006e-05, |
| "loss": 0.4088, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_bleu": 29.6642, |
| "eval_gen_len": 54.4689, |
| "eval_loss": 0.439211368560791, |
| "eval_runtime": 2379.609, |
| "eval_samples_per_second": 4.499, |
| "eval_steps_per_second": 0.563, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.768354193909957e-05, |
| "loss": 0.4046, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.7608817485522138e-05, |
| "loss": 0.4039, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_bleu": 29.8929, |
| "eval_gen_len": 55.4612, |
| "eval_loss": 0.4344358444213867, |
| "eval_runtime": 2401.3922, |
| "eval_samples_per_second": 4.458, |
| "eval_steps_per_second": 0.558, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.7534093031944705e-05, |
| "loss": 0.3635, |
| "step": 16500 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 1.7459368578367273e-05, |
| "loss": 0.3537, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.18, |
| "eval_bleu": 30.2302, |
| "eval_gen_len": 54.3727, |
| "eval_loss": 0.43423154950141907, |
| "eval_runtime": 2351.6946, |
| "eval_samples_per_second": 4.552, |
| "eval_steps_per_second": 0.569, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 1.7384644124789837e-05, |
| "loss": 0.3575, |
| "step": 17500 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 1.7309919671212404e-05, |
| "loss": 0.3569, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.36, |
| "eval_bleu": 30.1139, |
| "eval_gen_len": 54.6381, |
| "eval_loss": 0.4319211542606354, |
| "eval_runtime": 2402.5651, |
| "eval_samples_per_second": 4.456, |
| "eval_steps_per_second": 0.557, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 1.7235195217634972e-05, |
| "loss": 0.3564, |
| "step": 18500 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1.716047076405754e-05, |
| "loss": 0.3564, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.55, |
| "eval_bleu": 30.8007, |
| "eval_gen_len": 53.8819, |
| "eval_loss": 0.42764702439308167, |
| "eval_runtime": 2333.0447, |
| "eval_samples_per_second": 4.589, |
| "eval_steps_per_second": 0.574, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 1.7085746310480107e-05, |
| "loss": 0.3576, |
| "step": 19500 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 1.701102185690267e-05, |
| "loss": 0.3637, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.74, |
| "eval_bleu": 30.8698, |
| "eval_gen_len": 53.7231, |
| "eval_loss": 0.422607421875, |
| "eval_runtime": 2331.5045, |
| "eval_samples_per_second": 4.592, |
| "eval_steps_per_second": 0.574, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 1.693629740332524e-05, |
| "loss": 0.3601, |
| "step": 20500 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 1.6861572949747806e-05, |
| "loss": 0.3571, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.92, |
| "eval_bleu": 31.1343, |
| "eval_gen_len": 53.5349, |
| "eval_loss": 0.41751930117607117, |
| "eval_runtime": 2304.4971, |
| "eval_samples_per_second": 4.646, |
| "eval_steps_per_second": 0.581, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 1.6786848496170374e-05, |
| "loss": 0.3441, |
| "step": 21500 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 1.671212404259294e-05, |
| "loss": 0.3099, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.11, |
| "eval_bleu": 31.3026, |
| "eval_gen_len": 53.4483, |
| "eval_loss": 0.421342134475708, |
| "eval_runtime": 2298.8454, |
| "eval_samples_per_second": 4.657, |
| "eval_steps_per_second": 0.582, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 1.6637399589015505e-05, |
| "loss": 0.3175, |
| "step": 22500 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 1.6562675135438073e-05, |
| "loss": 0.3104, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.3, |
| "eval_bleu": 31.1261, |
| "eval_gen_len": 51.5196, |
| "eval_loss": 0.4227532744407654, |
| "eval_runtime": 2198.9363, |
| "eval_samples_per_second": 4.869, |
| "eval_steps_per_second": 0.609, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 1.648795068186064e-05, |
| "loss": 0.3169, |
| "step": 23500 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 1.6413226228283208e-05, |
| "loss": 0.3162, |
| "step": 24000 |
| }, |
| { |
| "epoch": 4.48, |
| "eval_bleu": 31.9091, |
| "eval_gen_len": 53.0626, |
| "eval_loss": 0.4195193946361542, |
| "eval_runtime": 2270.3312, |
| "eval_samples_per_second": 4.716, |
| "eval_steps_per_second": 0.59, |
| "step": 24000 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 1.6338501774705772e-05, |
| "loss": 0.3128, |
| "step": 24500 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 1.626377732112834e-05, |
| "loss": 0.3177, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.67, |
| "eval_bleu": 31.5561, |
| "eval_gen_len": 52.3463, |
| "eval_loss": 0.4158227741718292, |
| "eval_runtime": 2237.9742, |
| "eval_samples_per_second": 4.784, |
| "eval_steps_per_second": 0.598, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 1.6189052867550907e-05, |
| "loss": 0.3216, |
| "step": 25500 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 1.6114328413973474e-05, |
| "loss": 0.3181, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.86, |
| "eval_bleu": 32.1029, |
| "eval_gen_len": 53.8831, |
| "eval_loss": 0.4130856692790985, |
| "eval_runtime": 2288.7247, |
| "eval_samples_per_second": 4.678, |
| "eval_steps_per_second": 0.585, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 1.6039603960396042e-05, |
| "loss": 0.3176, |
| "step": 26500 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 1.5964879506818606e-05, |
| "loss": 0.2941, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.04, |
| "eval_bleu": 32.1061, |
| "eval_gen_len": 52.7448, |
| "eval_loss": 0.4150530993938446, |
| "eval_runtime": 2247.1776, |
| "eval_samples_per_second": 4.764, |
| "eval_steps_per_second": 0.596, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 1.5890155053241173e-05, |
| "loss": 0.2752, |
| "step": 27500 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 1.581543059966374e-05, |
| "loss": 0.274, |
| "step": 28000 |
| }, |
| { |
| "epoch": 5.23, |
| "eval_bleu": 31.9128, |
| "eval_gen_len": 52.9394, |
| "eval_loss": 0.4146653711795807, |
| "eval_runtime": 2267.9887, |
| "eval_samples_per_second": 4.72, |
| "eval_steps_per_second": 0.59, |
| "step": 28000 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 1.574070614608631e-05, |
| "loss": 0.2852, |
| "step": 28500 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 1.5665981692508876e-05, |
| "loss": 0.2713, |
| "step": 29000 |
| }, |
| { |
| "epoch": 5.42, |
| "eval_bleu": 32.452, |
| "eval_gen_len": 52.881, |
| "eval_loss": 0.41379648447036743, |
| "eval_runtime": 2262.9812, |
| "eval_samples_per_second": 4.731, |
| "eval_steps_per_second": 0.592, |
| "step": 29000 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 1.559125723893144e-05, |
| "loss": 0.2791, |
| "step": 29500 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 1.5516532785354008e-05, |
| "loss": 0.283, |
| "step": 30000 |
| }, |
| { |
| "epoch": 5.6, |
| "eval_bleu": 32.6103, |
| "eval_gen_len": 53.2173, |
| "eval_loss": 0.4103504717350006, |
| "eval_runtime": 2279.2685, |
| "eval_samples_per_second": 4.697, |
| "eval_steps_per_second": 0.587, |
| "step": 30000 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 1.5441808331776575e-05, |
| "loss": 0.2835, |
| "step": 30500 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 1.5367083878199142e-05, |
| "loss": 0.2866, |
| "step": 31000 |
| }, |
| { |
| "epoch": 5.79, |
| "eval_bleu": 32.5888, |
| "eval_gen_len": 52.9638, |
| "eval_loss": 0.41094180941581726, |
| "eval_runtime": 2257.2287, |
| "eval_samples_per_second": 4.743, |
| "eval_steps_per_second": 0.593, |
| "step": 31000 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 1.5292359424621707e-05, |
| "loss": 0.2851, |
| "step": 31500 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 1.5217634971044276e-05, |
| "loss": 0.2865, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.98, |
| "eval_bleu": 32.6545, |
| "eval_gen_len": 52.4693, |
| "eval_loss": 0.40765267610549927, |
| "eval_runtime": 2233.0208, |
| "eval_samples_per_second": 4.794, |
| "eval_steps_per_second": 0.6, |
| "step": 32000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 133825, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 1000, |
| "total_flos": 1.0234506177547469e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|