{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.98235965988409, "global_step": 70500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.965241056446268e-05, "loss": 5.7812, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.9299885781970474e-05, "loss": 3.1232, "step": 1000 }, { "epoch": 0.06, "learning_rate": 4.894736099947827e-05, "loss": 2.7376, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.859483621698606e-05, "loss": 2.4571, "step": 2000 }, { "epoch": 0.11, "learning_rate": 4.8242311434493845e-05, "loss": 2.2177, "step": 2500 }, { "epoch": 0.13, "learning_rate": 4.788978665200164e-05, "loss": 1.9238, "step": 3000 }, { "epoch": 0.15, "learning_rate": 4.753726186950943e-05, "loss": 1.7636, "step": 3500 }, { "epoch": 0.17, "learning_rate": 4.718473708701722e-05, "loss": 1.5908, "step": 4000 }, { "epoch": 0.19, "learning_rate": 4.683221230452501e-05, "loss": 1.4306, "step": 4500 }, { "epoch": 0.21, "learning_rate": 4.64796875220328e-05, "loss": 1.2813, "step": 5000 }, { "epoch": 0.23, "learning_rate": 4.6127162739540594e-05, "loss": 1.2258, "step": 5500 }, { "epoch": 0.25, "learning_rate": 4.577463795704838e-05, "loss": 1.1167, "step": 6000 }, { "epoch": 0.27, "learning_rate": 4.542211317455617e-05, "loss": 1.0346, "step": 6500 }, { "epoch": 0.3, "learning_rate": 4.5069588392063965e-05, "loss": 0.963, "step": 7000 }, { "epoch": 0.32, "learning_rate": 4.471706360957176e-05, "loss": 0.9412, "step": 7500 }, { "epoch": 0.34, "learning_rate": 4.4364538827079544e-05, "loss": 0.8301, "step": 8000 }, { "epoch": 0.36, "learning_rate": 4.4012014044587336e-05, "loss": 0.8389, "step": 8500 }, { "epoch": 0.38, "learning_rate": 4.366019431166011e-05, "loss": 0.8128, "step": 9000 }, { "epoch": 0.4, "learning_rate": 4.3307669529167903e-05, "loss": 0.7062, "step": 9500 }, { "epoch": 0.42, "learning_rate": 4.295514474667569e-05, "loss": 0.7305, "step": 10000 }, { "epoch": 0.44, "learning_rate": 4.260261996418348e-05, "loss": 0.7048, "step": 10500 }, { "epoch": 0.47, "learning_rate": 4.2250095181691275e-05, "loss": 0.6352, "step": 11000 }, { "epoch": 0.49, "learning_rate": 4.189757039919907e-05, "loss": 0.6491, "step": 11500 }, { "epoch": 0.51, "learning_rate": 4.154504561670686e-05, "loss": 0.6416, "step": 12000 }, { "epoch": 0.53, "learning_rate": 4.1192520834214646e-05, "loss": 0.618, "step": 12500 }, { "epoch": 0.55, "learning_rate": 4.083999605172244e-05, "loss": 0.5734, "step": 13000 }, { "epoch": 0.57, "learning_rate": 4.048747126923023e-05, "loss": 0.5764, "step": 13500 }, { "epoch": 0.59, "learning_rate": 4.0135651536303005e-05, "loss": 0.6122, "step": 14000 }, { "epoch": 0.61, "learning_rate": 3.97831267538108e-05, "loss": 0.4962, "step": 14500 }, { "epoch": 0.63, "learning_rate": 3.9430601971318584e-05, "loss": 0.5173, "step": 15000 }, { "epoch": 0.66, "learning_rate": 3.907878223839136e-05, "loss": 0.5122, "step": 15500 }, { "epoch": 0.68, "learning_rate": 3.872625745589915e-05, "loss": 0.4673, "step": 16000 }, { "epoch": 0.7, "learning_rate": 3.8373732673406944e-05, "loss": 0.4879, "step": 16500 }, { "epoch": 0.72, "learning_rate": 3.8021207890914736e-05, "loss": 0.4461, "step": 17000 }, { "epoch": 0.74, "learning_rate": 3.766868310842253e-05, "loss": 0.4864, "step": 17500 }, { "epoch": 0.76, "learning_rate": 3.7316158325930315e-05, "loss": 0.4934, "step": 18000 }, { "epoch": 0.78, "learning_rate": 3.69636335434381e-05, "loss": 0.465, "step": 18500 }, { "epoch": 0.8, "learning_rate": 3.661110876094589e-05, "loss": 0.4444, "step": 19000 }, { "epoch": 0.82, "learning_rate": 3.6258583978453686e-05, "loss": 0.4787, "step": 19500 }, { "epoch": 0.85, "learning_rate": 3.590605919596148e-05, "loss": 0.4358, "step": 20000 }, { "epoch": 0.87, "learning_rate": 3.555353441346927e-05, "loss": 0.3978, "step": 20500 }, { "epoch": 0.89, "learning_rate": 3.5201009630977064e-05, "loss": 0.4472, "step": 21000 }, { "epoch": 0.91, "learning_rate": 3.484918989804983e-05, "loss": 0.4205, "step": 21500 }, { "epoch": 0.93, "learning_rate": 3.4496665115557624e-05, "loss": 0.394, "step": 22000 }, { "epoch": 0.95, "learning_rate": 3.4144845382630405e-05, "loss": 0.3727, "step": 22500 }, { "epoch": 0.97, "learning_rate": 3.379232060013819e-05, "loss": 0.4089, "step": 23000 }, { "epoch": 0.99, "learning_rate": 3.343979581764598e-05, "loss": 0.3507, "step": 23500 }, { "epoch": 1.0, "eval_bleu": 68.2888, "eval_gen_len": 25.384, "eval_loss": 0.06593693792819977, "eval_runtime": 35.6722, "eval_samples_per_second": 14.017, "eval_steps_per_second": 3.504, "step": 23639 }, { "epoch": 1.02, "learning_rate": 3.308727103515377e-05, "loss": 0.2763, "step": 24000 }, { "epoch": 1.04, "learning_rate": 3.273474625266156e-05, "loss": 0.2929, "step": 24500 }, { "epoch": 1.06, "learning_rate": 3.2383631569299325e-05, "loss": 0.3075, "step": 25000 }, { "epoch": 1.08, "learning_rate": 3.203110678680711e-05, "loss": 0.262, "step": 25500 }, { "epoch": 1.1, "learning_rate": 3.1678582004314904e-05, "loss": 0.2456, "step": 26000 }, { "epoch": 1.12, "learning_rate": 3.1326057221822696e-05, "loss": 0.2446, "step": 26500 }, { "epoch": 1.14, "learning_rate": 3.097353243933049e-05, "loss": 0.2347, "step": 27000 }, { "epoch": 1.16, "learning_rate": 3.062100765683828e-05, "loss": 0.2831, "step": 27500 }, { "epoch": 1.18, "learning_rate": 3.0269187923911053e-05, "loss": 0.2593, "step": 28000 }, { "epoch": 1.21, "learning_rate": 2.9916663141418842e-05, "loss": 0.2421, "step": 28500 }, { "epoch": 1.23, "learning_rate": 2.9564843408491616e-05, "loss": 0.2507, "step": 29000 }, { "epoch": 1.25, "learning_rate": 2.921231862599941e-05, "loss": 0.2614, "step": 29500 }, { "epoch": 1.27, "learning_rate": 2.8859793843507198e-05, "loss": 0.2224, "step": 30000 }, { "epoch": 1.29, "learning_rate": 2.850726906101499e-05, "loss": 0.277, "step": 30500 }, { "epoch": 1.31, "learning_rate": 2.8154744278522784e-05, "loss": 0.2291, "step": 31000 }, { "epoch": 1.33, "learning_rate": 2.7802219496030573e-05, "loss": 0.2347, "step": 31500 }, { "epoch": 1.35, "learning_rate": 2.7450399763103347e-05, "loss": 0.2425, "step": 32000 }, { "epoch": 1.37, "learning_rate": 2.7098580030176125e-05, "loss": 0.2481, "step": 32500 }, { "epoch": 1.4, "learning_rate": 2.6746055247683914e-05, "loss": 0.2437, "step": 33000 }, { "epoch": 1.42, "learning_rate": 2.63935304651917e-05, "loss": 0.2438, "step": 33500 }, { "epoch": 1.44, "learning_rate": 2.6041005682699493e-05, "loss": 0.2456, "step": 34000 }, { "epoch": 1.46, "learning_rate": 2.5688480900207285e-05, "loss": 0.2239, "step": 34500 }, { "epoch": 1.48, "learning_rate": 2.5335956117715075e-05, "loss": 0.2165, "step": 35000 }, { "epoch": 1.5, "learning_rate": 2.4983431335222867e-05, "loss": 0.1948, "step": 35500 }, { "epoch": 1.52, "learning_rate": 2.463090655273066e-05, "loss": 0.2161, "step": 36000 }, { "epoch": 1.54, "learning_rate": 2.427838177023845e-05, "loss": 0.2112, "step": 36500 }, { "epoch": 1.57, "learning_rate": 2.392585698774624e-05, "loss": 0.2025, "step": 37000 }, { "epoch": 1.59, "learning_rate": 2.357333220525403e-05, "loss": 0.2208, "step": 37500 }, { "epoch": 1.61, "learning_rate": 2.322080742276182e-05, "loss": 0.2175, "step": 38000 }, { "epoch": 1.63, "learning_rate": 2.2868282640269613e-05, "loss": 0.2222, "step": 38500 }, { "epoch": 1.65, "learning_rate": 2.2515757857777402e-05, "loss": 0.2122, "step": 39000 }, { "epoch": 1.67, "learning_rate": 2.2163233075285195e-05, "loss": 0.2034, "step": 39500 }, { "epoch": 1.69, "learning_rate": 2.1810708292792984e-05, "loss": 0.1905, "step": 40000 }, { "epoch": 1.71, "learning_rate": 2.1458183510300777e-05, "loss": 0.1869, "step": 40500 }, { "epoch": 1.73, "learning_rate": 2.1105658727808566e-05, "loss": 0.2258, "step": 41000 }, { "epoch": 1.76, "learning_rate": 2.0753133945316355e-05, "loss": 0.1938, "step": 41500 }, { "epoch": 1.78, "learning_rate": 2.0400609162824148e-05, "loss": 0.1924, "step": 42000 }, { "epoch": 1.8, "learning_rate": 2.0048789429896922e-05, "loss": 0.1651, "step": 42500 }, { "epoch": 1.82, "learning_rate": 1.9696969696969697e-05, "loss": 0.2262, "step": 43000 }, { "epoch": 1.84, "learning_rate": 1.934444491447749e-05, "loss": 0.1959, "step": 43500 }, { "epoch": 1.86, "learning_rate": 1.899192013198528e-05, "loss": 0.1973, "step": 44000 }, { "epoch": 1.88, "learning_rate": 1.8639395349493068e-05, "loss": 0.1683, "step": 44500 }, { "epoch": 1.9, "learning_rate": 1.828687056700086e-05, "loss": 0.2026, "step": 45000 }, { "epoch": 1.92, "learning_rate": 1.7934345784508653e-05, "loss": 0.1885, "step": 45500 }, { "epoch": 1.95, "learning_rate": 1.7581821002016442e-05, "loss": 0.1737, "step": 46000 }, { "epoch": 1.97, "learning_rate": 1.722929621952423e-05, "loss": 0.1895, "step": 46500 }, { "epoch": 1.99, "learning_rate": 1.6878181536161995e-05, "loss": 0.1645, "step": 47000 }, { "epoch": 2.0, "eval_bleu": 0.0, "eval_gen_len": 1.0, "eval_loss": 0.00799082312732935, "eval_runtime": 41.137, "eval_samples_per_second": 12.155, "eval_steps_per_second": 3.039, "step": 47278 }, { "epoch": 2.01, "learning_rate": 1.6525656753669784e-05, "loss": 0.1458, "step": 47500 }, { "epoch": 2.03, "learning_rate": 1.6173131971177573e-05, "loss": 0.088, "step": 48000 }, { "epoch": 2.05, "learning_rate": 1.5820607188685366e-05, "loss": 0.0819, "step": 48500 }, { "epoch": 2.07, "learning_rate": 1.546808240619316e-05, "loss": 0.0885, "step": 49000 }, { "epoch": 2.09, "learning_rate": 1.5115557623700946e-05, "loss": 0.099, "step": 49500 }, { "epoch": 2.12, "learning_rate": 1.4763032841208737e-05, "loss": 0.0994, "step": 50000 }, { "epoch": 2.14, "learning_rate": 1.441050805871653e-05, "loss": 0.0941, "step": 50500 }, { "epoch": 2.16, "learning_rate": 1.4057983276224319e-05, "loss": 0.0977, "step": 51000 }, { "epoch": 2.18, "learning_rate": 1.370545849373211e-05, "loss": 0.0892, "step": 51500 }, { "epoch": 2.2, "learning_rate": 1.33529337112399e-05, "loss": 0.0935, "step": 52000 }, { "epoch": 2.22, "learning_rate": 1.3000408928747693e-05, "loss": 0.1018, "step": 52500 }, { "epoch": 2.24, "learning_rate": 1.264788414625548e-05, "loss": 0.078, "step": 53000 }, { "epoch": 2.26, "learning_rate": 1.2295359363763273e-05, "loss": 0.0866, "step": 53500 }, { "epoch": 2.28, "learning_rate": 1.1942834581271064e-05, "loss": 0.0956, "step": 54000 }, { "epoch": 2.31, "learning_rate": 1.1590309798778855e-05, "loss": 0.0853, "step": 54500 }, { "epoch": 2.33, "learning_rate": 1.1237785016286644e-05, "loss": 0.0882, "step": 55000 }, { "epoch": 2.35, "learning_rate": 1.0885260233794437e-05, "loss": 0.0994, "step": 55500 }, { "epoch": 2.37, "learning_rate": 1.0534145550432197e-05, "loss": 0.087, "step": 56000 }, { "epoch": 2.39, "learning_rate": 1.0181620767939986e-05, "loss": 0.0754, "step": 56500 }, { "epoch": 2.41, "learning_rate": 9.829095985447777e-06, "loss": 0.1035, "step": 57000 }, { "epoch": 2.43, "learning_rate": 9.476571202955568e-06, "loss": 0.0855, "step": 57500 }, { "epoch": 2.45, "learning_rate": 9.124046420463359e-06, "loss": 0.0868, "step": 58000 }, { "epoch": 2.47, "learning_rate": 8.772226687536133e-06, "loss": 0.0873, "step": 58500 }, { "epoch": 2.5, "learning_rate": 8.419701905043926e-06, "loss": 0.0849, "step": 59000 }, { "epoch": 2.52, "learning_rate": 8.0678821721167e-06, "loss": 0.0863, "step": 59500 }, { "epoch": 2.54, "learning_rate": 7.71535738962449e-06, "loss": 0.0833, "step": 60000 }, { "epoch": 2.56, "learning_rate": 7.362832607132282e-06, "loss": 0.0988, "step": 60500 }, { "epoch": 2.58, "learning_rate": 7.010307824640072e-06, "loss": 0.0712, "step": 61000 }, { "epoch": 2.6, "learning_rate": 6.657783042147864e-06, "loss": 0.0784, "step": 61500 }, { "epoch": 2.62, "learning_rate": 6.305258259655654e-06, "loss": 0.0871, "step": 62000 }, { "epoch": 2.64, "learning_rate": 5.952733477163445e-06, "loss": 0.0697, "step": 62500 }, { "epoch": 2.67, "learning_rate": 5.600208694671235e-06, "loss": 0.0681, "step": 63000 }, { "epoch": 2.69, "learning_rate": 5.247683912179026e-06, "loss": 0.0763, "step": 63500 }, { "epoch": 2.71, "learning_rate": 4.895159129686817e-06, "loss": 0.0844, "step": 64000 }, { "epoch": 2.73, "learning_rate": 4.542634347194608e-06, "loss": 0.0803, "step": 64500 }, { "epoch": 2.75, "learning_rate": 4.190109564702399e-06, "loss": 0.0743, "step": 65000 }, { "epoch": 2.77, "learning_rate": 3.837584782210189e-06, "loss": 0.079, "step": 65500 }, { "epoch": 2.79, "learning_rate": 3.48505999971798e-06, "loss": 0.0857, "step": 66000 }, { "epoch": 2.81, "learning_rate": 3.1332402667907553e-06, "loss": 0.0706, "step": 66500 }, { "epoch": 2.83, "learning_rate": 2.7807154842985467e-06, "loss": 0.0666, "step": 67000 }, { "epoch": 2.86, "learning_rate": 2.428190701806337e-06, "loss": 0.0659, "step": 67500 }, { "epoch": 2.88, "learning_rate": 2.0756659193141277e-06, "loss": 0.0793, "step": 68000 }, { "epoch": 2.9, "learning_rate": 1.723846186386903e-06, "loss": 0.0578, "step": 68500 }, { "epoch": 2.92, "learning_rate": 1.3713214038946937e-06, "loss": 0.0788, "step": 69000 }, { "epoch": 2.94, "learning_rate": 1.0187966214024847e-06, "loss": 0.0711, "step": 69500 }, { "epoch": 2.96, "learning_rate": 6.662718389102755e-07, "loss": 0.0622, "step": 70000 }, { "epoch": 2.98, "learning_rate": 3.1445210598305064e-07, "loss": 0.0716, "step": 70500 } ], "max_steps": 70917, "num_train_epochs": 3, "total_flos": 1.3984054543269888e+16, "trial_name": null, "trial_params": null }