{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 5000, "global_step": 107877, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.634779384501298e-08, "loss": 14.0889, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.269558769002596e-08, "loss": 13.3753, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.3904338153503894e-07, "loss": 12.3675, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.8539117538005193e-07, "loss": 11.1815, "step": 400 }, { "epoch": 0.01, "learning_rate": 2.317389692250649e-07, "loss": 9.3244, "step": 500 }, { "epoch": 0.02, "learning_rate": 2.780867630700779e-07, "loss": 7.8289, "step": 600 }, { "epoch": 0.02, "learning_rate": 3.2443455691509084e-07, "loss": 6.5179, "step": 700 }, { "epoch": 0.02, "learning_rate": 3.7078235076010385e-07, "loss": 5.6367, "step": 800 }, { "epoch": 0.03, "learning_rate": 4.171301446051168e-07, "loss": 5.0302, "step": 900 }, { "epoch": 0.03, "learning_rate": 4.634779384501298e-07, "loss": 4.4714, "step": 1000 }, { "epoch": 0.03, "learning_rate": 5.098257322951427e-07, "loss": 4.0915, "step": 1100 }, { "epoch": 0.03, "learning_rate": 5.561735261401558e-07, "loss": 3.7279, "step": 1200 }, { "epoch": 0.04, "learning_rate": 6.025213199851688e-07, "loss": 3.4629, "step": 1300 }, { "epoch": 0.04, "learning_rate": 6.488691138301817e-07, "loss": 3.2831, "step": 1400 }, { "epoch": 0.04, "learning_rate": 6.952169076751947e-07, "loss": 3.0667, "step": 1500 }, { "epoch": 0.04, "learning_rate": 7.415647015202077e-07, "loss": 2.9512, "step": 1600 }, { "epoch": 0.05, "learning_rate": 7.879124953652206e-07, "loss": 2.7697, "step": 1700 }, { "epoch": 0.05, "learning_rate": 8.342602892102336e-07, "loss": 2.6436, "step": 1800 }, { "epoch": 0.05, "learning_rate": 8.806080830552465e-07, "loss": 2.5721, "step": 1900 }, { "epoch": 0.06, "learning_rate": 9.269558769002596e-07, "loss": 2.547, "step": 2000 }, { "epoch": 0.06, "learning_rate": 9.733036707452726e-07, "loss": 2.4468, "step": 2100 }, { "epoch": 0.06, "learning_rate": 1.0196514645902855e-06, "loss": 2.4144, "step": 2200 }, { "epoch": 0.06, "learning_rate": 1.0659992584352986e-06, "loss": 2.3397, "step": 2300 }, { "epoch": 0.07, "learning_rate": 1.1123470522803115e-06, "loss": 2.2857, "step": 2400 }, { "epoch": 0.07, "learning_rate": 1.1586948461253246e-06, "loss": 2.2327, "step": 2500 }, { "epoch": 0.07, "learning_rate": 1.2050426399703375e-06, "loss": 2.1722, "step": 2600 }, { "epoch": 0.08, "learning_rate": 1.2513904338153504e-06, "loss": 2.1463, "step": 2700 }, { "epoch": 0.08, "learning_rate": 1.2977382276603633e-06, "loss": 2.1144, "step": 2800 }, { "epoch": 0.08, "learning_rate": 1.3440860215053765e-06, "loss": 2.0433, "step": 2900 }, { "epoch": 0.08, "learning_rate": 1.3904338153503894e-06, "loss": 2.0107, "step": 3000 }, { "epoch": 0.09, "learning_rate": 1.4367816091954023e-06, "loss": 2.0057, "step": 3100 }, { "epoch": 0.09, "learning_rate": 1.4831294030404154e-06, "loss": 1.986, "step": 3200 }, { "epoch": 0.09, "learning_rate": 1.5294771968854283e-06, "loss": 1.9158, "step": 3300 }, { "epoch": 0.09, "learning_rate": 1.5758249907304412e-06, "loss": 1.9486, "step": 3400 }, { "epoch": 0.1, "learning_rate": 1.6221727845754543e-06, "loss": 1.85, "step": 3500 }, { "epoch": 0.1, "learning_rate": 1.6685205784204673e-06, "loss": 1.8479, "step": 3600 }, { "epoch": 0.1, "learning_rate": 1.7148683722654802e-06, "loss": 1.8052, "step": 3700 }, { "epoch": 0.11, "learning_rate": 1.761216166110493e-06, "loss": 1.7931, "step": 3800 }, { "epoch": 0.11, "learning_rate": 1.8075639599555062e-06, "loss": 1.7661, "step": 3900 }, { "epoch": 0.11, "learning_rate": 1.853911753800519e-06, "loss": 1.7307, "step": 4000 }, { "epoch": 0.11, "learning_rate": 1.900259547645532e-06, "loss": 1.6742, "step": 4100 }, { "epoch": 0.12, "learning_rate": 1.946607341490545e-06, "loss": 1.629, "step": 4200 }, { "epoch": 0.12, "learning_rate": 1.9929551353355583e-06, "loss": 1.6147, "step": 4300 }, { "epoch": 0.12, "learning_rate": 2.039302929180571e-06, "loss": 1.6556, "step": 4400 }, { "epoch": 0.13, "learning_rate": 2.085650723025584e-06, "loss": 1.5819, "step": 4500 }, { "epoch": 0.13, "learning_rate": 2.131998516870597e-06, "loss": 1.6211, "step": 4600 }, { "epoch": 0.13, "learning_rate": 2.17834631071561e-06, "loss": 1.5386, "step": 4700 }, { "epoch": 0.13, "learning_rate": 2.224694104560623e-06, "loss": 1.5553, "step": 4800 }, { "epoch": 0.14, "learning_rate": 2.271041898405636e-06, "loss": 1.5572, "step": 4900 }, { "epoch": 0.14, "learning_rate": 2.3173896922506492e-06, "loss": 1.5671, "step": 5000 }, { "epoch": 0.14, "eval_bleu": 6.5196, "eval_gen_len": 18.9699, "eval_loss": 1.1690529584884644, "eval_runtime": 968.3873, "eval_samples_per_second": 2.061, "eval_steps_per_second": 1.031, "step": 5000 }, { "epoch": 0.14, "learning_rate": 2.3637374860956624e-06, "loss": 1.4969, "step": 5100 }, { "epoch": 0.14, "learning_rate": 2.410085279940675e-06, "loss": 1.5547, "step": 5200 }, { "epoch": 0.15, "learning_rate": 2.456433073785688e-06, "loss": 1.5369, "step": 5300 }, { "epoch": 0.15, "learning_rate": 2.502780867630701e-06, "loss": 1.4743, "step": 5400 }, { "epoch": 0.15, "learning_rate": 2.549128661475714e-06, "loss": 1.5065, "step": 5500 }, { "epoch": 0.16, "learning_rate": 2.5954764553207267e-06, "loss": 1.4637, "step": 5600 }, { "epoch": 0.16, "learning_rate": 2.64182424916574e-06, "loss": 1.4976, "step": 5700 }, { "epoch": 0.16, "learning_rate": 2.688172043010753e-06, "loss": 1.482, "step": 5800 }, { "epoch": 0.16, "learning_rate": 2.7345198368557656e-06, "loss": 1.4703, "step": 5900 }, { "epoch": 0.17, "learning_rate": 2.7808676307007788e-06, "loss": 1.4279, "step": 6000 }, { "epoch": 0.17, "learning_rate": 2.827215424545792e-06, "loss": 1.4673, "step": 6100 }, { "epoch": 0.17, "learning_rate": 2.8735632183908046e-06, "loss": 1.3763, "step": 6200 }, { "epoch": 0.18, "learning_rate": 2.9199110122358177e-06, "loss": 1.4375, "step": 6300 }, { "epoch": 0.18, "learning_rate": 2.966258806080831e-06, "loss": 1.3887, "step": 6400 }, { "epoch": 0.18, "learning_rate": 3.0126065999258435e-06, "loss": 1.3819, "step": 6500 }, { "epoch": 0.18, "learning_rate": 3.0589543937708566e-06, "loss": 1.4471, "step": 6600 }, { "epoch": 0.19, "learning_rate": 3.1053021876158698e-06, "loss": 1.3955, "step": 6700 }, { "epoch": 0.19, "learning_rate": 3.1516499814608825e-06, "loss": 1.4317, "step": 6800 }, { "epoch": 0.19, "learning_rate": 3.1979977753058956e-06, "loss": 1.3863, "step": 6900 }, { "epoch": 0.19, "learning_rate": 3.2443455691509087e-06, "loss": 1.3759, "step": 7000 }, { "epoch": 0.2, "learning_rate": 3.2906933629959214e-06, "loss": 1.3599, "step": 7100 }, { "epoch": 0.2, "learning_rate": 3.3370411568409345e-06, "loss": 1.4061, "step": 7200 }, { "epoch": 0.2, "learning_rate": 3.383388950685947e-06, "loss": 1.339, "step": 7300 }, { "epoch": 0.21, "learning_rate": 3.4297367445309603e-06, "loss": 1.377, "step": 7400 }, { "epoch": 0.21, "learning_rate": 3.4760845383759734e-06, "loss": 1.3454, "step": 7500 }, { "epoch": 0.21, "learning_rate": 3.522432332220986e-06, "loss": 1.3565, "step": 7600 }, { "epoch": 0.21, "learning_rate": 3.5687801260659993e-06, "loss": 1.3381, "step": 7700 }, { "epoch": 0.22, "learning_rate": 3.6151279199110124e-06, "loss": 1.3326, "step": 7800 }, { "epoch": 0.22, "learning_rate": 3.661475713756025e-06, "loss": 1.3442, "step": 7900 }, { "epoch": 0.22, "learning_rate": 3.707823507601038e-06, "loss": 1.3053, "step": 8000 }, { "epoch": 0.23, "learning_rate": 3.7541713014460513e-06, "loss": 1.3071, "step": 8100 }, { "epoch": 0.23, "learning_rate": 3.800519095291064e-06, "loss": 1.3401, "step": 8200 }, { "epoch": 0.23, "learning_rate": 3.8468668891360776e-06, "loss": 1.3061, "step": 8300 }, { "epoch": 0.23, "learning_rate": 3.89321468298109e-06, "loss": 1.3304, "step": 8400 }, { "epoch": 0.24, "learning_rate": 3.939562476826103e-06, "loss": 1.3354, "step": 8500 }, { "epoch": 0.24, "learning_rate": 3.9859102706711165e-06, "loss": 1.3253, "step": 8600 }, { "epoch": 0.24, "learning_rate": 4.032258064516129e-06, "loss": 1.3006, "step": 8700 }, { "epoch": 0.24, "learning_rate": 4.078605858361142e-06, "loss": 1.2852, "step": 8800 }, { "epoch": 0.25, "learning_rate": 4.1249536522061554e-06, "loss": 1.2775, "step": 8900 }, { "epoch": 0.25, "learning_rate": 4.171301446051168e-06, "loss": 1.2842, "step": 9000 }, { "epoch": 0.25, "learning_rate": 4.217649239896181e-06, "loss": 1.2935, "step": 9100 }, { "epoch": 0.26, "learning_rate": 4.263997033741194e-06, "loss": 1.2656, "step": 9200 }, { "epoch": 0.26, "learning_rate": 4.310344827586207e-06, "loss": 1.27, "step": 9300 }, { "epoch": 0.26, "learning_rate": 4.35669262143122e-06, "loss": 1.2983, "step": 9400 }, { "epoch": 0.26, "learning_rate": 4.403040415276233e-06, "loss": 1.2317, "step": 9500 }, { "epoch": 0.27, "learning_rate": 4.449388209121246e-06, "loss": 1.2491, "step": 9600 }, { "epoch": 0.27, "learning_rate": 4.4957360029662596e-06, "loss": 1.2438, "step": 9700 }, { "epoch": 0.27, "learning_rate": 4.542083796811272e-06, "loss": 1.2608, "step": 9800 }, { "epoch": 0.28, "learning_rate": 4.588431590656286e-06, "loss": 1.2656, "step": 9900 }, { "epoch": 0.28, "learning_rate": 4.6347793845012985e-06, "loss": 1.2277, "step": 10000 }, { "epoch": 0.28, "eval_bleu": 7.082, "eval_gen_len": 18.9724, "eval_loss": 1.0592412948608398, "eval_runtime": 961.2611, "eval_samples_per_second": 2.076, "eval_steps_per_second": 1.038, "step": 10000 }, { "epoch": 0.28, "learning_rate": 4.681127178346311e-06, "loss": 1.3006, "step": 10100 }, { "epoch": 0.28, "learning_rate": 4.727474972191325e-06, "loss": 1.2365, "step": 10200 }, { "epoch": 0.29, "learning_rate": 4.7738227660363374e-06, "loss": 1.252, "step": 10300 }, { "epoch": 0.29, "learning_rate": 4.82017055988135e-06, "loss": 1.2351, "step": 10400 }, { "epoch": 0.29, "learning_rate": 4.866518353726364e-06, "loss": 1.2442, "step": 10500 }, { "epoch": 0.29, "learning_rate": 4.912866147571376e-06, "loss": 1.2158, "step": 10600 }, { "epoch": 0.3, "learning_rate": 4.959213941416389e-06, "loss": 1.2123, "step": 10700 }, { "epoch": 0.3, "learning_rate": 4.999999811534358e-06, "loss": 1.1988, "step": 10800 }, { "epoch": 0.3, "learning_rate": 4.999983582566248e-06, "loss": 1.2438, "step": 10900 }, { "epoch": 0.31, "learning_rate": 4.999941178008878e-06, "loss": 1.2145, "step": 11000 }, { "epoch": 0.31, "learning_rate": 4.999872598306237e-06, "loss": 1.1855, "step": 11100 }, { "epoch": 0.31, "learning_rate": 4.999777844176376e-06, "loss": 1.2148, "step": 11200 }, { "epoch": 0.31, "learning_rate": 4.999656916611398e-06, "loss": 1.2128, "step": 11300 }, { "epoch": 0.32, "learning_rate": 4.999509816877453e-06, "loss": 1.2222, "step": 11400 }, { "epoch": 0.32, "learning_rate": 4.999336546514719e-06, "loss": 1.1951, "step": 11500 }, { "epoch": 0.32, "learning_rate": 4.9991371073373895e-06, "loss": 1.1656, "step": 11600 }, { "epoch": 0.33, "learning_rate": 4.998911501433653e-06, "loss": 1.1783, "step": 11700 }, { "epoch": 0.33, "learning_rate": 4.9986597311656735e-06, "loss": 1.182, "step": 11800 }, { "epoch": 0.33, "learning_rate": 4.998381799169562e-06, "loss": 1.1869, "step": 11900 }, { "epoch": 0.33, "learning_rate": 4.99807770835535e-06, "loss": 1.2242, "step": 12000 }, { "epoch": 0.34, "learning_rate": 4.997747461906961e-06, "loss": 1.1884, "step": 12100 }, { "epoch": 0.34, "learning_rate": 4.997391063282177e-06, "loss": 1.2025, "step": 12200 }, { "epoch": 0.34, "learning_rate": 4.997008516212599e-06, "loss": 1.207, "step": 12300 }, { "epoch": 0.34, "learning_rate": 4.996599824703613e-06, "loss": 1.1898, "step": 12400 }, { "epoch": 0.35, "learning_rate": 4.996164993034341e-06, "loss": 1.1942, "step": 12500 }, { "epoch": 0.35, "learning_rate": 4.995704025757605e-06, "loss": 1.1647, "step": 12600 }, { "epoch": 0.35, "learning_rate": 4.995216927699872e-06, "loss": 1.1961, "step": 12700 }, { "epoch": 0.36, "learning_rate": 4.994703703961206e-06, "loss": 1.1728, "step": 12800 }, { "epoch": 0.36, "learning_rate": 4.994164359915219e-06, "loss": 1.1801, "step": 12900 }, { "epoch": 0.36, "learning_rate": 4.993598901209003e-06, "loss": 1.1607, "step": 13000 }, { "epoch": 0.36, "learning_rate": 4.993007333763086e-06, "loss": 1.1674, "step": 13100 }, { "epoch": 0.37, "learning_rate": 4.9923896637713575e-06, "loss": 1.1837, "step": 13200 }, { "epoch": 0.37, "learning_rate": 4.991745897701012e-06, "loss": 1.1709, "step": 13300 }, { "epoch": 0.37, "learning_rate": 4.991076042292475e-06, "loss": 1.1672, "step": 13400 }, { "epoch": 0.38, "learning_rate": 4.990380104559337e-06, "loss": 1.1649, "step": 13500 }, { "epoch": 0.38, "learning_rate": 4.989658091788277e-06, "loss": 1.1582, "step": 13600 }, { "epoch": 0.38, "learning_rate": 4.988910011538991e-06, "loss": 1.1485, "step": 13700 }, { "epoch": 0.38, "learning_rate": 4.988135871644105e-06, "loss": 1.1887, "step": 13800 }, { "epoch": 0.39, "learning_rate": 4.987335680209099e-06, "loss": 1.1656, "step": 13900 }, { "epoch": 0.39, "learning_rate": 4.986509445612223e-06, "loss": 1.1182, "step": 14000 }, { "epoch": 0.39, "learning_rate": 4.985657176504402e-06, "loss": 1.0969, "step": 14100 }, { "epoch": 0.39, "learning_rate": 4.984778881809156e-06, "loss": 1.1648, "step": 14200 }, { "epoch": 0.4, "learning_rate": 4.983874570722496e-06, "loss": 1.1689, "step": 14300 }, { "epoch": 0.4, "learning_rate": 4.982944252712834e-06, "loss": 1.1127, "step": 14400 }, { "epoch": 0.4, "learning_rate": 4.981987937520884e-06, "loss": 1.1529, "step": 14500 }, { "epoch": 0.41, "learning_rate": 4.981005635159558e-06, "loss": 1.1279, "step": 14600 }, { "epoch": 0.41, "learning_rate": 4.979997355913859e-06, "loss": 1.1099, "step": 14700 }, { "epoch": 0.41, "learning_rate": 4.978963110340778e-06, "loss": 1.1671, "step": 14800 }, { "epoch": 0.41, "learning_rate": 4.9779029092691825e-06, "loss": 1.167, "step": 14900 }, { "epoch": 0.42, "learning_rate": 4.976816763799698e-06, "loss": 1.1316, "step": 15000 }, { "epoch": 0.42, "eval_bleu": 7.3283, "eval_gen_len": 18.9825, "eval_loss": 1.0111815929412842, "eval_runtime": 963.287, "eval_samples_per_second": 2.072, "eval_steps_per_second": 1.036, "step": 15000 }, { "epoch": 0.42, "learning_rate": 4.975704685304601e-06, "loss": 1.1448, "step": 15100 }, { "epoch": 0.42, "learning_rate": 4.97456668542769e-06, "loss": 1.1221, "step": 15200 }, { "epoch": 0.43, "learning_rate": 4.97340277608417e-06, "loss": 1.1549, "step": 15300 }, { "epoch": 0.43, "learning_rate": 4.972212969460528e-06, "loss": 1.1493, "step": 15400 }, { "epoch": 0.43, "learning_rate": 4.9709972780144e-06, "loss": 1.1595, "step": 15500 }, { "epoch": 0.43, "learning_rate": 4.969755714474447e-06, "loss": 1.1123, "step": 15600 }, { "epoch": 0.44, "learning_rate": 4.968488291840215e-06, "loss": 1.123, "step": 15700 }, { "epoch": 0.44, "learning_rate": 4.9671950233820075e-06, "loss": 1.1096, "step": 15800 }, { "epoch": 0.44, "learning_rate": 4.965875922640738e-06, "loss": 1.1296, "step": 15900 }, { "epoch": 0.44, "learning_rate": 4.964531003427792e-06, "loss": 1.1528, "step": 16000 }, { "epoch": 0.45, "learning_rate": 4.9631602798248845e-06, "loss": 1.1315, "step": 16100 }, { "epoch": 0.45, "learning_rate": 4.961763766183908e-06, "loss": 1.0704, "step": 16200 }, { "epoch": 0.45, "learning_rate": 4.960341477126786e-06, "loss": 1.1107, "step": 16300 }, { "epoch": 0.46, "learning_rate": 4.9588934275453165e-06, "loss": 1.1351, "step": 16400 }, { "epoch": 0.46, "learning_rate": 4.957419632601022e-06, "loss": 1.1132, "step": 16500 }, { "epoch": 0.46, "learning_rate": 4.955920107724982e-06, "loss": 1.0942, "step": 16600 }, { "epoch": 0.46, "learning_rate": 4.95439486861768e-06, "loss": 1.1466, "step": 16700 }, { "epoch": 0.47, "learning_rate": 4.952843931248834e-06, "loss": 1.1164, "step": 16800 }, { "epoch": 0.47, "learning_rate": 4.951267311857229e-06, "loss": 1.1336, "step": 16900 }, { "epoch": 0.47, "learning_rate": 4.949665026950551e-06, "loss": 1.0946, "step": 17000 }, { "epoch": 0.48, "learning_rate": 4.948037093305211e-06, "loss": 1.0768, "step": 17100 }, { "epoch": 0.48, "learning_rate": 4.94638352796617e-06, "loss": 1.1048, "step": 17200 }, { "epoch": 0.48, "learning_rate": 4.944704348246759e-06, "loss": 1.0964, "step": 17300 }, { "epoch": 0.48, "learning_rate": 4.942999571728503e-06, "loss": 1.115, "step": 17400 }, { "epoch": 0.49, "learning_rate": 4.941269216260929e-06, "loss": 1.0912, "step": 17500 }, { "epoch": 0.49, "learning_rate": 4.9395132999613874e-06, "loss": 1.1039, "step": 17600 }, { "epoch": 0.49, "learning_rate": 4.937731841214856e-06, "loss": 1.1001, "step": 17700 }, { "epoch": 0.5, "learning_rate": 4.935924858673751e-06, "loss": 1.1137, "step": 17800 }, { "epoch": 0.5, "learning_rate": 4.934092371257727e-06, "loss": 1.1154, "step": 17900 }, { "epoch": 0.5, "learning_rate": 4.932234398153488e-06, "loss": 1.1103, "step": 18000 }, { "epoch": 0.5, "learning_rate": 4.930350958814578e-06, "loss": 1.0984, "step": 18100 }, { "epoch": 0.51, "learning_rate": 4.9284420729611785e-06, "loss": 1.1006, "step": 18200 }, { "epoch": 0.51, "learning_rate": 4.926507760579906e-06, "loss": 1.1028, "step": 18300 }, { "epoch": 0.51, "learning_rate": 4.9245480419236015e-06, "loss": 1.0915, "step": 18400 }, { "epoch": 0.51, "learning_rate": 4.922562937511115e-06, "loss": 1.0716, "step": 18500 }, { "epoch": 0.52, "learning_rate": 4.920552468127093e-06, "loss": 1.0817, "step": 18600 }, { "epoch": 0.52, "learning_rate": 4.918516654821765e-06, "loss": 1.113, "step": 18700 }, { "epoch": 0.52, "learning_rate": 4.916455518910713e-06, "loss": 1.1086, "step": 18800 }, { "epoch": 0.53, "learning_rate": 4.9143690819746595e-06, "loss": 1.0853, "step": 18900 }, { "epoch": 0.53, "learning_rate": 4.912257365859234e-06, "loss": 1.1003, "step": 19000 }, { "epoch": 0.53, "learning_rate": 4.9101203926747465e-06, "loss": 1.074, "step": 19100 }, { "epoch": 0.53, "learning_rate": 4.907958184795958e-06, "loss": 1.069, "step": 19200 }, { "epoch": 0.54, "learning_rate": 4.905770764861842e-06, "loss": 1.0779, "step": 19300 }, { "epoch": 0.54, "learning_rate": 4.903558155775352e-06, "loss": 1.0746, "step": 19400 }, { "epoch": 0.54, "learning_rate": 4.901320380703179e-06, "loss": 1.0855, "step": 19500 }, { "epoch": 0.55, "learning_rate": 4.8990574630755085e-06, "loss": 1.0499, "step": 19600 }, { "epoch": 0.55, "learning_rate": 4.896769426585778e-06, "loss": 1.0826, "step": 19700 }, { "epoch": 0.55, "learning_rate": 4.8944562951904256e-06, "loss": 1.0936, "step": 19800 }, { "epoch": 0.55, "learning_rate": 4.892118093108641e-06, "loss": 1.0893, "step": 19900 }, { "epoch": 0.56, "learning_rate": 4.889754844822113e-06, "loss": 1.0833, "step": 20000 }, { "epoch": 0.56, "eval_bleu": 7.4462, "eval_gen_len": 18.977, "eval_loss": 0.972794771194458, "eval_runtime": 963.5918, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.036, "step": 20000 }, { "epoch": 0.56, "learning_rate": 4.887366575074769e-06, "loss": 1.1106, "step": 20100 }, { "epoch": 0.56, "learning_rate": 4.884953308872522e-06, "loss": 1.0961, "step": 20200 }, { "epoch": 0.56, "learning_rate": 4.882515071483003e-06, "loss": 1.0668, "step": 20300 }, { "epoch": 0.57, "learning_rate": 4.8800518884353e-06, "loss": 1.0548, "step": 20400 }, { "epoch": 0.57, "learning_rate": 4.8775637855196885e-06, "loss": 1.1031, "step": 20500 }, { "epoch": 0.57, "learning_rate": 4.875050788787367e-06, "loss": 1.0564, "step": 20600 }, { "epoch": 0.58, "learning_rate": 4.872512924550172e-06, "loss": 1.0837, "step": 20700 }, { "epoch": 0.58, "learning_rate": 4.869950219380317e-06, "loss": 1.0816, "step": 20800 }, { "epoch": 0.58, "learning_rate": 4.867362700110105e-06, "loss": 1.0352, "step": 20900 }, { "epoch": 0.58, "learning_rate": 4.864750393831651e-06, "loss": 1.068, "step": 21000 }, { "epoch": 0.59, "learning_rate": 4.8621133278965956e-06, "loss": 1.0525, "step": 21100 }, { "epoch": 0.59, "learning_rate": 4.859451529915825e-06, "loss": 1.0867, "step": 21200 }, { "epoch": 0.59, "learning_rate": 4.856765027759171e-06, "loss": 1.0544, "step": 21300 }, { "epoch": 0.6, "learning_rate": 4.8540538495551314e-06, "loss": 1.0805, "step": 21400 }, { "epoch": 0.6, "learning_rate": 4.851318023690567e-06, "loss": 1.0598, "step": 21500 }, { "epoch": 0.6, "learning_rate": 4.848557578810407e-06, "loss": 1.0418, "step": 21600 }, { "epoch": 0.6, "learning_rate": 4.845772543817351e-06, "loss": 1.0486, "step": 21700 }, { "epoch": 0.61, "learning_rate": 4.842962947871561e-06, "loss": 1.0618, "step": 21800 }, { "epoch": 0.61, "learning_rate": 4.840128820390364e-06, "loss": 1.0583, "step": 21900 }, { "epoch": 0.61, "learning_rate": 4.837270191047937e-06, "loss": 1.0703, "step": 22000 }, { "epoch": 0.61, "learning_rate": 4.834387089774999e-06, "loss": 1.039, "step": 22100 }, { "epoch": 0.62, "learning_rate": 4.8314795467585e-06, "loss": 1.0505, "step": 22200 }, { "epoch": 0.62, "learning_rate": 4.828547592441298e-06, "loss": 1.0539, "step": 22300 }, { "epoch": 0.62, "learning_rate": 4.8255912575218485e-06, "loss": 1.0375, "step": 22400 }, { "epoch": 0.63, "learning_rate": 4.8226105729538786e-06, "loss": 1.0378, "step": 22500 }, { "epoch": 0.63, "learning_rate": 4.8196055699460636e-06, "loss": 1.0304, "step": 22600 }, { "epoch": 0.63, "learning_rate": 4.816576279961699e-06, "loss": 1.065, "step": 22700 }, { "epoch": 0.63, "learning_rate": 4.813522734718373e-06, "loss": 1.0445, "step": 22800 }, { "epoch": 0.64, "learning_rate": 4.810444966187635e-06, "loss": 1.0327, "step": 22900 }, { "epoch": 0.64, "learning_rate": 4.807343006594658e-06, "loss": 1.0751, "step": 23000 }, { "epoch": 0.64, "learning_rate": 4.804216888417904e-06, "loss": 1.0444, "step": 23100 }, { "epoch": 0.65, "learning_rate": 4.801066644388781e-06, "loss": 1.005, "step": 23200 }, { "epoch": 0.65, "learning_rate": 4.797892307491303e-06, "loss": 1.0401, "step": 23300 }, { "epoch": 0.65, "learning_rate": 4.794693910961745e-06, "loss": 1.0566, "step": 23400 }, { "epoch": 0.65, "learning_rate": 4.7914714882882924e-06, "loss": 1.0478, "step": 23500 }, { "epoch": 0.66, "learning_rate": 4.788225073210691e-06, "loss": 1.0656, "step": 23600 }, { "epoch": 0.66, "learning_rate": 4.784954699719895e-06, "loss": 1.0244, "step": 23700 }, { "epoch": 0.66, "learning_rate": 4.7816604020577105e-06, "loss": 1.0288, "step": 23800 }, { "epoch": 0.66, "learning_rate": 4.778342214716438e-06, "loss": 1.0515, "step": 23900 }, { "epoch": 0.67, "learning_rate": 4.775000172438508e-06, "loss": 1.0391, "step": 24000 }, { "epoch": 0.67, "learning_rate": 4.771634310216122e-06, "loss": 1.0481, "step": 24100 }, { "epoch": 0.67, "learning_rate": 4.768244663290881e-06, "loss": 1.0502, "step": 24200 }, { "epoch": 0.68, "learning_rate": 4.76483126715342e-06, "loss": 1.0855, "step": 24300 }, { "epoch": 0.68, "learning_rate": 4.761394157543038e-06, "loss": 1.0305, "step": 24400 }, { "epoch": 0.68, "learning_rate": 4.757933370447317e-06, "loss": 1.0609, "step": 24500 }, { "epoch": 0.68, "learning_rate": 4.754448942101753e-06, "loss": 1.0152, "step": 24600 }, { "epoch": 0.69, "learning_rate": 4.7509409089893695e-06, "loss": 1.0449, "step": 24700 }, { "epoch": 0.69, "learning_rate": 4.7474093078403436e-06, "loss": 1.0152, "step": 24800 }, { "epoch": 0.69, "learning_rate": 4.743854175631614e-06, "loss": 1.0, "step": 24900 }, { "epoch": 0.7, "learning_rate": 4.740275549586496e-06, "loss": 1.0339, "step": 25000 }, { "epoch": 0.7, "eval_bleu": 8.0126, "eval_gen_len": 18.982, "eval_loss": 0.9545806646347046, "eval_runtime": 963.2133, "eval_samples_per_second": 2.072, "eval_steps_per_second": 1.036, "step": 25000 }, { "epoch": 0.7, "learning_rate": 4.736673467174295e-06, "loss": 1.0262, "step": 25100 }, { "epoch": 0.7, "learning_rate": 4.733047966109911e-06, "loss": 1.054, "step": 25200 }, { "epoch": 0.7, "learning_rate": 4.729399084353444e-06, "loss": 0.9769, "step": 25300 }, { "epoch": 0.71, "learning_rate": 4.725726860109794e-06, "loss": 1.0007, "step": 25400 }, { "epoch": 0.71, "learning_rate": 4.7220313318282704e-06, "loss": 1.0438, "step": 25500 }, { "epoch": 0.71, "learning_rate": 4.718312538202179e-06, "loss": 1.0347, "step": 25600 }, { "epoch": 0.71, "learning_rate": 4.7145705181684195e-06, "loss": 1.04, "step": 25700 }, { "epoch": 0.72, "learning_rate": 4.710805310907083e-06, "loss": 1.0099, "step": 25800 }, { "epoch": 0.72, "learning_rate": 4.707016955841034e-06, "loss": 1.0447, "step": 25900 }, { "epoch": 0.72, "learning_rate": 4.703205492635504e-06, "loss": 0.9819, "step": 26000 }, { "epoch": 0.73, "learning_rate": 4.699370961197675e-06, "loss": 1.0066, "step": 26100 }, { "epoch": 0.73, "learning_rate": 4.695513401676256e-06, "loss": 1.007, "step": 26200 }, { "epoch": 0.73, "learning_rate": 4.691632854461071e-06, "loss": 1.037, "step": 26300 }, { "epoch": 0.73, "learning_rate": 4.68772936018263e-06, "loss": 1.0054, "step": 26400 }, { "epoch": 0.74, "learning_rate": 4.683802959711709e-06, "loss": 1.0078, "step": 26500 }, { "epoch": 0.74, "learning_rate": 4.6798536941589125e-06, "loss": 1.0187, "step": 26600 }, { "epoch": 0.74, "learning_rate": 4.675881604874257e-06, "loss": 1.0209, "step": 26700 }, { "epoch": 0.75, "learning_rate": 4.6718867334467245e-06, "loss": 0.9908, "step": 26800 }, { "epoch": 0.75, "learning_rate": 4.667869121703835e-06, "loss": 0.9868, "step": 26900 }, { "epoch": 0.75, "learning_rate": 4.6638288117112075e-06, "loss": 0.985, "step": 27000 }, { "epoch": 0.75, "learning_rate": 4.6597658457721175e-06, "loss": 1.0143, "step": 27100 }, { "epoch": 0.76, "learning_rate": 4.655680266427057e-06, "loss": 0.9698, "step": 27200 }, { "epoch": 0.76, "learning_rate": 4.651572116453281e-06, "loss": 1.0149, "step": 27300 }, { "epoch": 0.76, "learning_rate": 4.6474414388643755e-06, "loss": 1.0212, "step": 27400 }, { "epoch": 0.76, "learning_rate": 4.643288276909791e-06, "loss": 1.0126, "step": 27500 }, { "epoch": 0.77, "learning_rate": 4.639112674074396e-06, "loss": 1.0163, "step": 27600 }, { "epoch": 0.77, "learning_rate": 4.634914674078025e-06, "loss": 0.9814, "step": 27700 }, { "epoch": 0.77, "learning_rate": 4.630694320875016e-06, "loss": 1.039, "step": 27800 }, { "epoch": 0.78, "learning_rate": 4.626451658653752e-06, "loss": 1.0087, "step": 27900 }, { "epoch": 0.78, "learning_rate": 4.6221867318361975e-06, "loss": 0.9795, "step": 28000 }, { "epoch": 0.78, "learning_rate": 4.617899585077436e-06, "loss": 1.0499, "step": 28100 }, { "epoch": 0.78, "learning_rate": 4.613590263265198e-06, "loss": 1.0109, "step": 28200 }, { "epoch": 0.79, "learning_rate": 4.6092588115193945e-06, "loss": 1.0347, "step": 28300 }, { "epoch": 0.79, "learning_rate": 4.604905275191647e-06, "loss": 1.0123, "step": 28400 }, { "epoch": 0.79, "learning_rate": 4.600529699864803e-06, "loss": 1.0216, "step": 28500 }, { "epoch": 0.8, "learning_rate": 4.59613213135247e-06, "loss": 0.9955, "step": 28600 }, { "epoch": 0.8, "learning_rate": 4.59171261569853e-06, "loss": 0.9794, "step": 28700 }, { "epoch": 0.8, "learning_rate": 4.587271199176654e-06, "loss": 1.0098, "step": 28800 }, { "epoch": 0.8, "learning_rate": 4.58280792828983e-06, "loss": 1.0176, "step": 28900 }, { "epoch": 0.81, "learning_rate": 4.57832284976986e-06, "loss": 1.0104, "step": 29000 }, { "epoch": 0.81, "learning_rate": 4.5738160105768815e-06, "loss": 1.0023, "step": 29100 }, { "epoch": 0.81, "learning_rate": 4.569287457898874e-06, "loss": 1.0333, "step": 29200 }, { "epoch": 0.81, "learning_rate": 4.564737239151164e-06, "loss": 0.9876, "step": 29300 }, { "epoch": 0.82, "learning_rate": 4.560165401975925e-06, "loss": 0.9966, "step": 29400 }, { "epoch": 0.82, "learning_rate": 4.555571994241685e-06, "loss": 1.0267, "step": 29500 }, { "epoch": 0.82, "learning_rate": 4.550957064042821e-06, "loss": 1.0371, "step": 29600 }, { "epoch": 0.83, "learning_rate": 4.546320659699059e-06, "loss": 0.9998, "step": 29700 }, { "epoch": 0.83, "learning_rate": 4.541662829754963e-06, "loss": 1.0174, "step": 29800 }, { "epoch": 0.83, "learning_rate": 4.536983622979429e-06, "loss": 0.99, "step": 29900 }, { "epoch": 0.83, "learning_rate": 4.532283088365179e-06, "loss": 1.025, "step": 30000 }, { "epoch": 0.83, "eval_bleu": 7.7648, "eval_gen_len": 18.9805, "eval_loss": 0.9337242245674133, "eval_runtime": 963.8615, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.035, "step": 30000 }, { "epoch": 0.84, "learning_rate": 4.527561275128241e-06, "loss": 1.03, "step": 30100 }, { "epoch": 0.84, "learning_rate": 4.522818232707435e-06, "loss": 1.0015, "step": 30200 }, { "epoch": 0.84, "learning_rate": 4.5180540107638634e-06, "loss": 0.9622, "step": 30300 }, { "epoch": 0.85, "learning_rate": 4.513268659180377e-06, "loss": 1.0272, "step": 30400 }, { "epoch": 0.85, "learning_rate": 4.508462228061065e-06, "loss": 0.994, "step": 30500 }, { "epoch": 0.85, "learning_rate": 4.503634767730724e-06, "loss": 0.9826, "step": 30600 }, { "epoch": 0.85, "learning_rate": 4.498786328734336e-06, "loss": 0.9823, "step": 30700 }, { "epoch": 0.86, "learning_rate": 4.49391696183653e-06, "loss": 0.9975, "step": 30800 }, { "epoch": 0.86, "learning_rate": 4.489026718021061e-06, "loss": 0.9776, "step": 30900 }, { "epoch": 0.86, "learning_rate": 4.484115648490271e-06, "loss": 0.9798, "step": 31000 }, { "epoch": 0.86, "learning_rate": 4.4791838046645545e-06, "loss": 0.9634, "step": 31100 }, { "epoch": 0.87, "learning_rate": 4.474231238181817e-06, "loss": 1.0174, "step": 31200 }, { "epoch": 0.87, "learning_rate": 4.469258000896936e-06, "loss": 0.9762, "step": 31300 }, { "epoch": 0.87, "learning_rate": 4.464264144881221e-06, "loss": 1.0287, "step": 31400 }, { "epoch": 0.88, "learning_rate": 4.459249722421866e-06, "loss": 0.977, "step": 31500 }, { "epoch": 0.88, "learning_rate": 4.454214786021399e-06, "loss": 0.9885, "step": 31600 }, { "epoch": 0.88, "learning_rate": 4.449159388397138e-06, "loss": 1.0286, "step": 31700 }, { "epoch": 0.88, "learning_rate": 4.4440835824806364e-06, "loss": 0.9907, "step": 31800 }, { "epoch": 0.89, "learning_rate": 4.438987421417126e-06, "loss": 0.9827, "step": 31900 }, { "epoch": 0.89, "learning_rate": 4.433870958564965e-06, "loss": 0.9911, "step": 32000 }, { "epoch": 0.89, "learning_rate": 4.428734247495077e-06, "loss": 1.0034, "step": 32100 }, { "epoch": 0.9, "learning_rate": 4.423577341990392e-06, "loss": 0.9539, "step": 32200 }, { "epoch": 0.9, "learning_rate": 4.41840029604528e-06, "loss": 1.0161, "step": 32300 }, { "epoch": 0.9, "learning_rate": 4.413203163864988e-06, "loss": 1.0044, "step": 32400 }, { "epoch": 0.9, "learning_rate": 4.407985999865072e-06, "loss": 0.9767, "step": 32500 }, { "epoch": 0.91, "learning_rate": 4.4027488586708274e-06, "loss": 0.9824, "step": 32600 }, { "epoch": 0.91, "learning_rate": 4.397491795116719e-06, "loss": 0.9905, "step": 32700 }, { "epoch": 0.91, "learning_rate": 4.392214864245801e-06, "loss": 0.9632, "step": 32800 }, { "epoch": 0.91, "learning_rate": 4.386918121309147e-06, "loss": 0.9724, "step": 32900 }, { "epoch": 0.92, "learning_rate": 4.381601621765267e-06, "loss": 1.0322, "step": 33000 }, { "epoch": 0.92, "learning_rate": 4.376265421279532e-06, "loss": 1.0209, "step": 33100 }, { "epoch": 0.92, "learning_rate": 4.3709095757235835e-06, "loss": 0.9452, "step": 33200 }, { "epoch": 0.93, "learning_rate": 4.365534141174756e-06, "loss": 0.9999, "step": 33300 }, { "epoch": 0.93, "learning_rate": 4.360139173915486e-06, "loss": 0.9786, "step": 33400 }, { "epoch": 0.93, "learning_rate": 4.3547247304327234e-06, "loss": 1.0074, "step": 33500 }, { "epoch": 0.93, "learning_rate": 4.34929086741734e-06, "loss": 0.9756, "step": 33600 }, { "epoch": 0.94, "learning_rate": 4.343837641763535e-06, "loss": 0.9844, "step": 33700 }, { "epoch": 0.94, "learning_rate": 4.338365110568242e-06, "loss": 1.0181, "step": 33800 }, { "epoch": 0.94, "learning_rate": 4.332873331130531e-06, "loss": 0.9706, "step": 33900 }, { "epoch": 0.95, "learning_rate": 4.3273623609509996e-06, "loss": 0.9749, "step": 34000 }, { "epoch": 0.95, "learning_rate": 4.321832257731189e-06, "loss": 0.969, "step": 34100 }, { "epoch": 0.95, "learning_rate": 4.316283079372959e-06, "loss": 1.0083, "step": 34200 }, { "epoch": 0.95, "learning_rate": 4.3107148839779e-06, "loss": 0.9969, "step": 34300 }, { "epoch": 0.96, "learning_rate": 4.305127729846711e-06, "loss": 0.9863, "step": 34400 }, { "epoch": 0.96, "learning_rate": 4.299521675478598e-06, "loss": 0.9646, "step": 34500 }, { "epoch": 0.96, "learning_rate": 4.293896779570656e-06, "loss": 0.9704, "step": 34600 }, { "epoch": 0.96, "learning_rate": 4.288253101017259e-06, "loss": 0.949, "step": 34700 }, { "epoch": 0.97, "learning_rate": 4.282590698909439e-06, "loss": 0.9667, "step": 34800 }, { "epoch": 0.97, "learning_rate": 4.276909632534269e-06, "loss": 0.9565, "step": 34900 }, { "epoch": 0.97, "learning_rate": 4.271209961374246e-06, "loss": 0.9733, "step": 35000 }, { "epoch": 0.97, "eval_bleu": 7.9496, "eval_gen_len": 18.9815, "eval_loss": 0.9227670431137085, "eval_runtime": 961.6825, "eval_samples_per_second": 2.076, "eval_steps_per_second": 1.038, "step": 35000 }, { "epoch": 0.98, "learning_rate": 4.26549174510666e-06, "loss": 0.9857, "step": 35100 }, { "epoch": 0.98, "learning_rate": 4.259755043602978e-06, "loss": 0.9431, "step": 35200 }, { "epoch": 0.98, "learning_rate": 4.253999916928211e-06, "loss": 0.9731, "step": 35300 }, { "epoch": 0.98, "learning_rate": 4.248226425340288e-06, "loss": 0.9833, "step": 35400 }, { "epoch": 0.99, "learning_rate": 4.242434629289421e-06, "loss": 0.9582, "step": 35500 }, { "epoch": 0.99, "learning_rate": 4.236624589417482e-06, "loss": 1.002, "step": 35600 }, { "epoch": 0.99, "learning_rate": 4.230796366557354e-06, "loss": 0.9598, "step": 35700 }, { "epoch": 1.0, "learning_rate": 4.224950021732307e-06, "loss": 0.9576, "step": 35800 }, { "epoch": 1.0, "learning_rate": 4.21908561615535e-06, "loss": 1.0025, "step": 35900 }, { "epoch": 1.0, "learning_rate": 4.213203211228596e-06, "loss": 0.9746, "step": 36000 }, { "epoch": 1.0, "learning_rate": 4.2073028685426146e-06, "loss": 0.9419, "step": 36100 }, { "epoch": 1.01, "learning_rate": 4.20138464987579e-06, "loss": 0.9092, "step": 36200 }, { "epoch": 1.01, "learning_rate": 4.195448617193676e-06, "loss": 0.9345, "step": 36300 }, { "epoch": 1.01, "learning_rate": 4.189494832648339e-06, "loss": 0.9613, "step": 36400 }, { "epoch": 1.02, "learning_rate": 4.183523358577716e-06, "loss": 0.9596, "step": 36500 }, { "epoch": 1.02, "learning_rate": 4.177534257504961e-06, "loss": 0.9412, "step": 36600 }, { "epoch": 1.02, "learning_rate": 4.171527592137783e-06, "loss": 0.9418, "step": 36700 }, { "epoch": 1.02, "learning_rate": 4.1655034253678e-06, "loss": 0.9166, "step": 36800 }, { "epoch": 1.03, "learning_rate": 4.15946182026987e-06, "loss": 0.9483, "step": 36900 }, { "epoch": 1.03, "learning_rate": 4.153402840101438e-06, "loss": 0.9604, "step": 37000 }, { "epoch": 1.03, "learning_rate": 4.1473265483018735e-06, "loss": 0.928, "step": 37100 }, { "epoch": 1.03, "learning_rate": 4.141233008491797e-06, "loss": 0.953, "step": 37200 }, { "epoch": 1.04, "learning_rate": 4.1351222844724305e-06, "loss": 0.935, "step": 37300 }, { "epoch": 1.04, "learning_rate": 4.128994440224912e-06, "loss": 0.9269, "step": 37400 }, { "epoch": 1.04, "learning_rate": 4.122849539909637e-06, "loss": 0.9423, "step": 37500 }, { "epoch": 1.05, "learning_rate": 4.1166876478655835e-06, "loss": 0.9402, "step": 37600 }, { "epoch": 1.05, "learning_rate": 4.110508828609638e-06, "loss": 0.9472, "step": 37700 }, { "epoch": 1.05, "learning_rate": 4.1043131468359155e-06, "loss": 0.9457, "step": 37800 }, { "epoch": 1.05, "learning_rate": 4.098100667415095e-06, "loss": 0.8999, "step": 37900 }, { "epoch": 1.06, "learning_rate": 4.091871455393725e-06, "loss": 0.9388, "step": 38000 }, { "epoch": 1.06, "learning_rate": 4.0856255759935515e-06, "loss": 0.9416, "step": 38100 }, { "epoch": 1.06, "learning_rate": 4.079363094610836e-06, "loss": 0.98, "step": 38200 }, { "epoch": 1.07, "learning_rate": 4.0730840768156625e-06, "loss": 0.9424, "step": 38300 }, { "epoch": 1.07, "learning_rate": 4.066788588351261e-06, "loss": 0.9192, "step": 38400 }, { "epoch": 1.07, "learning_rate": 4.0604766951333105e-06, "loss": 0.9188, "step": 38500 }, { "epoch": 1.07, "learning_rate": 4.054148463249257e-06, "loss": 0.9636, "step": 38600 }, { "epoch": 1.08, "learning_rate": 4.0478039589576146e-06, "loss": 0.9716, "step": 38700 }, { "epoch": 1.08, "learning_rate": 4.041443248687273e-06, "loss": 0.955, "step": 38800 }, { "epoch": 1.08, "learning_rate": 4.035066399036807e-06, "loss": 0.9519, "step": 38900 }, { "epoch": 1.08, "learning_rate": 4.028673476773774e-06, "loss": 0.9069, "step": 39000 }, { "epoch": 1.09, "learning_rate": 4.022264548834016e-06, "loss": 0.9085, "step": 39100 }, { "epoch": 1.09, "learning_rate": 4.015839682320959e-06, "loss": 0.9313, "step": 39200 }, { "epoch": 1.09, "learning_rate": 4.0093989445049135e-06, "loss": 0.9648, "step": 39300 }, { "epoch": 1.1, "learning_rate": 4.002942402822364e-06, "loss": 0.9398, "step": 39400 }, { "epoch": 1.1, "learning_rate": 3.9964701248752665e-06, "loss": 0.9305, "step": 39500 }, { "epoch": 1.1, "learning_rate": 3.989982178430345e-06, "loss": 0.9475, "step": 39600 }, { "epoch": 1.1, "learning_rate": 3.983478631418372e-06, "loss": 0.9448, "step": 39700 }, { "epoch": 1.11, "learning_rate": 3.976959551933464e-06, "loss": 0.9754, "step": 39800 }, { "epoch": 1.11, "learning_rate": 3.970425008232369e-06, "loss": 0.9564, "step": 39900 }, { "epoch": 1.11, "learning_rate": 3.96387506873375e-06, "loss": 0.9035, "step": 40000 }, { "epoch": 1.11, "eval_bleu": 7.689, "eval_gen_len": 18.9795, "eval_loss": 0.9161636233329773, "eval_runtime": 963.6243, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.036, "step": 40000 }, { "epoch": 1.12, "learning_rate": 3.957309802017466e-06, "loss": 0.9898, "step": 40100 }, { "epoch": 1.12, "learning_rate": 3.95072927682386e-06, "loss": 0.8963, "step": 40200 }, { "epoch": 1.12, "learning_rate": 3.944133562053033e-06, "loss": 0.9179, "step": 40300 }, { "epoch": 1.12, "learning_rate": 3.937522726764128e-06, "loss": 0.9368, "step": 40400 }, { "epoch": 1.13, "learning_rate": 3.930896840174603e-06, "loss": 0.9281, "step": 40500 }, { "epoch": 1.13, "learning_rate": 3.924255971659506e-06, "loss": 0.9706, "step": 40600 }, { "epoch": 1.13, "learning_rate": 3.9176001907507546e-06, "loss": 0.9401, "step": 40700 }, { "epoch": 1.13, "learning_rate": 3.910929567136401e-06, "loss": 0.9185, "step": 40800 }, { "epoch": 1.14, "learning_rate": 3.904244170659904e-06, "loss": 0.9406, "step": 40900 }, { "epoch": 1.14, "learning_rate": 3.8975440713194016e-06, "loss": 0.923, "step": 41000 }, { "epoch": 1.14, "learning_rate": 3.890829339266973e-06, "loss": 0.9427, "step": 41100 }, { "epoch": 1.15, "learning_rate": 3.884100044807907e-06, "loss": 0.9471, "step": 41200 }, { "epoch": 1.15, "learning_rate": 3.877356258399967e-06, "loss": 0.9367, "step": 41300 }, { "epoch": 1.15, "learning_rate": 3.870598050652648e-06, "loss": 0.9063, "step": 41400 }, { "epoch": 1.15, "learning_rate": 3.863825492326444e-06, "loss": 0.9604, "step": 41500 }, { "epoch": 1.16, "learning_rate": 3.857038654332102e-06, "loss": 0.8888, "step": 41600 }, { "epoch": 1.16, "learning_rate": 3.850237607729882e-06, "loss": 0.9494, "step": 41700 }, { "epoch": 1.16, "learning_rate": 3.8434224237288134e-06, "loss": 0.8925, "step": 41800 }, { "epoch": 1.17, "learning_rate": 3.836593173685946e-06, "loss": 0.9473, "step": 41900 }, { "epoch": 1.17, "learning_rate": 3.829749929105609e-06, "loss": 0.9236, "step": 42000 }, { "epoch": 1.17, "learning_rate": 3.822892761638656e-06, "loss": 0.9083, "step": 42100 }, { "epoch": 1.17, "learning_rate": 3.816021743081717e-06, "loss": 0.9135, "step": 42200 }, { "epoch": 1.18, "learning_rate": 3.8091369453764504e-06, "loss": 0.9526, "step": 42300 }, { "epoch": 1.18, "learning_rate": 3.8022384406087824e-06, "loss": 0.9345, "step": 42400 }, { "epoch": 1.18, "learning_rate": 3.79532630100816e-06, "loss": 0.9176, "step": 42500 }, { "epoch": 1.18, "learning_rate": 3.7884005989467866e-06, "loss": 0.9467, "step": 42600 }, { "epoch": 1.19, "learning_rate": 3.781461406938874e-06, "loss": 0.9271, "step": 42700 }, { "epoch": 1.19, "learning_rate": 3.774508797639874e-06, "loss": 0.9588, "step": 42800 }, { "epoch": 1.19, "learning_rate": 3.7675428438457234e-06, "loss": 0.9201, "step": 42900 }, { "epoch": 1.2, "learning_rate": 3.760563618492079e-06, "loss": 0.9285, "step": 43000 }, { "epoch": 1.2, "learning_rate": 3.7535711946535552e-06, "loss": 0.9484, "step": 43100 }, { "epoch": 1.2, "learning_rate": 3.746565645542958e-06, "loss": 0.9253, "step": 43200 }, { "epoch": 1.2, "learning_rate": 3.739547044510521e-06, "loss": 0.9179, "step": 43300 }, { "epoch": 1.21, "learning_rate": 3.7325154650431317e-06, "loss": 0.9143, "step": 43400 }, { "epoch": 1.21, "learning_rate": 3.725470980763569e-06, "loss": 0.9382, "step": 43500 }, { "epoch": 1.21, "learning_rate": 3.718413665429729e-06, "loss": 0.9533, "step": 43600 }, { "epoch": 1.22, "learning_rate": 3.711343592933851e-06, "loss": 0.9214, "step": 43700 }, { "epoch": 1.22, "learning_rate": 3.704260837301746e-06, "loss": 0.8943, "step": 43800 }, { "epoch": 1.22, "learning_rate": 3.6971654726920243e-06, "loss": 0.9063, "step": 43900 }, { "epoch": 1.22, "learning_rate": 3.690057573395311e-06, "loss": 0.8965, "step": 44000 }, { "epoch": 1.23, "learning_rate": 3.6829372138334763e-06, "loss": 0.9241, "step": 44100 }, { "epoch": 1.23, "learning_rate": 3.6758044685588547e-06, "loss": 0.9644, "step": 44200 }, { "epoch": 1.23, "learning_rate": 3.668659412253458e-06, "loss": 0.9391, "step": 44300 }, { "epoch": 1.23, "learning_rate": 3.661502119728203e-06, "loss": 0.9273, "step": 44400 }, { "epoch": 1.24, "learning_rate": 3.6543326659221213e-06, "loss": 0.9354, "step": 44500 }, { "epoch": 1.24, "learning_rate": 3.6471511259015764e-06, "loss": 0.9162, "step": 44600 }, { "epoch": 1.24, "learning_rate": 3.6399575748594796e-06, "loss": 0.9281, "step": 44700 }, { "epoch": 1.25, "learning_rate": 3.6327520881145002e-06, "loss": 0.8999, "step": 44800 }, { "epoch": 1.25, "learning_rate": 3.6255347411102777e-06, "loss": 0.909, "step": 44900 }, { "epoch": 1.25, "learning_rate": 3.6183056094146333e-06, "loss": 0.9386, "step": 45000 }, { "epoch": 1.25, "eval_bleu": 7.6781, "eval_gen_len": 18.9825, "eval_loss": 0.9038894176483154, "eval_runtime": 963.1893, "eval_samples_per_second": 2.072, "eval_steps_per_second": 1.036, "step": 45000 }, { "epoch": 1.25, "learning_rate": 3.611064768718777e-06, "loss": 0.8965, "step": 45100 }, { "epoch": 1.26, "learning_rate": 3.603812294836515e-06, "loss": 0.9717, "step": 45200 }, { "epoch": 1.26, "learning_rate": 3.5965482637034567e-06, "loss": 0.8826, "step": 45300 }, { "epoch": 1.26, "learning_rate": 3.58927275137622e-06, "loss": 0.9187, "step": 45400 }, { "epoch": 1.27, "learning_rate": 3.581985834031635e-06, "loss": 0.888, "step": 45500 }, { "epoch": 1.27, "learning_rate": 3.5746875879659426e-06, "loss": 0.9339, "step": 45600 }, { "epoch": 1.27, "learning_rate": 3.5673780895940034e-06, "loss": 0.9291, "step": 45700 }, { "epoch": 1.27, "learning_rate": 3.56005741544849e-06, "loss": 0.9303, "step": 45800 }, { "epoch": 1.28, "learning_rate": 3.5527256421790902e-06, "loss": 0.9123, "step": 45900 }, { "epoch": 1.28, "learning_rate": 3.5453828465517e-06, "loss": 0.9037, "step": 46000 }, { "epoch": 1.28, "learning_rate": 3.538029105447628e-06, "loss": 0.9447, "step": 46100 }, { "epoch": 1.28, "learning_rate": 3.530664495862782e-06, "loss": 0.9011, "step": 46200 }, { "epoch": 1.29, "learning_rate": 3.523289094906865e-06, "loss": 0.8897, "step": 46300 }, { "epoch": 1.29, "learning_rate": 3.5159029798025717e-06, "loss": 0.9278, "step": 46400 }, { "epoch": 1.29, "learning_rate": 3.5085062278847765e-06, "loss": 0.9204, "step": 46500 }, { "epoch": 1.3, "learning_rate": 3.5010989165997227e-06, "loss": 0.9185, "step": 46600 }, { "epoch": 1.3, "learning_rate": 3.4936811235042158e-06, "loss": 0.9067, "step": 46700 }, { "epoch": 1.3, "learning_rate": 3.4862529262648076e-06, "loss": 0.9469, "step": 46800 }, { "epoch": 1.3, "learning_rate": 3.4788144026569846e-06, "loss": 0.942, "step": 46900 }, { "epoch": 1.31, "learning_rate": 3.4713656305643543e-06, "loss": 0.9493, "step": 47000 }, { "epoch": 1.31, "learning_rate": 3.4639066879778278e-06, "loss": 0.9285, "step": 47100 }, { "epoch": 1.31, "learning_rate": 3.4564376529948045e-06, "loss": 0.9242, "step": 47200 }, { "epoch": 1.32, "learning_rate": 3.4489586038183564e-06, "loss": 0.9045, "step": 47300 }, { "epoch": 1.32, "learning_rate": 3.4414696187564035e-06, "loss": 0.9233, "step": 47400 }, { "epoch": 1.32, "learning_rate": 3.4339707762209006e-06, "loss": 0.896, "step": 47500 }, { "epoch": 1.32, "learning_rate": 3.426462154727012e-06, "loss": 0.9072, "step": 47600 }, { "epoch": 1.33, "learning_rate": 3.418943832892291e-06, "loss": 0.9419, "step": 47700 }, { "epoch": 1.33, "learning_rate": 3.411415889435856e-06, "loss": 0.8977, "step": 47800 }, { "epoch": 1.33, "learning_rate": 3.403878403177567e-06, "loss": 0.8837, "step": 47900 }, { "epoch": 1.33, "learning_rate": 3.3963314530372e-06, "loss": 0.9148, "step": 48000 }, { "epoch": 1.34, "learning_rate": 3.388775118033621e-06, "loss": 0.9005, "step": 48100 }, { "epoch": 1.34, "learning_rate": 3.381209477283957e-06, "loss": 0.8907, "step": 48200 }, { "epoch": 1.34, "learning_rate": 3.3736346100027717e-06, "loss": 0.9064, "step": 48300 }, { "epoch": 1.35, "learning_rate": 3.3660505955012308e-06, "loss": 0.9026, "step": 48400 }, { "epoch": 1.35, "learning_rate": 3.3584575131862757e-06, "loss": 0.9501, "step": 48500 }, { "epoch": 1.35, "learning_rate": 3.3508554425597896e-06, "loss": 0.9016, "step": 48600 }, { "epoch": 1.35, "learning_rate": 3.3432444632177663e-06, "loss": 0.9159, "step": 48700 }, { "epoch": 1.36, "learning_rate": 3.335624654849477e-06, "loss": 0.8766, "step": 48800 }, { "epoch": 1.36, "learning_rate": 3.327996097236636e-06, "loss": 0.9125, "step": 48900 }, { "epoch": 1.36, "learning_rate": 3.320358870252563e-06, "loss": 0.8931, "step": 49000 }, { "epoch": 1.37, "learning_rate": 3.3127130538613506e-06, "loss": 0.9088, "step": 49100 }, { "epoch": 1.37, "learning_rate": 3.3050587281170245e-06, "loss": 0.9427, "step": 49200 }, { "epoch": 1.37, "learning_rate": 3.297395973162705e-06, "loss": 0.9042, "step": 49300 }, { "epoch": 1.37, "learning_rate": 3.2897248692297678e-06, "loss": 0.9094, "step": 49400 }, { "epoch": 1.38, "learning_rate": 3.2820454966370102e-06, "loss": 0.9384, "step": 49500 }, { "epoch": 1.38, "learning_rate": 3.2743579357897997e-06, "loss": 0.888, "step": 49600 }, { "epoch": 1.38, "learning_rate": 3.266662267179238e-06, "loss": 0.9208, "step": 49700 }, { "epoch": 1.38, "learning_rate": 3.25895857138132e-06, "loss": 0.9304, "step": 49800 }, { "epoch": 1.39, "learning_rate": 3.2512469290560848e-06, "loss": 0.9346, "step": 49900 }, { "epoch": 1.39, "learning_rate": 3.2435274209467765e-06, "loss": 0.9073, "step": 50000 }, { "epoch": 1.39, "eval_bleu": 7.8607, "eval_gen_len": 18.9805, "eval_loss": 0.8985511064529419, "eval_runtime": 966.2977, "eval_samples_per_second": 2.066, "eval_steps_per_second": 1.033, "step": 50000 }, { "epoch": 1.39, "learning_rate": 3.235800127878995e-06, "loss": 0.8804, "step": 50100 }, { "epoch": 1.4, "learning_rate": 3.228065130759852e-06, "loss": 0.9301, "step": 50200 }, { "epoch": 1.4, "learning_rate": 3.220322510577121e-06, "loss": 0.9109, "step": 50300 }, { "epoch": 1.4, "learning_rate": 3.2125723483983935e-06, "loss": 0.9029, "step": 50400 }, { "epoch": 1.4, "learning_rate": 3.204814725370227e-06, "loss": 0.9089, "step": 50500 }, { "epoch": 1.41, "learning_rate": 3.1970497227172957e-06, "loss": 0.9184, "step": 50600 }, { "epoch": 1.41, "learning_rate": 3.1892774217415433e-06, "loss": 0.894, "step": 50700 }, { "epoch": 1.41, "learning_rate": 3.181497903821326e-06, "loss": 0.905, "step": 50800 }, { "epoch": 1.42, "learning_rate": 3.1737112504105655e-06, "loss": 0.889, "step": 50900 }, { "epoch": 1.42, "learning_rate": 3.165917543037894e-06, "loss": 0.8977, "step": 51000 }, { "epoch": 1.42, "learning_rate": 3.1581168633058002e-06, "loss": 0.9111, "step": 51100 }, { "epoch": 1.42, "learning_rate": 3.150309292889776e-06, "loss": 0.9073, "step": 51200 }, { "epoch": 1.43, "learning_rate": 3.14249491353746e-06, "loss": 0.9229, "step": 51300 }, { "epoch": 1.43, "learning_rate": 3.134673807067784e-06, "loss": 0.8961, "step": 51400 }, { "epoch": 1.43, "learning_rate": 3.1268460553701146e-06, "loss": 0.8976, "step": 51500 }, { "epoch": 1.43, "learning_rate": 3.1190117404033943e-06, "loss": 0.9186, "step": 51600 }, { "epoch": 1.44, "learning_rate": 3.111170944195286e-06, "loss": 0.8826, "step": 51700 }, { "epoch": 1.44, "learning_rate": 3.1033237488413144e-06, "loss": 0.9097, "step": 51800 }, { "epoch": 1.44, "learning_rate": 3.095470236504003e-06, "loss": 0.8805, "step": 51900 }, { "epoch": 1.45, "learning_rate": 3.0876104894120164e-06, "loss": 0.8718, "step": 52000 }, { "epoch": 1.45, "learning_rate": 3.0797445898593007e-06, "loss": 0.9029, "step": 52100 }, { "epoch": 1.45, "learning_rate": 3.07187262020422e-06, "loss": 0.9297, "step": 52200 }, { "epoch": 1.45, "learning_rate": 3.0639946628686913e-06, "loss": 0.8916, "step": 52300 }, { "epoch": 1.46, "learning_rate": 3.0561108003373275e-06, "loss": 0.9609, "step": 52400 }, { "epoch": 1.46, "learning_rate": 3.0482211151565693e-06, "loss": 0.8926, "step": 52500 }, { "epoch": 1.46, "learning_rate": 3.0403256899338236e-06, "loss": 0.8811, "step": 52600 }, { "epoch": 1.47, "learning_rate": 3.032424607336595e-06, "loss": 0.8748, "step": 52700 }, { "epoch": 1.47, "learning_rate": 3.0245179500916245e-06, "loss": 0.9036, "step": 52800 }, { "epoch": 1.47, "learning_rate": 3.016605800984021e-06, "loss": 0.9, "step": 52900 }, { "epoch": 1.47, "learning_rate": 3.0086882428563948e-06, "loss": 0.8919, "step": 53000 }, { "epoch": 1.48, "learning_rate": 3.0007653586079884e-06, "loss": 0.924, "step": 53100 }, { "epoch": 1.48, "learning_rate": 2.9928372311938134e-06, "loss": 0.9289, "step": 53200 }, { "epoch": 1.48, "learning_rate": 2.984903943623779e-06, "loss": 0.8922, "step": 53300 }, { "epoch": 1.49, "learning_rate": 2.9769655789618185e-06, "loss": 0.904, "step": 53400 }, { "epoch": 1.49, "learning_rate": 2.9690222203250286e-06, "loss": 0.9239, "step": 53500 }, { "epoch": 1.49, "learning_rate": 2.961073950882793e-06, "loss": 0.9248, "step": 53600 }, { "epoch": 1.49, "learning_rate": 2.9531208538559114e-06, "loss": 0.8867, "step": 53700 }, { "epoch": 1.5, "learning_rate": 2.945163012515732e-06, "loss": 0.8872, "step": 53800 }, { "epoch": 1.5, "learning_rate": 2.9372005101832767e-06, "loss": 0.9082, "step": 53900 }, { "epoch": 1.5, "learning_rate": 2.9292334302283683e-06, "loss": 0.8745, "step": 54000 }, { "epoch": 1.5, "learning_rate": 2.9212618560687604e-06, "loss": 0.8917, "step": 54100 }, { "epoch": 1.51, "learning_rate": 2.9132858711692607e-06, "loss": 0.8771, "step": 54200 }, { "epoch": 1.51, "learning_rate": 2.9053055590408603e-06, "loss": 0.8999, "step": 54300 }, { "epoch": 1.51, "learning_rate": 2.8973210032398567e-06, "loss": 0.8902, "step": 54400 }, { "epoch": 1.52, "learning_rate": 2.88933228736698e-06, "loss": 0.8912, "step": 54500 }, { "epoch": 1.52, "learning_rate": 2.881339495066518e-06, "loss": 0.8482, "step": 54600 }, { "epoch": 1.52, "learning_rate": 2.8733427100254383e-06, "loss": 0.9028, "step": 54700 }, { "epoch": 1.52, "learning_rate": 2.8653420159725166e-06, "loss": 0.8881, "step": 54800 }, { "epoch": 1.53, "learning_rate": 2.8573374966774546e-06, "loss": 0.8913, "step": 54900 }, { "epoch": 1.53, "learning_rate": 2.849329235950007e-06, "loss": 0.8928, "step": 55000 }, { "epoch": 1.53, "eval_bleu": 8.0666, "eval_gen_len": 18.981, "eval_loss": 0.8941593170166016, "eval_runtime": 968.3576, "eval_samples_per_second": 2.061, "eval_steps_per_second": 1.031, "step": 55000 }, { "epoch": 1.53, "learning_rate": 2.8413173176391006e-06, "loss": 0.8992, "step": 55100 }, { "epoch": 1.54, "learning_rate": 2.8333018256319617e-06, "loss": 0.8777, "step": 55200 }, { "epoch": 1.54, "learning_rate": 2.82528284385323e-06, "loss": 0.8709, "step": 55300 }, { "epoch": 1.54, "learning_rate": 2.817260456264086e-06, "loss": 0.9326, "step": 55400 }, { "epoch": 1.54, "learning_rate": 2.809234746861372e-06, "loss": 0.9006, "step": 55500 }, { "epoch": 1.55, "learning_rate": 2.801205799676709e-06, "loss": 0.929, "step": 55600 }, { "epoch": 1.55, "learning_rate": 2.7931736987756165e-06, "loss": 0.9161, "step": 55700 }, { "epoch": 1.55, "learning_rate": 2.7851385282566372e-06, "loss": 0.908, "step": 55800 }, { "epoch": 1.55, "learning_rate": 2.7771003722504534e-06, "loss": 0.9008, "step": 55900 }, { "epoch": 1.56, "learning_rate": 2.769059314919006e-06, "loss": 0.9195, "step": 56000 }, { "epoch": 1.56, "learning_rate": 2.7610154404546136e-06, "loss": 0.8826, "step": 56100 }, { "epoch": 1.56, "learning_rate": 2.752968833079089e-06, "loss": 0.858, "step": 56200 }, { "epoch": 1.57, "learning_rate": 2.744919577042863e-06, "loss": 0.9305, "step": 56300 }, { "epoch": 1.57, "learning_rate": 2.7368677566240976e-06, "loss": 0.9289, "step": 56400 }, { "epoch": 1.57, "learning_rate": 2.7288134561278017e-06, "loss": 0.9168, "step": 56500 }, { "epoch": 1.57, "learning_rate": 2.720756759884956e-06, "loss": 0.8958, "step": 56600 }, { "epoch": 1.58, "learning_rate": 2.7126977522516223e-06, "loss": 0.8924, "step": 56700 }, { "epoch": 1.58, "learning_rate": 2.7046365176080635e-06, "loss": 0.9235, "step": 56800 }, { "epoch": 1.58, "learning_rate": 2.6965731403578614e-06, "loss": 0.9171, "step": 56900 }, { "epoch": 1.59, "learning_rate": 2.6885077049270316e-06, "loss": 0.9159, "step": 57000 }, { "epoch": 1.59, "learning_rate": 2.6804402957631364e-06, "loss": 0.8949, "step": 57100 }, { "epoch": 1.59, "learning_rate": 2.6723709973344088e-06, "loss": 0.8796, "step": 57200 }, { "epoch": 1.59, "learning_rate": 2.6642998941288573e-06, "loss": 0.8943, "step": 57300 }, { "epoch": 1.6, "learning_rate": 2.6562270706533917e-06, "loss": 0.8714, "step": 57400 }, { "epoch": 1.6, "learning_rate": 2.6481526114329313e-06, "loss": 0.8647, "step": 57500 }, { "epoch": 1.6, "learning_rate": 2.640076601009522e-06, "loss": 0.9151, "step": 57600 }, { "epoch": 1.6, "learning_rate": 2.631999123941452e-06, "loss": 0.8942, "step": 57700 }, { "epoch": 1.61, "learning_rate": 2.6239202648023666e-06, "loss": 0.8863, "step": 57800 }, { "epoch": 1.61, "learning_rate": 2.6158401081803784e-06, "loss": 0.9138, "step": 57900 }, { "epoch": 1.61, "learning_rate": 2.6077587386771896e-06, "loss": 0.9095, "step": 58000 }, { "epoch": 1.62, "learning_rate": 2.5996762409071978e-06, "loss": 0.9049, "step": 58100 }, { "epoch": 1.62, "learning_rate": 2.591592699496616e-06, "loss": 0.8793, "step": 58200 }, { "epoch": 1.62, "learning_rate": 2.583508199082585e-06, "loss": 0.864, "step": 58300 }, { "epoch": 1.62, "learning_rate": 2.575422824312284e-06, "loss": 0.9173, "step": 58400 }, { "epoch": 1.63, "learning_rate": 2.5673366598420487e-06, "loss": 0.8799, "step": 58500 }, { "epoch": 1.63, "learning_rate": 2.5592497903364834e-06, "loss": 0.8751, "step": 58600 }, { "epoch": 1.63, "learning_rate": 2.5511623004675743e-06, "loss": 0.8871, "step": 58700 }, { "epoch": 1.64, "learning_rate": 2.5430742749138015e-06, "loss": 0.9121, "step": 58800 }, { "epoch": 1.64, "learning_rate": 2.534985798359257e-06, "loss": 0.8947, "step": 58900 }, { "epoch": 1.64, "learning_rate": 2.5268969554927512e-06, "loss": 0.8911, "step": 59000 }, { "epoch": 1.64, "learning_rate": 2.5188078310069326e-06, "loss": 0.8626, "step": 59100 }, { "epoch": 1.65, "learning_rate": 2.5107185095973967e-06, "loss": 0.8943, "step": 59200 }, { "epoch": 1.65, "learning_rate": 2.5026290759618026e-06, "loss": 0.9163, "step": 59300 }, { "epoch": 1.65, "learning_rate": 2.494539614798982e-06, "loss": 0.8599, "step": 59400 }, { "epoch": 1.65, "learning_rate": 2.486450210808057e-06, "loss": 0.8907, "step": 59500 }, { "epoch": 1.66, "learning_rate": 2.4783609486875507e-06, "loss": 0.889, "step": 59600 }, { "epoch": 1.66, "learning_rate": 2.4702719131345003e-06, "loss": 0.8872, "step": 59700 }, { "epoch": 1.66, "learning_rate": 2.462183188843569e-06, "loss": 0.8836, "step": 59800 }, { "epoch": 1.67, "learning_rate": 2.4540948605061652e-06, "loss": 0.8837, "step": 59900 }, { "epoch": 1.67, "learning_rate": 2.446007012809548e-06, "loss": 0.884, "step": 60000 }, { "epoch": 1.67, "eval_bleu": 8.1679, "eval_gen_len": 18.9785, "eval_loss": 0.8873680830001831, "eval_runtime": 968.5261, "eval_samples_per_second": 2.061, "eval_steps_per_second": 1.03, "step": 60000 }, { "epoch": 1.67, "learning_rate": 2.437919730435946e-06, "loss": 0.8902, "step": 60100 }, { "epoch": 1.67, "learning_rate": 2.4298330980616674e-06, "loss": 0.8499, "step": 60200 }, { "epoch": 1.68, "learning_rate": 2.4217472003562144e-06, "loss": 0.8927, "step": 60300 }, { "epoch": 1.68, "learning_rate": 2.413662121981399e-06, "loss": 0.9325, "step": 60400 }, { "epoch": 1.68, "learning_rate": 2.4055779475904536e-06, "loss": 0.8586, "step": 60500 }, { "epoch": 1.69, "learning_rate": 2.397494761827145e-06, "loss": 0.8788, "step": 60600 }, { "epoch": 1.69, "learning_rate": 2.3894126493248884e-06, "loss": 0.9161, "step": 60700 }, { "epoch": 1.69, "learning_rate": 2.3813316947058634e-06, "loss": 0.9058, "step": 60800 }, { "epoch": 1.69, "learning_rate": 2.373251982580124e-06, "loss": 0.8881, "step": 60900 }, { "epoch": 1.7, "learning_rate": 2.365173597544718e-06, "loss": 0.9027, "step": 61000 }, { "epoch": 1.7, "learning_rate": 2.3570966241827947e-06, "loss": 0.914, "step": 61100 }, { "epoch": 1.7, "learning_rate": 2.3490211470627254e-06, "loss": 0.8989, "step": 61200 }, { "epoch": 1.7, "learning_rate": 2.3409472507372134e-06, "loss": 0.8792, "step": 61300 }, { "epoch": 1.71, "learning_rate": 2.3328750197424115e-06, "loss": 0.8651, "step": 61400 }, { "epoch": 1.71, "learning_rate": 2.3248045385970357e-06, "loss": 0.9071, "step": 61500 }, { "epoch": 1.71, "learning_rate": 2.316735891801482e-06, "loss": 0.8676, "step": 61600 }, { "epoch": 1.72, "learning_rate": 2.308669163836938e-06, "loss": 0.9145, "step": 61700 }, { "epoch": 1.72, "learning_rate": 2.300604439164501e-06, "loss": 0.9072, "step": 61800 }, { "epoch": 1.72, "learning_rate": 2.2925418022242955e-06, "loss": 0.8916, "step": 61900 }, { "epoch": 1.72, "learning_rate": 2.2844813374345837e-06, "loss": 0.872, "step": 62000 }, { "epoch": 1.73, "learning_rate": 2.2764231291908847e-06, "loss": 0.8983, "step": 62100 }, { "epoch": 1.73, "learning_rate": 2.2683672618650945e-06, "loss": 0.8939, "step": 62200 }, { "epoch": 1.73, "learning_rate": 2.2603138198045966e-06, "loss": 0.9226, "step": 62300 }, { "epoch": 1.74, "learning_rate": 2.2522628873313806e-06, "loss": 0.8937, "step": 62400 }, { "epoch": 1.74, "learning_rate": 2.2442145487411605e-06, "loss": 0.8331, "step": 62500 }, { "epoch": 1.74, "learning_rate": 2.2361688883024912e-06, "loss": 0.8817, "step": 62600 }, { "epoch": 1.74, "learning_rate": 2.228125990255889e-06, "loss": 0.875, "step": 62700 }, { "epoch": 1.75, "learning_rate": 2.2200859388129447e-06, "loss": 0.9098, "step": 62800 }, { "epoch": 1.75, "learning_rate": 2.2120488181554433e-06, "loss": 0.8639, "step": 62900 }, { "epoch": 1.75, "learning_rate": 2.2040147124344864e-06, "loss": 0.89, "step": 63000 }, { "epoch": 1.75, "learning_rate": 2.195983705769607e-06, "loss": 0.8963, "step": 63100 }, { "epoch": 1.76, "learning_rate": 2.1879558822478883e-06, "loss": 0.9029, "step": 63200 }, { "epoch": 1.76, "learning_rate": 2.1799313259230894e-06, "loss": 0.8764, "step": 63300 }, { "epoch": 1.76, "learning_rate": 2.1719101208147557e-06, "loss": 0.8964, "step": 63400 }, { "epoch": 1.77, "learning_rate": 2.163892350907349e-06, "loss": 0.9076, "step": 63500 }, { "epoch": 1.77, "learning_rate": 2.1558781001493604e-06, "loss": 0.8936, "step": 63600 }, { "epoch": 1.77, "learning_rate": 2.147867452452435e-06, "loss": 0.8659, "step": 63700 }, { "epoch": 1.77, "learning_rate": 2.1398604916904923e-06, "loss": 0.897, "step": 63800 }, { "epoch": 1.78, "learning_rate": 2.131857301698852e-06, "loss": 0.8884, "step": 63900 }, { "epoch": 1.78, "learning_rate": 2.123857966273348e-06, "loss": 0.8893, "step": 64000 }, { "epoch": 1.78, "learning_rate": 2.115862569169458e-06, "loss": 0.9308, "step": 64100 }, { "epoch": 1.79, "learning_rate": 2.1078711941014242e-06, "loss": 0.8899, "step": 64200 }, { "epoch": 1.79, "learning_rate": 2.099883924741376e-06, "loss": 0.8964, "step": 64300 }, { "epoch": 1.79, "learning_rate": 2.0919008447184562e-06, "loss": 0.9132, "step": 64400 }, { "epoch": 1.79, "learning_rate": 2.083922037617943e-06, "loss": 0.9075, "step": 64500 }, { "epoch": 1.8, "learning_rate": 2.075947586980376e-06, "loss": 0.8968, "step": 64600 }, { "epoch": 1.8, "learning_rate": 2.067977576300682e-06, "loss": 0.8683, "step": 64700 }, { "epoch": 1.8, "learning_rate": 2.0600120890272976e-06, "loss": 0.8596, "step": 64800 }, { "epoch": 1.8, "learning_rate": 2.0520512085613e-06, "loss": 0.8863, "step": 64900 }, { "epoch": 1.81, "learning_rate": 2.0440950182555337e-06, "loss": 0.8786, "step": 65000 }, { "epoch": 1.81, "eval_bleu": 7.8516, "eval_gen_len": 18.9805, "eval_loss": 0.8830544948577881, "eval_runtime": 968.5663, "eval_samples_per_second": 2.061, "eval_steps_per_second": 1.03, "step": 65000 }, { "epoch": 1.81, "learning_rate": 2.0361436014137315e-06, "loss": 0.8911, "step": 65100 }, { "epoch": 1.81, "learning_rate": 2.028197041289649e-06, "loss": 0.9172, "step": 65200 }, { "epoch": 1.82, "learning_rate": 2.0202554210861906e-06, "loss": 0.8468, "step": 65300 }, { "epoch": 1.82, "learning_rate": 2.0123188239545375e-06, "loss": 0.8445, "step": 65400 }, { "epoch": 1.82, "learning_rate": 2.0043873329932774e-06, "loss": 0.8917, "step": 65500 }, { "epoch": 1.82, "learning_rate": 1.996461031247536e-06, "loss": 0.8759, "step": 65600 }, { "epoch": 1.83, "learning_rate": 1.9885400017081075e-06, "loss": 0.9045, "step": 65700 }, { "epoch": 1.83, "learning_rate": 1.9806243273105807e-06, "loss": 0.8706, "step": 65800 }, { "epoch": 1.83, "learning_rate": 1.9727140909344767e-06, "loss": 0.8973, "step": 65900 }, { "epoch": 1.84, "learning_rate": 1.9648093754023784e-06, "loss": 0.8648, "step": 66000 }, { "epoch": 1.84, "learning_rate": 1.956910263479066e-06, "loss": 0.8774, "step": 66100 }, { "epoch": 1.84, "learning_rate": 1.9490168378706456e-06, "loss": 0.9013, "step": 66200 }, { "epoch": 1.84, "learning_rate": 1.9411291812236855e-06, "loss": 0.9092, "step": 66300 }, { "epoch": 1.85, "learning_rate": 1.9332473761243532e-06, "loss": 0.8628, "step": 66400 }, { "epoch": 1.85, "learning_rate": 1.925371505097548e-06, "loss": 0.8867, "step": 66500 }, { "epoch": 1.85, "learning_rate": 1.9175016506060357e-06, "loss": 0.8694, "step": 66600 }, { "epoch": 1.85, "learning_rate": 1.9096378950495915e-06, "loss": 0.8613, "step": 66700 }, { "epoch": 1.86, "learning_rate": 1.9017803207641282e-06, "loss": 0.8966, "step": 66800 }, { "epoch": 1.86, "learning_rate": 1.8939290100208425e-06, "loss": 0.8381, "step": 66900 }, { "epoch": 1.86, "learning_rate": 1.8860840450253467e-06, "loss": 0.866, "step": 67000 }, { "epoch": 1.87, "learning_rate": 1.8782455079168144e-06, "loss": 0.9077, "step": 67100 }, { "epoch": 1.87, "learning_rate": 1.8704134807671138e-06, "loss": 0.908, "step": 67200 }, { "epoch": 1.87, "learning_rate": 1.8625880455799562e-06, "loss": 0.8655, "step": 67300 }, { "epoch": 1.87, "learning_rate": 1.8547692842900283e-06, "loss": 0.8985, "step": 67400 }, { "epoch": 1.88, "learning_rate": 1.8469572787621426e-06, "loss": 0.8852, "step": 67500 }, { "epoch": 1.88, "learning_rate": 1.8391521107903747e-06, "loss": 0.8918, "step": 67600 }, { "epoch": 1.88, "learning_rate": 1.8313538620972094e-06, "loss": 0.8765, "step": 67700 }, { "epoch": 1.89, "learning_rate": 1.8235626143326865e-06, "loss": 0.8762, "step": 67800 }, { "epoch": 1.89, "learning_rate": 1.8157784490735404e-06, "loss": 0.9053, "step": 67900 }, { "epoch": 1.89, "learning_rate": 1.8080014478223523e-06, "loss": 0.8811, "step": 68000 }, { "epoch": 1.89, "learning_rate": 1.8002316920066932e-06, "loss": 0.8794, "step": 68100 }, { "epoch": 1.9, "learning_rate": 1.792469262978271e-06, "loss": 0.8885, "step": 68200 }, { "epoch": 1.9, "learning_rate": 1.7847142420120815e-06, "loss": 0.8312, "step": 68300 }, { "epoch": 1.9, "learning_rate": 1.7769667103055564e-06, "loss": 0.856, "step": 68400 }, { "epoch": 1.9, "learning_rate": 1.7692267489777104e-06, "loss": 0.8706, "step": 68500 }, { "epoch": 1.91, "learning_rate": 1.761494439068295e-06, "loss": 0.873, "step": 68600 }, { "epoch": 1.91, "learning_rate": 1.7537698615369504e-06, "loss": 0.8709, "step": 68700 }, { "epoch": 1.91, "learning_rate": 1.7460530972623537e-06, "loss": 0.8237, "step": 68800 }, { "epoch": 1.92, "learning_rate": 1.738344227041376e-06, "loss": 0.8571, "step": 68900 }, { "epoch": 1.92, "learning_rate": 1.7306433315882372e-06, "loss": 0.8635, "step": 69000 }, { "epoch": 1.92, "learning_rate": 1.7229504915336574e-06, "loss": 0.8814, "step": 69100 }, { "epoch": 1.92, "learning_rate": 1.715265787424013e-06, "loss": 0.8667, "step": 69200 }, { "epoch": 1.93, "learning_rate": 1.7075892997204958e-06, "loss": 0.8739, "step": 69300 }, { "epoch": 1.93, "learning_rate": 1.6999211087982686e-06, "loss": 0.8479, "step": 69400 }, { "epoch": 1.93, "learning_rate": 1.6922612949456274e-06, "loss": 0.8878, "step": 69500 }, { "epoch": 1.94, "learning_rate": 1.6846099383631537e-06, "loss": 0.8956, "step": 69600 }, { "epoch": 1.94, "learning_rate": 1.6769671191628807e-06, "loss": 0.8428, "step": 69700 }, { "epoch": 1.94, "learning_rate": 1.6693329173674521e-06, "loss": 0.8344, "step": 69800 }, { "epoch": 1.94, "learning_rate": 1.6617074129092857e-06, "loss": 0.8504, "step": 69900 }, { "epoch": 1.95, "learning_rate": 1.6540906856297336e-06, "loss": 0.8899, "step": 70000 }, { "epoch": 1.95, "eval_bleu": 7.9392, "eval_gen_len": 18.9785, "eval_loss": 0.8788951635360718, "eval_runtime": 967.0825, "eval_samples_per_second": 2.064, "eval_steps_per_second": 1.032, "step": 70000 }, { "epoch": 1.95, "learning_rate": 1.6464828152782508e-06, "loss": 0.8444, "step": 70100 }, { "epoch": 1.95, "learning_rate": 1.638883881511556e-06, "loss": 0.8604, "step": 70200 }, { "epoch": 1.96, "learning_rate": 1.6312939638927994e-06, "loss": 0.8953, "step": 70300 }, { "epoch": 1.96, "learning_rate": 1.623713141890728e-06, "loss": 0.8298, "step": 70400 }, { "epoch": 1.96, "learning_rate": 1.6161414948788575e-06, "loss": 0.8403, "step": 70500 }, { "epoch": 1.96, "learning_rate": 1.6085791021346365e-06, "loss": 0.8284, "step": 70600 }, { "epoch": 1.97, "learning_rate": 1.6010260428386205e-06, "loss": 0.8546, "step": 70700 }, { "epoch": 1.97, "learning_rate": 1.5934823960736402e-06, "loss": 0.8742, "step": 70800 }, { "epoch": 1.97, "learning_rate": 1.5859482408239718e-06, "loss": 0.8856, "step": 70900 }, { "epoch": 1.97, "learning_rate": 1.5784236559745175e-06, "loss": 0.8231, "step": 71000 }, { "epoch": 1.98, "learning_rate": 1.5709087203099687e-06, "loss": 0.8579, "step": 71100 }, { "epoch": 1.98, "learning_rate": 1.5634035125139923e-06, "loss": 0.8614, "step": 71200 }, { "epoch": 1.98, "learning_rate": 1.5559081111683977e-06, "loss": 0.8646, "step": 71300 }, { "epoch": 1.99, "learning_rate": 1.5484225947523201e-06, "loss": 0.8503, "step": 71400 }, { "epoch": 1.99, "learning_rate": 1.5409470416413943e-06, "loss": 0.8683, "step": 71500 }, { "epoch": 1.99, "learning_rate": 1.5334815301069374e-06, "loss": 0.8306, "step": 71600 }, { "epoch": 1.99, "learning_rate": 1.526026138315128e-06, "loss": 0.8103, "step": 71700 }, { "epoch": 2.0, "learning_rate": 1.5185809443261897e-06, "loss": 0.8605, "step": 71800 }, { "epoch": 2.0, "learning_rate": 1.5111460260935695e-06, "loss": 0.833, "step": 71900 }, { "epoch": 2.0, "learning_rate": 1.5037214614631234e-06, "loss": 0.9071, "step": 72000 }, { "epoch": 2.01, "learning_rate": 1.496307328172306e-06, "loss": 0.854, "step": 72100 }, { "epoch": 2.01, "learning_rate": 1.4889037038493488e-06, "loss": 0.8733, "step": 72200 }, { "epoch": 2.01, "learning_rate": 1.4815106660124517e-06, "loss": 0.8441, "step": 72300 }, { "epoch": 2.01, "learning_rate": 1.4741282920689736e-06, "loss": 0.874, "step": 72400 }, { "epoch": 2.02, "learning_rate": 1.4667566593146167e-06, "loss": 0.8514, "step": 72500 }, { "epoch": 2.02, "learning_rate": 1.45939584493262e-06, "loss": 0.8502, "step": 72600 }, { "epoch": 2.02, "learning_rate": 1.4520459259929527e-06, "loss": 0.8563, "step": 72700 }, { "epoch": 2.02, "learning_rate": 1.4447069794515e-06, "loss": 0.8676, "step": 72800 }, { "epoch": 2.03, "learning_rate": 1.437379082149271e-06, "loss": 0.8202, "step": 72900 }, { "epoch": 2.03, "learning_rate": 1.4300623108115793e-06, "loss": 0.8378, "step": 73000 }, { "epoch": 2.03, "learning_rate": 1.4227567420472487e-06, "loss": 0.8805, "step": 73100 }, { "epoch": 2.04, "learning_rate": 1.4154624523478095e-06, "loss": 0.8568, "step": 73200 }, { "epoch": 2.04, "learning_rate": 1.408179518086694e-06, "loss": 0.8617, "step": 73300 }, { "epoch": 2.04, "learning_rate": 1.4009080155184407e-06, "loss": 0.8427, "step": 73400 }, { "epoch": 2.04, "learning_rate": 1.393648020777899e-06, "loss": 0.8675, "step": 73500 }, { "epoch": 2.05, "learning_rate": 1.3863996098794213e-06, "loss": 0.8523, "step": 73600 }, { "epoch": 2.05, "learning_rate": 1.3791628587160768e-06, "loss": 0.8456, "step": 73700 }, { "epoch": 2.05, "learning_rate": 1.3719378430588553e-06, "loss": 0.8492, "step": 73800 }, { "epoch": 2.06, "learning_rate": 1.3647246385558682e-06, "loss": 0.8459, "step": 73900 }, { "epoch": 2.06, "learning_rate": 1.3575233207315635e-06, "loss": 0.8667, "step": 74000 }, { "epoch": 2.06, "learning_rate": 1.3503339649859315e-06, "loss": 0.8342, "step": 74100 }, { "epoch": 2.06, "learning_rate": 1.3431566465937163e-06, "loss": 0.8684, "step": 74200 }, { "epoch": 2.07, "learning_rate": 1.3359914407036267e-06, "loss": 0.8315, "step": 74300 }, { "epoch": 2.07, "learning_rate": 1.3288384223375487e-06, "loss": 0.8773, "step": 74400 }, { "epoch": 2.07, "learning_rate": 1.3216976663897622e-06, "loss": 0.8355, "step": 74500 }, { "epoch": 2.07, "learning_rate": 1.3145692476261587e-06, "loss": 0.8641, "step": 74600 }, { "epoch": 2.08, "learning_rate": 1.3074532406834505e-06, "loss": 0.8655, "step": 74700 }, { "epoch": 2.08, "learning_rate": 1.300349720068398e-06, "loss": 0.8502, "step": 74800 }, { "epoch": 2.08, "learning_rate": 1.2932587601570245e-06, "loss": 0.8529, "step": 74900 }, { "epoch": 2.09, "learning_rate": 1.286180435193839e-06, "loss": 0.8638, "step": 75000 }, { "epoch": 2.09, "eval_bleu": 8.1623, "eval_gen_len": 18.979, "eval_loss": 0.878086507320404, "eval_runtime": 964.1689, "eval_samples_per_second": 2.07, "eval_steps_per_second": 1.035, "step": 75000 }, { "epoch": 2.09, "learning_rate": 1.2791148192910586e-06, "loss": 0.8571, "step": 75100 }, { "epoch": 2.09, "learning_rate": 1.2720619864278338e-06, "loss": 0.8588, "step": 75200 }, { "epoch": 2.09, "learning_rate": 1.2650220104494714e-06, "loss": 0.8413, "step": 75300 }, { "epoch": 2.1, "learning_rate": 1.2579949650666648e-06, "loss": 0.8627, "step": 75400 }, { "epoch": 2.1, "learning_rate": 1.2509809238547165e-06, "loss": 0.8522, "step": 75500 }, { "epoch": 2.1, "learning_rate": 1.2439799602527741e-06, "loss": 0.8369, "step": 75600 }, { "epoch": 2.11, "learning_rate": 1.2369921475630586e-06, "loss": 0.8676, "step": 75700 }, { "epoch": 2.11, "learning_rate": 1.230017558950096e-06, "loss": 0.8744, "step": 75800 }, { "epoch": 2.11, "learning_rate": 1.2230562674399538e-06, "loss": 0.8681, "step": 75900 }, { "epoch": 2.11, "learning_rate": 1.2161083459194714e-06, "loss": 0.8504, "step": 76000 }, { "epoch": 2.12, "learning_rate": 1.2091738671355039e-06, "loss": 0.8568, "step": 76100 }, { "epoch": 2.12, "learning_rate": 1.2022529036941546e-06, "loss": 0.862, "step": 76200 }, { "epoch": 2.12, "learning_rate": 1.1953455280600188e-06, "loss": 0.8509, "step": 76300 }, { "epoch": 2.12, "learning_rate": 1.188451812555422e-06, "loss": 0.8718, "step": 76400 }, { "epoch": 2.13, "learning_rate": 1.1815718293596653e-06, "loss": 0.877, "step": 76500 }, { "epoch": 2.13, "learning_rate": 1.1747056505082658e-06, "loss": 0.8373, "step": 76600 }, { "epoch": 2.13, "learning_rate": 1.1678533478922075e-06, "loss": 0.8603, "step": 76700 }, { "epoch": 2.14, "learning_rate": 1.1610149932571847e-06, "loss": 0.8459, "step": 76800 }, { "epoch": 2.14, "learning_rate": 1.1541906582028526e-06, "loss": 0.8692, "step": 76900 }, { "epoch": 2.14, "learning_rate": 1.1473804141820783e-06, "loss": 0.8508, "step": 77000 }, { "epoch": 2.14, "learning_rate": 1.1405843325001878e-06, "loss": 0.8799, "step": 77100 }, { "epoch": 2.15, "learning_rate": 1.1338024843142265e-06, "loss": 0.8758, "step": 77200 }, { "epoch": 2.15, "learning_rate": 1.1270349406322109e-06, "loss": 0.8531, "step": 77300 }, { "epoch": 2.15, "learning_rate": 1.1202817723123807e-06, "loss": 0.8452, "step": 77400 }, { "epoch": 2.16, "learning_rate": 1.1135430500624675e-06, "loss": 0.9195, "step": 77500 }, { "epoch": 2.16, "learning_rate": 1.1068188444389444e-06, "loss": 0.8824, "step": 77600 }, { "epoch": 2.16, "learning_rate": 1.10010922584629e-06, "loss": 0.8379, "step": 77700 }, { "epoch": 2.16, "learning_rate": 1.0934142645362547e-06, "loss": 0.8654, "step": 77800 }, { "epoch": 2.17, "learning_rate": 1.0867340306071228e-06, "loss": 0.8671, "step": 77900 }, { "epoch": 2.17, "learning_rate": 1.0800685940029742e-06, "loss": 0.8641, "step": 78000 }, { "epoch": 2.17, "learning_rate": 1.073418024512964e-06, "loss": 0.8477, "step": 78100 }, { "epoch": 2.17, "learning_rate": 1.066782391770576e-06, "loss": 0.8477, "step": 78200 }, { "epoch": 2.18, "learning_rate": 1.0601617652529066e-06, "loss": 0.8356, "step": 78300 }, { "epoch": 2.18, "learning_rate": 1.0535562142799325e-06, "loss": 0.8803, "step": 78400 }, { "epoch": 2.18, "learning_rate": 1.0469658080137801e-06, "loss": 0.8187, "step": 78500 }, { "epoch": 2.19, "learning_rate": 1.0403906154580127e-06, "loss": 0.8548, "step": 78600 }, { "epoch": 2.19, "learning_rate": 1.0338307054568977e-06, "loss": 0.8409, "step": 78700 }, { "epoch": 2.19, "learning_rate": 1.027286146694689e-06, "loss": 0.8734, "step": 78800 }, { "epoch": 2.19, "learning_rate": 1.0207570076949099e-06, "loss": 0.858, "step": 78900 }, { "epoch": 2.2, "learning_rate": 1.0142433568196347e-06, "loss": 0.8373, "step": 79000 }, { "epoch": 2.2, "learning_rate": 1.007745262268768e-06, "loss": 0.8376, "step": 79100 }, { "epoch": 2.2, "learning_rate": 1.0012627920793424e-06, "loss": 0.8204, "step": 79200 }, { "epoch": 2.21, "learning_rate": 9.947960141247918e-07, "loss": 0.8247, "step": 79300 }, { "epoch": 2.21, "learning_rate": 9.883449961142504e-07, "loss": 0.8753, "step": 79400 }, { "epoch": 2.21, "learning_rate": 9.819098055918424e-07, "loss": 0.8703, "step": 79500 }, { "epoch": 2.21, "learning_rate": 9.7549050993597e-07, "loss": 0.823, "step": 79600 }, { "epoch": 2.22, "learning_rate": 9.690871763586137e-07, "loss": 0.8877, "step": 79700 }, { "epoch": 2.22, "learning_rate": 9.626998719046263e-07, "loss": 0.8174, "step": 79800 }, { "epoch": 2.22, "learning_rate": 9.563286634510293e-07, "loss": 0.8523, "step": 79900 }, { "epoch": 2.22, "learning_rate": 9.499736177063165e-07, "loss": 0.8293, "step": 80000 }, { "epoch": 2.22, "eval_bleu": 8.0989, "eval_gen_len": 18.98, "eval_loss": 0.8752478361129761, "eval_runtime": 964.0098, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.035, "step": 80000 }, { "epoch": 2.23, "learning_rate": 9.436348012097496e-07, "loss": 0.8633, "step": 80100 }, { "epoch": 2.23, "learning_rate": 9.373122803306672e-07, "loss": 0.8656, "step": 80200 }, { "epoch": 2.23, "learning_rate": 9.31006121267791e-07, "loss": 0.8289, "step": 80300 }, { "epoch": 2.24, "learning_rate": 9.247163900485232e-07, "loss": 0.8325, "step": 80400 }, { "epoch": 2.24, "learning_rate": 9.184431525282659e-07, "loss": 0.8779, "step": 80500 }, { "epoch": 2.24, "learning_rate": 9.121864743897266e-07, "loss": 0.8456, "step": 80600 }, { "epoch": 2.24, "learning_rate": 9.059464211422286e-07, "loss": 0.8431, "step": 80700 }, { "epoch": 2.25, "learning_rate": 8.9972305812103e-07, "loss": 0.8287, "step": 80800 }, { "epoch": 2.25, "learning_rate": 8.935164504866367e-07, "loss": 0.842, "step": 80900 }, { "epoch": 2.25, "learning_rate": 8.873266632241201e-07, "loss": 0.8748, "step": 81000 }, { "epoch": 2.26, "learning_rate": 8.811537611424383e-07, "loss": 0.8652, "step": 81100 }, { "epoch": 2.26, "learning_rate": 8.749978088737541e-07, "loss": 0.8327, "step": 81200 }, { "epoch": 2.26, "learning_rate": 8.688588708727621e-07, "loss": 0.8433, "step": 81300 }, { "epoch": 2.26, "learning_rate": 8.627370114160133e-07, "loss": 0.861, "step": 81400 }, { "epoch": 2.27, "learning_rate": 8.566322946012389e-07, "loss": 0.852, "step": 81500 }, { "epoch": 2.27, "learning_rate": 8.505447843466836e-07, "loss": 0.8334, "step": 81600 }, { "epoch": 2.27, "learning_rate": 8.444745443904337e-07, "loss": 0.8509, "step": 81700 }, { "epoch": 2.27, "learning_rate": 8.384216382897476e-07, "loss": 0.8272, "step": 81800 }, { "epoch": 2.28, "learning_rate": 8.323861294203964e-07, "loss": 0.7921, "step": 81900 }, { "epoch": 2.28, "learning_rate": 8.263680809759955e-07, "loss": 0.8153, "step": 82000 }, { "epoch": 2.28, "learning_rate": 8.203675559673441e-07, "loss": 0.8168, "step": 82100 }, { "epoch": 2.29, "learning_rate": 8.143846172217671e-07, "loss": 0.872, "step": 82200 }, { "epoch": 2.29, "learning_rate": 8.084193273824531e-07, "loss": 0.8519, "step": 82300 }, { "epoch": 2.29, "learning_rate": 8.024717489078032e-07, "loss": 0.8594, "step": 82400 }, { "epoch": 2.29, "learning_rate": 7.965419440707756e-07, "loss": 0.8445, "step": 82500 }, { "epoch": 2.3, "learning_rate": 7.90629974958232e-07, "loss": 0.8429, "step": 82600 }, { "epoch": 2.3, "learning_rate": 7.84735903470289e-07, "loss": 0.819, "step": 82700 }, { "epoch": 2.3, "learning_rate": 7.788597913196702e-07, "loss": 0.829, "step": 82800 }, { "epoch": 2.31, "learning_rate": 7.730017000310575e-07, "loss": 0.8163, "step": 82900 }, { "epoch": 2.31, "learning_rate": 7.671616909404508e-07, "loss": 0.8699, "step": 83000 }, { "epoch": 2.31, "learning_rate": 7.613398251945239e-07, "loss": 0.8303, "step": 83100 }, { "epoch": 2.31, "learning_rate": 7.555361637499833e-07, "loss": 0.8012, "step": 83200 }, { "epoch": 2.32, "learning_rate": 7.49750767372932e-07, "loss": 0.8178, "step": 83300 }, { "epoch": 2.32, "learning_rate": 7.439836966382303e-07, "loss": 0.847, "step": 83400 }, { "epoch": 2.32, "learning_rate": 7.382350119288647e-07, "loss": 0.8623, "step": 83500 }, { "epoch": 2.32, "learning_rate": 7.325047734353155e-07, "loss": 0.8079, "step": 83600 }, { "epoch": 2.33, "learning_rate": 7.267930411549234e-07, "loss": 0.8337, "step": 83700 }, { "epoch": 2.33, "learning_rate": 7.210998748912657e-07, "loss": 0.8399, "step": 83800 }, { "epoch": 2.33, "learning_rate": 7.154253342535248e-07, "loss": 0.851, "step": 83900 }, { "epoch": 2.34, "learning_rate": 7.097694786558693e-07, "loss": 0.8304, "step": 84000 }, { "epoch": 2.34, "learning_rate": 7.041323673168307e-07, "loss": 0.846, "step": 84100 }, { "epoch": 2.34, "learning_rate": 6.985140592586781e-07, "loss": 0.8835, "step": 84200 }, { "epoch": 2.34, "learning_rate": 6.929146133068102e-07, "loss": 0.8588, "step": 84300 }, { "epoch": 2.35, "learning_rate": 6.873340880891308e-07, "loss": 0.8275, "step": 84400 }, { "epoch": 2.35, "learning_rate": 6.817725420354365e-07, "loss": 0.8529, "step": 84500 }, { "epoch": 2.35, "learning_rate": 6.762300333768082e-07, "loss": 0.8705, "step": 84600 }, { "epoch": 2.36, "learning_rate": 6.707066201450003e-07, "loss": 0.8591, "step": 84700 }, { "epoch": 2.36, "learning_rate": 6.652023601718282e-07, "loss": 0.8235, "step": 84800 }, { "epoch": 2.36, "learning_rate": 6.597173110885732e-07, "loss": 0.8575, "step": 84900 }, { "epoch": 2.36, "learning_rate": 6.542515303253666e-07, "loss": 0.8625, "step": 85000 }, { "epoch": 2.36, "eval_bleu": 8.176, "eval_gen_len": 18.979, "eval_loss": 0.8743442296981812, "eval_runtime": 965.6565, "eval_samples_per_second": 2.067, "eval_steps_per_second": 1.033, "step": 85000 }, { "epoch": 2.37, "learning_rate": 6.488050751105979e-07, "loss": 0.8735, "step": 85100 }, { "epoch": 2.37, "learning_rate": 6.433780024703124e-07, "loss": 0.8721, "step": 85200 }, { "epoch": 2.37, "learning_rate": 6.379703692276104e-07, "loss": 0.8554, "step": 85300 }, { "epoch": 2.37, "learning_rate": 6.325822320020608e-07, "loss": 0.8569, "step": 85400 }, { "epoch": 2.38, "learning_rate": 6.272136472091008e-07, "loss": 0.8107, "step": 85500 }, { "epoch": 2.38, "learning_rate": 6.218646710594465e-07, "loss": 0.8671, "step": 85600 }, { "epoch": 2.38, "learning_rate": 6.165353595585069e-07, "loss": 0.8392, "step": 85700 }, { "epoch": 2.39, "learning_rate": 6.112257685057973e-07, "loss": 0.8353, "step": 85800 }, { "epoch": 2.39, "learning_rate": 6.059359534943501e-07, "loss": 0.8244, "step": 85900 }, { "epoch": 2.39, "learning_rate": 6.006659699101419e-07, "loss": 0.8328, "step": 86000 }, { "epoch": 2.39, "learning_rate": 5.954158729315032e-07, "loss": 0.8333, "step": 86100 }, { "epoch": 2.4, "learning_rate": 5.901857175285488e-07, "loss": 0.8651, "step": 86200 }, { "epoch": 2.4, "learning_rate": 5.849755584625985e-07, "loss": 0.8671, "step": 86300 }, { "epoch": 2.4, "learning_rate": 5.797854502856029e-07, "loss": 0.8453, "step": 86400 }, { "epoch": 2.41, "learning_rate": 5.746154473395752e-07, "loss": 0.8358, "step": 86500 }, { "epoch": 2.41, "learning_rate": 5.694656037560206e-07, "loss": 0.8291, "step": 86600 }, { "epoch": 2.41, "learning_rate": 5.643359734553693e-07, "loss": 0.8541, "step": 86700 }, { "epoch": 2.41, "learning_rate": 5.592266101464122e-07, "loss": 0.8466, "step": 86800 }, { "epoch": 2.42, "learning_rate": 5.541375673257394e-07, "loss": 0.8221, "step": 86900 }, { "epoch": 2.42, "learning_rate": 5.490688982771769e-07, "loss": 0.8347, "step": 87000 }, { "epoch": 2.42, "learning_rate": 5.440206560712352e-07, "loss": 0.8715, "step": 87100 }, { "epoch": 2.42, "learning_rate": 5.389928935645452e-07, "loss": 0.854, "step": 87200 }, { "epoch": 2.43, "learning_rate": 5.339856633993124e-07, "loss": 0.8747, "step": 87300 }, { "epoch": 2.43, "learning_rate": 5.28999018002761e-07, "loss": 0.8586, "step": 87400 }, { "epoch": 2.43, "learning_rate": 5.240330095865856e-07, "loss": 0.8247, "step": 87500 }, { "epoch": 2.44, "learning_rate": 5.190876901464067e-07, "loss": 0.8357, "step": 87600 }, { "epoch": 2.44, "learning_rate": 5.14163111461225e-07, "loss": 0.847, "step": 87700 }, { "epoch": 2.44, "learning_rate": 5.092593250928782e-07, "loss": 0.8241, "step": 87800 }, { "epoch": 2.44, "learning_rate": 5.043763823855036e-07, "loss": 0.8706, "step": 87900 }, { "epoch": 2.45, "learning_rate": 4.995143344649964e-07, "loss": 0.8625, "step": 88000 }, { "epoch": 2.45, "learning_rate": 4.946732322384795e-07, "loss": 0.8476, "step": 88100 }, { "epoch": 2.45, "learning_rate": 4.89853126393767e-07, "loss": 0.8344, "step": 88200 }, { "epoch": 2.46, "learning_rate": 4.850540673988346e-07, "loss": 0.8456, "step": 88300 }, { "epoch": 2.46, "learning_rate": 4.802761055012914e-07, "loss": 0.8175, "step": 88400 }, { "epoch": 2.46, "learning_rate": 4.755192907278536e-07, "loss": 0.848, "step": 88500 }, { "epoch": 2.46, "learning_rate": 4.7078367288381886e-07, "loss": 0.8592, "step": 88600 }, { "epoch": 2.47, "learning_rate": 4.660693015525486e-07, "loss": 0.82, "step": 88700 }, { "epoch": 2.47, "learning_rate": 4.613762260949456e-07, "loss": 0.8745, "step": 88800 }, { "epoch": 2.47, "learning_rate": 4.567044956489394e-07, "loss": 0.8827, "step": 88900 }, { "epoch": 2.48, "learning_rate": 4.520541591289701e-07, "loss": 0.862, "step": 89000 }, { "epoch": 2.48, "learning_rate": 4.4742526522547626e-07, "loss": 0.8684, "step": 89100 }, { "epoch": 2.48, "learning_rate": 4.428178624043866e-07, "loss": 0.8537, "step": 89200 }, { "epoch": 2.48, "learning_rate": 4.382319989066117e-07, "loss": 0.8451, "step": 89300 }, { "epoch": 2.49, "learning_rate": 4.336677227475383e-07, "loss": 0.8589, "step": 89400 }, { "epoch": 2.49, "learning_rate": 4.2912508171652765e-07, "loss": 0.8102, "step": 89500 }, { "epoch": 2.49, "learning_rate": 4.2460412337641504e-07, "loss": 0.8662, "step": 89600 }, { "epoch": 2.49, "learning_rate": 4.2010489506300933e-07, "loss": 0.8566, "step": 89700 }, { "epoch": 2.5, "learning_rate": 4.156274438846017e-07, "loss": 0.8663, "step": 89800 }, { "epoch": 2.5, "learning_rate": 4.11171816721469e-07, "loss": 0.8125, "step": 89900 }, { "epoch": 2.5, "learning_rate": 4.0673806022538425e-07, "loss": 0.8605, "step": 90000 }, { "epoch": 2.5, "eval_bleu": 8.0117, "eval_gen_len": 18.9805, "eval_loss": 0.8721033334732056, "eval_runtime": 963.8746, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.035, "step": 90000 }, { "epoch": 2.51, "learning_rate": 4.023262208191284e-07, "loss": 0.8296, "step": 90100 }, { "epoch": 2.51, "learning_rate": 3.9793634469600216e-07, "loss": 0.83, "step": 90200 }, { "epoch": 2.51, "learning_rate": 3.9356847781934575e-07, "loss": 0.8432, "step": 90300 }, { "epoch": 2.51, "learning_rate": 3.892226659220552e-07, "loss": 0.8396, "step": 90400 }, { "epoch": 2.52, "learning_rate": 3.8489895450610407e-07, "loss": 0.8742, "step": 90500 }, { "epoch": 2.52, "learning_rate": 3.8059738884206775e-07, "loss": 0.8124, "step": 90600 }, { "epoch": 2.52, "learning_rate": 3.7631801396864757e-07, "loss": 0.8238, "step": 90700 }, { "epoch": 2.53, "learning_rate": 3.7206087469220195e-07, "loss": 0.8443, "step": 90800 }, { "epoch": 2.53, "learning_rate": 3.6782601558627563e-07, "loss": 0.8567, "step": 90900 }, { "epoch": 2.53, "learning_rate": 3.6361348099113123e-07, "loss": 0.8406, "step": 91000 }, { "epoch": 2.53, "learning_rate": 3.5942331501329003e-07, "loss": 0.8667, "step": 91100 }, { "epoch": 2.54, "learning_rate": 3.552555615250658e-07, "loss": 0.8206, "step": 91200 }, { "epoch": 2.54, "learning_rate": 3.511102641641051e-07, "loss": 0.8545, "step": 91300 }, { "epoch": 2.54, "learning_rate": 3.469874663329342e-07, "loss": 0.8578, "step": 91400 }, { "epoch": 2.54, "learning_rate": 3.428872111985021e-07, "loss": 0.8555, "step": 91500 }, { "epoch": 2.55, "learning_rate": 3.388095416917267e-07, "loss": 0.8161, "step": 91600 }, { "epoch": 2.55, "learning_rate": 3.3475450050705125e-07, "loss": 0.8234, "step": 91700 }, { "epoch": 2.55, "learning_rate": 3.3072213010199053e-07, "loss": 0.8342, "step": 91800 }, { "epoch": 2.56, "learning_rate": 3.267124726966903e-07, "loss": 0.8458, "step": 91900 }, { "epoch": 2.56, "learning_rate": 3.2272557027348524e-07, "loss": 0.8573, "step": 92000 }, { "epoch": 2.56, "learning_rate": 3.187614645764564e-07, "loss": 0.8544, "step": 92100 }, { "epoch": 2.56, "learning_rate": 3.1482019711099735e-07, "loss": 0.8455, "step": 92200 }, { "epoch": 2.57, "learning_rate": 3.109018091433802e-07, "loss": 0.847, "step": 92300 }, { "epoch": 2.57, "learning_rate": 3.07006341700318e-07, "loss": 0.8277, "step": 92400 }, { "epoch": 2.57, "learning_rate": 3.031338355685418e-07, "loss": 0.8297, "step": 92500 }, { "epoch": 2.58, "learning_rate": 2.992843312943702e-07, "loss": 0.8648, "step": 92600 }, { "epoch": 2.58, "learning_rate": 2.954578691832835e-07, "loss": 0.8469, "step": 92700 }, { "epoch": 2.58, "learning_rate": 2.9165448929950685e-07, "loss": 0.8757, "step": 92800 }, { "epoch": 2.58, "learning_rate": 2.878742314655844e-07, "loss": 0.8526, "step": 92900 }, { "epoch": 2.59, "learning_rate": 2.8411713526196677e-07, "loss": 0.8369, "step": 93000 }, { "epoch": 2.59, "learning_rate": 2.80383240026596e-07, "loss": 0.8428, "step": 93100 }, { "epoch": 2.59, "learning_rate": 2.766725848544907e-07, "loss": 0.8428, "step": 93200 }, { "epoch": 2.59, "learning_rate": 2.7298520859734054e-07, "loss": 0.8695, "step": 93300 }, { "epoch": 2.6, "learning_rate": 2.6932114986309874e-07, "loss": 0.8534, "step": 93400 }, { "epoch": 2.6, "learning_rate": 2.6568044701557494e-07, "loss": 0.8515, "step": 93500 }, { "epoch": 2.6, "learning_rate": 2.6206313817403627e-07, "loss": 0.8642, "step": 93600 }, { "epoch": 2.61, "learning_rate": 2.5846926121280843e-07, "loss": 0.8184, "step": 93700 }, { "epoch": 2.61, "learning_rate": 2.5489885376087626e-07, "loss": 0.8173, "step": 93800 }, { "epoch": 2.61, "learning_rate": 2.5135195320149355e-07, "loss": 0.85, "step": 93900 }, { "epoch": 2.61, "learning_rate": 2.478285966717889e-07, "loss": 0.858, "step": 94000 }, { "epoch": 2.62, "learning_rate": 2.4432882106237786e-07, "loss": 0.8104, "step": 94100 }, { "epoch": 2.62, "learning_rate": 2.408526630169772e-07, "loss": 0.8469, "step": 94200 }, { "epoch": 2.62, "learning_rate": 2.3740015893201906e-07, "loss": 0.8599, "step": 94300 }, { "epoch": 2.63, "learning_rate": 2.3397134495627278e-07, "loss": 0.874, "step": 94400 }, { "epoch": 2.63, "learning_rate": 2.305662569904646e-07, "loss": 0.8251, "step": 94500 }, { "epoch": 2.63, "learning_rate": 2.2718493068690229e-07, "loss": 0.86, "step": 94600 }, { "epoch": 2.63, "learning_rate": 2.2382740144910236e-07, "loss": 0.8766, "step": 94700 }, { "epoch": 2.64, "learning_rate": 2.2049370443141704e-07, "loss": 0.8541, "step": 94800 }, { "epoch": 2.64, "learning_rate": 2.171838745386695e-07, "loss": 0.8276, "step": 94900 }, { "epoch": 2.64, "learning_rate": 2.1389794642578649e-07, "loss": 0.8479, "step": 95000 }, { "epoch": 2.64, "eval_bleu": 8.1008, "eval_gen_len": 18.978, "eval_loss": 0.8710653781890869, "eval_runtime": 966.0685, "eval_samples_per_second": 2.066, "eval_steps_per_second": 1.033, "step": 95000 }, { "epoch": 2.64, "learning_rate": 2.1063595449743595e-07, "loss": 0.8244, "step": 95100 }, { "epoch": 2.65, "learning_rate": 2.073979329076664e-07, "loss": 0.8897, "step": 95200 }, { "epoch": 2.65, "learning_rate": 2.0418391555955042e-07, "loss": 0.8269, "step": 95300 }, { "epoch": 2.65, "learning_rate": 2.0099393610482688e-07, "loss": 0.8353, "step": 95400 }, { "epoch": 2.66, "learning_rate": 1.9782802794355239e-07, "loss": 0.8267, "step": 95500 }, { "epoch": 2.66, "learning_rate": 1.9468622422374962e-07, "loss": 0.8484, "step": 95600 }, { "epoch": 2.66, "learning_rate": 1.9156855784106004e-07, "loss": 0.8665, "step": 95700 }, { "epoch": 2.66, "learning_rate": 1.8847506143839983e-07, "loss": 0.8377, "step": 95800 }, { "epoch": 2.67, "learning_rate": 1.854057674056181e-07, "loss": 0.859, "step": 95900 }, { "epoch": 2.67, "learning_rate": 1.8236070787915754e-07, "loss": 0.8135, "step": 96000 }, { "epoch": 2.67, "learning_rate": 1.793399147417188e-07, "loss": 0.8111, "step": 96100 }, { "epoch": 2.68, "learning_rate": 1.7634341962192602e-07, "loss": 0.8239, "step": 96200 }, { "epoch": 2.68, "learning_rate": 1.7337125389399496e-07, "loss": 0.8728, "step": 96300 }, { "epoch": 2.68, "learning_rate": 1.7042344867740645e-07, "loss": 0.8582, "step": 96400 }, { "epoch": 2.68, "learning_rate": 1.6750003483657739e-07, "loss": 0.8271, "step": 96500 }, { "epoch": 2.69, "learning_rate": 1.646010429805417e-07, "loss": 0.8376, "step": 96600 }, { "epoch": 2.69, "learning_rate": 1.6172650346262642e-07, "loss": 0.867, "step": 96700 }, { "epoch": 2.69, "learning_rate": 1.5887644638013566e-07, "loss": 0.8924, "step": 96800 }, { "epoch": 2.69, "learning_rate": 1.5605090157403495e-07, "loss": 0.8466, "step": 96900 }, { "epoch": 2.7, "learning_rate": 1.5324989862863792e-07, "loss": 0.8462, "step": 97000 }, { "epoch": 2.7, "learning_rate": 1.5047346687129898e-07, "loss": 0.8679, "step": 97100 }, { "epoch": 2.7, "learning_rate": 1.4772163537210393e-07, "loss": 0.8395, "step": 97200 }, { "epoch": 2.71, "learning_rate": 1.4499443294356541e-07, "loss": 0.8514, "step": 97300 }, { "epoch": 2.71, "learning_rate": 1.422918881403243e-07, "loss": 0.8532, "step": 97400 }, { "epoch": 2.71, "learning_rate": 1.3961402925884744e-07, "loss": 0.8172, "step": 97500 }, { "epoch": 2.71, "learning_rate": 1.36960884337132e-07, "loss": 0.7917, "step": 97600 }, { "epoch": 2.72, "learning_rate": 1.3433248115441362e-07, "loss": 0.8305, "step": 97700 }, { "epoch": 2.72, "learning_rate": 1.3172884723087336e-07, "loss": 0.8496, "step": 97800 }, { "epoch": 2.72, "learning_rate": 1.2915000982735155e-07, "loss": 0.8685, "step": 97900 }, { "epoch": 2.73, "learning_rate": 1.2659599594506106e-07, "loss": 0.8706, "step": 98000 }, { "epoch": 2.73, "learning_rate": 1.2406683232530414e-07, "loss": 0.8015, "step": 98100 }, { "epoch": 2.73, "learning_rate": 1.2156254544919476e-07, "loss": 0.8639, "step": 98200 }, { "epoch": 2.73, "learning_rate": 1.1908316153737858e-07, "loss": 0.8624, "step": 98300 }, { "epoch": 2.74, "learning_rate": 1.1662870654975955e-07, "loss": 0.8271, "step": 98400 }, { "epoch": 2.74, "learning_rate": 1.1419920618522984e-07, "loss": 0.8196, "step": 98500 }, { "epoch": 2.74, "learning_rate": 1.1179468588139702e-07, "loss": 0.8528, "step": 98600 }, { "epoch": 2.74, "learning_rate": 1.0941517081432096e-07, "loss": 0.8642, "step": 98700 }, { "epoch": 2.75, "learning_rate": 1.0706068589824925e-07, "loss": 0.8515, "step": 98800 }, { "epoch": 2.75, "learning_rate": 1.0473125578535526e-07, "loss": 0.8398, "step": 98900 }, { "epoch": 2.75, "learning_rate": 1.0242690486548134e-07, "loss": 0.8797, "step": 99000 }, { "epoch": 2.76, "learning_rate": 1.0014765726588437e-07, "loss": 0.8537, "step": 99100 }, { "epoch": 2.76, "learning_rate": 9.789353685097953e-08, "loss": 0.812, "step": 99200 }, { "epoch": 2.76, "learning_rate": 9.566456722209432e-08, "loss": 0.8588, "step": 99300 }, { "epoch": 2.76, "learning_rate": 9.34607717172195e-08, "loss": 0.8394, "step": 99400 }, { "epoch": 2.77, "learning_rate": 9.12821734107644e-08, "loss": 0.8493, "step": 99500 }, { "epoch": 2.77, "learning_rate": 8.91287951133174e-08, "loss": 0.8376, "step": 99600 }, { "epoch": 2.77, "learning_rate": 8.700065937140401e-08, "loss": 0.832, "step": 99700 }, { "epoch": 2.78, "learning_rate": 8.489778846725417e-08, "loss": 0.8802, "step": 99800 }, { "epoch": 2.78, "learning_rate": 8.282020441856637e-08, "loss": 0.8084, "step": 99900 }, { "epoch": 2.78, "learning_rate": 8.076792897827757e-08, "loss": 0.8391, "step": 100000 }, { "epoch": 2.78, "eval_bleu": 8.2041, "eval_gen_len": 18.9795, "eval_loss": 0.8708174228668213, "eval_runtime": 963.8453, "eval_samples_per_second": 2.071, "eval_steps_per_second": 1.035, "step": 100000 }, { "epoch": 2.78, "learning_rate": 7.874098363433668e-08, "loss": 0.8315, "step": 100100 }, { "epoch": 2.79, "learning_rate": 7.673938960947924e-08, "loss": 0.8792, "step": 100200 }, { "epoch": 2.79, "learning_rate": 7.476316786100336e-08, "loss": 0.8167, "step": 100300 }, { "epoch": 2.79, "learning_rate": 7.281233908055269e-08, "loss": 0.8536, "step": 100400 }, { "epoch": 2.79, "learning_rate": 7.088692369389888e-08, "loss": 0.8216, "step": 100500 }, { "epoch": 2.8, "learning_rate": 6.89869418607264e-08, "loss": 0.8033, "step": 100600 }, { "epoch": 2.8, "learning_rate": 6.711241347442415e-08, "loss": 0.8184, "step": 100700 }, { "epoch": 2.8, "learning_rate": 6.526335816187474e-08, "loss": 0.847, "step": 100800 }, { "epoch": 2.81, "learning_rate": 6.34397952832505e-08, "loss": 0.8446, "step": 100900 }, { "epoch": 2.81, "learning_rate": 6.164174393181038e-08, "loss": 0.869, "step": 101000 }, { "epoch": 2.81, "learning_rate": 5.986922293369834e-08, "loss": 0.8355, "step": 101100 }, { "epoch": 2.81, "learning_rate": 5.812225084774969e-08, "loss": 0.8527, "step": 101200 }, { "epoch": 2.82, "learning_rate": 5.640084596529399e-08, "loss": 0.8716, "step": 101300 }, { "epoch": 2.82, "learning_rate": 5.4705026309964104e-08, "loss": 0.8185, "step": 101400 }, { "epoch": 2.82, "learning_rate": 5.3034809637508846e-08, "loss": 0.8322, "step": 101500 }, { "epoch": 2.83, "learning_rate": 5.139021343560452e-08, "loss": 0.81, "step": 101600 }, { "epoch": 2.83, "learning_rate": 4.977125492367452e-08, "loss": 0.8391, "step": 101700 }, { "epoch": 2.83, "learning_rate": 4.817795105270723e-08, "loss": 0.8142, "step": 101800 }, { "epoch": 2.83, "learning_rate": 4.661031850507924e-08, "loss": 0.8039, "step": 101900 }, { "epoch": 2.84, "learning_rate": 4.5068373694380775e-08, "loss": 0.828, "step": 102000 }, { "epoch": 2.84, "learning_rate": 4.355213276524356e-08, "loss": 0.8512, "step": 102100 }, { "epoch": 2.84, "learning_rate": 4.206161159317129e-08, "loss": 0.839, "step": 102200 }, { "epoch": 2.84, "learning_rate": 4.059682578437474e-08, "loss": 0.8535, "step": 102300 }, { "epoch": 2.85, "learning_rate": 3.915779067560743e-08, "loss": 0.8548, "step": 102400 }, { "epoch": 2.85, "learning_rate": 3.774452133400469e-08, "loss": 0.8437, "step": 102500 }, { "epoch": 2.85, "learning_rate": 3.635703255692735e-08, "loss": 0.8258, "step": 102600 }, { "epoch": 2.86, "learning_rate": 3.4995338871804954e-08, "loss": 0.8327, "step": 102700 }, { "epoch": 2.86, "learning_rate": 3.3659454535985015e-08, "loss": 0.8453, "step": 102800 }, { "epoch": 2.86, "learning_rate": 3.234939353658345e-08, "loss": 0.8719, "step": 102900 }, { "epoch": 2.86, "learning_rate": 3.1065169590337453e-08, "loss": 0.8516, "step": 103000 }, { "epoch": 2.87, "learning_rate": 2.98067961434631e-08, "loss": 0.8355, "step": 103100 }, { "epoch": 2.87, "learning_rate": 2.857428637151327e-08, "loss": 0.8453, "step": 103200 }, { "epoch": 2.87, "learning_rate": 2.7367653179240783e-08, "loss": 0.8211, "step": 103300 }, { "epoch": 2.88, "learning_rate": 2.6186909200462128e-08, "loss": 0.8567, "step": 103400 }, { "epoch": 2.88, "learning_rate": 2.503206679792647e-08, "loss": 0.8693, "step": 103500 }, { "epoch": 2.88, "learning_rate": 2.390313806318545e-08, "loss": 0.8263, "step": 103600 }, { "epoch": 2.88, "learning_rate": 2.2800134816466647e-08, "loss": 0.8622, "step": 103700 }, { "epoch": 2.89, "learning_rate": 2.172306860654977e-08, "loss": 0.8541, "step": 103800 }, { "epoch": 2.89, "learning_rate": 2.0671950710645928e-08, "loss": 0.8408, "step": 103900 }, { "epoch": 2.89, "learning_rate": 1.9646792134279667e-08, "loss": 0.8284, "step": 104000 }, { "epoch": 2.89, "learning_rate": 1.864760361117296e-08, "loss": 0.8354, "step": 104100 }, { "epoch": 2.9, "learning_rate": 1.7674395603134442e-08, "loss": 0.8895, "step": 104200 }, { "epoch": 2.9, "learning_rate": 1.6727178299948133e-08, "loss": 0.8532, "step": 104300 }, { "epoch": 2.9, "learning_rate": 1.5805961619267396e-08, "loss": 0.8279, "step": 104400 }, { "epoch": 2.91, "learning_rate": 1.491075520651142e-08, "loss": 0.8415, "step": 104500 }, { "epoch": 2.91, "learning_rate": 1.4041568434764175e-08, "loss": 0.8381, "step": 104600 }, { "epoch": 2.91, "learning_rate": 1.3198410404675066e-08, "loss": 0.8357, "step": 104700 }, { "epoch": 2.91, "learning_rate": 1.2381289944366492e-08, "loss": 0.8405, "step": 104800 }, { "epoch": 2.92, "learning_rate": 1.1590215609337264e-08, "loss": 0.8565, "step": 104900 }, { "epoch": 2.92, "learning_rate": 1.0825195682377387e-08, "loss": 0.8649, "step": 105000 }, { "epoch": 2.92, "eval_bleu": 8.1488, "eval_gen_len": 18.9785, "eval_loss": 0.8710347414016724, "eval_runtime": 973.1045, "eval_samples_per_second": 2.051, "eval_steps_per_second": 1.026, "step": 105000 }, { "epoch": 2.92, "learning_rate": 1.0086238173478146e-08, "loss": 0.8551, "step": 105100 }, { "epoch": 2.93, "learning_rate": 9.373350819749382e-09, "loss": 0.8313, "step": 105200 }, { "epoch": 2.93, "learning_rate": 8.686541085339006e-09, "loss": 0.8401, "step": 105300 }, { "epoch": 2.93, "learning_rate": 8.025816161353895e-09, "loss": 0.8806, "step": 105400 }, { "epoch": 2.93, "learning_rate": 7.391182965785504e-09, "loss": 0.8271, "step": 105500 }, { "epoch": 2.94, "learning_rate": 6.782648143436321e-09, "loss": 0.8348, "step": 105600 }, { "epoch": 2.94, "learning_rate": 6.200218065851304e-09, "loss": 0.8564, "step": 105700 }, { "epoch": 2.94, "learning_rate": 5.6438988312504385e-09, "loss": 0.8111, "step": 105800 }, { "epoch": 2.95, "learning_rate": 5.113696264466006e-09, "loss": 0.856, "step": 105900 }, { "epoch": 2.95, "learning_rate": 4.6096159168798616e-09, "loss": 0.814, "step": 106000 }, { "epoch": 2.95, "learning_rate": 4.1316630663670864e-09, "loss": 0.887, "step": 106100 }, { "epoch": 2.95, "learning_rate": 3.6798427172390904e-09, "loss": 0.8207, "step": 106200 }, { "epoch": 2.96, "learning_rate": 3.254159600192819e-09, "loss": 0.8367, "step": 106300 }, { "epoch": 2.96, "learning_rate": 2.8546181722599597e-09, "loss": 0.881, "step": 106400 }, { "epoch": 2.96, "learning_rate": 2.48122261676087e-09, "loss": 0.8333, "step": 106500 }, { "epoch": 2.96, "learning_rate": 2.1339768432609988e-09, "loss": 0.8364, "step": 106600 }, { "epoch": 2.97, "learning_rate": 1.8128844875289764e-09, "loss": 0.8306, "step": 106700 }, { "epoch": 2.97, "learning_rate": 1.517948911499978e-09, "loss": 0.8722, "step": 106800 }, { "epoch": 2.97, "learning_rate": 1.2491732032385295e-09, "loss": 0.8287, "step": 106900 }, { "epoch": 2.98, "learning_rate": 1.0065601769088106e-09, "loss": 0.8401, "step": 107000 }, { "epoch": 2.98, "learning_rate": 7.901123727427351e-10, "loss": 0.8713, "step": 107100 }, { "epoch": 2.98, "learning_rate": 5.998320570149708e-10, "loss": 0.8514, "step": 107200 }, { "epoch": 2.98, "learning_rate": 4.357212220182372e-10, "loss": 0.8462, "step": 107300 }, { "epoch": 2.99, "learning_rate": 2.9778158604276684e-10, "loss": 0.8381, "step": 107400 }, { "epoch": 2.99, "learning_rate": 1.860145933585411e-10, "loss": 0.8647, "step": 107500 }, { "epoch": 2.99, "learning_rate": 1.0042141420030238e-10, "loss": 0.8551, "step": 107600 }, { "epoch": 3.0, "learning_rate": 4.1002944754509055e-11, "loss": 0.8085, "step": 107700 }, { "epoch": 3.0, "learning_rate": 7.759807150731214e-12, "loss": 0.8357, "step": 107800 }, { "epoch": 3.0, "step": 107877, "total_flos": 3.211633985499169e+18, "train_loss": 0.3135226284782316, "train_runtime": 125809.459, "train_samples_per_second": 3.43, "train_steps_per_second": 0.857 } ], "logging_steps": 100, "max_steps": 107877, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2000, "total_flos": 3.211633985499169e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }