{ "best_metric": 0.7786535620689392, "best_model_checkpoint": "./enko_mbartLarge_100p_run1/checkpoint-128123", "epoch": 4.999992195000117, "eval_steps": 500, "global_step": 320307, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.940000000000001e-06, "loss": 1.6027, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.994e-05, "loss": 1.3159, "step": 1000 }, { "epoch": 0.02, "learning_rate": 2.994e-05, "loss": 1.2703, "step": 1500 }, { "epoch": 0.03, "learning_rate": 3.994e-05, "loss": 1.2587, "step": 2000 }, { "epoch": 0.04, "learning_rate": 4.9940000000000006e-05, "loss": 1.2526, "step": 2500 }, { "epoch": 0.05, "learning_rate": 4.997407177475311e-05, "loss": 1.2571, "step": 3000 }, { "epoch": 0.05, "learning_rate": 4.994803921057162e-05, "loss": 1.2336, "step": 3500 }, { "epoch": 0.06, "learning_rate": 4.992195447692284e-05, "loss": 1.2062, "step": 4000 }, { "epoch": 0.07, "learning_rate": 4.9895869743274054e-05, "loss": 1.1793, "step": 4500 }, { "epoch": 0.08, "learning_rate": 4.986983717909256e-05, "loss": 1.1679, "step": 5000 }, { "epoch": 0.09, "learning_rate": 4.984375244544378e-05, "loss": 1.148, "step": 5500 }, { "epoch": 0.09, "learning_rate": 4.9817667711794995e-05, "loss": 1.1374, "step": 6000 }, { "epoch": 0.1, "learning_rate": 4.979158297814621e-05, "loss": 1.1147, "step": 6500 }, { "epoch": 0.11, "learning_rate": 4.976549824449743e-05, "loss": 1.1143, "step": 7000 }, { "epoch": 0.12, "learning_rate": 4.9739465680315936e-05, "loss": 1.1026, "step": 7500 }, { "epoch": 0.12, "learning_rate": 4.971338094666715e-05, "loss": 1.1118, "step": 8000 }, { "epoch": 0.13, "learning_rate": 4.968729621301837e-05, "loss": 1.0842, "step": 8500 }, { "epoch": 0.14, "learning_rate": 4.9661211479369584e-05, "loss": 1.0774, "step": 9000 }, { "epoch": 0.15, "learning_rate": 4.96351267457208e-05, "loss": 1.0735, "step": 9500 }, { "epoch": 0.16, "learning_rate": 4.9609042012072016e-05, "loss": 1.0637, "step": 10000 }, { "epoch": 0.16, "learning_rate": 4.958300944789053e-05, "loss": 1.0474, "step": 10500 }, { "epoch": 0.17, "learning_rate": 4.955692471424174e-05, "loss": 1.0447, "step": 11000 }, { "epoch": 0.18, "learning_rate": 4.953089215006026e-05, "loss": 1.048, "step": 11500 }, { "epoch": 0.19, "learning_rate": 4.950480741641147e-05, "loss": 1.0178, "step": 12000 }, { "epoch": 0.2, "learning_rate": 4.947872268276269e-05, "loss": 1.0335, "step": 12500 }, { "epoch": 0.2, "learning_rate": 4.9452637949113905e-05, "loss": 1.031, "step": 13000 }, { "epoch": 0.21, "learning_rate": 4.9426605384932414e-05, "loss": 1.0121, "step": 13500 }, { "epoch": 0.22, "learning_rate": 4.9400624990218224e-05, "loss": 1.0188, "step": 14000 }, { "epoch": 0.23, "learning_rate": 4.937454025656944e-05, "loss": 0.9988, "step": 14500 }, { "epoch": 0.23, "learning_rate": 4.9348455522920656e-05, "loss": 1.0051, "step": 15000 }, { "epoch": 0.24, "learning_rate": 4.932237078927187e-05, "loss": 0.9967, "step": 15500 }, { "epoch": 0.25, "learning_rate": 4.929628605562309e-05, "loss": 0.9931, "step": 16000 }, { "epoch": 0.26, "learning_rate": 4.9270201321974304e-05, "loss": 0.9833, "step": 16500 }, { "epoch": 0.27, "learning_rate": 4.924411658832552e-05, "loss": 0.9801, "step": 17000 }, { "epoch": 0.27, "learning_rate": 4.9218031854676735e-05, "loss": 0.9848, "step": 17500 }, { "epoch": 0.28, "learning_rate": 4.919194712102795e-05, "loss": 0.9774, "step": 18000 }, { "epoch": 0.29, "learning_rate": 4.916586238737916e-05, "loss": 0.9867, "step": 18500 }, { "epoch": 0.3, "learning_rate": 4.9139777653730376e-05, "loss": 0.9738, "step": 19000 }, { "epoch": 0.3, "learning_rate": 4.911369292008159e-05, "loss": 0.9593, "step": 19500 }, { "epoch": 0.31, "learning_rate": 4.908760818643281e-05, "loss": 0.9804, "step": 20000 }, { "epoch": 0.32, "learning_rate": 4.9061523452784024e-05, "loss": 0.9654, "step": 20500 }, { "epoch": 0.33, "learning_rate": 4.903543871913524e-05, "loss": 0.9641, "step": 21000 }, { "epoch": 0.34, "learning_rate": 4.9009353985486456e-05, "loss": 0.955, "step": 21500 }, { "epoch": 0.34, "learning_rate": 4.8983321421304965e-05, "loss": 0.953, "step": 22000 }, { "epoch": 0.35, "learning_rate": 4.895723668765618e-05, "loss": 0.9585, "step": 22500 }, { "epoch": 0.36, "learning_rate": 4.89311519540074e-05, "loss": 0.9453, "step": 23000 }, { "epoch": 0.37, "learning_rate": 4.890506722035861e-05, "loss": 0.9519, "step": 23500 }, { "epoch": 0.37, "learning_rate": 4.887898248670983e-05, "loss": 0.9512, "step": 24000 }, { "epoch": 0.38, "learning_rate": 4.8852897753061045e-05, "loss": 0.9388, "step": 24500 }, { "epoch": 0.39, "learning_rate": 4.882686518887956e-05, "loss": 0.9257, "step": 25000 }, { "epoch": 0.4, "learning_rate": 4.880078045523077e-05, "loss": 0.9346, "step": 25500 }, { "epoch": 0.41, "learning_rate": 4.8774695721581986e-05, "loss": 0.9399, "step": 26000 }, { "epoch": 0.41, "learning_rate": 4.87486109879332e-05, "loss": 0.9269, "step": 26500 }, { "epoch": 0.42, "learning_rate": 4.872252625428442e-05, "loss": 0.9239, "step": 27000 }, { "epoch": 0.43, "learning_rate": 4.8696441520635634e-05, "loss": 0.9355, "step": 27500 }, { "epoch": 0.44, "learning_rate": 4.867035678698685e-05, "loss": 0.9256, "step": 28000 }, { "epoch": 0.44, "learning_rate": 4.864437639227266e-05, "loss": 0.9278, "step": 28500 }, { "epoch": 0.45, "learning_rate": 4.8618343828091176e-05, "loss": 0.9259, "step": 29000 }, { "epoch": 0.46, "learning_rate": 4.859225909444239e-05, "loss": 0.9181, "step": 29500 }, { "epoch": 0.47, "learning_rate": 4.85661743607936e-05, "loss": 0.9189, "step": 30000 }, { "epoch": 0.48, "learning_rate": 4.854008962714482e-05, "loss": 0.9094, "step": 30500 }, { "epoch": 0.48, "learning_rate": 4.851400489349603e-05, "loss": 0.9121, "step": 31000 }, { "epoch": 0.49, "learning_rate": 4.848792015984725e-05, "loss": 0.8966, "step": 31500 }, { "epoch": 0.5, "learning_rate": 4.8461835426198464e-05, "loss": 0.9144, "step": 32000 }, { "epoch": 0.51, "learning_rate": 4.843575069254968e-05, "loss": 0.9067, "step": 32500 }, { "epoch": 0.52, "learning_rate": 4.8409665958900896e-05, "loss": 0.9144, "step": 33000 }, { "epoch": 0.52, "learning_rate": 4.838358122525211e-05, "loss": 0.9011, "step": 33500 }, { "epoch": 0.53, "learning_rate": 4.835749649160333e-05, "loss": 0.8886, "step": 34000 }, { "epoch": 0.54, "learning_rate": 4.8331411757954544e-05, "loss": 0.9009, "step": 34500 }, { "epoch": 0.55, "learning_rate": 4.830532702430576e-05, "loss": 0.8945, "step": 35000 }, { "epoch": 0.55, "learning_rate": 4.8279242290656976e-05, "loss": 0.893, "step": 35500 }, { "epoch": 0.56, "learning_rate": 4.825315755700819e-05, "loss": 0.9099, "step": 36000 }, { "epoch": 0.57, "learning_rate": 4.82270728233594e-05, "loss": 0.8861, "step": 36500 }, { "epoch": 0.58, "learning_rate": 4.820098808971062e-05, "loss": 0.8945, "step": 37000 }, { "epoch": 0.59, "learning_rate": 4.817490335606183e-05, "loss": 0.8851, "step": 37500 }, { "epoch": 0.59, "learning_rate": 4.814881862241305e-05, "loss": 0.8827, "step": 38000 }, { "epoch": 0.6, "learning_rate": 4.8122733888764264e-05, "loss": 0.8887, "step": 38500 }, { "epoch": 0.61, "learning_rate": 4.809664915511548e-05, "loss": 0.8785, "step": 39000 }, { "epoch": 0.62, "learning_rate": 4.8070564421466696e-05, "loss": 0.8792, "step": 39500 }, { "epoch": 0.62, "learning_rate": 4.804447968781791e-05, "loss": 0.8724, "step": 40000 }, { "epoch": 0.63, "learning_rate": 4.801839495416913e-05, "loss": 0.8725, "step": 40500 }, { "epoch": 0.64, "learning_rate": 4.799236238998764e-05, "loss": 0.8745, "step": 41000 }, { "epoch": 0.65, "learning_rate": 4.7966277656338853e-05, "loss": 0.8696, "step": 41500 }, { "epoch": 0.66, "learning_rate": 4.794029726162466e-05, "loss": 0.8795, "step": 42000 }, { "epoch": 0.66, "learning_rate": 4.791421252797588e-05, "loss": 0.8744, "step": 42500 }, { "epoch": 0.67, "learning_rate": 4.7888127794327095e-05, "loss": 0.8728, "step": 43000 }, { "epoch": 0.68, "learning_rate": 4.786204306067831e-05, "loss": 0.8772, "step": 43500 }, { "epoch": 0.69, "learning_rate": 4.783595832702952e-05, "loss": 0.8685, "step": 44000 }, { "epoch": 0.69, "learning_rate": 4.7809873593380736e-05, "loss": 0.8698, "step": 44500 }, { "epoch": 0.7, "learning_rate": 4.778378885973195e-05, "loss": 0.8696, "step": 45000 }, { "epoch": 0.71, "learning_rate": 4.775770412608317e-05, "loss": 0.8573, "step": 45500 }, { "epoch": 0.72, "learning_rate": 4.7731619392434384e-05, "loss": 0.8623, "step": 46000 }, { "epoch": 0.73, "learning_rate": 4.77055346587856e-05, "loss": 0.8529, "step": 46500 }, { "epoch": 0.73, "learning_rate": 4.7679449925136815e-05, "loss": 0.8704, "step": 47000 }, { "epoch": 0.74, "learning_rate": 4.7653417360955325e-05, "loss": 0.8549, "step": 47500 }, { "epoch": 0.75, "learning_rate": 4.762733262730654e-05, "loss": 0.8648, "step": 48000 }, { "epoch": 0.76, "learning_rate": 4.760124789365776e-05, "loss": 0.8603, "step": 48500 }, { "epoch": 0.76, "learning_rate": 4.757516316000897e-05, "loss": 0.876, "step": 49000 }, { "epoch": 0.77, "learning_rate": 4.754913059582749e-05, "loss": 0.8657, "step": 49500 }, { "epoch": 0.78, "learning_rate": 4.7523045862178705e-05, "loss": 0.8569, "step": 50000 }, { "epoch": 0.79, "learning_rate": 4.749696112852992e-05, "loss": 0.8561, "step": 50500 }, { "epoch": 0.8, "learning_rate": 4.747092856434843e-05, "loss": 0.8585, "step": 51000 }, { "epoch": 0.8, "learning_rate": 4.7444843830699646e-05, "loss": 0.8592, "step": 51500 }, { "epoch": 0.81, "learning_rate": 4.741875909705086e-05, "loss": 0.859, "step": 52000 }, { "epoch": 0.82, "learning_rate": 4.739267436340208e-05, "loss": 0.844, "step": 52500 }, { "epoch": 0.83, "learning_rate": 4.7366589629753294e-05, "loss": 0.8492, "step": 53000 }, { "epoch": 0.84, "learning_rate": 4.734050489610451e-05, "loss": 0.8473, "step": 53500 }, { "epoch": 0.84, "learning_rate": 4.7314420162455726e-05, "loss": 0.8435, "step": 54000 }, { "epoch": 0.85, "learning_rate": 4.7288387598274235e-05, "loss": 0.8472, "step": 54500 }, { "epoch": 0.86, "learning_rate": 4.726230286462545e-05, "loss": 0.8356, "step": 55000 }, { "epoch": 0.87, "learning_rate": 4.723621813097667e-05, "loss": 0.8386, "step": 55500 }, { "epoch": 0.87, "learning_rate": 4.721013339732788e-05, "loss": 0.8414, "step": 56000 }, { "epoch": 0.88, "learning_rate": 4.71840486636791e-05, "loss": 0.8404, "step": 56500 }, { "epoch": 0.89, "learning_rate": 4.7157963930030314e-05, "loss": 0.8419, "step": 57000 }, { "epoch": 0.9, "learning_rate": 4.7131931365848824e-05, "loss": 0.8479, "step": 57500 }, { "epoch": 0.91, "learning_rate": 4.710584663220004e-05, "loss": 0.8432, "step": 58000 }, { "epoch": 0.91, "learning_rate": 4.7079761898551256e-05, "loss": 0.8391, "step": 58500 }, { "epoch": 0.92, "learning_rate": 4.705367716490247e-05, "loss": 0.8262, "step": 59000 }, { "epoch": 0.93, "learning_rate": 4.702759243125369e-05, "loss": 0.8401, "step": 59500 }, { "epoch": 0.94, "learning_rate": 4.7001507697604903e-05, "loss": 0.8386, "step": 60000 }, { "epoch": 0.94, "learning_rate": 4.697542296395612e-05, "loss": 0.831, "step": 60500 }, { "epoch": 0.95, "learning_rate": 4.694939039977463e-05, "loss": 0.8343, "step": 61000 }, { "epoch": 0.96, "learning_rate": 4.6923305666125845e-05, "loss": 0.8212, "step": 61500 }, { "epoch": 0.97, "learning_rate": 4.689722093247706e-05, "loss": 0.826, "step": 62000 }, { "epoch": 0.98, "learning_rate": 4.6871136198828277e-05, "loss": 0.833, "step": 62500 }, { "epoch": 0.98, "learning_rate": 4.684505146517949e-05, "loss": 0.8231, "step": 63000 }, { "epoch": 0.99, "learning_rate": 4.681896673153071e-05, "loss": 0.8402, "step": 63500 }, { "epoch": 1.0, "learning_rate": 4.679293416734922e-05, "loss": 0.8286, "step": 64000 }, { "epoch": 1.0, "eval_bleu": 50.2043, "eval_gen_len": 15.0703, "eval_loss": 0.8096853494644165, "eval_runtime": 9598.1646, "eval_samples_per_second": 13.325, "eval_steps_per_second": 1.666, "step": 64061 }, { "epoch": 1.01, "learning_rate": 4.6766849433700434e-05, "loss": 0.8088, "step": 64500 }, { "epoch": 1.01, "learning_rate": 4.674076470005165e-05, "loss": 0.8023, "step": 65000 }, { "epoch": 1.02, "learning_rate": 4.6714679966402866e-05, "loss": 0.8022, "step": 65500 }, { "epoch": 1.03, "learning_rate": 4.668859523275408e-05, "loss": 0.7841, "step": 66000 }, { "epoch": 1.04, "learning_rate": 4.66625104991053e-05, "loss": 0.7722, "step": 66500 }, { "epoch": 1.05, "learning_rate": 4.6636477934923814e-05, "loss": 0.7627, "step": 67000 }, { "epoch": 1.05, "learning_rate": 4.661039320127503e-05, "loss": 0.7635, "step": 67500 }, { "epoch": 1.06, "learning_rate": 4.6584308467626245e-05, "loss": 0.7612, "step": 68000 }, { "epoch": 1.07, "learning_rate": 4.6558223733977454e-05, "loss": 0.7563, "step": 68500 }, { "epoch": 1.08, "learning_rate": 4.653213900032867e-05, "loss": 0.7468, "step": 69000 }, { "epoch": 1.08, "learning_rate": 4.650605426667988e-05, "loss": 0.7534, "step": 69500 }, { "epoch": 1.09, "learning_rate": 4.6479969533031095e-05, "loss": 0.7457, "step": 70000 }, { "epoch": 1.1, "learning_rate": 4.645388479938231e-05, "loss": 0.7376, "step": 70500 }, { "epoch": 1.11, "learning_rate": 4.642780006573353e-05, "loss": 0.7377, "step": 71000 }, { "epoch": 1.12, "learning_rate": 4.640171533208474e-05, "loss": 0.7385, "step": 71500 }, { "epoch": 1.12, "learning_rate": 4.637563059843596e-05, "loss": 0.7483, "step": 72000 }, { "epoch": 1.13, "learning_rate": 4.6349545864787175e-05, "loss": 0.7356, "step": 72500 }, { "epoch": 1.14, "learning_rate": 4.632346113113839e-05, "loss": 0.7303, "step": 73000 }, { "epoch": 1.15, "learning_rate": 4.629742856695691e-05, "loss": 0.7383, "step": 73500 }, { "epoch": 1.16, "learning_rate": 4.627134383330812e-05, "loss": 0.7295, "step": 74000 }, { "epoch": 1.16, "learning_rate": 4.624531126912663e-05, "loss": 0.7284, "step": 74500 }, { "epoch": 1.17, "learning_rate": 4.621922653547785e-05, "loss": 0.72, "step": 75000 }, { "epoch": 1.18, "learning_rate": 4.6193141801829064e-05, "loss": 0.7246, "step": 75500 }, { "epoch": 1.19, "learning_rate": 4.616705706818028e-05, "loss": 0.7091, "step": 76000 }, { "epoch": 1.19, "learning_rate": 4.6140972334531496e-05, "loss": 0.7227, "step": 76500 }, { "epoch": 1.2, "learning_rate": 4.611488760088271e-05, "loss": 0.722, "step": 77000 }, { "epoch": 1.21, "learning_rate": 4.608880286723393e-05, "loss": 0.7167, "step": 77500 }, { "epoch": 1.22, "learning_rate": 4.6062718133585144e-05, "loss": 0.7211, "step": 78000 }, { "epoch": 1.23, "learning_rate": 4.603673773887095e-05, "loss": 0.7076, "step": 78500 }, { "epoch": 1.23, "learning_rate": 4.601065300522216e-05, "loss": 0.7132, "step": 79000 }, { "epoch": 1.24, "learning_rate": 4.598456827157338e-05, "loss": 0.7142, "step": 79500 }, { "epoch": 1.25, "learning_rate": 4.5958483537924594e-05, "loss": 0.7061, "step": 80000 }, { "epoch": 1.26, "learning_rate": 4.593239880427581e-05, "loss": 0.704, "step": 80500 }, { "epoch": 1.26, "learning_rate": 4.5906314070627026e-05, "loss": 0.6912, "step": 81000 }, { "epoch": 1.27, "learning_rate": 4.588022933697824e-05, "loss": 0.7105, "step": 81500 }, { "epoch": 1.28, "learning_rate": 4.585414460332946e-05, "loss": 0.7057, "step": 82000 }, { "epoch": 1.29, "learning_rate": 4.5828059869680674e-05, "loss": 0.7093, "step": 82500 }, { "epoch": 1.3, "learning_rate": 4.580197513603189e-05, "loss": 0.7089, "step": 83000 }, { "epoch": 1.3, "learning_rate": 4.5775890402383106e-05, "loss": 0.6892, "step": 83500 }, { "epoch": 1.31, "learning_rate": 4.574980566873432e-05, "loss": 0.7142, "step": 84000 }, { "epoch": 1.32, "learning_rate": 4.572377310455283e-05, "loss": 0.7039, "step": 84500 }, { "epoch": 1.33, "learning_rate": 4.569768837090405e-05, "loss": 0.6999, "step": 85000 }, { "epoch": 1.33, "learning_rate": 4.5671655806722557e-05, "loss": 0.698, "step": 85500 }, { "epoch": 1.34, "learning_rate": 4.564557107307377e-05, "loss": 0.6956, "step": 86000 }, { "epoch": 1.35, "learning_rate": 4.561948633942499e-05, "loss": 0.7012, "step": 86500 }, { "epoch": 1.36, "learning_rate": 4.5593401605776204e-05, "loss": 0.6933, "step": 87000 }, { "epoch": 1.37, "learning_rate": 4.556736904159472e-05, "loss": 0.6986, "step": 87500 }, { "epoch": 1.37, "learning_rate": 4.5541284307945936e-05, "loss": 0.6973, "step": 88000 }, { "epoch": 1.38, "learning_rate": 4.5515251743764446e-05, "loss": 0.6912, "step": 88500 }, { "epoch": 1.39, "learning_rate": 4.548916701011566e-05, "loss": 0.6842, "step": 89000 }, { "epoch": 1.4, "learning_rate": 4.546313444593418e-05, "loss": 0.6836, "step": 89500 }, { "epoch": 1.4, "learning_rate": 4.543704971228539e-05, "loss": 0.6926, "step": 90000 }, { "epoch": 1.41, "learning_rate": 4.54109649786366e-05, "loss": 0.6785, "step": 90500 }, { "epoch": 1.42, "learning_rate": 4.538488024498782e-05, "loss": 0.6801, "step": 91000 }, { "epoch": 1.43, "learning_rate": 4.5358795511339035e-05, "loss": 0.6837, "step": 91500 }, { "epoch": 1.44, "learning_rate": 4.533271077769025e-05, "loss": 0.6895, "step": 92000 }, { "epoch": 1.44, "learning_rate": 4.5306626044041467e-05, "loss": 0.688, "step": 92500 }, { "epoch": 1.45, "learning_rate": 4.528054131039268e-05, "loss": 0.6849, "step": 93000 }, { "epoch": 1.46, "learning_rate": 4.52544565767439e-05, "loss": 0.6814, "step": 93500 }, { "epoch": 1.47, "learning_rate": 4.5228371843095114e-05, "loss": 0.6862, "step": 94000 }, { "epoch": 1.48, "learning_rate": 4.5202339278913624e-05, "loss": 0.6727, "step": 94500 }, { "epoch": 1.48, "learning_rate": 4.517625454526484e-05, "loss": 0.6807, "step": 95000 }, { "epoch": 1.49, "learning_rate": 4.5150169811616056e-05, "loss": 0.6669, "step": 95500 }, { "epoch": 1.5, "learning_rate": 4.512408507796727e-05, "loss": 0.6765, "step": 96000 }, { "epoch": 1.51, "learning_rate": 4.509800034431849e-05, "loss": 0.6792, "step": 96500 }, { "epoch": 1.51, "learning_rate": 4.50719156106697e-05, "loss": 0.6803, "step": 97000 }, { "epoch": 1.52, "learning_rate": 4.504583087702092e-05, "loss": 0.6692, "step": 97500 }, { "epoch": 1.53, "learning_rate": 4.5019746143372135e-05, "loss": 0.6671, "step": 98000 }, { "epoch": 1.54, "learning_rate": 4.499366140972335e-05, "loss": 0.6712, "step": 98500 }, { "epoch": 1.55, "learning_rate": 4.496757667607456e-05, "loss": 0.6729, "step": 99000 }, { "epoch": 1.55, "learning_rate": 4.4941544111893076e-05, "loss": 0.6715, "step": 99500 }, { "epoch": 1.56, "learning_rate": 4.491545937824429e-05, "loss": 0.6821, "step": 100000 }, { "epoch": 1.57, "learning_rate": 4.488937464459551e-05, "loss": 0.6641, "step": 100500 }, { "epoch": 1.58, "learning_rate": 4.4863289910946724e-05, "loss": 0.673, "step": 101000 }, { "epoch": 1.58, "learning_rate": 4.483720517729793e-05, "loss": 0.6686, "step": 101500 }, { "epoch": 1.59, "learning_rate": 4.481112044364915e-05, "loss": 0.6614, "step": 102000 }, { "epoch": 1.6, "learning_rate": 4.4785035710000365e-05, "loss": 0.6708, "step": 102500 }, { "epoch": 1.61, "learning_rate": 4.475895097635158e-05, "loss": 0.6621, "step": 103000 }, { "epoch": 1.62, "learning_rate": 4.47328662427028e-05, "loss": 0.6618, "step": 103500 }, { "epoch": 1.62, "learning_rate": 4.4706833678521306e-05, "loss": 0.6647, "step": 104000 }, { "epoch": 1.63, "learning_rate": 4.468074894487252e-05, "loss": 0.6587, "step": 104500 }, { "epoch": 1.64, "learning_rate": 4.465466421122374e-05, "loss": 0.6589, "step": 105000 }, { "epoch": 1.65, "learning_rate": 4.4628579477574954e-05, "loss": 0.654, "step": 105500 }, { "epoch": 1.65, "learning_rate": 4.460249474392617e-05, "loss": 0.6614, "step": 106000 }, { "epoch": 1.66, "learning_rate": 4.4576410010277386e-05, "loss": 0.6663, "step": 106500 }, { "epoch": 1.67, "learning_rate": 4.45503252766286e-05, "loss": 0.6612, "step": 107000 }, { "epoch": 1.68, "learning_rate": 4.452424054297982e-05, "loss": 0.6594, "step": 107500 }, { "epoch": 1.69, "learning_rate": 4.449820797879833e-05, "loss": 0.6584, "step": 108000 }, { "epoch": 1.69, "learning_rate": 4.447217541461684e-05, "loss": 0.6592, "step": 108500 }, { "epoch": 1.7, "learning_rate": 4.444609068096806e-05, "loss": 0.6595, "step": 109000 }, { "epoch": 1.71, "learning_rate": 4.4420005947319275e-05, "loss": 0.6545, "step": 109500 }, { "epoch": 1.72, "learning_rate": 4.439392121367049e-05, "loss": 0.6543, "step": 110000 }, { "epoch": 1.72, "learning_rate": 4.4367888649489e-05, "loss": 0.6509, "step": 110500 }, { "epoch": 1.73, "learning_rate": 4.4341803915840216e-05, "loss": 0.6641, "step": 111000 }, { "epoch": 1.74, "learning_rate": 4.431577135165873e-05, "loss": 0.6514, "step": 111500 }, { "epoch": 1.75, "learning_rate": 4.428968661800995e-05, "loss": 0.6588, "step": 112000 }, { "epoch": 1.76, "learning_rate": 4.4263601884361164e-05, "loss": 0.6565, "step": 112500 }, { "epoch": 1.76, "learning_rate": 4.4237517150712373e-05, "loss": 0.6654, "step": 113000 }, { "epoch": 1.77, "learning_rate": 4.421143241706359e-05, "loss": 0.6642, "step": 113500 }, { "epoch": 1.78, "learning_rate": 4.4185347683414805e-05, "loss": 0.6505, "step": 114000 }, { "epoch": 1.79, "learning_rate": 4.415926294976602e-05, "loss": 0.6561, "step": 114500 }, { "epoch": 1.8, "learning_rate": 4.413317821611724e-05, "loss": 0.6555, "step": 115000 }, { "epoch": 1.8, "learning_rate": 4.410709348246845e-05, "loss": 0.6597, "step": 115500 }, { "epoch": 1.81, "learning_rate": 4.408100874881967e-05, "loss": 0.6635, "step": 116000 }, { "epoch": 1.82, "learning_rate": 4.405497618463818e-05, "loss": 0.6469, "step": 116500 }, { "epoch": 1.83, "learning_rate": 4.4028891450989394e-05, "loss": 0.6466, "step": 117000 }, { "epoch": 1.83, "learning_rate": 4.400280671734061e-05, "loss": 0.6504, "step": 117500 }, { "epoch": 1.84, "learning_rate": 4.3976721983691826e-05, "loss": 0.6463, "step": 118000 }, { "epoch": 1.85, "learning_rate": 4.395063725004304e-05, "loss": 0.6505, "step": 118500 }, { "epoch": 1.86, "learning_rate": 4.392455251639426e-05, "loss": 0.6392, "step": 119000 }, { "epoch": 1.87, "learning_rate": 4.3898467782745474e-05, "loss": 0.6446, "step": 119500 }, { "epoch": 1.87, "learning_rate": 4.387238304909669e-05, "loss": 0.6431, "step": 120000 }, { "epoch": 1.88, "learning_rate": 4.38463504849152e-05, "loss": 0.6452, "step": 120500 }, { "epoch": 1.89, "learning_rate": 4.3820265751266415e-05, "loss": 0.6505, "step": 121000 }, { "epoch": 1.9, "learning_rate": 4.379418101761763e-05, "loss": 0.6487, "step": 121500 }, { "epoch": 1.9, "learning_rate": 4.376809628396885e-05, "loss": 0.6481, "step": 122000 }, { "epoch": 1.91, "learning_rate": 4.3742063719787356e-05, "loss": 0.648, "step": 122500 }, { "epoch": 1.92, "learning_rate": 4.371597898613857e-05, "loss": 0.6378, "step": 123000 }, { "epoch": 1.93, "learning_rate": 4.368989425248979e-05, "loss": 0.6457, "step": 123500 }, { "epoch": 1.94, "learning_rate": 4.3663809518841004e-05, "loss": 0.6468, "step": 124000 }, { "epoch": 1.94, "learning_rate": 4.363772478519222e-05, "loss": 0.6456, "step": 124500 }, { "epoch": 1.95, "learning_rate": 4.3611692221010736e-05, "loss": 0.6392, "step": 125000 }, { "epoch": 1.96, "learning_rate": 4.358560748736195e-05, "loss": 0.6388, "step": 125500 }, { "epoch": 1.97, "learning_rate": 4.355952275371316e-05, "loss": 0.6382, "step": 126000 }, { "epoch": 1.97, "learning_rate": 4.353343802006438e-05, "loss": 0.6397, "step": 126500 }, { "epoch": 1.98, "learning_rate": 4.350735328641559e-05, "loss": 0.6368, "step": 127000 }, { "epoch": 1.99, "learning_rate": 4.348126855276681e-05, "loss": 0.6516, "step": 127500 }, { "epoch": 2.0, "learning_rate": 4.3455235988585325e-05, "loss": 0.6429, "step": 128000 }, { "epoch": 2.0, "eval_bleu": 51.8273, "eval_gen_len": 15.0439, "eval_loss": 0.7786535620689392, "eval_runtime": 9521.3959, "eval_samples_per_second": 13.433, "eval_steps_per_second": 1.679, "step": 128123 }, { "epoch": 2.01, "learning_rate": 4.342915125493654e-05, "loss": 0.6315, "step": 128500 }, { "epoch": 2.01, "learning_rate": 4.340306652128776e-05, "loss": 0.6169, "step": 129000 }, { "epoch": 2.02, "learning_rate": 4.337698178763897e-05, "loss": 0.6218, "step": 129500 }, { "epoch": 2.03, "learning_rate": 4.335089705399019e-05, "loss": 0.6108, "step": 130000 }, { "epoch": 2.04, "learning_rate": 4.33248644898087e-05, "loss": 0.6101, "step": 130500 }, { "epoch": 2.04, "learning_rate": 4.3298779756159914e-05, "loss": 0.5922, "step": 131000 }, { "epoch": 2.05, "learning_rate": 4.327269502251113e-05, "loss": 0.6026, "step": 131500 }, { "epoch": 2.06, "learning_rate": 4.3246610288862346e-05, "loss": 0.591, "step": 132000 }, { "epoch": 2.07, "learning_rate": 4.322052555521356e-05, "loss": 0.5985, "step": 132500 }, { "epoch": 2.08, "learning_rate": 4.319444082156478e-05, "loss": 0.5816, "step": 133000 }, { "epoch": 2.08, "learning_rate": 4.316840825738329e-05, "loss": 0.5942, "step": 133500 }, { "epoch": 2.09, "learning_rate": 4.31423235237345e-05, "loss": 0.5905, "step": 134000 }, { "epoch": 2.1, "learning_rate": 4.311623879008572e-05, "loss": 0.5845, "step": 134500 }, { "epoch": 2.11, "learning_rate": 4.3090154056436935e-05, "loss": 0.5799, "step": 135000 }, { "epoch": 2.12, "learning_rate": 4.306406932278815e-05, "loss": 0.5847, "step": 135500 }, { "epoch": 2.12, "learning_rate": 4.303798458913936e-05, "loss": 0.5866, "step": 136000 }, { "epoch": 2.13, "learning_rate": 4.3011899855490576e-05, "loss": 0.5867, "step": 136500 }, { "epoch": 2.14, "learning_rate": 4.298581512184179e-05, "loss": 0.5739, "step": 137000 }, { "epoch": 2.15, "learning_rate": 4.295973038819301e-05, "loss": 0.5882, "step": 137500 }, { "epoch": 2.15, "learning_rate": 4.2933697824011524e-05, "loss": 0.576, "step": 138000 }, { "epoch": 2.16, "learning_rate": 4.290761309036273e-05, "loss": 0.5654, "step": 138500 }, { "epoch": 2.17, "learning_rate": 4.288152835671395e-05, "loss": 0.5708, "step": 139000 }, { "epoch": 2.18, "learning_rate": 4.2855443623065165e-05, "loss": 0.5725, "step": 139500 }, { "epoch": 2.19, "learning_rate": 4.282935888941638e-05, "loss": 0.5687, "step": 140000 }, { "epoch": 2.19, "learning_rate": 4.28033263252349e-05, "loss": 0.5627, "step": 140500 }, { "epoch": 2.2, "learning_rate": 4.2777293761053406e-05, "loss": 0.5745, "step": 141000 }, { "epoch": 2.21, "learning_rate": 4.275120902740462e-05, "loss": 0.5617, "step": 141500 }, { "epoch": 2.22, "learning_rate": 4.272512429375584e-05, "loss": 0.5687, "step": 142000 }, { "epoch": 2.22, "learning_rate": 4.2699039560107054e-05, "loss": 0.5682, "step": 142500 }, { "epoch": 2.23, "learning_rate": 4.267295482645827e-05, "loss": 0.5573, "step": 143000 }, { "epoch": 2.24, "learning_rate": 4.264687009280948e-05, "loss": 0.5729, "step": 143500 }, { "epoch": 2.25, "learning_rate": 4.2620785359160695e-05, "loss": 0.5602, "step": 144000 }, { "epoch": 2.26, "learning_rate": 4.259470062551191e-05, "loss": 0.553, "step": 144500 }, { "epoch": 2.26, "learning_rate": 4.256861589186313e-05, "loss": 0.5501, "step": 145000 }, { "epoch": 2.27, "learning_rate": 4.254253115821434e-05, "loss": 0.5646, "step": 145500 }, { "epoch": 2.28, "learning_rate": 4.251644642456556e-05, "loss": 0.5609, "step": 146000 }, { "epoch": 2.29, "learning_rate": 4.2490361690916775e-05, "loss": 0.5575, "step": 146500 }, { "epoch": 2.29, "learning_rate": 4.246432912673529e-05, "loss": 0.5687, "step": 147000 }, { "epoch": 2.3, "learning_rate": 4.24382443930865e-05, "loss": 0.5425, "step": 147500 }, { "epoch": 2.31, "learning_rate": 4.2412159659437716e-05, "loss": 0.5689, "step": 148000 }, { "epoch": 2.32, "learning_rate": 4.238607492578893e-05, "loss": 0.557, "step": 148500 }, { "epoch": 2.33, "learning_rate": 4.235999019214015e-05, "loss": 0.5557, "step": 149000 }, { "epoch": 2.33, "learning_rate": 4.2333905458491364e-05, "loss": 0.5629, "step": 149500 }, { "epoch": 2.34, "learning_rate": 4.230787289430988e-05, "loss": 0.556, "step": 150000 }, { "epoch": 2.35, "learning_rate": 4.228184033012839e-05, "loss": 0.5521, "step": 150500 }, { "epoch": 2.36, "learning_rate": 4.2255807765946905e-05, "loss": 0.5508, "step": 151000 }, { "epoch": 2.36, "learning_rate": 4.2229775201765415e-05, "loss": 0.5539, "step": 151500 }, { "epoch": 2.37, "learning_rate": 4.220369046811663e-05, "loss": 0.5601, "step": 152000 }, { "epoch": 2.38, "learning_rate": 4.217760573446785e-05, "loss": 0.5504, "step": 152500 }, { "epoch": 2.39, "learning_rate": 4.215152100081906e-05, "loss": 0.5475, "step": 153000 }, { "epoch": 2.4, "learning_rate": 4.212543626717028e-05, "loss": 0.5477, "step": 153500 }, { "epoch": 2.4, "learning_rate": 4.2099351533521494e-05, "loss": 0.5421, "step": 154000 }, { "epoch": 2.41, "learning_rate": 4.207326679987271e-05, "loss": 0.5374, "step": 154500 }, { "epoch": 2.42, "learning_rate": 4.204718206622392e-05, "loss": 0.5443, "step": 155000 }, { "epoch": 2.43, "learning_rate": 4.2021097332575135e-05, "loss": 0.547, "step": 155500 }, { "epoch": 2.44, "learning_rate": 4.199501259892635e-05, "loss": 0.5496, "step": 156000 }, { "epoch": 2.44, "learning_rate": 4.196892786527757e-05, "loss": 0.5472, "step": 156500 }, { "epoch": 2.45, "learning_rate": 4.194284313162878e-05, "loss": 0.5458, "step": 157000 }, { "epoch": 2.46, "learning_rate": 4.191675839798e-05, "loss": 0.5505, "step": 157500 }, { "epoch": 2.47, "learning_rate": 4.1890673664331215e-05, "loss": 0.5454, "step": 158000 }, { "epoch": 2.47, "learning_rate": 4.186458893068243e-05, "loss": 0.5341, "step": 158500 }, { "epoch": 2.48, "learning_rate": 4.183850419703365e-05, "loss": 0.5402, "step": 159000 }, { "epoch": 2.49, "learning_rate": 4.1812471632852156e-05, "loss": 0.5363, "step": 159500 }, { "epoch": 2.5, "learning_rate": 4.178643906867067e-05, "loss": 0.5358, "step": 160000 }, { "epoch": 2.51, "learning_rate": 4.176035433502189e-05, "loss": 0.5388, "step": 160500 }, { "epoch": 2.51, "learning_rate": 4.1734269601373104e-05, "loss": 0.5427, "step": 161000 }, { "epoch": 2.52, "learning_rate": 4.170818486772432e-05, "loss": 0.5325, "step": 161500 }, { "epoch": 2.53, "learning_rate": 4.1682100134075536e-05, "loss": 0.5375, "step": 162000 }, { "epoch": 2.54, "learning_rate": 4.1656015400426745e-05, "loss": 0.535, "step": 162500 }, { "epoch": 2.54, "learning_rate": 4.162993066677796e-05, "loss": 0.5287, "step": 163000 }, { "epoch": 2.55, "learning_rate": 4.160384593312918e-05, "loss": 0.5345, "step": 163500 }, { "epoch": 2.56, "learning_rate": 4.157776119948039e-05, "loss": 0.5411, "step": 164000 }, { "epoch": 2.57, "learning_rate": 4.155167646583161e-05, "loss": 0.5366, "step": 164500 }, { "epoch": 2.58, "learning_rate": 4.152569607111742e-05, "loss": 0.5378, "step": 165000 }, { "epoch": 2.58, "learning_rate": 4.1499611337468634e-05, "loss": 0.5349, "step": 165500 }, { "epoch": 2.59, "learning_rate": 4.147352660381985e-05, "loss": 0.5279, "step": 166000 }, { "epoch": 2.6, "learning_rate": 4.1447441870171066e-05, "loss": 0.5324, "step": 166500 }, { "epoch": 2.61, "learning_rate": 4.142135713652228e-05, "loss": 0.5326, "step": 167000 }, { "epoch": 2.61, "learning_rate": 4.139532457234079e-05, "loss": 0.5254, "step": 167500 }, { "epoch": 2.62, "learning_rate": 4.136923983869201e-05, "loss": 0.5317, "step": 168000 }, { "epoch": 2.63, "learning_rate": 4.134315510504322e-05, "loss": 0.5278, "step": 168500 }, { "epoch": 2.64, "learning_rate": 4.131707037139444e-05, "loss": 0.5223, "step": 169000 }, { "epoch": 2.65, "learning_rate": 4.129103780721295e-05, "loss": 0.5237, "step": 169500 }, { "epoch": 2.65, "learning_rate": 4.1265005243031465e-05, "loss": 0.5251, "step": 170000 }, { "epoch": 2.66, "learning_rate": 4.123892050938268e-05, "loss": 0.5345, "step": 170500 }, { "epoch": 2.67, "learning_rate": 4.12128357757339e-05, "loss": 0.5302, "step": 171000 }, { "epoch": 2.68, "learning_rate": 4.1186751042085106e-05, "loss": 0.5256, "step": 171500 }, { "epoch": 2.68, "learning_rate": 4.116066630843632e-05, "loss": 0.5231, "step": 172000 }, { "epoch": 2.69, "learning_rate": 4.113458157478754e-05, "loss": 0.5322, "step": 172500 }, { "epoch": 2.7, "learning_rate": 4.1108549010606054e-05, "loss": 0.5268, "step": 173000 }, { "epoch": 2.71, "learning_rate": 4.108246427695727e-05, "loss": 0.521, "step": 173500 }, { "epoch": 2.72, "learning_rate": 4.1056379543308486e-05, "loss": 0.5179, "step": 174000 }, { "epoch": 2.72, "learning_rate": 4.10302948096597e-05, "loss": 0.5249, "step": 174500 }, { "epoch": 2.73, "learning_rate": 4.100421007601092e-05, "loss": 0.5267, "step": 175000 }, { "epoch": 2.74, "learning_rate": 4.0978125342362133e-05, "loss": 0.5198, "step": 175500 }, { "epoch": 2.75, "learning_rate": 4.095204060871335e-05, "loss": 0.525, "step": 176000 }, { "epoch": 2.76, "learning_rate": 4.0925955875064565e-05, "loss": 0.5236, "step": 176500 }, { "epoch": 2.76, "learning_rate": 4.0899871141415774e-05, "loss": 0.5345, "step": 177000 }, { "epoch": 2.77, "learning_rate": 4.087378640776699e-05, "loss": 0.5307, "step": 177500 }, { "epoch": 2.78, "learning_rate": 4.0847701674118206e-05, "loss": 0.5227, "step": 178000 }, { "epoch": 2.79, "learning_rate": 4.082161694046942e-05, "loss": 0.523, "step": 178500 }, { "epoch": 2.79, "learning_rate": 4.079553220682064e-05, "loss": 0.5221, "step": 179000 }, { "epoch": 2.8, "learning_rate": 4.0769447473171854e-05, "loss": 0.5195, "step": 179500 }, { "epoch": 2.81, "learning_rate": 4.074336273952307e-05, "loss": 0.5366, "step": 180000 }, { "epoch": 2.82, "learning_rate": 4.071727800587428e-05, "loss": 0.5173, "step": 180500 }, { "epoch": 2.83, "learning_rate": 4.0691193272225495e-05, "loss": 0.5168, "step": 181000 }, { "epoch": 2.83, "learning_rate": 4.066516070804402e-05, "loss": 0.5227, "step": 181500 }, { "epoch": 2.84, "learning_rate": 4.063912814386253e-05, "loss": 0.5145, "step": 182000 }, { "epoch": 2.85, "learning_rate": 4.061304341021374e-05, "loss": 0.5162, "step": 182500 }, { "epoch": 2.86, "learning_rate": 4.058695867656496e-05, "loss": 0.5155, "step": 183000 }, { "epoch": 2.86, "learning_rate": 4.0560873942916175e-05, "loss": 0.5142, "step": 183500 }, { "epoch": 2.87, "learning_rate": 4.053478920926739e-05, "loss": 0.5168, "step": 184000 }, { "epoch": 2.88, "learning_rate": 4.05087044756186e-05, "loss": 0.5152, "step": 184500 }, { "epoch": 2.89, "learning_rate": 4.0482619741969816e-05, "loss": 0.5196, "step": 185000 }, { "epoch": 2.9, "learning_rate": 4.045653500832103e-05, "loss": 0.5173, "step": 185500 }, { "epoch": 2.9, "learning_rate": 4.043045027467225e-05, "loss": 0.5178, "step": 186000 }, { "epoch": 2.91, "learning_rate": 4.040446987995806e-05, "loss": 0.5186, "step": 186500 }, { "epoch": 2.92, "learning_rate": 4.037838514630927e-05, "loss": 0.5108, "step": 187000 }, { "epoch": 2.93, "learning_rate": 4.035230041266049e-05, "loss": 0.5198, "step": 187500 }, { "epoch": 2.93, "learning_rate": 4.0326215679011705e-05, "loss": 0.5184, "step": 188000 }, { "epoch": 2.94, "learning_rate": 4.030013094536292e-05, "loss": 0.5184, "step": 188500 }, { "epoch": 2.95, "learning_rate": 4.027409838118143e-05, "loss": 0.5123, "step": 189000 }, { "epoch": 2.96, "learning_rate": 4.0248013647532646e-05, "loss": 0.5031, "step": 189500 }, { "epoch": 2.97, "learning_rate": 4.022192891388386e-05, "loss": 0.5148, "step": 190000 }, { "epoch": 2.97, "learning_rate": 4.019584418023508e-05, "loss": 0.5079, "step": 190500 }, { "epoch": 2.98, "learning_rate": 4.0169759446586294e-05, "loss": 0.5117, "step": 191000 }, { "epoch": 2.99, "learning_rate": 4.014367471293751e-05, "loss": 0.523, "step": 191500 }, { "epoch": 3.0, "learning_rate": 4.011758997928872e-05, "loss": 0.517, "step": 192000 }, { "epoch": 3.0, "eval_bleu": 52.0502, "eval_gen_len": 14.9976, "eval_loss": 0.8042312264442444, "eval_runtime": 9500.4244, "eval_samples_per_second": 13.462, "eval_steps_per_second": 1.683, "step": 192184 }, { "epoch": 3.0, "learning_rate": 4.0091557415107235e-05, "loss": 0.5007, "step": 192500 }, { "epoch": 3.01, "learning_rate": 4.006547268145845e-05, "loss": 0.4984, "step": 193000 }, { "epoch": 3.02, "learning_rate": 4.003938794780967e-05, "loss": 0.4934, "step": 193500 }, { "epoch": 3.03, "learning_rate": 4.001330321416088e-05, "loss": 0.4892, "step": 194000 }, { "epoch": 3.04, "learning_rate": 3.998721848051209e-05, "loss": 0.4925, "step": 194500 }, { "epoch": 3.04, "learning_rate": 3.9961185916330615e-05, "loss": 0.4777, "step": 195000 }, { "epoch": 3.05, "learning_rate": 3.9935153352149125e-05, "loss": 0.4794, "step": 195500 }, { "epoch": 3.06, "learning_rate": 3.990906861850034e-05, "loss": 0.4773, "step": 196000 }, { "epoch": 3.07, "learning_rate": 3.9882983884851556e-05, "loss": 0.4829, "step": 196500 }, { "epoch": 3.08, "learning_rate": 3.9856951320670066e-05, "loss": 0.4692, "step": 197000 }, { "epoch": 3.08, "learning_rate": 3.983086658702128e-05, "loss": 0.4776, "step": 197500 }, { "epoch": 3.09, "learning_rate": 3.98047818533725e-05, "loss": 0.472, "step": 198000 }, { "epoch": 3.1, "learning_rate": 3.9778697119723714e-05, "loss": 0.4704, "step": 198500 }, { "epoch": 3.11, "learning_rate": 3.975261238607493e-05, "loss": 0.4599, "step": 199000 }, { "epoch": 3.11, "learning_rate": 3.9726527652426145e-05, "loss": 0.4698, "step": 199500 }, { "epoch": 3.12, "learning_rate": 3.970044291877736e-05, "loss": 0.4742, "step": 200000 }, { "epoch": 3.13, "learning_rate": 3.967435818512858e-05, "loss": 0.4715, "step": 200500 }, { "epoch": 3.14, "learning_rate": 3.9648273451479786e-05, "loss": 0.4599, "step": 201000 }, { "epoch": 3.15, "learning_rate": 3.9622188717831e-05, "loss": 0.4671, "step": 201500 }, { "epoch": 3.15, "learning_rate": 3.959610398418222e-05, "loss": 0.4655, "step": 202000 }, { "epoch": 3.16, "learning_rate": 3.9570019250533434e-05, "loss": 0.4529, "step": 202500 }, { "epoch": 3.17, "learning_rate": 3.954393451688465e-05, "loss": 0.4639, "step": 203000 }, { "epoch": 3.18, "learning_rate": 3.9517849783235866e-05, "loss": 0.451, "step": 203500 }, { "epoch": 3.18, "learning_rate": 3.949176504958708e-05, "loss": 0.4584, "step": 204000 }, { "epoch": 3.19, "learning_rate": 3.946573248540559e-05, "loss": 0.4503, "step": 204500 }, { "epoch": 3.2, "learning_rate": 3.943964775175681e-05, "loss": 0.4578, "step": 205000 }, { "epoch": 3.21, "learning_rate": 3.9413615187575323e-05, "loss": 0.4563, "step": 205500 }, { "epoch": 3.22, "learning_rate": 3.938753045392653e-05, "loss": 0.455, "step": 206000 }, { "epoch": 3.22, "learning_rate": 3.936144572027775e-05, "loss": 0.4595, "step": 206500 }, { "epoch": 3.23, "learning_rate": 3.9335360986628964e-05, "loss": 0.4473, "step": 207000 }, { "epoch": 3.24, "learning_rate": 3.930927625298018e-05, "loss": 0.4613, "step": 207500 }, { "epoch": 3.25, "learning_rate": 3.9283191519331396e-05, "loss": 0.4529, "step": 208000 }, { "epoch": 3.25, "learning_rate": 3.9257158955149906e-05, "loss": 0.4433, "step": 208500 }, { "epoch": 3.26, "learning_rate": 3.923107422150112e-05, "loss": 0.4402, "step": 209000 }, { "epoch": 3.27, "learning_rate": 3.920498948785234e-05, "loss": 0.4506, "step": 209500 }, { "epoch": 3.28, "learning_rate": 3.917890475420355e-05, "loss": 0.4544, "step": 210000 }, { "epoch": 3.29, "learning_rate": 3.915282002055477e-05, "loss": 0.4503, "step": 210500 }, { "epoch": 3.29, "learning_rate": 3.9126787456373285e-05, "loss": 0.4585, "step": 211000 }, { "epoch": 3.3, "learning_rate": 3.91007027227245e-05, "loss": 0.432, "step": 211500 }, { "epoch": 3.31, "learning_rate": 3.907461798907572e-05, "loss": 0.4579, "step": 212000 }, { "epoch": 3.32, "learning_rate": 3.904853325542693e-05, "loss": 0.4488, "step": 212500 }, { "epoch": 3.32, "learning_rate": 3.902244852177814e-05, "loss": 0.4514, "step": 213000 }, { "epoch": 3.33, "learning_rate": 3.899641595759666e-05, "loss": 0.4534, "step": 213500 }, { "epoch": 3.34, "learning_rate": 3.8970331223947874e-05, "loss": 0.4451, "step": 214000 }, { "epoch": 3.35, "learning_rate": 3.894424649029909e-05, "loss": 0.4453, "step": 214500 }, { "epoch": 3.36, "learning_rate": 3.8918161756650306e-05, "loss": 0.4422, "step": 215000 }, { "epoch": 3.36, "learning_rate": 3.8892129192468816e-05, "loss": 0.447, "step": 215500 }, { "epoch": 3.37, "learning_rate": 3.886604445882003e-05, "loss": 0.4494, "step": 216000 }, { "epoch": 3.38, "learning_rate": 3.883995972517125e-05, "loss": 0.4442, "step": 216500 }, { "epoch": 3.39, "learning_rate": 3.8813874991522463e-05, "loss": 0.4367, "step": 217000 }, { "epoch": 3.4, "learning_rate": 3.878779025787368e-05, "loss": 0.4418, "step": 217500 }, { "epoch": 3.4, "learning_rate": 3.8761705524224895e-05, "loss": 0.4374, "step": 218000 }, { "epoch": 3.41, "learning_rate": 3.873562079057611e-05, "loss": 0.4372, "step": 218500 }, { "epoch": 3.42, "learning_rate": 3.870953605692733e-05, "loss": 0.434, "step": 219000 }, { "epoch": 3.43, "learning_rate": 3.868345132327854e-05, "loss": 0.4363, "step": 219500 }, { "epoch": 3.43, "learning_rate": 3.865736658962976e-05, "loss": 0.4446, "step": 220000 }, { "epoch": 3.44, "learning_rate": 3.8631281855980975e-05, "loss": 0.4362, "step": 220500 }, { "epoch": 3.45, "learning_rate": 3.860519712233219e-05, "loss": 0.4402, "step": 221000 }, { "epoch": 3.46, "learning_rate": 3.8579216727617994e-05, "loss": 0.4426, "step": 221500 }, { "epoch": 3.47, "learning_rate": 3.855313199396921e-05, "loss": 0.4387, "step": 222000 }, { "epoch": 3.47, "learning_rate": 3.8527047260320425e-05, "loss": 0.4312, "step": 222500 }, { "epoch": 3.48, "learning_rate": 3.850096252667164e-05, "loss": 0.4336, "step": 223000 }, { "epoch": 3.49, "learning_rate": 3.847487779302286e-05, "loss": 0.4339, "step": 223500 }, { "epoch": 3.5, "learning_rate": 3.844879305937407e-05, "loss": 0.4244, "step": 224000 }, { "epoch": 3.5, "learning_rate": 3.842270832572529e-05, "loss": 0.4393, "step": 224500 }, { "epoch": 3.51, "learning_rate": 3.83966757615438e-05, "loss": 0.4311, "step": 225000 }, { "epoch": 3.52, "learning_rate": 3.8370591027895014e-05, "loss": 0.434, "step": 225500 }, { "epoch": 3.53, "learning_rate": 3.834450629424623e-05, "loss": 0.4313, "step": 226000 }, { "epoch": 3.54, "learning_rate": 3.8318421560597446e-05, "loss": 0.4307, "step": 226500 }, { "epoch": 3.54, "learning_rate": 3.829233682694866e-05, "loss": 0.4251, "step": 227000 }, { "epoch": 3.55, "learning_rate": 3.826625209329988e-05, "loss": 0.4315, "step": 227500 }, { "epoch": 3.56, "learning_rate": 3.8240167359651094e-05, "loss": 0.4372, "step": 228000 }, { "epoch": 3.57, "learning_rate": 3.821408262600231e-05, "loss": 0.4343, "step": 228500 }, { "epoch": 3.57, "learning_rate": 3.818799789235352e-05, "loss": 0.4331, "step": 229000 }, { "epoch": 3.58, "learning_rate": 3.8161913158704735e-05, "loss": 0.4299, "step": 229500 }, { "epoch": 3.59, "learning_rate": 3.813582842505595e-05, "loss": 0.4238, "step": 230000 }, { "epoch": 3.6, "learning_rate": 3.810974369140717e-05, "loss": 0.4228, "step": 230500 }, { "epoch": 3.61, "learning_rate": 3.808371112722568e-05, "loss": 0.425, "step": 231000 }, { "epoch": 3.61, "learning_rate": 3.805762639357689e-05, "loss": 0.4243, "step": 231500 }, { "epoch": 3.62, "learning_rate": 3.803154165992811e-05, "loss": 0.4273, "step": 232000 }, { "epoch": 3.63, "learning_rate": 3.8005509095746624e-05, "loss": 0.4274, "step": 232500 }, { "epoch": 3.64, "learning_rate": 3.797942436209784e-05, "loss": 0.4191, "step": 233000 }, { "epoch": 3.64, "learning_rate": 3.7953339628449056e-05, "loss": 0.4225, "step": 233500 }, { "epoch": 3.65, "learning_rate": 3.792725489480027e-05, "loss": 0.4218, "step": 234000 }, { "epoch": 3.66, "learning_rate": 3.790117016115148e-05, "loss": 0.4279, "step": 234500 }, { "epoch": 3.67, "learning_rate": 3.78750854275027e-05, "loss": 0.4233, "step": 235000 }, { "epoch": 3.68, "learning_rate": 3.784900069385391e-05, "loss": 0.4232, "step": 235500 }, { "epoch": 3.68, "learning_rate": 3.782291596020513e-05, "loss": 0.4216, "step": 236000 }, { "epoch": 3.69, "learning_rate": 3.7796831226556345e-05, "loss": 0.4251, "step": 236500 }, { "epoch": 3.7, "learning_rate": 3.777074649290756e-05, "loss": 0.4321, "step": 237000 }, { "epoch": 3.71, "learning_rate": 3.7744661759258777e-05, "loss": 0.4179, "step": 237500 }, { "epoch": 3.72, "learning_rate": 3.771857702560999e-05, "loss": 0.4137, "step": 238000 }, { "epoch": 3.72, "learning_rate": 3.769254446142851e-05, "loss": 0.4162, "step": 238500 }, { "epoch": 3.73, "learning_rate": 3.7666459727779725e-05, "loss": 0.431, "step": 239000 }, { "epoch": 3.74, "learning_rate": 3.7640427163598234e-05, "loss": 0.4126, "step": 239500 }, { "epoch": 3.75, "learning_rate": 3.761439459941675e-05, "loss": 0.4238, "step": 240000 }, { "epoch": 3.75, "learning_rate": 3.758836203523526e-05, "loss": 0.4239, "step": 240500 }, { "epoch": 3.76, "learning_rate": 3.7562277301586476e-05, "loss": 0.4297, "step": 241000 }, { "epoch": 3.77, "learning_rate": 3.753619256793769e-05, "loss": 0.4225, "step": 241500 }, { "epoch": 3.78, "learning_rate": 3.751010783428891e-05, "loss": 0.4201, "step": 242000 }, { "epoch": 3.79, "learning_rate": 3.748402310064012e-05, "loss": 0.4215, "step": 242500 }, { "epoch": 3.79, "learning_rate": 3.745793836699133e-05, "loss": 0.4182, "step": 243000 }, { "epoch": 3.8, "learning_rate": 3.743185363334255e-05, "loss": 0.4184, "step": 243500 }, { "epoch": 3.81, "learning_rate": 3.7405768899693764e-05, "loss": 0.4304, "step": 244000 }, { "epoch": 3.82, "learning_rate": 3.737968416604498e-05, "loss": 0.4175, "step": 244500 }, { "epoch": 3.82, "learning_rate": 3.7353599432396196e-05, "loss": 0.4144, "step": 245000 }, { "epoch": 3.83, "learning_rate": 3.732751469874741e-05, "loss": 0.4197, "step": 245500 }, { "epoch": 3.84, "learning_rate": 3.730142996509863e-05, "loss": 0.4157, "step": 246000 }, { "epoch": 3.85, "learning_rate": 3.7275345231449844e-05, "loss": 0.4127, "step": 246500 }, { "epoch": 3.86, "learning_rate": 3.724926049780106e-05, "loss": 0.4139, "step": 247000 }, { "epoch": 3.86, "learning_rate": 3.722322793361957e-05, "loss": 0.4154, "step": 247500 }, { "epoch": 3.87, "learning_rate": 3.7197143199970785e-05, "loss": 0.4172, "step": 248000 }, { "epoch": 3.88, "learning_rate": 3.7171058466322e-05, "loss": 0.4129, "step": 248500 }, { "epoch": 3.89, "learning_rate": 3.714497373267322e-05, "loss": 0.4162, "step": 249000 }, { "epoch": 3.89, "learning_rate": 3.7118941168491726e-05, "loss": 0.42, "step": 249500 }, { "epoch": 3.9, "learning_rate": 3.709285643484294e-05, "loss": 0.4146, "step": 250000 }, { "epoch": 3.91, "learning_rate": 3.706687604012876e-05, "loss": 0.4211, "step": 250500 }, { "epoch": 3.92, "learning_rate": 3.7040791306479975e-05, "loss": 0.4134, "step": 251000 }, { "epoch": 3.93, "learning_rate": 3.7014706572831184e-05, "loss": 0.4171, "step": 251500 }, { "epoch": 3.93, "learning_rate": 3.69886218391824e-05, "loss": 0.42, "step": 252000 }, { "epoch": 3.94, "learning_rate": 3.6962537105533615e-05, "loss": 0.4074, "step": 252500 }, { "epoch": 3.95, "learning_rate": 3.693650454135213e-05, "loss": 0.4119, "step": 253000 }, { "epoch": 3.96, "learning_rate": 3.691047197717064e-05, "loss": 0.4126, "step": 253500 }, { "epoch": 3.96, "learning_rate": 3.688438724352186e-05, "loss": 0.4086, "step": 254000 }, { "epoch": 3.97, "learning_rate": 3.685830250987307e-05, "loss": 0.4074, "step": 254500 }, { "epoch": 3.98, "learning_rate": 3.683221777622429e-05, "loss": 0.4154, "step": 255000 }, { "epoch": 3.99, "learning_rate": 3.6806133042575505e-05, "loss": 0.4223, "step": 255500 }, { "epoch": 4.0, "learning_rate": 3.678004830892672e-05, "loss": 0.415, "step": 256000 }, { "epoch": 4.0, "eval_bleu": 52.0616, "eval_gen_len": 15.0215, "eval_loss": 0.8553618788719177, "eval_runtime": 9536.6894, "eval_samples_per_second": 13.411, "eval_steps_per_second": 1.676, "step": 256246 }, { "epoch": 4.0, "learning_rate": 3.6753963575277937e-05, "loss": 0.3994, "step": 256500 }, { "epoch": 4.01, "learning_rate": 3.6727878841629146e-05, "loss": 0.4028, "step": 257000 }, { "epoch": 4.02, "learning_rate": 3.670184627744766e-05, "loss": 0.3918, "step": 257500 }, { "epoch": 4.03, "learning_rate": 3.667576154379888e-05, "loss": 0.3958, "step": 258000 }, { "epoch": 4.04, "learning_rate": 3.6649676810150094e-05, "loss": 0.396, "step": 258500 }, { "epoch": 4.04, "learning_rate": 3.662359207650131e-05, "loss": 0.3874, "step": 259000 }, { "epoch": 4.05, "learning_rate": 3.659750734285252e-05, "loss": 0.3863, "step": 259500 }, { "epoch": 4.06, "learning_rate": 3.657147477867104e-05, "loss": 0.3821, "step": 260000 }, { "epoch": 4.07, "learning_rate": 3.654544221448955e-05, "loss": 0.3895, "step": 260500 }, { "epoch": 4.07, "learning_rate": 3.651935748084077e-05, "loss": 0.3819, "step": 261000 }, { "epoch": 4.08, "learning_rate": 3.649327274719198e-05, "loss": 0.3798, "step": 261500 }, { "epoch": 4.09, "learning_rate": 3.646724018301049e-05, "loss": 0.3812, "step": 262000 }, { "epoch": 4.1, "learning_rate": 3.644115544936171e-05, "loss": 0.3796, "step": 262500 }, { "epoch": 4.11, "learning_rate": 3.6415070715712924e-05, "loss": 0.3743, "step": 263000 }, { "epoch": 4.11, "learning_rate": 3.638898598206414e-05, "loss": 0.374, "step": 263500 }, { "epoch": 4.12, "learning_rate": 3.6362901248415356e-05, "loss": 0.3849, "step": 264000 }, { "epoch": 4.13, "learning_rate": 3.633681651476657e-05, "loss": 0.3815, "step": 264500 }, { "epoch": 4.14, "learning_rate": 3.631073178111779e-05, "loss": 0.3717, "step": 265000 }, { "epoch": 4.14, "learning_rate": 3.6284647047469e-05, "loss": 0.3727, "step": 265500 }, { "epoch": 4.15, "learning_rate": 3.625856231382021e-05, "loss": 0.3789, "step": 266000 }, { "epoch": 4.16, "learning_rate": 3.623247758017143e-05, "loss": 0.3658, "step": 266500 }, { "epoch": 4.17, "learning_rate": 3.6206392846522645e-05, "loss": 0.3717, "step": 267000 }, { "epoch": 4.18, "learning_rate": 3.618030811287386e-05, "loss": 0.3661, "step": 267500 }, { "epoch": 4.18, "learning_rate": 3.6154223379225077e-05, "loss": 0.3732, "step": 268000 }, { "epoch": 4.19, "learning_rate": 3.612813864557629e-05, "loss": 0.361, "step": 268500 }, { "epoch": 4.2, "learning_rate": 3.610205391192751e-05, "loss": 0.3657, "step": 269000 }, { "epoch": 4.21, "learning_rate": 3.6075969178278724e-05, "loss": 0.37, "step": 269500 }, { "epoch": 4.21, "learning_rate": 3.604988444462994e-05, "loss": 0.3647, "step": 270000 }, { "epoch": 4.22, "learning_rate": 3.6023799710981156e-05, "loss": 0.3665, "step": 270500 }, { "epoch": 4.23, "learning_rate": 3.599781931626696e-05, "loss": 0.3597, "step": 271000 }, { "epoch": 4.24, "learning_rate": 3.5971734582618175e-05, "loss": 0.3711, "step": 271500 }, { "epoch": 4.25, "learning_rate": 3.594564984896939e-05, "loss": 0.3582, "step": 272000 }, { "epoch": 4.25, "learning_rate": 3.591956511532061e-05, "loss": 0.358, "step": 272500 }, { "epoch": 4.26, "learning_rate": 3.589348038167182e-05, "loss": 0.3548, "step": 273000 }, { "epoch": 4.27, "learning_rate": 3.586744781749033e-05, "loss": 0.3592, "step": 273500 }, { "epoch": 4.28, "learning_rate": 3.584136308384155e-05, "loss": 0.3683, "step": 274000 }, { "epoch": 4.28, "learning_rate": 3.5815278350192764e-05, "loss": 0.3615, "step": 274500 }, { "epoch": 4.29, "learning_rate": 3.578919361654398e-05, "loss": 0.3691, "step": 275000 }, { "epoch": 4.3, "learning_rate": 3.5763161052362496e-05, "loss": 0.3503, "step": 275500 }, { "epoch": 4.31, "learning_rate": 3.573707631871371e-05, "loss": 0.3686, "step": 276000 }, { "epoch": 4.32, "learning_rate": 3.571099158506493e-05, "loss": 0.3617, "step": 276500 }, { "epoch": 4.32, "learning_rate": 3.5684906851416144e-05, "loss": 0.3621, "step": 277000 }, { "epoch": 4.33, "learning_rate": 3.565882211776736e-05, "loss": 0.3653, "step": 277500 }, { "epoch": 4.34, "learning_rate": 3.5632737384118576e-05, "loss": 0.3594, "step": 278000 }, { "epoch": 4.35, "learning_rate": 3.5606652650469785e-05, "loss": 0.358, "step": 278500 }, { "epoch": 4.36, "learning_rate": 3.55806200862883e-05, "loss": 0.3565, "step": 279000 }, { "epoch": 4.36, "learning_rate": 3.555453535263952e-05, "loss": 0.3576, "step": 279500 }, { "epoch": 4.37, "learning_rate": 3.552845061899073e-05, "loss": 0.3646, "step": 280000 }, { "epoch": 4.38, "learning_rate": 3.550236588534195e-05, "loss": 0.3541, "step": 280500 }, { "epoch": 4.39, "learning_rate": 3.5476281151693165e-05, "loss": 0.3531, "step": 281000 }, { "epoch": 4.39, "learning_rate": 3.545019641804438e-05, "loss": 0.3547, "step": 281500 }, { "epoch": 4.4, "learning_rate": 3.5424111684395596e-05, "loss": 0.3496, "step": 282000 }, { "epoch": 4.41, "learning_rate": 3.539802695074681e-05, "loss": 0.3525, "step": 282500 }, { "epoch": 4.42, "learning_rate": 3.537199438656532e-05, "loss": 0.3485, "step": 283000 }, { "epoch": 4.43, "learning_rate": 3.534590965291654e-05, "loss": 0.352, "step": 283500 }, { "epoch": 4.43, "learning_rate": 3.5319824919267754e-05, "loss": 0.3585, "step": 284000 }, { "epoch": 4.44, "learning_rate": 3.529374018561897e-05, "loss": 0.3549, "step": 284500 }, { "epoch": 4.45, "learning_rate": 3.5267655451970185e-05, "loss": 0.3503, "step": 285000 }, { "epoch": 4.46, "learning_rate": 3.52415707183214e-05, "loss": 0.3567, "step": 285500 }, { "epoch": 4.46, "learning_rate": 3.521548598467261e-05, "loss": 0.3526, "step": 286000 }, { "epoch": 4.47, "learning_rate": 3.5189401251023826e-05, "loss": 0.3458, "step": 286500 }, { "epoch": 4.48, "learning_rate": 3.5163420856309636e-05, "loss": 0.3486, "step": 287000 }, { "epoch": 4.49, "learning_rate": 3.513733612266085e-05, "loss": 0.3476, "step": 287500 }, { "epoch": 4.5, "learning_rate": 3.511125138901207e-05, "loss": 0.346, "step": 288000 }, { "epoch": 4.5, "learning_rate": 3.508521882483058e-05, "loss": 0.3547, "step": 288500 }, { "epoch": 4.51, "learning_rate": 3.505913409118179e-05, "loss": 0.3442, "step": 289000 }, { "epoch": 4.52, "learning_rate": 3.503304935753301e-05, "loss": 0.3488, "step": 289500 }, { "epoch": 4.53, "learning_rate": 3.5006964623884225e-05, "loss": 0.3464, "step": 290000 }, { "epoch": 4.53, "learning_rate": 3.498087989023544e-05, "loss": 0.3482, "step": 290500 }, { "epoch": 4.54, "learning_rate": 3.495479515658666e-05, "loss": 0.342, "step": 291000 }, { "epoch": 4.55, "learning_rate": 3.492871042293787e-05, "loss": 0.3462, "step": 291500 }, { "epoch": 4.56, "learning_rate": 3.490262568928909e-05, "loss": 0.3514, "step": 292000 }, { "epoch": 4.57, "learning_rate": 3.4876540955640305e-05, "loss": 0.352, "step": 292500 }, { "epoch": 4.57, "learning_rate": 3.485045622199152e-05, "loss": 0.3479, "step": 293000 }, { "epoch": 4.58, "learning_rate": 3.4824371488342736e-05, "loss": 0.3489, "step": 293500 }, { "epoch": 4.59, "learning_rate": 3.4798286754693946e-05, "loss": 0.3403, "step": 294000 }, { "epoch": 4.6, "learning_rate": 3.477220202104516e-05, "loss": 0.337, "step": 294500 }, { "epoch": 4.6, "learning_rate": 3.474611728739638e-05, "loss": 0.346, "step": 295000 }, { "epoch": 4.61, "learning_rate": 3.472003255374759e-05, "loss": 0.3395, "step": 295500 }, { "epoch": 4.62, "learning_rate": 3.469394782009881e-05, "loss": 0.3379, "step": 296000 }, { "epoch": 4.63, "learning_rate": 3.4667863086450025e-05, "loss": 0.3447, "step": 296500 }, { "epoch": 4.64, "learning_rate": 3.464177835280124e-05, "loss": 0.3392, "step": 297000 }, { "epoch": 4.64, "learning_rate": 3.461569361915246e-05, "loss": 0.3439, "step": 297500 }, { "epoch": 4.65, "learning_rate": 3.458960888550367e-05, "loss": 0.3341, "step": 298000 }, { "epoch": 4.66, "learning_rate": 3.456352415185489e-05, "loss": 0.3435, "step": 298500 }, { "epoch": 4.67, "learning_rate": 3.4537439418206105e-05, "loss": 0.3459, "step": 299000 }, { "epoch": 4.68, "learning_rate": 3.4511406854024614e-05, "loss": 0.3394, "step": 299500 }, { "epoch": 4.68, "learning_rate": 3.448532212037583e-05, "loss": 0.3338, "step": 300000 }, { "epoch": 4.69, "learning_rate": 3.4459237386727046e-05, "loss": 0.3435, "step": 300500 }, { "epoch": 4.7, "learning_rate": 3.443315265307826e-05, "loss": 0.3512, "step": 301000 }, { "epoch": 4.71, "learning_rate": 3.440706791942948e-05, "loss": 0.3327, "step": 301500 }, { "epoch": 4.71, "learning_rate": 3.438113969418259e-05, "loss": 0.3351, "step": 302000 }, { "epoch": 4.72, "learning_rate": 3.43550549605338e-05, "loss": 0.3341, "step": 302500 }, { "epoch": 4.73, "learning_rate": 3.432897022688501e-05, "loss": 0.3392, "step": 303000 }, { "epoch": 4.74, "learning_rate": 3.430288549323623e-05, "loss": 0.3324, "step": 303500 }, { "epoch": 4.75, "learning_rate": 3.4276800759587445e-05, "loss": 0.3373, "step": 304000 }, { "epoch": 4.75, "learning_rate": 3.425071602593866e-05, "loss": 0.3402, "step": 304500 }, { "epoch": 4.76, "learning_rate": 3.4224631292289876e-05, "loss": 0.3451, "step": 305000 }, { "epoch": 4.77, "learning_rate": 3.419854655864109e-05, "loss": 0.3417, "step": 305500 }, { "epoch": 4.78, "learning_rate": 3.417246182499231e-05, "loss": 0.3386, "step": 306000 }, { "epoch": 4.78, "learning_rate": 3.4146377091343524e-05, "loss": 0.3384, "step": 306500 }, { "epoch": 4.79, "learning_rate": 3.412029235769474e-05, "loss": 0.3388, "step": 307000 }, { "epoch": 4.8, "learning_rate": 3.4094207624045956e-05, "loss": 0.3357, "step": 307500 }, { "epoch": 4.81, "learning_rate": 3.4068175059864465e-05, "loss": 0.3482, "step": 308000 }, { "epoch": 4.82, "learning_rate": 3.404209032621568e-05, "loss": 0.334, "step": 308500 }, { "epoch": 4.82, "learning_rate": 3.40160055925669e-05, "loss": 0.3334, "step": 309000 }, { "epoch": 4.83, "learning_rate": 3.398992085891811e-05, "loss": 0.3379, "step": 309500 }, { "epoch": 4.84, "learning_rate": 3.396383612526933e-05, "loss": 0.3321, "step": 310000 }, { "epoch": 4.85, "learning_rate": 3.393780356108784e-05, "loss": 0.3291, "step": 310500 }, { "epoch": 4.85, "learning_rate": 3.391177099690635e-05, "loss": 0.3314, "step": 311000 }, { "epoch": 4.86, "learning_rate": 3.3885686263257564e-05, "loss": 0.3365, "step": 311500 }, { "epoch": 4.87, "learning_rate": 3.385960152960878e-05, "loss": 0.3352, "step": 312000 }, { "epoch": 4.88, "learning_rate": 3.3833516795959996e-05, "loss": 0.3276, "step": 312500 }, { "epoch": 4.89, "learning_rate": 3.380743206231121e-05, "loss": 0.3353, "step": 313000 }, { "epoch": 4.89, "learning_rate": 3.378134732866243e-05, "loss": 0.3404, "step": 313500 }, { "epoch": 4.9, "learning_rate": 3.375526259501364e-05, "loss": 0.331, "step": 314000 }, { "epoch": 4.91, "learning_rate": 3.372923003083215e-05, "loss": 0.3357, "step": 314500 }, { "epoch": 4.92, "learning_rate": 3.370314529718337e-05, "loss": 0.3313, "step": 315000 }, { "epoch": 4.92, "learning_rate": 3.3677060563534585e-05, "loss": 0.3392, "step": 315500 }, { "epoch": 4.93, "learning_rate": 3.36509758298858e-05, "loss": 0.3325, "step": 316000 }, { "epoch": 4.94, "learning_rate": 3.3624891096237016e-05, "loss": 0.3276, "step": 316500 }, { "epoch": 4.95, "learning_rate": 3.359880636258823e-05, "loss": 0.3323, "step": 317000 }, { "epoch": 4.96, "learning_rate": 3.357272162893945e-05, "loss": 0.3269, "step": 317500 }, { "epoch": 4.96, "learning_rate": 3.3546636895290664e-05, "loss": 0.3339, "step": 318000 }, { "epoch": 4.97, "learning_rate": 3.352055216164188e-05, "loss": 0.3211, "step": 318500 }, { "epoch": 4.98, "learning_rate": 3.3494467427993096e-05, "loss": 0.3357, "step": 319000 }, { "epoch": 4.99, "learning_rate": 3.3468382694344305e-05, "loss": 0.3379, "step": 319500 }, { "epoch": 5.0, "learning_rate": 3.344229796069552e-05, "loss": 0.3348, "step": 320000 }, { "epoch": 5.0, "eval_bleu": 51.995, "eval_gen_len": 14.9859, "eval_loss": 0.9135012626647949, "eval_runtime": 9511.5501, "eval_samples_per_second": 13.446, "eval_steps_per_second": 1.681, "step": 320307 }, { "epoch": 5.0, "step": 320307, "total_flos": 1.1106366691632742e+19, "train_loss": 0.5943015630864049, "train_runtime": 296139.8154, "train_samples_per_second": 51.917, "train_steps_per_second": 3.245 } ], "logging_steps": 500, "max_steps": 960915, "num_train_epochs": 15, "save_steps": 500, "total_flos": 1.1106366691632742e+19, "trial_name": null, "trial_params": null }