{ "best_metric": null, "best_model_checkpoint": null, "epoch": 44.123389301054274, "eval_steps": 500, "global_step": 4181000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.999736167248858e-05, "loss": 5.8079, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.999472334497716e-05, "loss": 4.48, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.999208501746573e-05, "loss": 4.1412, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.9989446689954304e-05, "loss": 3.8521, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.998680836244288e-05, "loss": 3.7444, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.998417003493146e-05, "loss": 3.547, "step": 3000 }, { "epoch": 0.04, "learning_rate": 4.998153170742004e-05, "loss": 3.3731, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.9978893379908606e-05, "loss": 3.2642, "step": 4000 }, { "epoch": 0.05, "learning_rate": 4.997625505239719e-05, "loss": 3.1457, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.9973616724885764e-05, "loss": 3.0872, "step": 5000 }, { "epoch": 0.06, "learning_rate": 4.997097839737434e-05, "loss": 2.9484, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.9968340069862915e-05, "loss": 2.8687, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.996570174235149e-05, "loss": 2.8459, "step": 6500 }, { "epoch": 0.07, "learning_rate": 4.9963063414840066e-05, "loss": 2.8518, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.996042508732864e-05, "loss": 2.7294, "step": 7500 }, { "epoch": 0.08, "learning_rate": 4.9957786759817224e-05, "loss": 2.6635, "step": 8000 }, { "epoch": 0.09, "learning_rate": 4.995514843230579e-05, "loss": 2.6407, "step": 8500 }, { "epoch": 0.09, "learning_rate": 4.995251010479437e-05, "loss": 2.6268, "step": 9000 }, { "epoch": 0.1, "learning_rate": 4.994987177728295e-05, "loss": 2.6036, "step": 9500 }, { "epoch": 0.11, "learning_rate": 4.9947233449771526e-05, "loss": 2.5329, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.99445951222601e-05, "loss": 2.6029, "step": 10500 }, { "epoch": 0.12, "learning_rate": 4.994195679474868e-05, "loss": 2.506, "step": 11000 }, { "epoch": 0.12, "learning_rate": 4.993931846723725e-05, "loss": 2.4599, "step": 11500 }, { "epoch": 0.13, "learning_rate": 4.993668013972583e-05, "loss": 2.5108, "step": 12000 }, { "epoch": 0.13, "learning_rate": 4.99340418122144e-05, "loss": 2.4693, "step": 12500 }, { "epoch": 0.14, "learning_rate": 4.9931403484702985e-05, "loss": 2.4383, "step": 13000 }, { "epoch": 0.14, "learning_rate": 4.9928765157191554e-05, "loss": 2.4124, "step": 13500 }, { "epoch": 0.15, "learning_rate": 4.992612682968013e-05, "loss": 2.3671, "step": 14000 }, { "epoch": 0.15, "learning_rate": 4.9923488502168705e-05, "loss": 2.3858, "step": 14500 }, { "epoch": 0.16, "learning_rate": 4.992085017465729e-05, "loss": 2.3952, "step": 15000 }, { "epoch": 0.16, "learning_rate": 4.9918211847145856e-05, "loss": 2.3724, "step": 15500 }, { "epoch": 0.17, "learning_rate": 4.991557351963443e-05, "loss": 2.4513, "step": 16000 }, { "epoch": 0.17, "learning_rate": 4.9912935192123014e-05, "loss": 2.2696, "step": 16500 }, { "epoch": 0.18, "learning_rate": 4.991029686461159e-05, "loss": 2.3472, "step": 17000 }, { "epoch": 0.18, "learning_rate": 4.9907658537100165e-05, "loss": 2.3475, "step": 17500 }, { "epoch": 0.19, "learning_rate": 4.990502020958874e-05, "loss": 2.3289, "step": 18000 }, { "epoch": 0.2, "learning_rate": 4.9902381882077316e-05, "loss": 2.2573, "step": 18500 }, { "epoch": 0.2, "learning_rate": 4.989974355456589e-05, "loss": 2.2714, "step": 19000 }, { "epoch": 0.21, "learning_rate": 4.989710522705447e-05, "loss": 2.3415, "step": 19500 }, { "epoch": 0.21, "learning_rate": 4.989446689954305e-05, "loss": 2.2987, "step": 20000 }, { "epoch": 0.22, "learning_rate": 4.989182857203162e-05, "loss": 2.285, "step": 20500 }, { "epoch": 0.22, "learning_rate": 4.988919024452019e-05, "loss": 2.2427, "step": 21000 }, { "epoch": 0.23, "learning_rate": 4.9886551917008775e-05, "loss": 2.2315, "step": 21500 }, { "epoch": 0.23, "learning_rate": 4.988391358949735e-05, "loss": 2.2075, "step": 22000 }, { "epoch": 0.24, "learning_rate": 4.9881275261985926e-05, "loss": 2.2379, "step": 22500 }, { "epoch": 0.24, "learning_rate": 4.98786369344745e-05, "loss": 2.2669, "step": 23000 }, { "epoch": 0.25, "learning_rate": 4.987599860696308e-05, "loss": 2.1572, "step": 23500 }, { "epoch": 0.25, "learning_rate": 4.987336027945165e-05, "loss": 2.2218, "step": 24000 }, { "epoch": 0.26, "learning_rate": 4.987072195194023e-05, "loss": 2.2017, "step": 24500 }, { "epoch": 0.26, "learning_rate": 4.9868083624428804e-05, "loss": 2.1509, "step": 25000 }, { "epoch": 0.27, "learning_rate": 4.986544529691738e-05, "loss": 2.2906, "step": 25500 }, { "epoch": 0.27, "learning_rate": 4.9862806969405955e-05, "loss": 2.1208, "step": 26000 }, { "epoch": 0.28, "learning_rate": 4.986016864189453e-05, "loss": 2.2166, "step": 26500 }, { "epoch": 0.28, "learning_rate": 4.985753031438311e-05, "loss": 2.1473, "step": 27000 }, { "epoch": 0.29, "learning_rate": 4.985489198687168e-05, "loss": 2.1183, "step": 27500 }, { "epoch": 0.3, "learning_rate": 4.985225365936026e-05, "loss": 2.1408, "step": 28000 }, { "epoch": 0.3, "learning_rate": 4.984961533184884e-05, "loss": 2.1855, "step": 28500 }, { "epoch": 0.31, "learning_rate": 4.9846977004337415e-05, "loss": 2.1993, "step": 29000 }, { "epoch": 0.31, "learning_rate": 4.984433867682599e-05, "loss": 2.1275, "step": 29500 }, { "epoch": 0.32, "learning_rate": 4.9841700349314566e-05, "loss": 2.0987, "step": 30000 }, { "epoch": 0.32, "learning_rate": 4.983906202180314e-05, "loss": 2.1306, "step": 30500 }, { "epoch": 0.33, "learning_rate": 4.9836423694291716e-05, "loss": 2.1015, "step": 31000 }, { "epoch": 0.33, "learning_rate": 4.983378536678029e-05, "loss": 2.1981, "step": 31500 }, { "epoch": 0.34, "learning_rate": 4.9831147039268874e-05, "loss": 2.1527, "step": 32000 }, { "epoch": 0.34, "learning_rate": 4.982850871175744e-05, "loss": 2.1114, "step": 32500 }, { "epoch": 0.35, "learning_rate": 4.982587038424602e-05, "loss": 2.0971, "step": 33000 }, { "epoch": 0.35, "learning_rate": 4.98232320567346e-05, "loss": 2.0855, "step": 33500 }, { "epoch": 0.36, "learning_rate": 4.9820593729223176e-05, "loss": 2.102, "step": 34000 }, { "epoch": 0.36, "learning_rate": 4.9817955401711745e-05, "loss": 2.1159, "step": 34500 }, { "epoch": 0.37, "learning_rate": 4.981531707420033e-05, "loss": 2.0279, "step": 35000 }, { "epoch": 0.37, "learning_rate": 4.98126787466889e-05, "loss": 2.0742, "step": 35500 }, { "epoch": 0.38, "learning_rate": 4.981004041917748e-05, "loss": 2.1082, "step": 36000 }, { "epoch": 0.39, "learning_rate": 4.9807402091666054e-05, "loss": 2.1118, "step": 36500 }, { "epoch": 0.39, "learning_rate": 4.980476376415463e-05, "loss": 2.0661, "step": 37000 }, { "epoch": 0.4, "learning_rate": 4.9802125436643205e-05, "loss": 2.0396, "step": 37500 }, { "epoch": 0.4, "learning_rate": 4.979948710913178e-05, "loss": 2.0375, "step": 38000 }, { "epoch": 0.41, "learning_rate": 4.9796848781620356e-05, "loss": 2.053, "step": 38500 }, { "epoch": 0.41, "learning_rate": 4.979421045410894e-05, "loss": 2.0394, "step": 39000 }, { "epoch": 0.42, "learning_rate": 4.9791572126597507e-05, "loss": 2.0003, "step": 39500 }, { "epoch": 0.42, "learning_rate": 4.978893379908608e-05, "loss": 2.0158, "step": 40000 }, { "epoch": 0.43, "learning_rate": 4.9786295471574664e-05, "loss": 2.0421, "step": 40500 }, { "epoch": 0.43, "learning_rate": 4.978365714406324e-05, "loss": 2.1111, "step": 41000 }, { "epoch": 0.44, "learning_rate": 4.9781018816551815e-05, "loss": 2.0702, "step": 41500 }, { "epoch": 0.44, "learning_rate": 4.977838048904039e-05, "loss": 2.0634, "step": 42000 }, { "epoch": 0.45, "learning_rate": 4.9775742161528966e-05, "loss": 2.0276, "step": 42500 }, { "epoch": 0.45, "learning_rate": 4.977310383401754e-05, "loss": 2.0368, "step": 43000 }, { "epoch": 0.46, "learning_rate": 4.977046550650612e-05, "loss": 1.9591, "step": 43500 }, { "epoch": 0.46, "learning_rate": 4.976782717899469e-05, "loss": 2.0182, "step": 44000 }, { "epoch": 0.47, "learning_rate": 4.976518885148327e-05, "loss": 2.0129, "step": 44500 }, { "epoch": 0.47, "learning_rate": 4.9762550523971844e-05, "loss": 2.0446, "step": 45000 }, { "epoch": 0.48, "learning_rate": 4.9759912196460426e-05, "loss": 2.0224, "step": 45500 }, { "epoch": 0.49, "learning_rate": 4.9757273868949e-05, "loss": 1.9851, "step": 46000 }, { "epoch": 0.49, "learning_rate": 4.975463554143757e-05, "loss": 1.9947, "step": 46500 }, { "epoch": 0.5, "learning_rate": 4.975199721392615e-05, "loss": 1.9868, "step": 47000 }, { "epoch": 0.5, "learning_rate": 4.974935888641473e-05, "loss": 1.9678, "step": 47500 }, { "epoch": 0.51, "learning_rate": 4.97467205589033e-05, "loss": 1.9906, "step": 48000 }, { "epoch": 0.51, "learning_rate": 4.974408223139188e-05, "loss": 1.9736, "step": 48500 }, { "epoch": 0.52, "learning_rate": 4.9741443903880454e-05, "loss": 1.9457, "step": 49000 }, { "epoch": 0.52, "learning_rate": 4.973880557636903e-05, "loss": 1.9758, "step": 49500 }, { "epoch": 0.53, "learning_rate": 4.9736167248857605e-05, "loss": 2.031, "step": 50000 }, { "epoch": 0.53, "learning_rate": 4.973352892134618e-05, "loss": 1.9567, "step": 50500 }, { "epoch": 0.54, "learning_rate": 4.973089059383476e-05, "loss": 2.0051, "step": 51000 }, { "epoch": 0.54, "learning_rate": 4.972825226632333e-05, "loss": 1.9309, "step": 51500 }, { "epoch": 0.55, "learning_rate": 4.972561393881191e-05, "loss": 1.9041, "step": 52000 }, { "epoch": 0.55, "learning_rate": 4.972297561130049e-05, "loss": 1.9507, "step": 52500 }, { "epoch": 0.56, "learning_rate": 4.9720337283789065e-05, "loss": 1.935, "step": 53000 }, { "epoch": 0.56, "learning_rate": 4.9717698956277634e-05, "loss": 1.938, "step": 53500 }, { "epoch": 0.57, "learning_rate": 4.9715060628766216e-05, "loss": 2.0293, "step": 54000 }, { "epoch": 0.58, "learning_rate": 4.971242230125479e-05, "loss": 1.9879, "step": 54500 }, { "epoch": 0.58, "learning_rate": 4.970978397374337e-05, "loss": 1.9516, "step": 55000 }, { "epoch": 0.59, "learning_rate": 4.970714564623194e-05, "loss": 1.986, "step": 55500 }, { "epoch": 0.59, "learning_rate": 4.970450731872052e-05, "loss": 1.9432, "step": 56000 }, { "epoch": 0.6, "learning_rate": 4.9701868991209093e-05, "loss": 1.964, "step": 56500 }, { "epoch": 0.6, "learning_rate": 4.969923066369767e-05, "loss": 1.9411, "step": 57000 }, { "epoch": 0.61, "learning_rate": 4.969659233618625e-05, "loss": 1.924, "step": 57500 }, { "epoch": 0.61, "learning_rate": 4.969395400867483e-05, "loss": 1.929, "step": 58000 }, { "epoch": 0.62, "learning_rate": 4.9691315681163395e-05, "loss": 1.9136, "step": 58500 }, { "epoch": 0.62, "learning_rate": 4.968867735365198e-05, "loss": 1.9355, "step": 59000 }, { "epoch": 0.63, "learning_rate": 4.968603902614055e-05, "loss": 1.9159, "step": 59500 }, { "epoch": 0.63, "learning_rate": 4.968340069862913e-05, "loss": 1.9798, "step": 60000 }, { "epoch": 0.64, "learning_rate": 4.9680762371117704e-05, "loss": 1.8963, "step": 60500 }, { "epoch": 0.64, "learning_rate": 4.967812404360628e-05, "loss": 1.9586, "step": 61000 }, { "epoch": 0.65, "learning_rate": 4.9675485716094855e-05, "loss": 1.8789, "step": 61500 }, { "epoch": 0.65, "learning_rate": 4.967284738858343e-05, "loss": 1.955, "step": 62000 }, { "epoch": 0.66, "learning_rate": 4.9670209061072006e-05, "loss": 1.9076, "step": 62500 }, { "epoch": 0.66, "learning_rate": 4.966757073356058e-05, "loss": 1.8856, "step": 63000 }, { "epoch": 0.67, "learning_rate": 4.966493240604916e-05, "loss": 1.9284, "step": 63500 }, { "epoch": 0.68, "learning_rate": 4.966229407853773e-05, "loss": 1.9121, "step": 64000 }, { "epoch": 0.68, "learning_rate": 4.9659655751026315e-05, "loss": 1.899, "step": 64500 }, { "epoch": 0.69, "learning_rate": 4.965701742351489e-05, "loss": 1.9383, "step": 65000 }, { "epoch": 0.69, "learning_rate": 4.965437909600346e-05, "loss": 1.9472, "step": 65500 }, { "epoch": 0.7, "learning_rate": 4.965174076849204e-05, "loss": 2.0012, "step": 66000 }, { "epoch": 0.7, "learning_rate": 4.964910244098062e-05, "loss": 1.9008, "step": 66500 }, { "epoch": 0.71, "learning_rate": 4.964646411346919e-05, "loss": 1.968, "step": 67000 }, { "epoch": 0.71, "learning_rate": 4.964382578595777e-05, "loss": 1.8978, "step": 67500 }, { "epoch": 0.72, "learning_rate": 4.964118745844634e-05, "loss": 1.9119, "step": 68000 }, { "epoch": 0.72, "learning_rate": 4.963854913093492e-05, "loss": 1.9171, "step": 68500 }, { "epoch": 0.73, "learning_rate": 4.9635910803423494e-05, "loss": 1.86, "step": 69000 }, { "epoch": 0.73, "learning_rate": 4.9633272475912076e-05, "loss": 1.9395, "step": 69500 }, { "epoch": 0.74, "learning_rate": 4.963063414840065e-05, "loss": 1.9457, "step": 70000 }, { "epoch": 0.74, "learning_rate": 4.962799582088922e-05, "loss": 1.9378, "step": 70500 }, { "epoch": 0.75, "learning_rate": 4.96253574933778e-05, "loss": 1.9074, "step": 71000 }, { "epoch": 0.75, "learning_rate": 4.962271916586638e-05, "loss": 1.8827, "step": 71500 }, { "epoch": 0.76, "learning_rate": 4.9620080838354954e-05, "loss": 1.8431, "step": 72000 }, { "epoch": 0.77, "learning_rate": 4.961744251084352e-05, "loss": 1.8881, "step": 72500 }, { "epoch": 0.77, "learning_rate": 4.9614804183332105e-05, "loss": 1.9483, "step": 73000 }, { "epoch": 0.78, "learning_rate": 4.961216585582068e-05, "loss": 1.8593, "step": 73500 }, { "epoch": 0.78, "learning_rate": 4.9609527528309256e-05, "loss": 1.9339, "step": 74000 }, { "epoch": 0.79, "learning_rate": 4.960688920079784e-05, "loss": 1.8562, "step": 74500 }, { "epoch": 0.79, "learning_rate": 4.960425087328641e-05, "loss": 1.8673, "step": 75000 }, { "epoch": 0.8, "learning_rate": 4.960161254577498e-05, "loss": 1.8887, "step": 75500 }, { "epoch": 0.8, "learning_rate": 4.959897421826356e-05, "loss": 1.9011, "step": 76000 }, { "epoch": 0.81, "learning_rate": 4.959633589075214e-05, "loss": 1.931, "step": 76500 }, { "epoch": 0.81, "learning_rate": 4.9593697563240716e-05, "loss": 1.8714, "step": 77000 }, { "epoch": 0.82, "learning_rate": 4.9591059235729284e-05, "loss": 1.929, "step": 77500 }, { "epoch": 0.82, "learning_rate": 4.9588420908217866e-05, "loss": 1.8629, "step": 78000 }, { "epoch": 0.83, "learning_rate": 4.958578258070644e-05, "loss": 1.9195, "step": 78500 }, { "epoch": 0.83, "learning_rate": 4.958314425319502e-05, "loss": 1.8173, "step": 79000 }, { "epoch": 0.84, "learning_rate": 4.958050592568359e-05, "loss": 1.8651, "step": 79500 }, { "epoch": 0.84, "learning_rate": 4.957786759817217e-05, "loss": 1.8549, "step": 80000 }, { "epoch": 0.85, "learning_rate": 4.9575229270660744e-05, "loss": 1.8392, "step": 80500 }, { "epoch": 0.85, "learning_rate": 4.957259094314932e-05, "loss": 1.8546, "step": 81000 }, { "epoch": 0.86, "learning_rate": 4.95699526156379e-05, "loss": 1.9343, "step": 81500 }, { "epoch": 0.87, "learning_rate": 4.956731428812647e-05, "loss": 1.851, "step": 82000 }, { "epoch": 0.87, "learning_rate": 4.9564675960615046e-05, "loss": 1.8565, "step": 82500 }, { "epoch": 0.88, "learning_rate": 4.956203763310363e-05, "loss": 1.831, "step": 83000 }, { "epoch": 0.88, "learning_rate": 4.9559399305592204e-05, "loss": 1.8616, "step": 83500 }, { "epoch": 0.89, "learning_rate": 4.955676097808078e-05, "loss": 1.8053, "step": 84000 }, { "epoch": 0.89, "learning_rate": 4.955412265056935e-05, "loss": 1.8284, "step": 84500 }, { "epoch": 0.9, "learning_rate": 4.955148432305793e-05, "loss": 1.8462, "step": 85000 }, { "epoch": 0.9, "learning_rate": 4.9548845995546506e-05, "loss": 1.797, "step": 85500 }, { "epoch": 0.91, "learning_rate": 4.954620766803508e-05, "loss": 1.8318, "step": 86000 }, { "epoch": 0.91, "learning_rate": 4.954356934052366e-05, "loss": 1.8532, "step": 86500 }, { "epoch": 0.92, "learning_rate": 4.954093101301223e-05, "loss": 1.9005, "step": 87000 }, { "epoch": 0.92, "learning_rate": 4.953829268550081e-05, "loss": 1.8816, "step": 87500 }, { "epoch": 0.93, "learning_rate": 4.953565435798938e-05, "loss": 1.826, "step": 88000 }, { "epoch": 0.93, "learning_rate": 4.9533016030477965e-05, "loss": 1.8047, "step": 88500 }, { "epoch": 0.94, "learning_rate": 4.953037770296654e-05, "loss": 1.8725, "step": 89000 }, { "epoch": 0.94, "learning_rate": 4.952773937545511e-05, "loss": 1.9063, "step": 89500 }, { "epoch": 0.95, "learning_rate": 4.952510104794369e-05, "loss": 1.8768, "step": 90000 }, { "epoch": 0.96, "learning_rate": 4.952246272043227e-05, "loss": 1.8589, "step": 90500 }, { "epoch": 0.96, "learning_rate": 4.951982439292084e-05, "loss": 1.9075, "step": 91000 }, { "epoch": 0.97, "learning_rate": 4.951718606540942e-05, "loss": 1.7886, "step": 91500 }, { "epoch": 0.97, "learning_rate": 4.9514547737897994e-05, "loss": 1.8383, "step": 92000 }, { "epoch": 0.98, "learning_rate": 4.951190941038657e-05, "loss": 1.8656, "step": 92500 }, { "epoch": 0.98, "learning_rate": 4.9509271082875145e-05, "loss": 1.8166, "step": 93000 }, { "epoch": 0.99, "learning_rate": 4.950663275536373e-05, "loss": 1.8027, "step": 93500 }, { "epoch": 0.99, "learning_rate": 4.9503994427852296e-05, "loss": 1.8244, "step": 94000 }, { "epoch": 1.0, "learning_rate": 4.950135610034087e-05, "loss": 1.8403, "step": 94500 }, { "epoch": 1.0, "learning_rate": 4.9498717772829453e-05, "loss": 1.7837, "step": 95000 }, { "epoch": 1.01, "learning_rate": 4.949607944531803e-05, "loss": 1.8076, "step": 95500 }, { "epoch": 1.01, "learning_rate": 4.9493441117806604e-05, "loss": 1.753, "step": 96000 }, { "epoch": 1.02, "learning_rate": 4.949080279029518e-05, "loss": 1.7972, "step": 96500 }, { "epoch": 1.02, "learning_rate": 4.9488164462783755e-05, "loss": 1.802, "step": 97000 }, { "epoch": 1.03, "learning_rate": 4.948552613527233e-05, "loss": 1.8633, "step": 97500 }, { "epoch": 1.03, "learning_rate": 4.9482887807760906e-05, "loss": 1.828, "step": 98000 }, { "epoch": 1.04, "learning_rate": 4.948024948024949e-05, "loss": 1.801, "step": 98500 }, { "epoch": 1.04, "learning_rate": 4.947761115273806e-05, "loss": 1.8275, "step": 99000 }, { "epoch": 1.05, "learning_rate": 4.947497282522663e-05, "loss": 1.6852, "step": 99500 }, { "epoch": 1.06, "learning_rate": 4.947233449771521e-05, "loss": 1.8134, "step": 100000 }, { "epoch": 1.06, "learning_rate": 4.946969617020379e-05, "loss": 1.8473, "step": 100500 }, { "epoch": 1.07, "learning_rate": 4.946705784269236e-05, "loss": 1.7716, "step": 101000 }, { "epoch": 1.07, "learning_rate": 4.9464419515180935e-05, "loss": 1.8282, "step": 101500 }, { "epoch": 1.08, "learning_rate": 4.946178118766952e-05, "loss": 1.8222, "step": 102000 }, { "epoch": 1.08, "learning_rate": 4.945914286015809e-05, "loss": 1.7972, "step": 102500 }, { "epoch": 1.09, "learning_rate": 4.945650453264667e-05, "loss": 1.7718, "step": 103000 }, { "epoch": 1.09, "learning_rate": 4.9453866205135243e-05, "loss": 1.7732, "step": 103500 }, { "epoch": 1.1, "learning_rate": 4.945122787762382e-05, "loss": 1.8121, "step": 104000 }, { "epoch": 1.1, "learning_rate": 4.9448589550112394e-05, "loss": 1.838, "step": 104500 }, { "epoch": 1.11, "learning_rate": 4.944595122260097e-05, "loss": 1.745, "step": 105000 }, { "epoch": 1.11, "learning_rate": 4.944331289508955e-05, "loss": 1.8298, "step": 105500 }, { "epoch": 1.12, "learning_rate": 4.944067456757812e-05, "loss": 1.7644, "step": 106000 }, { "epoch": 1.12, "learning_rate": 4.9438036240066696e-05, "loss": 1.7628, "step": 106500 }, { "epoch": 1.13, "learning_rate": 4.943539791255528e-05, "loss": 1.7666, "step": 107000 }, { "epoch": 1.13, "learning_rate": 4.9432759585043854e-05, "loss": 1.8087, "step": 107500 }, { "epoch": 1.14, "learning_rate": 4.943012125753243e-05, "loss": 1.8171, "step": 108000 }, { "epoch": 1.15, "learning_rate": 4.9427482930021005e-05, "loss": 1.7637, "step": 108500 }, { "epoch": 1.15, "learning_rate": 4.942484460250958e-05, "loss": 1.8473, "step": 109000 }, { "epoch": 1.16, "learning_rate": 4.9422206274998156e-05, "loss": 1.7809, "step": 109500 }, { "epoch": 1.16, "learning_rate": 4.941956794748673e-05, "loss": 1.8197, "step": 110000 }, { "epoch": 1.17, "learning_rate": 4.941692961997531e-05, "loss": 1.8043, "step": 110500 }, { "epoch": 1.17, "learning_rate": 4.941429129246388e-05, "loss": 1.7968, "step": 111000 }, { "epoch": 1.18, "learning_rate": 4.941165296495246e-05, "loss": 1.7791, "step": 111500 }, { "epoch": 1.18, "learning_rate": 4.9409014637441034e-05, "loss": 1.8318, "step": 112000 }, { "epoch": 1.19, "learning_rate": 4.9406376309929616e-05, "loss": 1.8194, "step": 112500 }, { "epoch": 1.19, "learning_rate": 4.9403737982418184e-05, "loss": 1.8207, "step": 113000 }, { "epoch": 1.2, "learning_rate": 4.940109965490676e-05, "loss": 1.7426, "step": 113500 }, { "epoch": 1.2, "learning_rate": 4.939846132739534e-05, "loss": 1.8495, "step": 114000 }, { "epoch": 1.21, "learning_rate": 4.939582299988392e-05, "loss": 1.7921, "step": 114500 }, { "epoch": 1.21, "learning_rate": 4.939318467237249e-05, "loss": 1.855, "step": 115000 }, { "epoch": 1.22, "learning_rate": 4.939054634486107e-05, "loss": 1.7774, "step": 115500 }, { "epoch": 1.22, "learning_rate": 4.9387908017349644e-05, "loss": 1.7221, "step": 116000 }, { "epoch": 1.23, "learning_rate": 4.938526968983822e-05, "loss": 1.7624, "step": 116500 }, { "epoch": 1.23, "learning_rate": 4.9382631362326795e-05, "loss": 1.7415, "step": 117000 }, { "epoch": 1.24, "learning_rate": 4.937999303481538e-05, "loss": 1.7585, "step": 117500 }, { "epoch": 1.25, "learning_rate": 4.9377354707303946e-05, "loss": 1.7917, "step": 118000 }, { "epoch": 1.25, "learning_rate": 4.937471637979252e-05, "loss": 1.7487, "step": 118500 }, { "epoch": 1.26, "learning_rate": 4.9372078052281104e-05, "loss": 1.8211, "step": 119000 }, { "epoch": 1.26, "learning_rate": 4.936943972476968e-05, "loss": 1.783, "step": 119500 }, { "epoch": 1.27, "learning_rate": 4.936680139725825e-05, "loss": 1.7488, "step": 120000 }, { "epoch": 1.27, "learning_rate": 4.936416306974683e-05, "loss": 1.8472, "step": 120500 }, { "epoch": 1.28, "learning_rate": 4.9361524742235406e-05, "loss": 1.7733, "step": 121000 }, { "epoch": 1.28, "learning_rate": 4.935888641472398e-05, "loss": 1.772, "step": 121500 }, { "epoch": 1.29, "learning_rate": 4.935624808721256e-05, "loss": 1.7718, "step": 122000 }, { "epoch": 1.29, "learning_rate": 4.935360975970113e-05, "loss": 1.7641, "step": 122500 }, { "epoch": 1.3, "learning_rate": 4.935097143218971e-05, "loss": 1.7613, "step": 123000 }, { "epoch": 1.3, "learning_rate": 4.934833310467828e-05, "loss": 1.7668, "step": 123500 }, { "epoch": 1.31, "learning_rate": 4.934569477716686e-05, "loss": 1.766, "step": 124000 }, { "epoch": 1.31, "learning_rate": 4.934305644965544e-05, "loss": 1.8101, "step": 124500 }, { "epoch": 1.32, "learning_rate": 4.934041812214401e-05, "loss": 1.7556, "step": 125000 }, { "epoch": 1.32, "learning_rate": 4.9337779794632585e-05, "loss": 1.8068, "step": 125500 }, { "epoch": 1.33, "learning_rate": 4.933514146712117e-05, "loss": 1.7675, "step": 126000 }, { "epoch": 1.33, "learning_rate": 4.933250313960974e-05, "loss": 1.7478, "step": 126500 }, { "epoch": 1.34, "learning_rate": 4.932986481209831e-05, "loss": 1.7629, "step": 127000 }, { "epoch": 1.35, "learning_rate": 4.9327226484586894e-05, "loss": 1.7598, "step": 127500 }, { "epoch": 1.35, "learning_rate": 4.932458815707547e-05, "loss": 1.7768, "step": 128000 }, { "epoch": 1.36, "learning_rate": 4.9321949829564045e-05, "loss": 1.7503, "step": 128500 }, { "epoch": 1.36, "learning_rate": 4.931931150205262e-05, "loss": 1.8354, "step": 129000 }, { "epoch": 1.37, "learning_rate": 4.9316673174541196e-05, "loss": 1.7779, "step": 129500 }, { "epoch": 1.37, "learning_rate": 4.931403484702977e-05, "loss": 1.8496, "step": 130000 }, { "epoch": 1.38, "learning_rate": 4.931139651951835e-05, "loss": 1.7351, "step": 130500 }, { "epoch": 1.38, "learning_rate": 4.930875819200693e-05, "loss": 1.7485, "step": 131000 }, { "epoch": 1.39, "learning_rate": 4.9306119864495505e-05, "loss": 1.7645, "step": 131500 }, { "epoch": 1.39, "learning_rate": 4.930348153698407e-05, "loss": 1.7865, "step": 132000 }, { "epoch": 1.4, "learning_rate": 4.9300843209472656e-05, "loss": 1.8111, "step": 132500 }, { "epoch": 1.4, "learning_rate": 4.929820488196123e-05, "loss": 1.8053, "step": 133000 }, { "epoch": 1.41, "learning_rate": 4.9295566554449807e-05, "loss": 1.7417, "step": 133500 }, { "epoch": 1.41, "learning_rate": 4.929292822693838e-05, "loss": 1.7713, "step": 134000 }, { "epoch": 1.42, "learning_rate": 4.929028989942696e-05, "loss": 1.7865, "step": 134500 }, { "epoch": 1.42, "learning_rate": 4.928765157191553e-05, "loss": 1.7825, "step": 135000 }, { "epoch": 1.43, "learning_rate": 4.928501324440411e-05, "loss": 1.7241, "step": 135500 }, { "epoch": 1.44, "learning_rate": 4.9282374916892684e-05, "loss": 1.7344, "step": 136000 }, { "epoch": 1.44, "learning_rate": 4.927973658938126e-05, "loss": 1.7488, "step": 136500 }, { "epoch": 1.45, "learning_rate": 4.9277098261869835e-05, "loss": 1.7841, "step": 137000 }, { "epoch": 1.45, "learning_rate": 4.927445993435841e-05, "loss": 1.7839, "step": 137500 }, { "epoch": 1.46, "learning_rate": 4.927182160684699e-05, "loss": 1.7906, "step": 138000 }, { "epoch": 1.46, "learning_rate": 4.926918327933557e-05, "loss": 1.6839, "step": 138500 }, { "epoch": 1.47, "learning_rate": 4.926654495182414e-05, "loss": 1.8357, "step": 139000 }, { "epoch": 1.47, "learning_rate": 4.926390662431272e-05, "loss": 1.7853, "step": 139500 }, { "epoch": 1.48, "learning_rate": 4.9261268296801295e-05, "loss": 1.7617, "step": 140000 }, { "epoch": 1.48, "learning_rate": 4.925862996928987e-05, "loss": 1.7382, "step": 140500 }, { "epoch": 1.49, "learning_rate": 4.9255991641778446e-05, "loss": 1.7745, "step": 141000 }, { "epoch": 1.49, "learning_rate": 4.925335331426702e-05, "loss": 1.8369, "step": 141500 }, { "epoch": 1.5, "learning_rate": 4.92507149867556e-05, "loss": 1.8001, "step": 142000 }, { "epoch": 1.5, "learning_rate": 4.924807665924417e-05, "loss": 1.752, "step": 142500 }, { "epoch": 1.51, "learning_rate": 4.9245438331732754e-05, "loss": 1.7505, "step": 143000 }, { "epoch": 1.51, "learning_rate": 4.924280000422133e-05, "loss": 1.7562, "step": 143500 }, { "epoch": 1.52, "learning_rate": 4.92401616767099e-05, "loss": 1.7219, "step": 144000 }, { "epoch": 1.52, "learning_rate": 4.923752334919848e-05, "loss": 1.8104, "step": 144500 }, { "epoch": 1.53, "learning_rate": 4.9234885021687056e-05, "loss": 1.7915, "step": 145000 }, { "epoch": 1.54, "learning_rate": 4.923224669417563e-05, "loss": 1.6853, "step": 145500 }, { "epoch": 1.54, "learning_rate": 4.92296083666642e-05, "loss": 1.7633, "step": 146000 }, { "epoch": 1.55, "learning_rate": 4.922697003915278e-05, "loss": 1.7715, "step": 146500 }, { "epoch": 1.55, "learning_rate": 4.922433171164136e-05, "loss": 1.7615, "step": 147000 }, { "epoch": 1.56, "learning_rate": 4.9221693384129934e-05, "loss": 1.764, "step": 147500 }, { "epoch": 1.56, "learning_rate": 4.9219055056618516e-05, "loss": 1.6518, "step": 148000 }, { "epoch": 1.57, "learning_rate": 4.9216416729107085e-05, "loss": 1.7363, "step": 148500 }, { "epoch": 1.57, "learning_rate": 4.921377840159566e-05, "loss": 1.7355, "step": 149000 }, { "epoch": 1.58, "learning_rate": 4.9211140074084236e-05, "loss": 1.7463, "step": 149500 }, { "epoch": 1.58, "learning_rate": 4.920850174657282e-05, "loss": 1.7872, "step": 150000 }, { "epoch": 1.59, "learning_rate": 4.9205863419061393e-05, "loss": 1.7646, "step": 150500 }, { "epoch": 1.59, "learning_rate": 4.920322509154996e-05, "loss": 1.8, "step": 151000 }, { "epoch": 1.6, "learning_rate": 4.9200586764038544e-05, "loss": 1.7694, "step": 151500 }, { "epoch": 1.6, "learning_rate": 4.919794843652712e-05, "loss": 1.6817, "step": 152000 }, { "epoch": 1.61, "learning_rate": 4.9195310109015695e-05, "loss": 1.696, "step": 152500 }, { "epoch": 1.61, "learning_rate": 4.919267178150427e-05, "loss": 1.655, "step": 153000 }, { "epoch": 1.62, "learning_rate": 4.9190033453992846e-05, "loss": 1.739, "step": 153500 }, { "epoch": 1.63, "learning_rate": 4.918739512648142e-05, "loss": 1.7529, "step": 154000 }, { "epoch": 1.63, "learning_rate": 4.918475679897e-05, "loss": 1.7408, "step": 154500 }, { "epoch": 1.64, "learning_rate": 4.918211847145858e-05, "loss": 1.7453, "step": 155000 }, { "epoch": 1.64, "learning_rate": 4.917948014394715e-05, "loss": 1.6618, "step": 155500 }, { "epoch": 1.65, "learning_rate": 4.9176841816435724e-05, "loss": 1.7424, "step": 156000 }, { "epoch": 1.65, "learning_rate": 4.9174203488924306e-05, "loss": 1.6764, "step": 156500 }, { "epoch": 1.66, "learning_rate": 4.917156516141288e-05, "loss": 1.7322, "step": 157000 }, { "epoch": 1.66, "learning_rate": 4.916892683390146e-05, "loss": 1.729, "step": 157500 }, { "epoch": 1.67, "learning_rate": 4.9166288506390026e-05, "loss": 1.7404, "step": 158000 }, { "epoch": 1.67, "learning_rate": 4.916365017887861e-05, "loss": 1.7711, "step": 158500 }, { "epoch": 1.68, "learning_rate": 4.9161011851367184e-05, "loss": 1.7441, "step": 159000 }, { "epoch": 1.68, "learning_rate": 4.915837352385576e-05, "loss": 1.7461, "step": 159500 }, { "epoch": 1.69, "learning_rate": 4.915573519634434e-05, "loss": 1.8016, "step": 160000 }, { "epoch": 1.69, "learning_rate": 4.915309686883291e-05, "loss": 1.8012, "step": 160500 }, { "epoch": 1.7, "learning_rate": 4.9150458541321485e-05, "loss": 1.7191, "step": 161000 }, { "epoch": 1.7, "learning_rate": 4.914782021381006e-05, "loss": 1.7023, "step": 161500 }, { "epoch": 1.71, "learning_rate": 4.914518188629864e-05, "loss": 1.7468, "step": 162000 }, { "epoch": 1.71, "learning_rate": 4.914254355878722e-05, "loss": 1.7401, "step": 162500 }, { "epoch": 1.72, "learning_rate": 4.913990523127579e-05, "loss": 1.6992, "step": 163000 }, { "epoch": 1.73, "learning_rate": 4.913726690376437e-05, "loss": 1.7466, "step": 163500 }, { "epoch": 1.73, "learning_rate": 4.9134628576252945e-05, "loss": 1.7413, "step": 164000 }, { "epoch": 1.74, "learning_rate": 4.913199024874152e-05, "loss": 1.6929, "step": 164500 }, { "epoch": 1.74, "learning_rate": 4.9129351921230096e-05, "loss": 1.7348, "step": 165000 }, { "epoch": 1.75, "learning_rate": 4.912671359371867e-05, "loss": 1.7522, "step": 165500 }, { "epoch": 1.75, "learning_rate": 4.912407526620725e-05, "loss": 1.7337, "step": 166000 }, { "epoch": 1.76, "learning_rate": 4.912143693869582e-05, "loss": 1.7149, "step": 166500 }, { "epoch": 1.76, "learning_rate": 4.9118798611184405e-05, "loss": 1.7649, "step": 167000 }, { "epoch": 1.77, "learning_rate": 4.9116160283672974e-05, "loss": 1.7287, "step": 167500 }, { "epoch": 1.77, "learning_rate": 4.911352195616155e-05, "loss": 1.7685, "step": 168000 }, { "epoch": 1.78, "learning_rate": 4.911088362865013e-05, "loss": 1.7232, "step": 168500 }, { "epoch": 1.78, "learning_rate": 4.910824530113871e-05, "loss": 1.6898, "step": 169000 }, { "epoch": 1.79, "learning_rate": 4.910560697362728e-05, "loss": 1.7203, "step": 169500 }, { "epoch": 1.79, "learning_rate": 4.910296864611586e-05, "loss": 1.704, "step": 170000 }, { "epoch": 1.8, "learning_rate": 4.910033031860443e-05, "loss": 1.7518, "step": 170500 }, { "epoch": 1.8, "learning_rate": 4.909769199109301e-05, "loss": 1.7617, "step": 171000 }, { "epoch": 1.81, "learning_rate": 4.9095053663581584e-05, "loss": 1.7215, "step": 171500 }, { "epoch": 1.82, "learning_rate": 4.9092415336070167e-05, "loss": 1.686, "step": 172000 }, { "epoch": 1.82, "learning_rate": 4.9089777008558735e-05, "loss": 1.697, "step": 172500 }, { "epoch": 1.83, "learning_rate": 4.908713868104731e-05, "loss": 1.7945, "step": 173000 }, { "epoch": 1.83, "learning_rate": 4.9084500353535886e-05, "loss": 1.7217, "step": 173500 }, { "epoch": 1.84, "learning_rate": 4.908186202602447e-05, "loss": 1.7539, "step": 174000 }, { "epoch": 1.84, "learning_rate": 4.907922369851304e-05, "loss": 1.7645, "step": 174500 }, { "epoch": 1.85, "learning_rate": 4.907658537100161e-05, "loss": 1.7937, "step": 175000 }, { "epoch": 1.85, "learning_rate": 4.9073947043490195e-05, "loss": 1.75, "step": 175500 }, { "epoch": 1.86, "learning_rate": 4.907130871597877e-05, "loss": 1.7107, "step": 176000 }, { "epoch": 1.86, "learning_rate": 4.9068670388467346e-05, "loss": 1.7503, "step": 176500 }, { "epoch": 1.87, "learning_rate": 4.906603206095592e-05, "loss": 1.7788, "step": 177000 }, { "epoch": 1.87, "learning_rate": 4.90633937334445e-05, "loss": 1.7617, "step": 177500 }, { "epoch": 1.88, "learning_rate": 4.906075540593307e-05, "loss": 1.7401, "step": 178000 }, { "epoch": 1.88, "learning_rate": 4.905811707842165e-05, "loss": 1.712, "step": 178500 }, { "epoch": 1.89, "learning_rate": 4.905547875091023e-05, "loss": 1.6923, "step": 179000 }, { "epoch": 1.89, "learning_rate": 4.90528404233988e-05, "loss": 1.674, "step": 179500 }, { "epoch": 1.9, "learning_rate": 4.9050202095887374e-05, "loss": 1.7781, "step": 180000 }, { "epoch": 1.9, "learning_rate": 4.9047563768375957e-05, "loss": 1.7068, "step": 180500 }, { "epoch": 1.91, "learning_rate": 4.904492544086453e-05, "loss": 1.7466, "step": 181000 }, { "epoch": 1.92, "learning_rate": 4.904228711335311e-05, "loss": 1.7442, "step": 181500 }, { "epoch": 1.92, "learning_rate": 4.903964878584168e-05, "loss": 1.7456, "step": 182000 }, { "epoch": 1.93, "learning_rate": 4.903701045833026e-05, "loss": 1.6894, "step": 182500 }, { "epoch": 1.93, "learning_rate": 4.9034372130818834e-05, "loss": 1.7349, "step": 183000 }, { "epoch": 1.94, "learning_rate": 4.903173380330741e-05, "loss": 1.7361, "step": 183500 }, { "epoch": 1.94, "learning_rate": 4.9029095475795985e-05, "loss": 1.7298, "step": 184000 }, { "epoch": 1.95, "learning_rate": 4.902645714828456e-05, "loss": 1.709, "step": 184500 }, { "epoch": 1.95, "learning_rate": 4.9023818820773136e-05, "loss": 1.7666, "step": 185000 }, { "epoch": 1.96, "learning_rate": 4.902118049326171e-05, "loss": 1.6987, "step": 185500 }, { "epoch": 1.96, "learning_rate": 4.9018542165750294e-05, "loss": 1.6872, "step": 186000 }, { "epoch": 1.97, "learning_rate": 4.901590383823886e-05, "loss": 1.7145, "step": 186500 }, { "epoch": 1.97, "learning_rate": 4.901326551072744e-05, "loss": 1.7244, "step": 187000 }, { "epoch": 1.98, "learning_rate": 4.901062718321602e-05, "loss": 1.7201, "step": 187500 }, { "epoch": 1.98, "learning_rate": 4.9007988855704596e-05, "loss": 1.7063, "step": 188000 }, { "epoch": 1.99, "learning_rate": 4.900535052819317e-05, "loss": 1.7023, "step": 188500 }, { "epoch": 1.99, "learning_rate": 4.900271220068175e-05, "loss": 1.6943, "step": 189000 }, { "epoch": 2.0, "learning_rate": 4.900007387317032e-05, "loss": 1.7301, "step": 189500 }, { "epoch": 2.01, "learning_rate": 4.89974355456589e-05, "loss": 1.7021, "step": 190000 }, { "epoch": 2.01, "learning_rate": 4.899479721814747e-05, "loss": 1.7012, "step": 190500 }, { "epoch": 2.02, "learning_rate": 4.8992158890636055e-05, "loss": 1.7209, "step": 191000 }, { "epoch": 2.02, "learning_rate": 4.8989520563124624e-05, "loss": 1.7103, "step": 191500 }, { "epoch": 2.03, "learning_rate": 4.89868822356132e-05, "loss": 1.769, "step": 192000 }, { "epoch": 2.03, "learning_rate": 4.898424390810178e-05, "loss": 1.724, "step": 192500 }, { "epoch": 2.04, "learning_rate": 4.898160558059036e-05, "loss": 1.7372, "step": 193000 }, { "epoch": 2.04, "learning_rate": 4.8978967253078926e-05, "loss": 1.6769, "step": 193500 }, { "epoch": 2.05, "learning_rate": 4.897632892556751e-05, "loss": 1.7192, "step": 194000 }, { "epoch": 2.05, "learning_rate": 4.8973690598056084e-05, "loss": 1.6837, "step": 194500 }, { "epoch": 2.06, "learning_rate": 4.897105227054466e-05, "loss": 1.7341, "step": 195000 }, { "epoch": 2.06, "learning_rate": 4.8968413943033235e-05, "loss": 1.6831, "step": 195500 }, { "epoch": 2.07, "learning_rate": 4.896577561552181e-05, "loss": 1.6526, "step": 196000 }, { "epoch": 2.07, "learning_rate": 4.8963137288010386e-05, "loss": 1.7261, "step": 196500 }, { "epoch": 2.08, "learning_rate": 4.896049896049896e-05, "loss": 1.7118, "step": 197000 }, { "epoch": 2.08, "learning_rate": 4.895786063298754e-05, "loss": 1.6494, "step": 197500 }, { "epoch": 2.09, "learning_rate": 4.895522230547612e-05, "loss": 1.6473, "step": 198000 }, { "epoch": 2.09, "learning_rate": 4.895258397796469e-05, "loss": 1.7002, "step": 198500 }, { "epoch": 2.1, "learning_rate": 4.894994565045326e-05, "loss": 1.7142, "step": 199000 }, { "epoch": 2.11, "learning_rate": 4.8947307322941845e-05, "loss": 1.6836, "step": 199500 }, { "epoch": 2.11, "learning_rate": 4.894466899543042e-05, "loss": 1.7262, "step": 200000 }, { "epoch": 2.12, "learning_rate": 4.8942030667918996e-05, "loss": 1.6797, "step": 200500 }, { "epoch": 2.12, "learning_rate": 4.893939234040757e-05, "loss": 1.757, "step": 201000 }, { "epoch": 2.13, "learning_rate": 4.893675401289615e-05, "loss": 1.6898, "step": 201500 }, { "epoch": 2.13, "learning_rate": 4.893411568538472e-05, "loss": 1.7077, "step": 202000 }, { "epoch": 2.14, "learning_rate": 4.89314773578733e-05, "loss": 1.7184, "step": 202500 }, { "epoch": 2.14, "learning_rate": 4.8928839030361874e-05, "loss": 1.7181, "step": 203000 }, { "epoch": 2.15, "learning_rate": 4.892620070285045e-05, "loss": 1.7161, "step": 203500 }, { "epoch": 2.15, "learning_rate": 4.8923562375339025e-05, "loss": 1.6628, "step": 204000 }, { "epoch": 2.16, "learning_rate": 4.892092404782761e-05, "loss": 1.6527, "step": 204500 }, { "epoch": 2.16, "learning_rate": 4.891828572031618e-05, "loss": 1.7463, "step": 205000 }, { "epoch": 2.17, "learning_rate": 4.891564739280475e-05, "loss": 1.743, "step": 205500 }, { "epoch": 2.17, "learning_rate": 4.8913009065293334e-05, "loss": 1.6885, "step": 206000 }, { "epoch": 2.18, "learning_rate": 4.891037073778191e-05, "loss": 1.6991, "step": 206500 }, { "epoch": 2.18, "learning_rate": 4.8907732410270485e-05, "loss": 1.6554, "step": 207000 }, { "epoch": 2.19, "learning_rate": 4.890509408275906e-05, "loss": 1.7406, "step": 207500 }, { "epoch": 2.2, "learning_rate": 4.8902455755247635e-05, "loss": 1.6958, "step": 208000 }, { "epoch": 2.2, "learning_rate": 4.889981742773621e-05, "loss": 1.7102, "step": 208500 }, { "epoch": 2.21, "learning_rate": 4.8897179100224786e-05, "loss": 1.6647, "step": 209000 }, { "epoch": 2.21, "learning_rate": 4.889454077271336e-05, "loss": 1.6753, "step": 209500 }, { "epoch": 2.22, "learning_rate": 4.8891902445201944e-05, "loss": 1.6846, "step": 210000 }, { "epoch": 2.22, "learning_rate": 4.888926411769051e-05, "loss": 1.6655, "step": 210500 }, { "epoch": 2.23, "learning_rate": 4.888662579017909e-05, "loss": 1.7068, "step": 211000 }, { "epoch": 2.23, "learning_rate": 4.888398746266767e-05, "loss": 1.666, "step": 211500 }, { "epoch": 2.24, "learning_rate": 4.8881349135156246e-05, "loss": 1.7239, "step": 212000 }, { "epoch": 2.24, "learning_rate": 4.8878710807644815e-05, "loss": 1.6347, "step": 212500 }, { "epoch": 2.25, "learning_rate": 4.88760724801334e-05, "loss": 1.693, "step": 213000 }, { "epoch": 2.25, "learning_rate": 4.887343415262197e-05, "loss": 1.693, "step": 213500 }, { "epoch": 2.26, "learning_rate": 4.887079582511055e-05, "loss": 1.698, "step": 214000 }, { "epoch": 2.26, "learning_rate": 4.8868157497599124e-05, "loss": 1.7336, "step": 214500 }, { "epoch": 2.27, "learning_rate": 4.88655191700877e-05, "loss": 1.7195, "step": 215000 }, { "epoch": 2.27, "learning_rate": 4.8862880842576275e-05, "loss": 1.6858, "step": 215500 }, { "epoch": 2.28, "learning_rate": 4.886024251506485e-05, "loss": 1.6581, "step": 216000 }, { "epoch": 2.28, "learning_rate": 4.885760418755343e-05, "loss": 1.7193, "step": 216500 }, { "epoch": 2.29, "learning_rate": 4.885496586004201e-05, "loss": 1.7095, "step": 217000 }, { "epoch": 2.3, "learning_rate": 4.8852327532530577e-05, "loss": 1.7284, "step": 217500 }, { "epoch": 2.3, "learning_rate": 4.884968920501916e-05, "loss": 1.6482, "step": 218000 }, { "epoch": 2.31, "learning_rate": 4.8847050877507734e-05, "loss": 1.7369, "step": 218500 }, { "epoch": 2.31, "learning_rate": 4.884441254999631e-05, "loss": 1.7034, "step": 219000 }, { "epoch": 2.32, "learning_rate": 4.8841774222484885e-05, "loss": 1.6569, "step": 219500 }, { "epoch": 2.32, "learning_rate": 4.883913589497346e-05, "loss": 1.68, "step": 220000 }, { "epoch": 2.33, "learning_rate": 4.8836497567462036e-05, "loss": 1.7285, "step": 220500 }, { "epoch": 2.33, "learning_rate": 4.883385923995061e-05, "loss": 1.688, "step": 221000 }, { "epoch": 2.34, "learning_rate": 4.8831220912439194e-05, "loss": 1.7384, "step": 221500 }, { "epoch": 2.34, "learning_rate": 4.882858258492776e-05, "loss": 1.7264, "step": 222000 }, { "epoch": 2.35, "learning_rate": 4.882594425741634e-05, "loss": 1.7002, "step": 222500 }, { "epoch": 2.35, "learning_rate": 4.8823305929904914e-05, "loss": 1.672, "step": 223000 }, { "epoch": 2.36, "learning_rate": 4.8820667602393496e-05, "loss": 1.6912, "step": 223500 }, { "epoch": 2.36, "learning_rate": 4.881802927488207e-05, "loss": 1.739, "step": 224000 }, { "epoch": 2.37, "learning_rate": 4.881539094737064e-05, "loss": 1.7253, "step": 224500 }, { "epoch": 2.37, "learning_rate": 4.881275261985922e-05, "loss": 1.7489, "step": 225000 }, { "epoch": 2.38, "learning_rate": 4.88101142923478e-05, "loss": 1.6896, "step": 225500 }, { "epoch": 2.39, "learning_rate": 4.880747596483637e-05, "loss": 1.7235, "step": 226000 }, { "epoch": 2.39, "learning_rate": 4.880483763732495e-05, "loss": 1.7274, "step": 226500 }, { "epoch": 2.4, "learning_rate": 4.8802199309813524e-05, "loss": 1.6662, "step": 227000 }, { "epoch": 2.4, "learning_rate": 4.87995609823021e-05, "loss": 1.6915, "step": 227500 }, { "epoch": 2.41, "learning_rate": 4.8796922654790675e-05, "loss": 1.7037, "step": 228000 }, { "epoch": 2.41, "learning_rate": 4.879428432727926e-05, "loss": 1.6538, "step": 228500 }, { "epoch": 2.42, "learning_rate": 4.879164599976783e-05, "loss": 1.7442, "step": 229000 }, { "epoch": 2.42, "learning_rate": 4.87890076722564e-05, "loss": 1.7138, "step": 229500 }, { "epoch": 2.43, "learning_rate": 4.8786369344744984e-05, "loss": 1.6833, "step": 230000 }, { "epoch": 2.43, "learning_rate": 4.878373101723356e-05, "loss": 1.7533, "step": 230500 }, { "epoch": 2.44, "learning_rate": 4.8781092689722135e-05, "loss": 1.6502, "step": 231000 }, { "epoch": 2.44, "learning_rate": 4.8778454362210704e-05, "loss": 1.6832, "step": 231500 }, { "epoch": 2.45, "learning_rate": 4.8775816034699286e-05, "loss": 1.7059, "step": 232000 }, { "epoch": 2.45, "learning_rate": 4.877317770718786e-05, "loss": 1.7332, "step": 232500 }, { "epoch": 2.46, "learning_rate": 4.877053937967644e-05, "loss": 1.6688, "step": 233000 }, { "epoch": 2.46, "learning_rate": 4.876790105216502e-05, "loss": 1.7518, "step": 233500 }, { "epoch": 2.47, "learning_rate": 4.876526272465359e-05, "loss": 1.7212, "step": 234000 }, { "epoch": 2.47, "learning_rate": 4.8762624397142163e-05, "loss": 1.6932, "step": 234500 }, { "epoch": 2.48, "learning_rate": 4.875998606963074e-05, "loss": 1.7022, "step": 235000 }, { "epoch": 2.49, "learning_rate": 4.875734774211932e-05, "loss": 1.6285, "step": 235500 }, { "epoch": 2.49, "learning_rate": 4.87547094146079e-05, "loss": 1.7442, "step": 236000 }, { "epoch": 2.5, "learning_rate": 4.8752071087096465e-05, "loss": 1.6937, "step": 236500 }, { "epoch": 2.5, "learning_rate": 4.874943275958505e-05, "loss": 1.7169, "step": 237000 }, { "epoch": 2.51, "learning_rate": 4.874679443207362e-05, "loss": 1.7334, "step": 237500 }, { "epoch": 2.51, "learning_rate": 4.87441561045622e-05, "loss": 1.6249, "step": 238000 }, { "epoch": 2.52, "learning_rate": 4.8741517777050774e-05, "loss": 1.7606, "step": 238500 }, { "epoch": 2.52, "learning_rate": 4.873887944953935e-05, "loss": 1.703, "step": 239000 }, { "epoch": 2.53, "learning_rate": 4.8736241122027925e-05, "loss": 1.7377, "step": 239500 }, { "epoch": 2.53, "learning_rate": 4.87336027945165e-05, "loss": 1.6979, "step": 240000 }, { "epoch": 2.54, "learning_rate": 4.873096446700508e-05, "loss": 1.6847, "step": 240500 }, { "epoch": 2.54, "learning_rate": 4.872832613949365e-05, "loss": 1.6939, "step": 241000 }, { "epoch": 2.55, "learning_rate": 4.872568781198223e-05, "loss": 1.7648, "step": 241500 }, { "epoch": 2.55, "learning_rate": 4.872304948447081e-05, "loss": 1.6578, "step": 242000 }, { "epoch": 2.56, "learning_rate": 4.8720411156959385e-05, "loss": 1.7277, "step": 242500 }, { "epoch": 2.56, "learning_rate": 4.871777282944796e-05, "loss": 1.7014, "step": 243000 }, { "epoch": 2.57, "learning_rate": 4.8715134501936536e-05, "loss": 1.8071, "step": 243500 }, { "epoch": 2.58, "learning_rate": 4.871249617442511e-05, "loss": 1.6826, "step": 244000 }, { "epoch": 2.58, "learning_rate": 4.870985784691369e-05, "loss": 1.721, "step": 244500 }, { "epoch": 2.59, "learning_rate": 4.870721951940226e-05, "loss": 1.6965, "step": 245000 }, { "epoch": 2.59, "learning_rate": 4.8704581191890844e-05, "loss": 1.7585, "step": 245500 }, { "epoch": 2.6, "learning_rate": 4.870194286437941e-05, "loss": 1.6792, "step": 246000 }, { "epoch": 2.6, "learning_rate": 4.869930453686799e-05, "loss": 1.6543, "step": 246500 }, { "epoch": 2.61, "learning_rate": 4.8696666209356564e-05, "loss": 1.7448, "step": 247000 }, { "epoch": 2.61, "learning_rate": 4.8694027881845146e-05, "loss": 1.691, "step": 247500 }, { "epoch": 2.62, "learning_rate": 4.869138955433372e-05, "loss": 1.6794, "step": 248000 }, { "epoch": 2.62, "learning_rate": 4.868875122682229e-05, "loss": 1.7113, "step": 248500 }, { "epoch": 2.63, "learning_rate": 4.868611289931087e-05, "loss": 1.7411, "step": 249000 }, { "epoch": 2.63, "learning_rate": 4.868347457179945e-05, "loss": 1.7102, "step": 249500 }, { "epoch": 2.64, "learning_rate": 4.8680836244288024e-05, "loss": 1.6756, "step": 250000 }, { "epoch": 2.64, "learning_rate": 4.86781979167766e-05, "loss": 1.6735, "step": 250500 }, { "epoch": 2.65, "learning_rate": 4.8675559589265175e-05, "loss": 1.7092, "step": 251000 }, { "epoch": 2.65, "learning_rate": 4.867292126175375e-05, "loss": 1.7197, "step": 251500 }, { "epoch": 2.66, "learning_rate": 4.8670282934242326e-05, "loss": 1.7064, "step": 252000 }, { "epoch": 2.66, "learning_rate": 4.866764460673091e-05, "loss": 1.7369, "step": 252500 }, { "epoch": 2.67, "learning_rate": 4.866500627921948e-05, "loss": 1.7038, "step": 253000 }, { "epoch": 2.68, "learning_rate": 4.866236795170805e-05, "loss": 1.7508, "step": 253500 }, { "epoch": 2.68, "learning_rate": 4.8659729624196635e-05, "loss": 1.7227, "step": 254000 }, { "epoch": 2.69, "learning_rate": 4.865709129668521e-05, "loss": 1.6917, "step": 254500 }, { "epoch": 2.69, "learning_rate": 4.8654452969173785e-05, "loss": 1.7066, "step": 255000 }, { "epoch": 2.7, "learning_rate": 4.865181464166236e-05, "loss": 1.7158, "step": 255500 }, { "epoch": 2.7, "learning_rate": 4.8649176314150936e-05, "loss": 1.6913, "step": 256000 }, { "epoch": 2.71, "learning_rate": 4.864653798663951e-05, "loss": 1.7248, "step": 256500 }, { "epoch": 2.71, "learning_rate": 4.864389965912809e-05, "loss": 1.6812, "step": 257000 }, { "epoch": 2.72, "learning_rate": 4.864126133161667e-05, "loss": 1.7347, "step": 257500 }, { "epoch": 2.72, "learning_rate": 4.863862300410524e-05, "loss": 1.7114, "step": 258000 }, { "epoch": 2.73, "learning_rate": 4.8635984676593814e-05, "loss": 1.7453, "step": 258500 }, { "epoch": 2.73, "learning_rate": 4.863334634908239e-05, "loss": 1.7243, "step": 259000 }, { "epoch": 2.74, "learning_rate": 4.863070802157097e-05, "loss": 1.7015, "step": 259500 }, { "epoch": 2.74, "learning_rate": 4.862806969405954e-05, "loss": 1.7205, "step": 260000 }, { "epoch": 2.75, "learning_rate": 4.8625431366548116e-05, "loss": 1.7205, "step": 260500 }, { "epoch": 2.75, "learning_rate": 4.86227930390367e-05, "loss": 1.7309, "step": 261000 }, { "epoch": 2.76, "learning_rate": 4.8620154711525274e-05, "loss": 1.7358, "step": 261500 }, { "epoch": 2.76, "learning_rate": 4.861751638401385e-05, "loss": 1.7191, "step": 262000 }, { "epoch": 2.77, "learning_rate": 4.8614878056502425e-05, "loss": 1.6687, "step": 262500 }, { "epoch": 2.78, "learning_rate": 4.8612239728991e-05, "loss": 1.6839, "step": 263000 }, { "epoch": 2.78, "learning_rate": 4.8609601401479576e-05, "loss": 1.7014, "step": 263500 }, { "epoch": 2.79, "learning_rate": 4.860696307396815e-05, "loss": 1.749, "step": 264000 }, { "epoch": 2.79, "learning_rate": 4.860432474645673e-05, "loss": 1.7306, "step": 264500 }, { "epoch": 2.8, "learning_rate": 4.86016864189453e-05, "loss": 1.6477, "step": 265000 }, { "epoch": 2.8, "learning_rate": 4.859904809143388e-05, "loss": 1.7868, "step": 265500 }, { "epoch": 2.81, "learning_rate": 4.859640976392246e-05, "loss": 1.7169, "step": 266000 }, { "epoch": 2.81, "learning_rate": 4.8593771436411035e-05, "loss": 1.7109, "step": 266500 }, { "epoch": 2.82, "learning_rate": 4.859113310889961e-05, "loss": 1.7196, "step": 267000 }, { "epoch": 2.82, "learning_rate": 4.8588494781388186e-05, "loss": 1.6625, "step": 267500 }, { "epoch": 2.83, "learning_rate": 4.858585645387676e-05, "loss": 1.6963, "step": 268000 }, { "epoch": 2.83, "learning_rate": 4.858321812636534e-05, "loss": 1.7503, "step": 268500 }, { "epoch": 2.84, "learning_rate": 4.858057979885391e-05, "loss": 1.7355, "step": 269000 }, { "epoch": 2.84, "learning_rate": 4.857794147134249e-05, "loss": 1.655, "step": 269500 }, { "epoch": 2.85, "learning_rate": 4.8575303143831064e-05, "loss": 1.7101, "step": 270000 }, { "epoch": 2.85, "learning_rate": 4.857266481631964e-05, "loss": 1.7334, "step": 270500 }, { "epoch": 2.86, "learning_rate": 4.8570026488808215e-05, "loss": 1.6939, "step": 271000 }, { "epoch": 2.87, "learning_rate": 4.85673881612968e-05, "loss": 1.7089, "step": 271500 }, { "epoch": 2.87, "learning_rate": 4.8564749833785366e-05, "loss": 1.6987, "step": 272000 }, { "epoch": 2.88, "learning_rate": 4.856211150627394e-05, "loss": 1.6851, "step": 272500 }, { "epoch": 2.88, "learning_rate": 4.855947317876252e-05, "loss": 1.688, "step": 273000 }, { "epoch": 2.89, "learning_rate": 4.85568348512511e-05, "loss": 1.6986, "step": 273500 }, { "epoch": 2.89, "learning_rate": 4.8554196523739674e-05, "loss": 1.6905, "step": 274000 }, { "epoch": 2.9, "learning_rate": 4.855155819622825e-05, "loss": 1.7356, "step": 274500 }, { "epoch": 2.9, "learning_rate": 4.8548919868716825e-05, "loss": 1.6631, "step": 275000 }, { "epoch": 2.91, "learning_rate": 4.85462815412054e-05, "loss": 1.7263, "step": 275500 }, { "epoch": 2.91, "learning_rate": 4.8543643213693976e-05, "loss": 1.6505, "step": 276000 }, { "epoch": 2.92, "learning_rate": 4.854100488618256e-05, "loss": 1.6988, "step": 276500 }, { "epoch": 2.92, "learning_rate": 4.853836655867113e-05, "loss": 1.6654, "step": 277000 }, { "epoch": 2.93, "learning_rate": 4.85357282311597e-05, "loss": 1.6675, "step": 277500 }, { "epoch": 2.93, "learning_rate": 4.8533089903648285e-05, "loss": 1.6286, "step": 278000 }, { "epoch": 2.94, "learning_rate": 4.853045157613686e-05, "loss": 1.6634, "step": 278500 }, { "epoch": 2.94, "learning_rate": 4.852781324862543e-05, "loss": 1.7349, "step": 279000 }, { "epoch": 2.95, "learning_rate": 4.852517492111401e-05, "loss": 1.6749, "step": 279500 }, { "epoch": 2.95, "learning_rate": 4.852253659360259e-05, "loss": 1.6642, "step": 280000 }, { "epoch": 2.96, "learning_rate": 4.851989826609116e-05, "loss": 1.6811, "step": 280500 }, { "epoch": 2.97, "learning_rate": 4.851725993857974e-05, "loss": 1.7147, "step": 281000 }, { "epoch": 2.97, "learning_rate": 4.8514621611068313e-05, "loss": 1.6932, "step": 281500 }, { "epoch": 2.98, "learning_rate": 4.851198328355689e-05, "loss": 1.6603, "step": 282000 }, { "epoch": 2.98, "learning_rate": 4.8509344956045464e-05, "loss": 1.7445, "step": 282500 }, { "epoch": 2.99, "learning_rate": 4.850670662853404e-05, "loss": 1.7289, "step": 283000 }, { "epoch": 2.99, "learning_rate": 4.850406830102262e-05, "loss": 1.7037, "step": 283500 }, { "epoch": 3.0, "learning_rate": 4.850142997351119e-05, "loss": 1.7119, "step": 284000 }, { "epoch": 3.0, "learning_rate": 4.8498791645999766e-05, "loss": 1.7227, "step": 284500 }, { "epoch": 3.01, "learning_rate": 4.849615331848835e-05, "loss": 1.7518, "step": 285000 }, { "epoch": 3.01, "learning_rate": 4.8493514990976924e-05, "loss": 1.7073, "step": 285500 }, { "epoch": 3.02, "learning_rate": 4.84908766634655e-05, "loss": 1.631, "step": 286000 }, { "epoch": 3.02, "learning_rate": 4.8488238335954075e-05, "loss": 1.7416, "step": 286500 }, { "epoch": 3.03, "learning_rate": 4.848560000844265e-05, "loss": 1.6357, "step": 287000 }, { "epoch": 3.03, "learning_rate": 4.8482961680931226e-05, "loss": 1.6751, "step": 287500 }, { "epoch": 3.04, "learning_rate": 4.84803233534198e-05, "loss": 1.6612, "step": 288000 }, { "epoch": 3.04, "learning_rate": 4.847768502590838e-05, "loss": 1.6609, "step": 288500 }, { "epoch": 3.05, "learning_rate": 4.847504669839695e-05, "loss": 1.6773, "step": 289000 }, { "epoch": 3.06, "learning_rate": 4.847240837088553e-05, "loss": 1.6682, "step": 289500 }, { "epoch": 3.06, "learning_rate": 4.846977004337411e-05, "loss": 1.7214, "step": 290000 }, { "epoch": 3.07, "learning_rate": 4.8467131715862686e-05, "loss": 1.6761, "step": 290500 }, { "epoch": 3.07, "learning_rate": 4.8464493388351254e-05, "loss": 1.6674, "step": 291000 }, { "epoch": 3.08, "learning_rate": 4.846185506083984e-05, "loss": 1.6683, "step": 291500 }, { "epoch": 3.08, "learning_rate": 4.845921673332841e-05, "loss": 1.7131, "step": 292000 }, { "epoch": 3.09, "learning_rate": 4.845657840581699e-05, "loss": 1.7381, "step": 292500 }, { "epoch": 3.09, "learning_rate": 4.845394007830556e-05, "loss": 1.6669, "step": 293000 }, { "epoch": 3.1, "learning_rate": 4.845130175079414e-05, "loss": 1.6997, "step": 293500 }, { "epoch": 3.1, "learning_rate": 4.8448663423282714e-05, "loss": 1.6871, "step": 294000 }, { "epoch": 3.11, "learning_rate": 4.844602509577129e-05, "loss": 1.6939, "step": 294500 }, { "epoch": 3.11, "learning_rate": 4.844338676825987e-05, "loss": 1.6947, "step": 295000 }, { "epoch": 3.12, "learning_rate": 4.844074844074845e-05, "loss": 1.6562, "step": 295500 }, { "epoch": 3.12, "learning_rate": 4.8438110113237016e-05, "loss": 1.6445, "step": 296000 }, { "epoch": 3.13, "learning_rate": 4.843547178572559e-05, "loss": 1.7058, "step": 296500 }, { "epoch": 3.13, "learning_rate": 4.8432833458214174e-05, "loss": 1.7239, "step": 297000 }, { "epoch": 3.14, "learning_rate": 4.843019513070275e-05, "loss": 1.6551, "step": 297500 }, { "epoch": 3.14, "learning_rate": 4.842755680319132e-05, "loss": 1.7141, "step": 298000 }, { "epoch": 3.15, "learning_rate": 4.84249184756799e-05, "loss": 1.6708, "step": 298500 }, { "epoch": 3.16, "learning_rate": 4.8422280148168476e-05, "loss": 1.6953, "step": 299000 }, { "epoch": 3.16, "learning_rate": 4.841964182065705e-05, "loss": 1.671, "step": 299500 }, { "epoch": 3.17, "learning_rate": 4.841700349314563e-05, "loss": 1.6486, "step": 300000 }, { "epoch": 3.17, "learning_rate": 4.84143651656342e-05, "loss": 1.706, "step": 300500 }, { "epoch": 3.18, "learning_rate": 4.841172683812278e-05, "loss": 1.6429, "step": 301000 }, { "epoch": 3.18, "learning_rate": 4.840908851061135e-05, "loss": 1.7043, "step": 301500 }, { "epoch": 3.19, "learning_rate": 4.8406450183099936e-05, "loss": 1.6538, "step": 302000 }, { "epoch": 3.19, "learning_rate": 4.840381185558851e-05, "loss": 1.7288, "step": 302500 }, { "epoch": 3.2, "learning_rate": 4.840117352807708e-05, "loss": 1.7037, "step": 303000 }, { "epoch": 3.2, "learning_rate": 4.839853520056566e-05, "loss": 1.6921, "step": 303500 }, { "epoch": 3.21, "learning_rate": 4.839589687305424e-05, "loss": 1.6823, "step": 304000 }, { "epoch": 3.21, "learning_rate": 4.839325854554281e-05, "loss": 1.6628, "step": 304500 }, { "epoch": 3.22, "learning_rate": 4.839062021803139e-05, "loss": 1.663, "step": 305000 }, { "epoch": 3.22, "learning_rate": 4.8387981890519964e-05, "loss": 1.6735, "step": 305500 }, { "epoch": 3.23, "learning_rate": 4.838534356300854e-05, "loss": 1.6844, "step": 306000 }, { "epoch": 3.23, "learning_rate": 4.8382705235497115e-05, "loss": 1.6927, "step": 306500 }, { "epoch": 3.24, "learning_rate": 4.83800669079857e-05, "loss": 1.6823, "step": 307000 }, { "epoch": 3.25, "learning_rate": 4.8377428580474266e-05, "loss": 1.6781, "step": 307500 }, { "epoch": 3.25, "learning_rate": 4.837479025296284e-05, "loss": 1.7053, "step": 308000 }, { "epoch": 3.26, "learning_rate": 4.837215192545142e-05, "loss": 1.6733, "step": 308500 }, { "epoch": 3.26, "learning_rate": 4.836951359794e-05, "loss": 1.6161, "step": 309000 }, { "epoch": 3.27, "learning_rate": 4.8366875270428575e-05, "loss": 1.6848, "step": 309500 }, { "epoch": 3.27, "learning_rate": 4.836423694291714e-05, "loss": 1.6697, "step": 310000 }, { "epoch": 3.28, "learning_rate": 4.8361598615405726e-05, "loss": 1.6078, "step": 310500 }, { "epoch": 3.28, "learning_rate": 4.83589602878943e-05, "loss": 1.6819, "step": 311000 }, { "epoch": 3.29, "learning_rate": 4.8356321960382877e-05, "loss": 1.6648, "step": 311500 }, { "epoch": 3.29, "learning_rate": 4.835368363287145e-05, "loss": 1.6675, "step": 312000 }, { "epoch": 3.3, "learning_rate": 4.835104530536003e-05, "loss": 1.6451, "step": 312500 }, { "epoch": 3.3, "learning_rate": 4.83484069778486e-05, "loss": 1.751, "step": 313000 }, { "epoch": 3.31, "learning_rate": 4.834576865033718e-05, "loss": 1.6985, "step": 313500 }, { "epoch": 3.31, "learning_rate": 4.834313032282576e-05, "loss": 1.6553, "step": 314000 }, { "epoch": 3.32, "learning_rate": 4.8340491995314336e-05, "loss": 1.676, "step": 314500 }, { "epoch": 3.32, "learning_rate": 4.8337853667802905e-05, "loss": 1.7431, "step": 315000 }, { "epoch": 3.33, "learning_rate": 4.833521534029149e-05, "loss": 1.6853, "step": 315500 }, { "epoch": 3.33, "learning_rate": 4.833257701278006e-05, "loss": 1.6736, "step": 316000 }, { "epoch": 3.34, "learning_rate": 4.832993868526864e-05, "loss": 1.6953, "step": 316500 }, { "epoch": 3.35, "learning_rate": 4.8327300357757214e-05, "loss": 1.7401, "step": 317000 }, { "epoch": 3.35, "learning_rate": 4.832466203024579e-05, "loss": 1.6518, "step": 317500 }, { "epoch": 3.36, "learning_rate": 4.8322023702734365e-05, "loss": 1.7008, "step": 318000 }, { "epoch": 3.36, "learning_rate": 4.831938537522294e-05, "loss": 1.6562, "step": 318500 }, { "epoch": 3.37, "learning_rate": 4.831674704771152e-05, "loss": 1.6616, "step": 319000 }, { "epoch": 3.37, "learning_rate": 4.831410872020009e-05, "loss": 1.6658, "step": 319500 }, { "epoch": 3.38, "learning_rate": 4.8311470392688667e-05, "loss": 1.7119, "step": 320000 }, { "epoch": 3.38, "learning_rate": 4.830883206517724e-05, "loss": 1.7166, "step": 320500 }, { "epoch": 3.39, "learning_rate": 4.8306193737665824e-05, "loss": 1.6877, "step": 321000 }, { "epoch": 3.39, "learning_rate": 4.83035554101544e-05, "loss": 1.7004, "step": 321500 }, { "epoch": 3.4, "learning_rate": 4.830091708264297e-05, "loss": 1.6745, "step": 322000 }, { "epoch": 3.4, "learning_rate": 4.829827875513155e-05, "loss": 1.7037, "step": 322500 }, { "epoch": 3.41, "learning_rate": 4.8295640427620126e-05, "loss": 1.6333, "step": 323000 }, { "epoch": 3.41, "learning_rate": 4.82930021001087e-05, "loss": 1.6723, "step": 323500 }, { "epoch": 3.42, "learning_rate": 4.829036377259728e-05, "loss": 1.6591, "step": 324000 }, { "epoch": 3.42, "learning_rate": 4.828772544508585e-05, "loss": 1.6465, "step": 324500 }, { "epoch": 3.43, "learning_rate": 4.828508711757443e-05, "loss": 1.7052, "step": 325000 }, { "epoch": 3.44, "learning_rate": 4.8282448790063004e-05, "loss": 1.7242, "step": 325500 }, { "epoch": 3.44, "learning_rate": 4.8279810462551586e-05, "loss": 1.6665, "step": 326000 }, { "epoch": 3.45, "learning_rate": 4.8277172135040155e-05, "loss": 1.7022, "step": 326500 }, { "epoch": 3.45, "learning_rate": 4.827453380752873e-05, "loss": 1.6468, "step": 327000 }, { "epoch": 3.46, "learning_rate": 4.827189548001731e-05, "loss": 1.6509, "step": 327500 }, { "epoch": 3.46, "learning_rate": 4.826925715250589e-05, "loss": 1.6806, "step": 328000 }, { "epoch": 3.47, "learning_rate": 4.8266618824994463e-05, "loss": 1.6762, "step": 328500 }, { "epoch": 3.47, "learning_rate": 4.826398049748304e-05, "loss": 1.6799, "step": 329000 }, { "epoch": 3.48, "learning_rate": 4.8261342169971614e-05, "loss": 1.6667, "step": 329500 }, { "epoch": 3.48, "learning_rate": 4.825870384246019e-05, "loss": 1.717, "step": 330000 }, { "epoch": 3.49, "learning_rate": 4.8256065514948765e-05, "loss": 1.6508, "step": 330500 }, { "epoch": 3.49, "learning_rate": 4.825342718743735e-05, "loss": 1.6641, "step": 331000 }, { "epoch": 3.5, "learning_rate": 4.8250788859925916e-05, "loss": 1.6673, "step": 331500 }, { "epoch": 3.5, "learning_rate": 4.824815053241449e-05, "loss": 1.6973, "step": 332000 }, { "epoch": 3.51, "learning_rate": 4.824551220490307e-05, "loss": 1.6409, "step": 332500 }, { "epoch": 3.51, "learning_rate": 4.824287387739165e-05, "loss": 1.6891, "step": 333000 }, { "epoch": 3.52, "learning_rate": 4.8240235549880225e-05, "loss": 1.6648, "step": 333500 }, { "epoch": 3.52, "learning_rate": 4.8237597222368794e-05, "loss": 1.6737, "step": 334000 }, { "epoch": 3.53, "learning_rate": 4.8234958894857376e-05, "loss": 1.6215, "step": 334500 }, { "epoch": 3.54, "learning_rate": 4.823232056734595e-05, "loss": 1.6787, "step": 335000 }, { "epoch": 3.54, "learning_rate": 4.822968223983453e-05, "loss": 1.6445, "step": 335500 }, { "epoch": 3.55, "learning_rate": 4.82270439123231e-05, "loss": 1.6563, "step": 336000 }, { "epoch": 3.55, "learning_rate": 4.822440558481168e-05, "loss": 1.755, "step": 336500 }, { "epoch": 3.56, "learning_rate": 4.8221767257300253e-05, "loss": 1.6341, "step": 337000 }, { "epoch": 3.56, "learning_rate": 4.821912892978883e-05, "loss": 1.5877, "step": 337500 }, { "epoch": 3.57, "learning_rate": 4.821649060227741e-05, "loss": 1.6857, "step": 338000 }, { "epoch": 3.57, "learning_rate": 4.821385227476598e-05, "loss": 1.6568, "step": 338500 }, { "epoch": 3.58, "learning_rate": 4.8211213947254555e-05, "loss": 1.6517, "step": 339000 }, { "epoch": 3.58, "learning_rate": 4.820857561974314e-05, "loss": 1.653, "step": 339500 }, { "epoch": 3.59, "learning_rate": 4.820593729223171e-05, "loss": 1.6615, "step": 340000 }, { "epoch": 3.59, "learning_rate": 4.820329896472029e-05, "loss": 1.606, "step": 340500 }, { "epoch": 3.6, "learning_rate": 4.8200660637208864e-05, "loss": 1.6582, "step": 341000 }, { "epoch": 3.6, "learning_rate": 4.819802230969744e-05, "loss": 1.6914, "step": 341500 }, { "epoch": 3.61, "learning_rate": 4.8195383982186015e-05, "loss": 1.657, "step": 342000 }, { "epoch": 3.61, "learning_rate": 4.819274565467459e-05, "loss": 1.7014, "step": 342500 }, { "epoch": 3.62, "learning_rate": 4.819010732716317e-05, "loss": 1.665, "step": 343000 }, { "epoch": 3.63, "learning_rate": 4.818746899965174e-05, "loss": 1.5978, "step": 343500 }, { "epoch": 3.63, "learning_rate": 4.818483067214032e-05, "loss": 1.6981, "step": 344000 }, { "epoch": 3.64, "learning_rate": 4.818219234462889e-05, "loss": 1.6625, "step": 344500 }, { "epoch": 3.64, "learning_rate": 4.8179554017117475e-05, "loss": 1.6385, "step": 345000 }, { "epoch": 3.65, "learning_rate": 4.8176915689606044e-05, "loss": 1.6216, "step": 345500 }, { "epoch": 3.65, "learning_rate": 4.817427736209462e-05, "loss": 1.7118, "step": 346000 }, { "epoch": 3.66, "learning_rate": 4.81716390345832e-05, "loss": 1.6888, "step": 346500 }, { "epoch": 3.66, "learning_rate": 4.816900070707178e-05, "loss": 1.6905, "step": 347000 }, { "epoch": 3.67, "learning_rate": 4.816636237956035e-05, "loss": 1.7083, "step": 347500 }, { "epoch": 3.67, "learning_rate": 4.816372405204893e-05, "loss": 1.6536, "step": 348000 }, { "epoch": 3.68, "learning_rate": 4.81610857245375e-05, "loss": 1.648, "step": 348500 }, { "epoch": 3.68, "learning_rate": 4.815844739702608e-05, "loss": 1.7008, "step": 349000 }, { "epoch": 3.69, "learning_rate": 4.8155809069514654e-05, "loss": 1.6465, "step": 349500 }, { "epoch": 3.69, "learning_rate": 4.8153170742003236e-05, "loss": 1.6125, "step": 350000 }, { "epoch": 3.7, "learning_rate": 4.8150532414491805e-05, "loss": 1.6014, "step": 350500 }, { "epoch": 3.7, "learning_rate": 4.814789408698038e-05, "loss": 1.6055, "step": 351000 }, { "epoch": 3.71, "learning_rate": 4.814525575946896e-05, "loss": 1.7267, "step": 351500 }, { "epoch": 3.71, "learning_rate": 4.814261743195754e-05, "loss": 1.7071, "step": 352000 }, { "epoch": 3.72, "learning_rate": 4.8139979104446114e-05, "loss": 1.656, "step": 352500 }, { "epoch": 3.73, "learning_rate": 4.813734077693469e-05, "loss": 1.7256, "step": 353000 }, { "epoch": 3.73, "learning_rate": 4.8134702449423265e-05, "loss": 1.6698, "step": 353500 }, { "epoch": 3.74, "learning_rate": 4.813206412191184e-05, "loss": 1.6392, "step": 354000 }, { "epoch": 3.74, "learning_rate": 4.8129425794400416e-05, "loss": 1.6474, "step": 354500 }, { "epoch": 3.75, "learning_rate": 4.812678746688899e-05, "loss": 1.6977, "step": 355000 }, { "epoch": 3.75, "learning_rate": 4.812414913937757e-05, "loss": 1.6203, "step": 355500 }, { "epoch": 3.76, "learning_rate": 4.812151081186614e-05, "loss": 1.7067, "step": 356000 }, { "epoch": 3.76, "learning_rate": 4.811887248435472e-05, "loss": 1.6473, "step": 356500 }, { "epoch": 3.77, "learning_rate": 4.81162341568433e-05, "loss": 1.661, "step": 357000 }, { "epoch": 3.77, "learning_rate": 4.811359582933187e-05, "loss": 1.694, "step": 357500 }, { "epoch": 3.78, "learning_rate": 4.8110957501820444e-05, "loss": 1.6565, "step": 358000 }, { "epoch": 3.78, "learning_rate": 4.8108319174309027e-05, "loss": 1.6908, "step": 358500 }, { "epoch": 3.79, "learning_rate": 4.81056808467976e-05, "loss": 1.6365, "step": 359000 }, { "epoch": 3.79, "learning_rate": 4.810304251928618e-05, "loss": 1.6668, "step": 359500 }, { "epoch": 3.8, "learning_rate": 4.810040419177475e-05, "loss": 1.6407, "step": 360000 }, { "epoch": 3.8, "learning_rate": 4.809776586426333e-05, "loss": 1.7032, "step": 360500 }, { "epoch": 3.81, "learning_rate": 4.8095127536751904e-05, "loss": 1.6689, "step": 361000 }, { "epoch": 3.82, "learning_rate": 4.809248920924048e-05, "loss": 1.6702, "step": 361500 }, { "epoch": 3.82, "learning_rate": 4.808985088172906e-05, "loss": 1.6768, "step": 362000 }, { "epoch": 3.83, "learning_rate": 4.808721255421763e-05, "loss": 1.6351, "step": 362500 }, { "epoch": 3.83, "learning_rate": 4.8084574226706206e-05, "loss": 1.6318, "step": 363000 }, { "epoch": 3.84, "learning_rate": 4.808193589919479e-05, "loss": 1.6762, "step": 363500 }, { "epoch": 3.84, "learning_rate": 4.8079297571683364e-05, "loss": 1.6904, "step": 364000 }, { "epoch": 3.85, "learning_rate": 4.807665924417193e-05, "loss": 1.6479, "step": 364500 }, { "epoch": 3.85, "learning_rate": 4.8074020916660515e-05, "loss": 1.685, "step": 365000 }, { "epoch": 3.86, "learning_rate": 4.807138258914909e-05, "loss": 1.6978, "step": 365500 }, { "epoch": 3.86, "learning_rate": 4.8068744261637666e-05, "loss": 1.7268, "step": 366000 }, { "epoch": 3.87, "learning_rate": 4.806610593412624e-05, "loss": 1.5727, "step": 366500 }, { "epoch": 3.87, "learning_rate": 4.8063467606614817e-05, "loss": 1.6757, "step": 367000 }, { "epoch": 3.88, "learning_rate": 4.806082927910339e-05, "loss": 1.6584, "step": 367500 }, { "epoch": 3.88, "learning_rate": 4.805819095159197e-05, "loss": 1.6526, "step": 368000 }, { "epoch": 3.89, "learning_rate": 4.805555262408055e-05, "loss": 1.6811, "step": 368500 }, { "epoch": 3.89, "learning_rate": 4.8052914296569125e-05, "loss": 1.6566, "step": 369000 }, { "epoch": 3.9, "learning_rate": 4.8050275969057694e-05, "loss": 1.6137, "step": 369500 }, { "epoch": 3.9, "learning_rate": 4.804763764154627e-05, "loss": 1.6556, "step": 370000 }, { "epoch": 3.91, "learning_rate": 4.804499931403485e-05, "loss": 1.6395, "step": 370500 }, { "epoch": 3.92, "learning_rate": 4.804236098652343e-05, "loss": 1.7093, "step": 371000 }, { "epoch": 3.92, "learning_rate": 4.8039722659011996e-05, "loss": 1.6868, "step": 371500 }, { "epoch": 3.93, "learning_rate": 4.803708433150058e-05, "loss": 1.6831, "step": 372000 }, { "epoch": 3.93, "learning_rate": 4.8034446003989154e-05, "loss": 1.6136, "step": 372500 }, { "epoch": 3.94, "learning_rate": 4.803180767647773e-05, "loss": 1.6435, "step": 373000 }, { "epoch": 3.94, "learning_rate": 4.8029169348966305e-05, "loss": 1.6391, "step": 373500 }, { "epoch": 3.95, "learning_rate": 4.802653102145488e-05, "loss": 1.6854, "step": 374000 }, { "epoch": 3.95, "learning_rate": 4.8023892693943456e-05, "loss": 1.6383, "step": 374500 }, { "epoch": 3.96, "learning_rate": 4.802125436643203e-05, "loss": 1.6545, "step": 375000 }, { "epoch": 3.96, "learning_rate": 4.8018616038920613e-05, "loss": 1.6104, "step": 375500 }, { "epoch": 3.97, "learning_rate": 4.801597771140919e-05, "loss": 1.6323, "step": 376000 }, { "epoch": 3.97, "learning_rate": 4.801333938389776e-05, "loss": 1.623, "step": 376500 }, { "epoch": 3.98, "learning_rate": 4.801070105638634e-05, "loss": 1.7045, "step": 377000 }, { "epoch": 3.98, "learning_rate": 4.8008062728874915e-05, "loss": 1.6174, "step": 377500 }, { "epoch": 3.99, "learning_rate": 4.800542440136349e-05, "loss": 1.6552, "step": 378000 }, { "epoch": 3.99, "learning_rate": 4.8002786073852066e-05, "loss": 1.6537, "step": 378500 }, { "epoch": 4.0, "learning_rate": 4.800014774634064e-05, "loss": 1.6549, "step": 379000 }, { "epoch": 4.0, "learning_rate": 4.799750941882922e-05, "loss": 1.6834, "step": 379500 }, { "epoch": 4.01, "learning_rate": 4.799487109131779e-05, "loss": 1.6471, "step": 380000 }, { "epoch": 4.02, "learning_rate": 4.7992232763806375e-05, "loss": 1.6518, "step": 380500 }, { "epoch": 4.02, "learning_rate": 4.7989594436294944e-05, "loss": 1.6697, "step": 381000 }, { "epoch": 4.03, "learning_rate": 4.798695610878352e-05, "loss": 1.6707, "step": 381500 }, { "epoch": 4.03, "learning_rate": 4.7984317781272095e-05, "loss": 1.6655, "step": 382000 }, { "epoch": 4.04, "learning_rate": 4.798167945376068e-05, "loss": 1.6576, "step": 382500 }, { "epoch": 4.04, "learning_rate": 4.797904112624925e-05, "loss": 1.6724, "step": 383000 }, { "epoch": 4.05, "learning_rate": 4.797640279873782e-05, "loss": 1.725, "step": 383500 }, { "epoch": 4.05, "learning_rate": 4.7973764471226404e-05, "loss": 1.6289, "step": 384000 }, { "epoch": 4.06, "learning_rate": 4.797112614371498e-05, "loss": 1.6394, "step": 384500 }, { "epoch": 4.06, "learning_rate": 4.7968487816203554e-05, "loss": 1.5837, "step": 385000 }, { "epoch": 4.07, "learning_rate": 4.796584948869213e-05, "loss": 1.64, "step": 385500 }, { "epoch": 4.07, "learning_rate": 4.7963211161180705e-05, "loss": 1.6711, "step": 386000 }, { "epoch": 4.08, "learning_rate": 4.796057283366928e-05, "loss": 1.6392, "step": 386500 }, { "epoch": 4.08, "learning_rate": 4.7957934506157856e-05, "loss": 1.6572, "step": 387000 }, { "epoch": 4.09, "learning_rate": 4.795529617864644e-05, "loss": 1.6576, "step": 387500 }, { "epoch": 4.09, "learning_rate": 4.7952657851135014e-05, "loss": 1.6883, "step": 388000 }, { "epoch": 4.1, "learning_rate": 4.795001952362358e-05, "loss": 1.6942, "step": 388500 }, { "epoch": 4.11, "learning_rate": 4.7947381196112165e-05, "loss": 1.6218, "step": 389000 }, { "epoch": 4.11, "learning_rate": 4.794474286860074e-05, "loss": 1.6295, "step": 389500 }, { "epoch": 4.12, "learning_rate": 4.7942104541089316e-05, "loss": 1.6481, "step": 390000 }, { "epoch": 4.12, "learning_rate": 4.793946621357789e-05, "loss": 1.6542, "step": 390500 }, { "epoch": 4.13, "learning_rate": 4.793682788606647e-05, "loss": 1.6281, "step": 391000 }, { "epoch": 4.13, "learning_rate": 4.793418955855504e-05, "loss": 1.7108, "step": 391500 }, { "epoch": 4.14, "learning_rate": 4.793155123104362e-05, "loss": 1.596, "step": 392000 }, { "epoch": 4.14, "learning_rate": 4.79289129035322e-05, "loss": 1.5914, "step": 392500 }, { "epoch": 4.15, "learning_rate": 4.792627457602077e-05, "loss": 1.6497, "step": 393000 }, { "epoch": 4.15, "learning_rate": 4.7923636248509345e-05, "loss": 1.6767, "step": 393500 }, { "epoch": 4.16, "learning_rate": 4.792099792099792e-05, "loss": 1.6309, "step": 394000 }, { "epoch": 4.16, "learning_rate": 4.79183595934865e-05, "loss": 1.6643, "step": 394500 }, { "epoch": 4.17, "learning_rate": 4.791572126597508e-05, "loss": 1.736, "step": 395000 }, { "epoch": 4.17, "learning_rate": 4.7913082938463646e-05, "loss": 1.6474, "step": 395500 }, { "epoch": 4.18, "learning_rate": 4.791044461095223e-05, "loss": 1.6526, "step": 396000 }, { "epoch": 4.18, "learning_rate": 4.7907806283440804e-05, "loss": 1.6095, "step": 396500 }, { "epoch": 4.19, "learning_rate": 4.790516795592938e-05, "loss": 1.5997, "step": 397000 }, { "epoch": 4.19, "learning_rate": 4.7902529628417955e-05, "loss": 1.6006, "step": 397500 }, { "epoch": 4.2, "learning_rate": 4.789989130090653e-05, "loss": 1.6454, "step": 398000 }, { "epoch": 4.21, "learning_rate": 4.7897252973395106e-05, "loss": 1.6686, "step": 398500 }, { "epoch": 4.21, "learning_rate": 4.789461464588368e-05, "loss": 1.6563, "step": 399000 }, { "epoch": 4.22, "learning_rate": 4.7891976318372264e-05, "loss": 1.6452, "step": 399500 }, { "epoch": 4.22, "learning_rate": 4.788933799086083e-05, "loss": 1.5825, "step": 400000 }, { "epoch": 4.23, "learning_rate": 4.788669966334941e-05, "loss": 1.6478, "step": 400500 }, { "epoch": 4.23, "learning_rate": 4.788406133583799e-05, "loss": 1.6919, "step": 401000 }, { "epoch": 4.24, "learning_rate": 4.7881423008326566e-05, "loss": 1.6677, "step": 401500 }, { "epoch": 4.24, "learning_rate": 4.787878468081514e-05, "loss": 1.5854, "step": 402000 }, { "epoch": 4.25, "learning_rate": 4.787614635330372e-05, "loss": 1.6133, "step": 402500 }, { "epoch": 4.25, "learning_rate": 4.787350802579229e-05, "loss": 1.6278, "step": 403000 }, { "epoch": 4.26, "learning_rate": 4.787086969828087e-05, "loss": 1.6411, "step": 403500 }, { "epoch": 4.26, "learning_rate": 4.786823137076944e-05, "loss": 1.618, "step": 404000 }, { "epoch": 4.27, "learning_rate": 4.7865593043258026e-05, "loss": 1.6237, "step": 404500 }, { "epoch": 4.27, "learning_rate": 4.7862954715746594e-05, "loss": 1.6506, "step": 405000 }, { "epoch": 4.28, "learning_rate": 4.786031638823517e-05, "loss": 1.6734, "step": 405500 }, { "epoch": 4.28, "learning_rate": 4.7857678060723745e-05, "loss": 1.594, "step": 406000 }, { "epoch": 4.29, "learning_rate": 4.785503973321233e-05, "loss": 1.6166, "step": 406500 }, { "epoch": 4.3, "learning_rate": 4.78524014057009e-05, "loss": 1.6727, "step": 407000 }, { "epoch": 4.3, "learning_rate": 4.784976307818947e-05, "loss": 1.6498, "step": 407500 }, { "epoch": 4.31, "learning_rate": 4.7847124750678054e-05, "loss": 1.6661, "step": 408000 }, { "epoch": 4.31, "learning_rate": 4.784448642316663e-05, "loss": 1.6566, "step": 408500 }, { "epoch": 4.32, "learning_rate": 4.7841848095655205e-05, "loss": 1.6268, "step": 409000 }, { "epoch": 4.32, "learning_rate": 4.783920976814378e-05, "loss": 1.6362, "step": 409500 }, { "epoch": 4.33, "learning_rate": 4.7836571440632356e-05, "loss": 1.6837, "step": 410000 }, { "epoch": 4.33, "learning_rate": 4.783393311312093e-05, "loss": 1.6431, "step": 410500 }, { "epoch": 4.34, "learning_rate": 4.783129478560951e-05, "loss": 1.6104, "step": 411000 }, { "epoch": 4.34, "learning_rate": 4.782865645809809e-05, "loss": 1.6288, "step": 411500 }, { "epoch": 4.35, "learning_rate": 4.782601813058666e-05, "loss": 1.6331, "step": 412000 }, { "epoch": 4.35, "learning_rate": 4.782337980307523e-05, "loss": 1.6347, "step": 412500 }, { "epoch": 4.36, "learning_rate": 4.7820741475563816e-05, "loss": 1.6032, "step": 413000 }, { "epoch": 4.36, "learning_rate": 4.781810314805239e-05, "loss": 1.625, "step": 413500 }, { "epoch": 4.37, "learning_rate": 4.781546482054097e-05, "loss": 1.6519, "step": 414000 }, { "epoch": 4.37, "learning_rate": 4.781282649302954e-05, "loss": 1.6903, "step": 414500 }, { "epoch": 4.38, "learning_rate": 4.781018816551812e-05, "loss": 1.6407, "step": 415000 }, { "epoch": 4.38, "learning_rate": 4.780754983800669e-05, "loss": 1.5923, "step": 415500 }, { "epoch": 4.39, "learning_rate": 4.780491151049527e-05, "loss": 1.641, "step": 416000 }, { "epoch": 4.4, "learning_rate": 4.780227318298385e-05, "loss": 1.6768, "step": 416500 }, { "epoch": 4.4, "learning_rate": 4.779963485547242e-05, "loss": 1.6736, "step": 417000 }, { "epoch": 4.41, "learning_rate": 4.7796996527960995e-05, "loss": 1.6425, "step": 417500 }, { "epoch": 4.41, "learning_rate": 4.779435820044957e-05, "loss": 1.6744, "step": 418000 }, { "epoch": 4.42, "learning_rate": 4.779171987293815e-05, "loss": 1.6647, "step": 418500 }, { "epoch": 4.42, "learning_rate": 4.778908154542672e-05, "loss": 1.6718, "step": 419000 }, { "epoch": 4.43, "learning_rate": 4.77864432179153e-05, "loss": 1.6699, "step": 419500 }, { "epoch": 4.43, "learning_rate": 4.778380489040388e-05, "loss": 1.6802, "step": 420000 }, { "epoch": 4.44, "learning_rate": 4.7781166562892455e-05, "loss": 1.6153, "step": 420500 }, { "epoch": 4.44, "learning_rate": 4.777852823538103e-05, "loss": 1.6693, "step": 421000 }, { "epoch": 4.45, "learning_rate": 4.7775889907869606e-05, "loss": 1.5998, "step": 421500 }, { "epoch": 4.45, "learning_rate": 4.777325158035818e-05, "loss": 1.638, "step": 422000 }, { "epoch": 4.46, "learning_rate": 4.777061325284676e-05, "loss": 1.6581, "step": 422500 }, { "epoch": 4.46, "learning_rate": 4.776797492533533e-05, "loss": 1.6443, "step": 423000 }, { "epoch": 4.47, "learning_rate": 4.7765336597823914e-05, "loss": 1.5833, "step": 423500 }, { "epoch": 4.47, "learning_rate": 4.776269827031248e-05, "loss": 1.6991, "step": 424000 }, { "epoch": 4.48, "learning_rate": 4.776005994280106e-05, "loss": 1.6339, "step": 424500 }, { "epoch": 4.49, "learning_rate": 4.775742161528964e-05, "loss": 1.6509, "step": 425000 }, { "epoch": 4.49, "learning_rate": 4.7754783287778216e-05, "loss": 1.6765, "step": 425500 }, { "epoch": 4.5, "learning_rate": 4.775214496026679e-05, "loss": 1.5962, "step": 426000 }, { "epoch": 4.5, "learning_rate": 4.774950663275537e-05, "loss": 1.6448, "step": 426500 }, { "epoch": 4.51, "learning_rate": 4.774686830524394e-05, "loss": 1.6225, "step": 427000 }, { "epoch": 4.51, "learning_rate": 4.774422997773252e-05, "loss": 1.5847, "step": 427500 }, { "epoch": 4.52, "learning_rate": 4.7741591650221094e-05, "loss": 1.6683, "step": 428000 }, { "epoch": 4.52, "learning_rate": 4.773895332270967e-05, "loss": 1.6106, "step": 428500 }, { "epoch": 4.53, "learning_rate": 4.7736314995198245e-05, "loss": 1.6308, "step": 429000 }, { "epoch": 4.53, "learning_rate": 4.773367666768682e-05, "loss": 1.6596, "step": 429500 }, { "epoch": 4.54, "learning_rate": 4.7731038340175396e-05, "loss": 1.6046, "step": 430000 }, { "epoch": 4.54, "learning_rate": 4.772840001266398e-05, "loss": 1.7039, "step": 430500 }, { "epoch": 4.55, "learning_rate": 4.772576168515255e-05, "loss": 1.6439, "step": 431000 }, { "epoch": 4.55, "learning_rate": 4.772312335764112e-05, "loss": 1.6718, "step": 431500 }, { "epoch": 4.56, "learning_rate": 4.7720485030129704e-05, "loss": 1.6171, "step": 432000 }, { "epoch": 4.56, "learning_rate": 4.771784670261828e-05, "loss": 1.6606, "step": 432500 }, { "epoch": 4.57, "learning_rate": 4.7715208375106855e-05, "loss": 1.6203, "step": 433000 }, { "epoch": 4.57, "learning_rate": 4.771257004759543e-05, "loss": 1.6291, "step": 433500 }, { "epoch": 4.58, "learning_rate": 4.7709931720084006e-05, "loss": 1.6647, "step": 434000 }, { "epoch": 4.59, "learning_rate": 4.770729339257258e-05, "loss": 1.6363, "step": 434500 }, { "epoch": 4.59, "learning_rate": 4.770465506506116e-05, "loss": 1.6679, "step": 435000 }, { "epoch": 4.6, "learning_rate": 4.770201673754974e-05, "loss": 1.611, "step": 435500 }, { "epoch": 4.6, "learning_rate": 4.769937841003831e-05, "loss": 1.6905, "step": 436000 }, { "epoch": 4.61, "learning_rate": 4.7696740082526884e-05, "loss": 1.6785, "step": 436500 }, { "epoch": 4.61, "learning_rate": 4.7694101755015466e-05, "loss": 1.6331, "step": 437000 }, { "epoch": 4.62, "learning_rate": 4.769146342750404e-05, "loss": 1.6846, "step": 437500 }, { "epoch": 4.62, "learning_rate": 4.768882509999261e-05, "loss": 1.5959, "step": 438000 }, { "epoch": 4.63, "learning_rate": 4.768618677248119e-05, "loss": 1.622, "step": 438500 }, { "epoch": 4.63, "learning_rate": 4.768354844496977e-05, "loss": 1.6951, "step": 439000 }, { "epoch": 4.64, "learning_rate": 4.7680910117458344e-05, "loss": 1.6167, "step": 439500 }, { "epoch": 4.64, "learning_rate": 4.767827178994692e-05, "loss": 1.6469, "step": 440000 }, { "epoch": 4.65, "learning_rate": 4.7675633462435495e-05, "loss": 1.6068, "step": 440500 }, { "epoch": 4.65, "learning_rate": 4.767299513492407e-05, "loss": 1.6256, "step": 441000 }, { "epoch": 4.66, "learning_rate": 4.7670356807412646e-05, "loss": 1.6095, "step": 441500 }, { "epoch": 4.66, "learning_rate": 4.766771847990123e-05, "loss": 1.6369, "step": 442000 }, { "epoch": 4.67, "learning_rate": 4.76650801523898e-05, "loss": 1.6118, "step": 442500 }, { "epoch": 4.68, "learning_rate": 4.766244182487837e-05, "loss": 1.6545, "step": 443000 }, { "epoch": 4.68, "learning_rate": 4.765980349736695e-05, "loss": 1.6059, "step": 443500 }, { "epoch": 4.69, "learning_rate": 4.765716516985553e-05, "loss": 1.6684, "step": 444000 }, { "epoch": 4.69, "learning_rate": 4.7654526842344105e-05, "loss": 1.6138, "step": 444500 }, { "epoch": 4.7, "learning_rate": 4.765188851483268e-05, "loss": 1.6432, "step": 445000 }, { "epoch": 4.7, "learning_rate": 4.7649250187321256e-05, "loss": 1.6021, "step": 445500 }, { "epoch": 4.71, "learning_rate": 4.764661185980983e-05, "loss": 1.6009, "step": 446000 }, { "epoch": 4.71, "learning_rate": 4.764397353229841e-05, "loss": 1.6686, "step": 446500 }, { "epoch": 4.72, "learning_rate": 4.764133520478698e-05, "loss": 1.613, "step": 447000 }, { "epoch": 4.72, "learning_rate": 4.763869687727556e-05, "loss": 1.5977, "step": 447500 }, { "epoch": 4.73, "learning_rate": 4.7636058549764134e-05, "loss": 1.6445, "step": 448000 }, { "epoch": 4.73, "learning_rate": 4.763342022225271e-05, "loss": 1.6953, "step": 448500 }, { "epoch": 4.74, "learning_rate": 4.763078189474129e-05, "loss": 1.6407, "step": 449000 }, { "epoch": 4.74, "learning_rate": 4.762814356722987e-05, "loss": 1.6648, "step": 449500 }, { "epoch": 4.75, "learning_rate": 4.7625505239718436e-05, "loss": 1.6102, "step": 450000 }, { "epoch": 4.75, "learning_rate": 4.762286691220702e-05, "loss": 1.6217, "step": 450500 }, { "epoch": 4.76, "learning_rate": 4.762022858469559e-05, "loss": 1.712, "step": 451000 }, { "epoch": 4.76, "learning_rate": 4.761759025718417e-05, "loss": 1.6443, "step": 451500 }, { "epoch": 4.77, "learning_rate": 4.7614951929672744e-05, "loss": 1.5958, "step": 452000 }, { "epoch": 4.78, "learning_rate": 4.761231360216132e-05, "loss": 1.646, "step": 452500 }, { "epoch": 4.78, "learning_rate": 4.7609675274649895e-05, "loss": 1.6038, "step": 453000 }, { "epoch": 4.79, "learning_rate": 4.760703694713847e-05, "loss": 1.6476, "step": 453500 }, { "epoch": 4.79, "learning_rate": 4.760439861962705e-05, "loss": 1.5922, "step": 454000 }, { "epoch": 4.8, "learning_rate": 4.760176029211563e-05, "loss": 1.6415, "step": 454500 }, { "epoch": 4.8, "learning_rate": 4.75991219646042e-05, "loss": 1.6584, "step": 455000 }, { "epoch": 4.81, "learning_rate": 4.759648363709277e-05, "loss": 1.5969, "step": 455500 }, { "epoch": 4.81, "learning_rate": 4.7593845309581355e-05, "loss": 1.7003, "step": 456000 }, { "epoch": 4.82, "learning_rate": 4.759120698206993e-05, "loss": 1.619, "step": 456500 }, { "epoch": 4.82, "learning_rate": 4.75885686545585e-05, "loss": 1.5629, "step": 457000 }, { "epoch": 4.83, "learning_rate": 4.758593032704708e-05, "loss": 1.6749, "step": 457500 }, { "epoch": 4.83, "learning_rate": 4.758329199953566e-05, "loss": 1.6881, "step": 458000 }, { "epoch": 4.84, "learning_rate": 4.758065367202423e-05, "loss": 1.5868, "step": 458500 }, { "epoch": 4.84, "learning_rate": 4.757801534451281e-05, "loss": 1.6644, "step": 459000 }, { "epoch": 4.85, "learning_rate": 4.7575377017001383e-05, "loss": 1.5954, "step": 459500 }, { "epoch": 4.85, "learning_rate": 4.757273868948996e-05, "loss": 1.5997, "step": 460000 }, { "epoch": 4.86, "learning_rate": 4.7570100361978534e-05, "loss": 1.5938, "step": 460500 }, { "epoch": 4.87, "learning_rate": 4.756746203446712e-05, "loss": 1.6152, "step": 461000 }, { "epoch": 4.87, "learning_rate": 4.756482370695569e-05, "loss": 1.6343, "step": 461500 }, { "epoch": 4.88, "learning_rate": 4.756218537944426e-05, "loss": 1.6591, "step": 462000 }, { "epoch": 4.88, "learning_rate": 4.755954705193284e-05, "loss": 1.6547, "step": 462500 }, { "epoch": 4.89, "learning_rate": 4.755690872442142e-05, "loss": 1.6118, "step": 463000 }, { "epoch": 4.89, "learning_rate": 4.7554270396909994e-05, "loss": 1.6258, "step": 463500 }, { "epoch": 4.9, "learning_rate": 4.755163206939857e-05, "loss": 1.6307, "step": 464000 }, { "epoch": 4.9, "learning_rate": 4.7548993741887145e-05, "loss": 1.6158, "step": 464500 }, { "epoch": 4.91, "learning_rate": 4.754635541437572e-05, "loss": 1.7294, "step": 465000 }, { "epoch": 4.91, "learning_rate": 4.7543717086864296e-05, "loss": 1.6328, "step": 465500 }, { "epoch": 4.92, "learning_rate": 4.754107875935288e-05, "loss": 1.6578, "step": 466000 }, { "epoch": 4.92, "learning_rate": 4.753844043184145e-05, "loss": 1.6379, "step": 466500 }, { "epoch": 4.93, "learning_rate": 4.753580210433002e-05, "loss": 1.6438, "step": 467000 }, { "epoch": 4.93, "learning_rate": 4.75331637768186e-05, "loss": 1.6156, "step": 467500 }, { "epoch": 4.94, "learning_rate": 4.753052544930718e-05, "loss": 1.6282, "step": 468000 }, { "epoch": 4.94, "learning_rate": 4.7527887121795756e-05, "loss": 1.6226, "step": 468500 }, { "epoch": 4.95, "learning_rate": 4.7525248794284324e-05, "loss": 1.6445, "step": 469000 }, { "epoch": 4.95, "learning_rate": 4.752261046677291e-05, "loss": 1.6046, "step": 469500 }, { "epoch": 4.96, "learning_rate": 4.751997213926148e-05, "loss": 1.6112, "step": 470000 }, { "epoch": 4.97, "learning_rate": 4.751733381175006e-05, "loss": 1.6427, "step": 470500 }, { "epoch": 4.97, "learning_rate": 4.751469548423863e-05, "loss": 1.6069, "step": 471000 }, { "epoch": 4.98, "learning_rate": 4.751205715672721e-05, "loss": 1.5632, "step": 471500 }, { "epoch": 4.98, "learning_rate": 4.7509418829215784e-05, "loss": 1.5718, "step": 472000 }, { "epoch": 4.99, "learning_rate": 4.750678050170436e-05, "loss": 1.6254, "step": 472500 }, { "epoch": 4.99, "learning_rate": 4.750414217419294e-05, "loss": 1.5766, "step": 473000 }, { "epoch": 5.0, "learning_rate": 4.750150384668152e-05, "loss": 1.6458, "step": 473500 }, { "epoch": 5.0, "learning_rate": 4.7498865519170086e-05, "loss": 1.6088, "step": 474000 }, { "epoch": 5.01, "learning_rate": 4.749622719165867e-05, "loss": 1.6618, "step": 474500 }, { "epoch": 5.01, "learning_rate": 4.7493588864147244e-05, "loss": 1.6301, "step": 475000 }, { "epoch": 5.02, "learning_rate": 4.749095053663582e-05, "loss": 1.5825, "step": 475500 }, { "epoch": 5.02, "learning_rate": 4.7488312209124395e-05, "loss": 1.5824, "step": 476000 }, { "epoch": 5.03, "learning_rate": 4.748567388161297e-05, "loss": 1.6586, "step": 476500 }, { "epoch": 5.03, "learning_rate": 4.7483035554101546e-05, "loss": 1.6123, "step": 477000 }, { "epoch": 5.04, "learning_rate": 4.748039722659012e-05, "loss": 1.6036, "step": 477500 }, { "epoch": 5.04, "learning_rate": 4.7477758899078704e-05, "loss": 1.6296, "step": 478000 }, { "epoch": 5.05, "learning_rate": 4.747512057156727e-05, "loss": 1.5868, "step": 478500 }, { "epoch": 5.06, "learning_rate": 4.747248224405585e-05, "loss": 1.5925, "step": 479000 }, { "epoch": 5.06, "learning_rate": 4.746984391654442e-05, "loss": 1.6261, "step": 479500 }, { "epoch": 5.07, "learning_rate": 4.7467205589033005e-05, "loss": 1.5992, "step": 480000 }, { "epoch": 5.07, "learning_rate": 4.746456726152158e-05, "loss": 1.6335, "step": 480500 }, { "epoch": 5.08, "learning_rate": 4.746192893401015e-05, "loss": 1.6818, "step": 481000 }, { "epoch": 5.08, "learning_rate": 4.745929060649873e-05, "loss": 1.62, "step": 481500 }, { "epoch": 5.09, "learning_rate": 4.745665227898731e-05, "loss": 1.6744, "step": 482000 }, { "epoch": 5.09, "learning_rate": 4.745401395147588e-05, "loss": 1.5865, "step": 482500 }, { "epoch": 5.1, "learning_rate": 4.745137562396446e-05, "loss": 1.6044, "step": 483000 }, { "epoch": 5.1, "learning_rate": 4.7448737296453034e-05, "loss": 1.5615, "step": 483500 }, { "epoch": 5.11, "learning_rate": 4.744609896894161e-05, "loss": 1.6365, "step": 484000 }, { "epoch": 5.11, "learning_rate": 4.7443460641430185e-05, "loss": 1.6027, "step": 484500 }, { "epoch": 5.12, "learning_rate": 4.744082231391877e-05, "loss": 1.5974, "step": 485000 }, { "epoch": 5.12, "learning_rate": 4.7438183986407336e-05, "loss": 1.6319, "step": 485500 }, { "epoch": 5.13, "learning_rate": 4.743554565889591e-05, "loss": 1.6239, "step": 486000 }, { "epoch": 5.13, "learning_rate": 4.7432907331384494e-05, "loss": 1.6054, "step": 486500 }, { "epoch": 5.14, "learning_rate": 4.743026900387307e-05, "loss": 1.5867, "step": 487000 }, { "epoch": 5.14, "learning_rate": 4.7427630676361645e-05, "loss": 1.6207, "step": 487500 }, { "epoch": 5.15, "learning_rate": 4.742499234885022e-05, "loss": 1.594, "step": 488000 }, { "epoch": 5.16, "learning_rate": 4.7422354021338796e-05, "loss": 1.5996, "step": 488500 }, { "epoch": 5.16, "learning_rate": 4.741971569382737e-05, "loss": 1.658, "step": 489000 }, { "epoch": 5.17, "learning_rate": 4.7417077366315947e-05, "loss": 1.6473, "step": 489500 }, { "epoch": 5.17, "learning_rate": 4.741443903880453e-05, "loss": 1.5704, "step": 490000 }, { "epoch": 5.18, "learning_rate": 4.74118007112931e-05, "loss": 1.6053, "step": 490500 }, { "epoch": 5.18, "learning_rate": 4.740916238378167e-05, "loss": 1.611, "step": 491000 }, { "epoch": 5.19, "learning_rate": 4.740652405627025e-05, "loss": 1.5511, "step": 491500 }, { "epoch": 5.19, "learning_rate": 4.740388572875883e-05, "loss": 1.555, "step": 492000 }, { "epoch": 5.2, "learning_rate": 4.7401247401247406e-05, "loss": 1.5561, "step": 492500 }, { "epoch": 5.2, "learning_rate": 4.7398609073735975e-05, "loss": 1.6389, "step": 493000 }, { "epoch": 5.21, "learning_rate": 4.739597074622456e-05, "loss": 1.6217, "step": 493500 }, { "epoch": 5.21, "learning_rate": 4.739333241871313e-05, "loss": 1.5734, "step": 494000 }, { "epoch": 5.22, "learning_rate": 4.739069409120171e-05, "loss": 1.6123, "step": 494500 }, { "epoch": 5.22, "learning_rate": 4.7388055763690284e-05, "loss": 1.6724, "step": 495000 }, { "epoch": 5.23, "learning_rate": 4.738541743617886e-05, "loss": 1.5765, "step": 495500 }, { "epoch": 5.23, "learning_rate": 4.7382779108667435e-05, "loss": 1.5895, "step": 496000 }, { "epoch": 5.24, "learning_rate": 4.738014078115601e-05, "loss": 1.6198, "step": 496500 }, { "epoch": 5.24, "learning_rate": 4.737750245364459e-05, "loss": 1.6433, "step": 497000 }, { "epoch": 5.25, "learning_rate": 4.737486412613316e-05, "loss": 1.5787, "step": 497500 }, { "epoch": 5.26, "learning_rate": 4.7372225798621737e-05, "loss": 1.6067, "step": 498000 }, { "epoch": 5.26, "learning_rate": 4.736958747111032e-05, "loss": 1.6044, "step": 498500 }, { "epoch": 5.27, "learning_rate": 4.7366949143598894e-05, "loss": 1.5846, "step": 499000 }, { "epoch": 5.27, "learning_rate": 4.736431081608747e-05, "loss": 1.546, "step": 499500 }, { "epoch": 5.28, "learning_rate": 4.7361672488576045e-05, "loss": 1.6272, "step": 500000 }, { "epoch": 5.28, "learning_rate": 4.735903416106462e-05, "loss": 1.5858, "step": 500500 }, { "epoch": 5.29, "learning_rate": 4.7356395833553196e-05, "loss": 1.5824, "step": 501000 }, { "epoch": 5.29, "learning_rate": 4.735375750604177e-05, "loss": 1.6254, "step": 501500 }, { "epoch": 5.3, "learning_rate": 4.7351119178530354e-05, "loss": 1.6031, "step": 502000 }, { "epoch": 5.3, "learning_rate": 4.734848085101892e-05, "loss": 1.58, "step": 502500 }, { "epoch": 5.31, "learning_rate": 4.73458425235075e-05, "loss": 1.6577, "step": 503000 }, { "epoch": 5.31, "learning_rate": 4.7343204195996074e-05, "loss": 1.6133, "step": 503500 }, { "epoch": 5.32, "learning_rate": 4.7340565868484656e-05, "loss": 1.5924, "step": 504000 }, { "epoch": 5.32, "learning_rate": 4.7337927540973225e-05, "loss": 1.6244, "step": 504500 }, { "epoch": 5.33, "learning_rate": 4.73352892134618e-05, "loss": 1.566, "step": 505000 }, { "epoch": 5.33, "learning_rate": 4.733265088595038e-05, "loss": 1.5824, "step": 505500 }, { "epoch": 5.34, "learning_rate": 4.733001255843896e-05, "loss": 1.6317, "step": 506000 }, { "epoch": 5.35, "learning_rate": 4.7327374230927533e-05, "loss": 1.6222, "step": 506500 }, { "epoch": 5.35, "learning_rate": 4.732473590341611e-05, "loss": 1.6029, "step": 507000 }, { "epoch": 5.36, "learning_rate": 4.7322097575904684e-05, "loss": 1.6218, "step": 507500 }, { "epoch": 5.36, "learning_rate": 4.731945924839326e-05, "loss": 1.65, "step": 508000 }, { "epoch": 5.37, "learning_rate": 4.7316820920881835e-05, "loss": 1.6045, "step": 508500 }, { "epoch": 5.37, "learning_rate": 4.731418259337042e-05, "loss": 1.6085, "step": 509000 }, { "epoch": 5.38, "learning_rate": 4.7311544265858986e-05, "loss": 1.5934, "step": 509500 }, { "epoch": 5.38, "learning_rate": 4.730890593834756e-05, "loss": 1.6569, "step": 510000 }, { "epoch": 5.39, "learning_rate": 4.7306267610836144e-05, "loss": 1.6161, "step": 510500 }, { "epoch": 5.39, "learning_rate": 4.730362928332472e-05, "loss": 1.6312, "step": 511000 }, { "epoch": 5.4, "learning_rate": 4.7300990955813295e-05, "loss": 1.5641, "step": 511500 }, { "epoch": 5.4, "learning_rate": 4.729835262830187e-05, "loss": 1.5925, "step": 512000 }, { "epoch": 5.41, "learning_rate": 4.7295714300790446e-05, "loss": 1.5405, "step": 512500 }, { "epoch": 5.41, "learning_rate": 4.729307597327902e-05, "loss": 1.5908, "step": 513000 }, { "epoch": 5.42, "learning_rate": 4.72904376457676e-05, "loss": 1.6157, "step": 513500 }, { "epoch": 5.42, "learning_rate": 4.728779931825617e-05, "loss": 1.599, "step": 514000 }, { "epoch": 5.43, "learning_rate": 4.728516099074475e-05, "loss": 1.6312, "step": 514500 }, { "epoch": 5.43, "learning_rate": 4.7282522663233323e-05, "loss": 1.5281, "step": 515000 }, { "epoch": 5.44, "learning_rate": 4.7279884335721906e-05, "loss": 1.6089, "step": 515500 }, { "epoch": 5.45, "learning_rate": 4.727724600821048e-05, "loss": 1.598, "step": 516000 }, { "epoch": 5.45, "learning_rate": 4.727460768069905e-05, "loss": 1.5862, "step": 516500 }, { "epoch": 5.46, "learning_rate": 4.7271969353187625e-05, "loss": 1.6163, "step": 517000 }, { "epoch": 5.46, "learning_rate": 4.726933102567621e-05, "loss": 1.5934, "step": 517500 }, { "epoch": 5.47, "learning_rate": 4.726669269816478e-05, "loss": 1.5813, "step": 518000 }, { "epoch": 5.47, "learning_rate": 4.726405437065336e-05, "loss": 1.6065, "step": 518500 }, { "epoch": 5.48, "learning_rate": 4.7261416043141934e-05, "loss": 1.5972, "step": 519000 }, { "epoch": 5.48, "learning_rate": 4.725877771563051e-05, "loss": 1.5867, "step": 519500 }, { "epoch": 5.49, "learning_rate": 4.7256139388119085e-05, "loss": 1.5534, "step": 520000 }, { "epoch": 5.49, "learning_rate": 4.725350106060766e-05, "loss": 1.5987, "step": 520500 }, { "epoch": 5.5, "learning_rate": 4.725086273309624e-05, "loss": 1.6481, "step": 521000 }, { "epoch": 5.5, "learning_rate": 4.724822440558481e-05, "loss": 1.6056, "step": 521500 }, { "epoch": 5.51, "learning_rate": 4.724558607807339e-05, "loss": 1.65, "step": 522000 }, { "epoch": 5.51, "learning_rate": 4.724294775056197e-05, "loss": 1.6141, "step": 522500 }, { "epoch": 5.52, "learning_rate": 4.7240309423050545e-05, "loss": 1.6122, "step": 523000 }, { "epoch": 5.52, "learning_rate": 4.7237671095539114e-05, "loss": 1.5758, "step": 523500 }, { "epoch": 5.53, "learning_rate": 4.7235032768027696e-05, "loss": 1.6405, "step": 524000 }, { "epoch": 5.54, "learning_rate": 4.723239444051627e-05, "loss": 1.6152, "step": 524500 }, { "epoch": 5.54, "learning_rate": 4.722975611300485e-05, "loss": 1.6285, "step": 525000 }, { "epoch": 5.55, "learning_rate": 4.722711778549342e-05, "loss": 1.5927, "step": 525500 }, { "epoch": 5.55, "learning_rate": 4.7224479457982e-05, "loss": 1.6235, "step": 526000 }, { "epoch": 5.56, "learning_rate": 4.722184113047057e-05, "loss": 1.6307, "step": 526500 }, { "epoch": 5.56, "learning_rate": 4.721920280295915e-05, "loss": 1.5789, "step": 527000 }, { "epoch": 5.57, "learning_rate": 4.721656447544773e-05, "loss": 1.6399, "step": 527500 }, { "epoch": 5.57, "learning_rate": 4.7213926147936306e-05, "loss": 1.5424, "step": 528000 }, { "epoch": 5.58, "learning_rate": 4.7211287820424875e-05, "loss": 1.6026, "step": 528500 }, { "epoch": 5.58, "learning_rate": 4.720864949291345e-05, "loss": 1.6079, "step": 529000 }, { "epoch": 5.59, "learning_rate": 4.720601116540203e-05, "loss": 1.5942, "step": 529500 }, { "epoch": 5.59, "learning_rate": 4.720337283789061e-05, "loss": 1.5539, "step": 530000 }, { "epoch": 5.6, "learning_rate": 4.7200734510379184e-05, "loss": 1.5898, "step": 530500 }, { "epoch": 5.6, "learning_rate": 4.719809618286776e-05, "loss": 1.5718, "step": 531000 }, { "epoch": 5.61, "learning_rate": 4.7195457855356335e-05, "loss": 1.6116, "step": 531500 }, { "epoch": 5.61, "learning_rate": 4.719281952784491e-05, "loss": 1.5543, "step": 532000 }, { "epoch": 5.62, "learning_rate": 4.7190181200333486e-05, "loss": 1.5653, "step": 532500 }, { "epoch": 5.62, "learning_rate": 4.718754287282206e-05, "loss": 1.595, "step": 533000 }, { "epoch": 5.63, "learning_rate": 4.718490454531064e-05, "loss": 1.6734, "step": 533500 }, { "epoch": 5.64, "learning_rate": 4.718226621779921e-05, "loss": 1.6143, "step": 534000 }, { "epoch": 5.64, "learning_rate": 4.7179627890287795e-05, "loss": 1.6051, "step": 534500 }, { "epoch": 5.65, "learning_rate": 4.717698956277637e-05, "loss": 1.597, "step": 535000 }, { "epoch": 5.65, "learning_rate": 4.717435123526494e-05, "loss": 1.5881, "step": 535500 }, { "epoch": 5.66, "learning_rate": 4.717171290775352e-05, "loss": 1.6141, "step": 536000 }, { "epoch": 5.66, "learning_rate": 4.7169074580242097e-05, "loss": 1.6107, "step": 536500 }, { "epoch": 5.67, "learning_rate": 4.716643625273067e-05, "loss": 1.6004, "step": 537000 }, { "epoch": 5.67, "learning_rate": 4.716379792521925e-05, "loss": 1.5663, "step": 537500 }, { "epoch": 5.68, "learning_rate": 4.716115959770782e-05, "loss": 1.6131, "step": 538000 }, { "epoch": 5.68, "learning_rate": 4.71585212701964e-05, "loss": 1.6065, "step": 538500 }, { "epoch": 5.69, "learning_rate": 4.7155882942684974e-05, "loss": 1.5881, "step": 539000 }, { "epoch": 5.69, "learning_rate": 4.7153244615173556e-05, "loss": 1.6044, "step": 539500 }, { "epoch": 5.7, "learning_rate": 4.715060628766213e-05, "loss": 1.6107, "step": 540000 }, { "epoch": 5.7, "learning_rate": 4.71479679601507e-05, "loss": 1.5912, "step": 540500 }, { "epoch": 5.71, "learning_rate": 4.7145329632639276e-05, "loss": 1.6306, "step": 541000 }, { "epoch": 5.71, "learning_rate": 4.714269130512786e-05, "loss": 1.6232, "step": 541500 }, { "epoch": 5.72, "learning_rate": 4.7140052977616434e-05, "loss": 1.6587, "step": 542000 }, { "epoch": 5.73, "learning_rate": 4.7137414650105e-05, "loss": 1.5613, "step": 542500 }, { "epoch": 5.73, "learning_rate": 4.7134776322593585e-05, "loss": 1.6087, "step": 543000 }, { "epoch": 5.74, "learning_rate": 4.713213799508216e-05, "loss": 1.6141, "step": 543500 }, { "epoch": 5.74, "learning_rate": 4.7129499667570736e-05, "loss": 1.5854, "step": 544000 }, { "epoch": 5.75, "learning_rate": 4.712686134005931e-05, "loss": 1.6353, "step": 544500 }, { "epoch": 5.75, "learning_rate": 4.7124223012547887e-05, "loss": 1.6345, "step": 545000 }, { "epoch": 5.76, "learning_rate": 4.712158468503646e-05, "loss": 1.5875, "step": 545500 }, { "epoch": 5.76, "learning_rate": 4.711894635752504e-05, "loss": 1.5819, "step": 546000 }, { "epoch": 5.77, "learning_rate": 4.711630803001362e-05, "loss": 1.5707, "step": 546500 }, { "epoch": 5.77, "learning_rate": 4.7113669702502195e-05, "loss": 1.6411, "step": 547000 }, { "epoch": 5.78, "learning_rate": 4.7111031374990764e-05, "loss": 1.5361, "step": 547500 }, { "epoch": 5.78, "learning_rate": 4.7108393047479346e-05, "loss": 1.6249, "step": 548000 }, { "epoch": 5.79, "learning_rate": 4.710575471996792e-05, "loss": 1.5848, "step": 548500 }, { "epoch": 5.79, "learning_rate": 4.71031163924565e-05, "loss": 1.5607, "step": 549000 }, { "epoch": 5.8, "learning_rate": 4.710047806494507e-05, "loss": 1.5965, "step": 549500 }, { "epoch": 5.8, "learning_rate": 4.709783973743365e-05, "loss": 1.5437, "step": 550000 }, { "epoch": 5.81, "learning_rate": 4.7095201409922224e-05, "loss": 1.6159, "step": 550500 }, { "epoch": 5.81, "learning_rate": 4.70925630824108e-05, "loss": 1.561, "step": 551000 }, { "epoch": 5.82, "learning_rate": 4.708992475489938e-05, "loss": 1.5703, "step": 551500 }, { "epoch": 5.83, "learning_rate": 4.708728642738795e-05, "loss": 1.5424, "step": 552000 }, { "epoch": 5.83, "learning_rate": 4.7084648099876526e-05, "loss": 1.5926, "step": 552500 }, { "epoch": 5.84, "learning_rate": 4.70820097723651e-05, "loss": 1.5715, "step": 553000 }, { "epoch": 5.84, "learning_rate": 4.7079371444853683e-05, "loss": 1.6255, "step": 553500 }, { "epoch": 5.85, "learning_rate": 4.707673311734226e-05, "loss": 1.55, "step": 554000 }, { "epoch": 5.85, "learning_rate": 4.707409478983083e-05, "loss": 1.5837, "step": 554500 }, { "epoch": 5.86, "learning_rate": 4.707145646231941e-05, "loss": 1.5679, "step": 555000 }, { "epoch": 5.86, "learning_rate": 4.7068818134807985e-05, "loss": 1.5952, "step": 555500 }, { "epoch": 5.87, "learning_rate": 4.706617980729656e-05, "loss": 1.6066, "step": 556000 }, { "epoch": 5.87, "learning_rate": 4.7063541479785136e-05, "loss": 1.6008, "step": 556500 }, { "epoch": 5.88, "learning_rate": 4.706090315227371e-05, "loss": 1.5693, "step": 557000 }, { "epoch": 5.88, "learning_rate": 4.705826482476229e-05, "loss": 1.6092, "step": 557500 }, { "epoch": 5.89, "learning_rate": 4.705562649725086e-05, "loss": 1.6026, "step": 558000 }, { "epoch": 5.89, "learning_rate": 4.7052988169739445e-05, "loss": 1.5661, "step": 558500 }, { "epoch": 5.9, "learning_rate": 4.705034984222802e-05, "loss": 1.5723, "step": 559000 }, { "epoch": 5.9, "learning_rate": 4.704771151471659e-05, "loss": 1.5804, "step": 559500 }, { "epoch": 5.91, "learning_rate": 4.704507318720517e-05, "loss": 1.6379, "step": 560000 }, { "epoch": 5.92, "learning_rate": 4.704243485969375e-05, "loss": 1.5815, "step": 560500 }, { "epoch": 5.92, "learning_rate": 4.703979653218232e-05, "loss": 1.5482, "step": 561000 }, { "epoch": 5.93, "learning_rate": 4.70371582046709e-05, "loss": 1.5436, "step": 561500 }, { "epoch": 5.93, "learning_rate": 4.7034519877159473e-05, "loss": 1.5782, "step": 562000 }, { "epoch": 5.94, "learning_rate": 4.703188154964805e-05, "loss": 1.6076, "step": 562500 }, { "epoch": 5.94, "learning_rate": 4.7029243222136624e-05, "loss": 1.5135, "step": 563000 }, { "epoch": 5.95, "learning_rate": 4.702660489462521e-05, "loss": 1.571, "step": 563500 }, { "epoch": 5.95, "learning_rate": 4.7023966567113775e-05, "loss": 1.597, "step": 564000 }, { "epoch": 5.96, "learning_rate": 4.702132823960235e-05, "loss": 1.5486, "step": 564500 }, { "epoch": 5.96, "learning_rate": 4.7018689912090926e-05, "loss": 1.5737, "step": 565000 }, { "epoch": 5.97, "learning_rate": 4.701605158457951e-05, "loss": 1.6054, "step": 565500 }, { "epoch": 5.97, "learning_rate": 4.7013413257068084e-05, "loss": 1.613, "step": 566000 }, { "epoch": 5.98, "learning_rate": 4.701077492955665e-05, "loss": 1.5358, "step": 566500 }, { "epoch": 5.98, "learning_rate": 4.7008136602045235e-05, "loss": 1.555, "step": 567000 }, { "epoch": 5.99, "learning_rate": 4.700549827453381e-05, "loss": 1.5899, "step": 567500 }, { "epoch": 5.99, "learning_rate": 4.7002859947022386e-05, "loss": 1.5689, "step": 568000 }, { "epoch": 6.0, "learning_rate": 4.700022161951096e-05, "loss": 1.5863, "step": 568500 }, { "epoch": 6.0, "learning_rate": 4.699758329199954e-05, "loss": 1.5649, "step": 569000 }, { "epoch": 6.01, "learning_rate": 4.699494496448811e-05, "loss": 1.5337, "step": 569500 }, { "epoch": 6.02, "learning_rate": 4.699230663697669e-05, "loss": 1.5289, "step": 570000 }, { "epoch": 6.02, "learning_rate": 4.698966830946527e-05, "loss": 1.5854, "step": 570500 }, { "epoch": 6.03, "learning_rate": 4.698702998195384e-05, "loss": 1.5638, "step": 571000 }, { "epoch": 6.03, "learning_rate": 4.6984391654442415e-05, "loss": 1.5806, "step": 571500 }, { "epoch": 6.04, "learning_rate": 4.6981753326931e-05, "loss": 1.5922, "step": 572000 }, { "epoch": 6.04, "learning_rate": 4.697911499941957e-05, "loss": 1.5267, "step": 572500 }, { "epoch": 6.05, "learning_rate": 4.697647667190815e-05, "loss": 1.5361, "step": 573000 }, { "epoch": 6.05, "learning_rate": 4.697383834439672e-05, "loss": 1.5375, "step": 573500 }, { "epoch": 6.06, "learning_rate": 4.69712000168853e-05, "loss": 1.5971, "step": 574000 }, { "epoch": 6.06, "learning_rate": 4.6968561689373874e-05, "loss": 1.5501, "step": 574500 }, { "epoch": 6.07, "learning_rate": 4.696592336186245e-05, "loss": 1.6159, "step": 575000 }, { "epoch": 6.07, "learning_rate": 4.696328503435103e-05, "loss": 1.5343, "step": 575500 }, { "epoch": 6.08, "learning_rate": 4.69606467068396e-05, "loss": 1.5853, "step": 576000 }, { "epoch": 6.08, "learning_rate": 4.6958008379328176e-05, "loss": 1.5436, "step": 576500 }, { "epoch": 6.09, "learning_rate": 4.695537005181675e-05, "loss": 1.5941, "step": 577000 }, { "epoch": 6.09, "learning_rate": 4.6952731724305334e-05, "loss": 1.5695, "step": 577500 }, { "epoch": 6.1, "learning_rate": 4.695009339679391e-05, "loss": 1.5933, "step": 578000 }, { "epoch": 6.11, "learning_rate": 4.694745506928248e-05, "loss": 1.6093, "step": 578500 }, { "epoch": 6.11, "learning_rate": 4.694481674177106e-05, "loss": 1.5673, "step": 579000 }, { "epoch": 6.12, "learning_rate": 4.6942178414259636e-05, "loss": 1.5577, "step": 579500 }, { "epoch": 6.12, "learning_rate": 4.693954008674821e-05, "loss": 1.5665, "step": 580000 }, { "epoch": 6.13, "learning_rate": 4.693690175923679e-05, "loss": 1.5725, "step": 580500 }, { "epoch": 6.13, "learning_rate": 4.693426343172536e-05, "loss": 1.5562, "step": 581000 }, { "epoch": 6.14, "learning_rate": 4.693162510421394e-05, "loss": 1.546, "step": 581500 }, { "epoch": 6.14, "learning_rate": 4.692898677670251e-05, "loss": 1.5711, "step": 582000 }, { "epoch": 6.15, "learning_rate": 4.6926348449191096e-05, "loss": 1.5826, "step": 582500 }, { "epoch": 6.15, "learning_rate": 4.6923710121679664e-05, "loss": 1.5373, "step": 583000 }, { "epoch": 6.16, "learning_rate": 4.692107179416824e-05, "loss": 1.5748, "step": 583500 }, { "epoch": 6.16, "learning_rate": 4.691843346665682e-05, "loss": 1.5308, "step": 584000 }, { "epoch": 6.17, "learning_rate": 4.69157951391454e-05, "loss": 1.6058, "step": 584500 }, { "epoch": 6.17, "learning_rate": 4.691315681163397e-05, "loss": 1.5243, "step": 585000 }, { "epoch": 6.18, "learning_rate": 4.691051848412255e-05, "loss": 1.504, "step": 585500 }, { "epoch": 6.18, "learning_rate": 4.6907880156611124e-05, "loss": 1.5225, "step": 586000 }, { "epoch": 6.19, "learning_rate": 4.69052418290997e-05, "loss": 1.5264, "step": 586500 }, { "epoch": 6.19, "learning_rate": 4.6902603501588275e-05, "loss": 1.5486, "step": 587000 }, { "epoch": 6.2, "learning_rate": 4.689996517407686e-05, "loss": 1.5715, "step": 587500 }, { "epoch": 6.21, "learning_rate": 4.6897326846565426e-05, "loss": 1.5754, "step": 588000 }, { "epoch": 6.21, "learning_rate": 4.6894688519054e-05, "loss": 1.5968, "step": 588500 }, { "epoch": 6.22, "learning_rate": 4.689205019154258e-05, "loss": 1.5428, "step": 589000 }, { "epoch": 6.22, "learning_rate": 4.688941186403116e-05, "loss": 1.5625, "step": 589500 }, { "epoch": 6.23, "learning_rate": 4.688677353651973e-05, "loss": 1.5252, "step": 590000 }, { "epoch": 6.23, "learning_rate": 4.68841352090083e-05, "loss": 1.5933, "step": 590500 }, { "epoch": 6.24, "learning_rate": 4.6881496881496886e-05, "loss": 1.5401, "step": 591000 }, { "epoch": 6.24, "learning_rate": 4.687885855398546e-05, "loss": 1.5656, "step": 591500 }, { "epoch": 6.25, "learning_rate": 4.6876220226474037e-05, "loss": 1.6269, "step": 592000 }, { "epoch": 6.25, "learning_rate": 4.687358189896261e-05, "loss": 1.5654, "step": 592500 }, { "epoch": 6.26, "learning_rate": 4.687094357145119e-05, "loss": 1.5573, "step": 593000 }, { "epoch": 6.26, "learning_rate": 4.686830524393976e-05, "loss": 1.5655, "step": 593500 }, { "epoch": 6.27, "learning_rate": 4.686566691642834e-05, "loss": 1.5632, "step": 594000 }, { "epoch": 6.27, "learning_rate": 4.686302858891692e-05, "loss": 1.609, "step": 594500 }, { "epoch": 6.28, "learning_rate": 4.686039026140549e-05, "loss": 1.548, "step": 595000 }, { "epoch": 6.28, "learning_rate": 4.6857751933894065e-05, "loss": 1.5888, "step": 595500 }, { "epoch": 6.29, "learning_rate": 4.685511360638265e-05, "loss": 1.5434, "step": 596000 }, { "epoch": 6.3, "learning_rate": 4.685247527887122e-05, "loss": 1.5855, "step": 596500 }, { "epoch": 6.3, "learning_rate": 4.68498369513598e-05, "loss": 1.6229, "step": 597000 }, { "epoch": 6.31, "learning_rate": 4.6847198623848374e-05, "loss": 1.5858, "step": 597500 }, { "epoch": 6.31, "learning_rate": 4.684456029633695e-05, "loss": 1.5795, "step": 598000 }, { "epoch": 6.32, "learning_rate": 4.6841921968825525e-05, "loss": 1.5646, "step": 598500 }, { "epoch": 6.32, "learning_rate": 4.68392836413141e-05, "loss": 1.5194, "step": 599000 }, { "epoch": 6.33, "learning_rate": 4.6836645313802676e-05, "loss": 1.5501, "step": 599500 }, { "epoch": 6.33, "learning_rate": 4.683400698629125e-05, "loss": 1.5627, "step": 600000 }, { "epoch": 6.34, "learning_rate": 4.683136865877983e-05, "loss": 1.5277, "step": 600500 }, { "epoch": 6.34, "learning_rate": 4.682873033126841e-05, "loss": 1.6273, "step": 601000 }, { "epoch": 6.35, "learning_rate": 4.6826092003756984e-05, "loss": 1.6298, "step": 601500 }, { "epoch": 6.35, "learning_rate": 4.682345367624555e-05, "loss": 1.5655, "step": 602000 }, { "epoch": 6.36, "learning_rate": 4.682081534873413e-05, "loss": 1.583, "step": 602500 }, { "epoch": 6.36, "learning_rate": 4.681817702122271e-05, "loss": 1.5916, "step": 603000 }, { "epoch": 6.37, "learning_rate": 4.6815538693711286e-05, "loss": 1.5802, "step": 603500 }, { "epoch": 6.37, "learning_rate": 4.681290036619986e-05, "loss": 1.592, "step": 604000 }, { "epoch": 6.38, "learning_rate": 4.681026203868844e-05, "loss": 1.5806, "step": 604500 }, { "epoch": 6.38, "learning_rate": 4.680762371117701e-05, "loss": 1.5353, "step": 605000 }, { "epoch": 6.39, "learning_rate": 4.680498538366559e-05, "loss": 1.6011, "step": 605500 }, { "epoch": 6.4, "learning_rate": 4.6802347056154164e-05, "loss": 1.6063, "step": 606000 }, { "epoch": 6.4, "learning_rate": 4.6799708728642746e-05, "loss": 1.565, "step": 606500 }, { "epoch": 6.41, "learning_rate": 4.6797070401131315e-05, "loss": 1.5239, "step": 607000 }, { "epoch": 6.41, "learning_rate": 4.679443207361989e-05, "loss": 1.5482, "step": 607500 }, { "epoch": 6.42, "learning_rate": 4.679179374610847e-05, "loss": 1.5685, "step": 608000 }, { "epoch": 6.42, "learning_rate": 4.678915541859705e-05, "loss": 1.6324, "step": 608500 }, { "epoch": 6.43, "learning_rate": 4.678651709108562e-05, "loss": 1.6039, "step": 609000 }, { "epoch": 6.43, "learning_rate": 4.67838787635742e-05, "loss": 1.579, "step": 609500 }, { "epoch": 6.44, "learning_rate": 4.6781240436062774e-05, "loss": 1.5673, "step": 610000 }, { "epoch": 6.44, "learning_rate": 4.677860210855135e-05, "loss": 1.5833, "step": 610500 }, { "epoch": 6.45, "learning_rate": 4.6775963781039925e-05, "loss": 1.5358, "step": 611000 }, { "epoch": 6.45, "learning_rate": 4.67733254535285e-05, "loss": 1.5704, "step": 611500 }, { "epoch": 6.46, "learning_rate": 4.6770687126017076e-05, "loss": 1.5536, "step": 612000 }, { "epoch": 6.46, "learning_rate": 4.676804879850565e-05, "loss": 1.5397, "step": 612500 }, { "epoch": 6.47, "learning_rate": 4.6765410470994234e-05, "loss": 1.5453, "step": 613000 }, { "epoch": 6.47, "learning_rate": 4.676277214348281e-05, "loss": 1.542, "step": 613500 }, { "epoch": 6.48, "learning_rate": 4.676013381597138e-05, "loss": 1.5655, "step": 614000 }, { "epoch": 6.49, "learning_rate": 4.6757495488459954e-05, "loss": 1.5586, "step": 614500 }, { "epoch": 6.49, "learning_rate": 4.6754857160948536e-05, "loss": 1.5729, "step": 615000 }, { "epoch": 6.5, "learning_rate": 4.675221883343711e-05, "loss": 1.5618, "step": 615500 }, { "epoch": 6.5, "learning_rate": 4.674958050592568e-05, "loss": 1.5798, "step": 616000 }, { "epoch": 6.51, "learning_rate": 4.674694217841426e-05, "loss": 1.5853, "step": 616500 }, { "epoch": 6.51, "learning_rate": 4.674430385090284e-05, "loss": 1.5419, "step": 617000 }, { "epoch": 6.52, "learning_rate": 4.6741665523391414e-05, "loss": 1.527, "step": 617500 }, { "epoch": 6.52, "learning_rate": 4.673902719587999e-05, "loss": 1.5385, "step": 618000 }, { "epoch": 6.53, "learning_rate": 4.6736388868368565e-05, "loss": 1.621, "step": 618500 }, { "epoch": 6.53, "learning_rate": 4.673375054085714e-05, "loss": 1.5795, "step": 619000 }, { "epoch": 6.54, "learning_rate": 4.6731112213345715e-05, "loss": 1.5566, "step": 619500 }, { "epoch": 6.54, "learning_rate": 4.67284738858343e-05, "loss": 1.5682, "step": 620000 }, { "epoch": 6.55, "learning_rate": 4.672583555832287e-05, "loss": 1.6056, "step": 620500 }, { "epoch": 6.55, "learning_rate": 4.672319723081144e-05, "loss": 1.5953, "step": 621000 }, { "epoch": 6.56, "learning_rate": 4.6720558903300024e-05, "loss": 1.5685, "step": 621500 }, { "epoch": 6.56, "learning_rate": 4.67179205757886e-05, "loss": 1.5738, "step": 622000 }, { "epoch": 6.57, "learning_rate": 4.6715282248277175e-05, "loss": 1.5062, "step": 622500 }, { "epoch": 6.57, "learning_rate": 4.671264392076575e-05, "loss": 1.5813, "step": 623000 }, { "epoch": 6.58, "learning_rate": 4.6710005593254326e-05, "loss": 1.5521, "step": 623500 }, { "epoch": 6.59, "learning_rate": 4.67073672657429e-05, "loss": 1.5735, "step": 624000 }, { "epoch": 6.59, "learning_rate": 4.670472893823148e-05, "loss": 1.5525, "step": 624500 }, { "epoch": 6.6, "learning_rate": 4.670209061072006e-05, "loss": 1.5548, "step": 625000 }, { "epoch": 6.6, "learning_rate": 4.669945228320863e-05, "loss": 1.6289, "step": 625500 }, { "epoch": 6.61, "learning_rate": 4.6696813955697204e-05, "loss": 1.5233, "step": 626000 }, { "epoch": 6.61, "learning_rate": 4.669417562818578e-05, "loss": 1.5918, "step": 626500 }, { "epoch": 6.62, "learning_rate": 4.669153730067436e-05, "loss": 1.566, "step": 627000 }, { "epoch": 6.62, "learning_rate": 4.668889897316294e-05, "loss": 1.58, "step": 627500 }, { "epoch": 6.63, "learning_rate": 4.6686260645651506e-05, "loss": 1.5543, "step": 628000 }, { "epoch": 6.63, "learning_rate": 4.668362231814009e-05, "loss": 1.5612, "step": 628500 }, { "epoch": 6.64, "learning_rate": 4.668098399062866e-05, "loss": 1.5311, "step": 629000 }, { "epoch": 6.64, "learning_rate": 4.667834566311724e-05, "loss": 1.5294, "step": 629500 }, { "epoch": 6.65, "learning_rate": 4.6675707335605814e-05, "loss": 1.5237, "step": 630000 }, { "epoch": 6.65, "learning_rate": 4.667306900809439e-05, "loss": 1.6066, "step": 630500 }, { "epoch": 6.66, "learning_rate": 4.6670430680582965e-05, "loss": 1.5881, "step": 631000 }, { "epoch": 6.66, "learning_rate": 4.666779235307154e-05, "loss": 1.507, "step": 631500 }, { "epoch": 6.67, "learning_rate": 4.666515402556012e-05, "loss": 1.5388, "step": 632000 }, { "epoch": 6.67, "learning_rate": 4.66625156980487e-05, "loss": 1.535, "step": 632500 }, { "epoch": 6.68, "learning_rate": 4.665987737053727e-05, "loss": 1.5537, "step": 633000 }, { "epoch": 6.69, "learning_rate": 4.665723904302585e-05, "loss": 1.5126, "step": 633500 }, { "epoch": 6.69, "learning_rate": 4.6654600715514425e-05, "loss": 1.5695, "step": 634000 }, { "epoch": 6.7, "learning_rate": 4.6651962388003e-05, "loss": 1.4985, "step": 634500 }, { "epoch": 6.7, "learning_rate": 4.6649324060491576e-05, "loss": 1.5456, "step": 635000 }, { "epoch": 6.71, "learning_rate": 4.664668573298015e-05, "loss": 1.5373, "step": 635500 }, { "epoch": 6.71, "learning_rate": 4.664404740546873e-05, "loss": 1.5359, "step": 636000 }, { "epoch": 6.72, "learning_rate": 4.66414090779573e-05, "loss": 1.533, "step": 636500 }, { "epoch": 6.72, "learning_rate": 4.6638770750445885e-05, "loss": 1.5453, "step": 637000 }, { "epoch": 6.73, "learning_rate": 4.663613242293445e-05, "loss": 1.5355, "step": 637500 }, { "epoch": 6.73, "learning_rate": 4.663349409542303e-05, "loss": 1.5728, "step": 638000 }, { "epoch": 6.74, "learning_rate": 4.6630855767911604e-05, "loss": 1.6012, "step": 638500 }, { "epoch": 6.74, "learning_rate": 4.6628217440400187e-05, "loss": 1.5804, "step": 639000 }, { "epoch": 6.75, "learning_rate": 4.662557911288876e-05, "loss": 1.6098, "step": 639500 }, { "epoch": 6.75, "learning_rate": 4.662294078537733e-05, "loss": 1.5671, "step": 640000 }, { "epoch": 6.76, "learning_rate": 4.662030245786591e-05, "loss": 1.5756, "step": 640500 }, { "epoch": 6.76, "learning_rate": 4.661766413035449e-05, "loss": 1.5261, "step": 641000 }, { "epoch": 6.77, "learning_rate": 4.6615025802843064e-05, "loss": 1.5968, "step": 641500 }, { "epoch": 6.78, "learning_rate": 4.661238747533164e-05, "loss": 1.5959, "step": 642000 }, { "epoch": 6.78, "learning_rate": 4.6609749147820215e-05, "loss": 1.5485, "step": 642500 }, { "epoch": 6.79, "learning_rate": 4.660711082030879e-05, "loss": 1.4645, "step": 643000 }, { "epoch": 6.79, "learning_rate": 4.6604472492797366e-05, "loss": 1.6253, "step": 643500 }, { "epoch": 6.8, "learning_rate": 4.660183416528595e-05, "loss": 1.542, "step": 644000 }, { "epoch": 6.8, "learning_rate": 4.659919583777452e-05, "loss": 1.5557, "step": 644500 }, { "epoch": 6.81, "learning_rate": 4.659655751026309e-05, "loss": 1.5647, "step": 645000 }, { "epoch": 6.81, "learning_rate": 4.6593919182751675e-05, "loss": 1.5418, "step": 645500 }, { "epoch": 6.82, "learning_rate": 4.659128085524025e-05, "loss": 1.5518, "step": 646000 }, { "epoch": 6.82, "learning_rate": 4.6588642527728826e-05, "loss": 1.5586, "step": 646500 }, { "epoch": 6.83, "learning_rate": 4.65860042002174e-05, "loss": 1.5456, "step": 647000 }, { "epoch": 6.83, "learning_rate": 4.658336587270598e-05, "loss": 1.5916, "step": 647500 }, { "epoch": 6.84, "learning_rate": 4.658072754519455e-05, "loss": 1.5603, "step": 648000 }, { "epoch": 6.84, "learning_rate": 4.657808921768313e-05, "loss": 1.553, "step": 648500 }, { "epoch": 6.85, "learning_rate": 4.657545089017171e-05, "loss": 1.5874, "step": 649000 }, { "epoch": 6.85, "learning_rate": 4.657281256266028e-05, "loss": 1.5526, "step": 649500 }, { "epoch": 6.86, "learning_rate": 4.6570174235148854e-05, "loss": 1.5855, "step": 650000 }, { "epoch": 6.86, "learning_rate": 4.656753590763743e-05, "loss": 1.5749, "step": 650500 }, { "epoch": 6.87, "learning_rate": 4.656489758012601e-05, "loss": 1.5072, "step": 651000 }, { "epoch": 6.88, "learning_rate": 4.656225925261459e-05, "loss": 1.549, "step": 651500 }, { "epoch": 6.88, "learning_rate": 4.6559620925103156e-05, "loss": 1.5683, "step": 652000 }, { "epoch": 6.89, "learning_rate": 4.655698259759174e-05, "loss": 1.6031, "step": 652500 }, { "epoch": 6.89, "learning_rate": 4.6554344270080314e-05, "loss": 1.584, "step": 653000 }, { "epoch": 6.9, "learning_rate": 4.655170594256889e-05, "loss": 1.6017, "step": 653500 }, { "epoch": 6.9, "learning_rate": 4.6549067615057465e-05, "loss": 1.5826, "step": 654000 }, { "epoch": 6.91, "learning_rate": 4.654642928754604e-05, "loss": 1.5626, "step": 654500 }, { "epoch": 6.91, "learning_rate": 4.6543790960034616e-05, "loss": 1.5541, "step": 655000 }, { "epoch": 6.92, "learning_rate": 4.654115263252319e-05, "loss": 1.5948, "step": 655500 }, { "epoch": 6.92, "learning_rate": 4.6538514305011773e-05, "loss": 1.5998, "step": 656000 }, { "epoch": 6.93, "learning_rate": 4.653587597750034e-05, "loss": 1.5461, "step": 656500 }, { "epoch": 6.93, "learning_rate": 4.653323764998892e-05, "loss": 1.5436, "step": 657000 }, { "epoch": 6.94, "learning_rate": 4.65305993224775e-05, "loss": 1.5417, "step": 657500 }, { "epoch": 6.94, "learning_rate": 4.6527960994966075e-05, "loss": 1.5153, "step": 658000 }, { "epoch": 6.95, "learning_rate": 4.652532266745465e-05, "loss": 1.5491, "step": 658500 }, { "epoch": 6.95, "learning_rate": 4.6522684339943226e-05, "loss": 1.5606, "step": 659000 }, { "epoch": 6.96, "learning_rate": 4.65200460124318e-05, "loss": 1.5137, "step": 659500 }, { "epoch": 6.97, "learning_rate": 4.651740768492038e-05, "loss": 1.5071, "step": 660000 }, { "epoch": 6.97, "learning_rate": 4.651476935740895e-05, "loss": 1.5475, "step": 660500 }, { "epoch": 6.98, "learning_rate": 4.6512131029897535e-05, "loss": 1.6058, "step": 661000 }, { "epoch": 6.98, "learning_rate": 4.6509492702386104e-05, "loss": 1.5317, "step": 661500 }, { "epoch": 6.99, "learning_rate": 4.650685437487468e-05, "loss": 1.5655, "step": 662000 }, { "epoch": 6.99, "learning_rate": 4.6504216047363255e-05, "loss": 1.5177, "step": 662500 }, { "epoch": 7.0, "learning_rate": 4.650157771985184e-05, "loss": 1.5608, "step": 663000 }, { "epoch": 7.0, "learning_rate": 4.6498939392340406e-05, "loss": 1.5513, "step": 663500 }, { "epoch": 7.01, "learning_rate": 4.649630106482898e-05, "loss": 1.5288, "step": 664000 }, { "epoch": 7.01, "learning_rate": 4.6493662737317564e-05, "loss": 1.5554, "step": 664500 }, { "epoch": 7.02, "learning_rate": 4.649102440980614e-05, "loss": 1.5212, "step": 665000 }, { "epoch": 7.02, "learning_rate": 4.6488386082294715e-05, "loss": 1.5173, "step": 665500 }, { "epoch": 7.03, "learning_rate": 4.648574775478329e-05, "loss": 1.583, "step": 666000 }, { "epoch": 7.03, "learning_rate": 4.6483109427271866e-05, "loss": 1.4853, "step": 666500 }, { "epoch": 7.04, "learning_rate": 4.648047109976044e-05, "loss": 1.4866, "step": 667000 }, { "epoch": 7.04, "learning_rate": 4.6477832772249016e-05, "loss": 1.5072, "step": 667500 }, { "epoch": 7.05, "learning_rate": 4.64751944447376e-05, "loss": 1.5881, "step": 668000 }, { "epoch": 7.05, "learning_rate": 4.647255611722617e-05, "loss": 1.5064, "step": 668500 }, { "epoch": 7.06, "learning_rate": 4.646991778971474e-05, "loss": 1.5156, "step": 669000 }, { "epoch": 7.07, "learning_rate": 4.6467279462203325e-05, "loss": 1.4971, "step": 669500 }, { "epoch": 7.07, "learning_rate": 4.64646411346919e-05, "loss": 1.5319, "step": 670000 }, { "epoch": 7.08, "learning_rate": 4.6462002807180476e-05, "loss": 1.5321, "step": 670500 }, { "epoch": 7.08, "learning_rate": 4.645936447966905e-05, "loss": 1.5261, "step": 671000 }, { "epoch": 7.09, "learning_rate": 4.645672615215763e-05, "loss": 1.4987, "step": 671500 }, { "epoch": 7.09, "learning_rate": 4.64540878246462e-05, "loss": 1.5254, "step": 672000 }, { "epoch": 7.1, "learning_rate": 4.645144949713478e-05, "loss": 1.5679, "step": 672500 }, { "epoch": 7.1, "learning_rate": 4.6448811169623354e-05, "loss": 1.5558, "step": 673000 }, { "epoch": 7.11, "learning_rate": 4.644617284211193e-05, "loss": 1.5596, "step": 673500 }, { "epoch": 7.11, "learning_rate": 4.6443534514600505e-05, "loss": 1.5503, "step": 674000 }, { "epoch": 7.12, "learning_rate": 4.644089618708909e-05, "loss": 1.5375, "step": 674500 }, { "epoch": 7.12, "learning_rate": 4.643825785957766e-05, "loss": 1.5078, "step": 675000 }, { "epoch": 7.13, "learning_rate": 4.643561953206623e-05, "loss": 1.5153, "step": 675500 }, { "epoch": 7.13, "learning_rate": 4.6432981204554807e-05, "loss": 1.5497, "step": 676000 }, { "epoch": 7.14, "learning_rate": 4.643034287704339e-05, "loss": 1.5369, "step": 676500 }, { "epoch": 7.14, "learning_rate": 4.6427704549531964e-05, "loss": 1.4763, "step": 677000 }, { "epoch": 7.15, "learning_rate": 4.642506622202054e-05, "loss": 1.5775, "step": 677500 }, { "epoch": 7.16, "learning_rate": 4.6422427894509115e-05, "loss": 1.5444, "step": 678000 }, { "epoch": 7.16, "learning_rate": 4.641978956699769e-05, "loss": 1.5402, "step": 678500 }, { "epoch": 7.17, "learning_rate": 4.6417151239486266e-05, "loss": 1.5212, "step": 679000 }, { "epoch": 7.17, "learning_rate": 4.641451291197484e-05, "loss": 1.451, "step": 679500 }, { "epoch": 7.18, "learning_rate": 4.6411874584463424e-05, "loss": 1.5434, "step": 680000 }, { "epoch": 7.18, "learning_rate": 4.640923625695199e-05, "loss": 1.5159, "step": 680500 }, { "epoch": 7.19, "learning_rate": 4.640659792944057e-05, "loss": 1.5445, "step": 681000 }, { "epoch": 7.19, "learning_rate": 4.640395960192915e-05, "loss": 1.5545, "step": 681500 }, { "epoch": 7.2, "learning_rate": 4.6401321274417726e-05, "loss": 1.5416, "step": 682000 }, { "epoch": 7.2, "learning_rate": 4.6398682946906295e-05, "loss": 1.5086, "step": 682500 }, { "epoch": 7.21, "learning_rate": 4.639604461939488e-05, "loss": 1.5539, "step": 683000 }, { "epoch": 7.21, "learning_rate": 4.639340629188345e-05, "loss": 1.5839, "step": 683500 }, { "epoch": 7.22, "learning_rate": 4.639076796437203e-05, "loss": 1.5511, "step": 684000 }, { "epoch": 7.22, "learning_rate": 4.63881296368606e-05, "loss": 1.4778, "step": 684500 }, { "epoch": 7.23, "learning_rate": 4.638549130934918e-05, "loss": 1.5724, "step": 685000 }, { "epoch": 7.23, "learning_rate": 4.6382852981837754e-05, "loss": 1.5511, "step": 685500 }, { "epoch": 7.24, "learning_rate": 4.638021465432633e-05, "loss": 1.4857, "step": 686000 }, { "epoch": 7.24, "learning_rate": 4.637757632681491e-05, "loss": 1.5569, "step": 686500 }, { "epoch": 7.25, "learning_rate": 4.637493799930349e-05, "loss": 1.5369, "step": 687000 }, { "epoch": 7.26, "learning_rate": 4.6372299671792056e-05, "loss": 1.4937, "step": 687500 }, { "epoch": 7.26, "learning_rate": 4.636966134428063e-05, "loss": 1.5551, "step": 688000 }, { "epoch": 7.27, "learning_rate": 4.6367023016769214e-05, "loss": 1.5419, "step": 688500 }, { "epoch": 7.27, "learning_rate": 4.636438468925779e-05, "loss": 1.6045, "step": 689000 }, { "epoch": 7.28, "learning_rate": 4.6361746361746365e-05, "loss": 1.5637, "step": 689500 }, { "epoch": 7.28, "learning_rate": 4.635910803423494e-05, "loss": 1.564, "step": 690000 }, { "epoch": 7.29, "learning_rate": 4.6356469706723516e-05, "loss": 1.5386, "step": 690500 }, { "epoch": 7.29, "learning_rate": 4.635383137921209e-05, "loss": 1.5636, "step": 691000 }, { "epoch": 7.3, "learning_rate": 4.635119305170067e-05, "loss": 1.5504, "step": 691500 }, { "epoch": 7.3, "learning_rate": 4.634855472418924e-05, "loss": 1.5228, "step": 692000 }, { "epoch": 7.31, "learning_rate": 4.634591639667782e-05, "loss": 1.5299, "step": 692500 }, { "epoch": 7.31, "learning_rate": 4.6343278069166393e-05, "loss": 1.5078, "step": 693000 }, { "epoch": 7.32, "learning_rate": 4.6340639741654976e-05, "loss": 1.5406, "step": 693500 }, { "epoch": 7.32, "learning_rate": 4.633800141414355e-05, "loss": 1.5699, "step": 694000 }, { "epoch": 7.33, "learning_rate": 4.633536308663212e-05, "loss": 1.5153, "step": 694500 }, { "epoch": 7.33, "learning_rate": 4.63327247591207e-05, "loss": 1.5585, "step": 695000 }, { "epoch": 7.34, "learning_rate": 4.633008643160928e-05, "loss": 1.5119, "step": 695500 }, { "epoch": 7.35, "learning_rate": 4.632744810409785e-05, "loss": 1.525, "step": 696000 }, { "epoch": 7.35, "learning_rate": 4.632480977658643e-05, "loss": 1.4776, "step": 696500 }, { "epoch": 7.36, "learning_rate": 4.6322171449075004e-05, "loss": 1.5534, "step": 697000 }, { "epoch": 7.36, "learning_rate": 4.631953312156358e-05, "loss": 1.4884, "step": 697500 }, { "epoch": 7.37, "learning_rate": 4.6316894794052155e-05, "loss": 1.5275, "step": 698000 }, { "epoch": 7.37, "learning_rate": 4.631425646654074e-05, "loss": 1.5659, "step": 698500 }, { "epoch": 7.38, "learning_rate": 4.631161813902931e-05, "loss": 1.5743, "step": 699000 }, { "epoch": 7.38, "learning_rate": 4.630897981151788e-05, "loss": 1.5234, "step": 699500 }, { "epoch": 7.39, "learning_rate": 4.630634148400646e-05, "loss": 1.5538, "step": 700000 }, { "epoch": 7.39, "learning_rate": 4.630370315649504e-05, "loss": 1.5643, "step": 700500 }, { "epoch": 7.4, "learning_rate": 4.6301064828983615e-05, "loss": 1.5388, "step": 701000 }, { "epoch": 7.4, "learning_rate": 4.6298426501472183e-05, "loss": 1.5441, "step": 701500 }, { "epoch": 7.41, "learning_rate": 4.6295788173960766e-05, "loss": 1.5417, "step": 702000 }, { "epoch": 7.41, "learning_rate": 4.629314984644934e-05, "loss": 1.5365, "step": 702500 }, { "epoch": 7.42, "learning_rate": 4.629051151893792e-05, "loss": 1.5394, "step": 703000 }, { "epoch": 7.42, "learning_rate": 4.628787319142649e-05, "loss": 1.5298, "step": 703500 }, { "epoch": 7.43, "learning_rate": 4.628523486391507e-05, "loss": 1.5469, "step": 704000 }, { "epoch": 7.43, "learning_rate": 4.628259653640364e-05, "loss": 1.5533, "step": 704500 }, { "epoch": 7.44, "learning_rate": 4.627995820889222e-05, "loss": 1.616, "step": 705000 }, { "epoch": 7.45, "learning_rate": 4.62773198813808e-05, "loss": 1.4938, "step": 705500 }, { "epoch": 7.45, "learning_rate": 4.6274681553869376e-05, "loss": 1.5798, "step": 706000 }, { "epoch": 7.46, "learning_rate": 4.6272043226357945e-05, "loss": 1.4972, "step": 706500 }, { "epoch": 7.46, "learning_rate": 4.626940489884653e-05, "loss": 1.4848, "step": 707000 }, { "epoch": 7.47, "learning_rate": 4.62667665713351e-05, "loss": 1.5263, "step": 707500 }, { "epoch": 7.47, "learning_rate": 4.626412824382368e-05, "loss": 1.4994, "step": 708000 }, { "epoch": 7.48, "learning_rate": 4.6261489916312254e-05, "loss": 1.4977, "step": 708500 }, { "epoch": 7.48, "learning_rate": 4.625885158880083e-05, "loss": 1.5651, "step": 709000 }, { "epoch": 7.49, "learning_rate": 4.6256213261289405e-05, "loss": 1.5206, "step": 709500 }, { "epoch": 7.49, "learning_rate": 4.625357493377798e-05, "loss": 1.5526, "step": 710000 }, { "epoch": 7.5, "learning_rate": 4.625093660626656e-05, "loss": 1.5453, "step": 710500 }, { "epoch": 7.5, "learning_rate": 4.624829827875513e-05, "loss": 1.5197, "step": 711000 }, { "epoch": 7.51, "learning_rate": 4.624565995124371e-05, "loss": 1.4714, "step": 711500 }, { "epoch": 7.51, "learning_rate": 4.624302162373228e-05, "loss": 1.605, "step": 712000 }, { "epoch": 7.52, "learning_rate": 4.6240383296220865e-05, "loss": 1.5809, "step": 712500 }, { "epoch": 7.52, "learning_rate": 4.623774496870944e-05, "loss": 1.5645, "step": 713000 }, { "epoch": 7.53, "learning_rate": 4.623510664119801e-05, "loss": 1.5674, "step": 713500 }, { "epoch": 7.54, "learning_rate": 4.623246831368659e-05, "loss": 1.4627, "step": 714000 }, { "epoch": 7.54, "learning_rate": 4.6229829986175166e-05, "loss": 1.5293, "step": 714500 }, { "epoch": 7.55, "learning_rate": 4.622719165866374e-05, "loss": 1.5384, "step": 715000 }, { "epoch": 7.55, "learning_rate": 4.622455333115232e-05, "loss": 1.5014, "step": 715500 }, { "epoch": 7.56, "learning_rate": 4.622191500364089e-05, "loss": 1.5375, "step": 716000 }, { "epoch": 7.56, "learning_rate": 4.621927667612947e-05, "loss": 1.4813, "step": 716500 }, { "epoch": 7.57, "learning_rate": 4.6216638348618044e-05, "loss": 1.5681, "step": 717000 }, { "epoch": 7.57, "learning_rate": 4.6214000021106626e-05, "loss": 1.4718, "step": 717500 }, { "epoch": 7.58, "learning_rate": 4.62113616935952e-05, "loss": 1.5164, "step": 718000 }, { "epoch": 7.58, "learning_rate": 4.620872336608377e-05, "loss": 1.5006, "step": 718500 }, { "epoch": 7.59, "learning_rate": 4.620608503857235e-05, "loss": 1.4957, "step": 719000 }, { "epoch": 7.59, "learning_rate": 4.620344671106093e-05, "loss": 1.5103, "step": 719500 }, { "epoch": 7.6, "learning_rate": 4.6200808383549504e-05, "loss": 1.5042, "step": 720000 }, { "epoch": 7.6, "learning_rate": 4.619817005603808e-05, "loss": 1.5286, "step": 720500 }, { "epoch": 7.61, "learning_rate": 4.6195531728526655e-05, "loss": 1.4908, "step": 721000 }, { "epoch": 7.61, "learning_rate": 4.619289340101523e-05, "loss": 1.5493, "step": 721500 }, { "epoch": 7.62, "learning_rate": 4.6190255073503806e-05, "loss": 1.4931, "step": 722000 }, { "epoch": 7.62, "learning_rate": 4.618761674599239e-05, "loss": 1.5632, "step": 722500 }, { "epoch": 7.63, "learning_rate": 4.6184978418480957e-05, "loss": 1.5382, "step": 723000 }, { "epoch": 7.64, "learning_rate": 4.618234009096953e-05, "loss": 1.466, "step": 723500 }, { "epoch": 7.64, "learning_rate": 4.617970176345811e-05, "loss": 1.5274, "step": 724000 }, { "epoch": 7.65, "learning_rate": 4.617706343594669e-05, "loss": 1.5177, "step": 724500 }, { "epoch": 7.65, "learning_rate": 4.6174425108435265e-05, "loss": 1.5112, "step": 725000 }, { "epoch": 7.66, "learning_rate": 4.6171786780923834e-05, "loss": 1.5546, "step": 725500 }, { "epoch": 7.66, "learning_rate": 4.6169148453412416e-05, "loss": 1.5209, "step": 726000 }, { "epoch": 7.67, "learning_rate": 4.616651012590099e-05, "loss": 1.5249, "step": 726500 }, { "epoch": 7.67, "learning_rate": 4.616387179838957e-05, "loss": 1.5214, "step": 727000 }, { "epoch": 7.68, "learning_rate": 4.616123347087814e-05, "loss": 1.5383, "step": 727500 }, { "epoch": 7.68, "learning_rate": 4.615859514336672e-05, "loss": 1.5324, "step": 728000 }, { "epoch": 7.69, "learning_rate": 4.6155956815855294e-05, "loss": 1.5218, "step": 728500 }, { "epoch": 7.69, "learning_rate": 4.615331848834387e-05, "loss": 1.536, "step": 729000 }, { "epoch": 7.7, "learning_rate": 4.615068016083245e-05, "loss": 1.5354, "step": 729500 }, { "epoch": 7.7, "learning_rate": 4.614804183332102e-05, "loss": 1.515, "step": 730000 }, { "epoch": 7.71, "learning_rate": 4.6145403505809596e-05, "loss": 1.5044, "step": 730500 }, { "epoch": 7.71, "learning_rate": 4.614276517829818e-05, "loss": 1.5297, "step": 731000 }, { "epoch": 7.72, "learning_rate": 4.614012685078675e-05, "loss": 1.5114, "step": 731500 }, { "epoch": 7.73, "learning_rate": 4.613748852327533e-05, "loss": 1.5458, "step": 732000 }, { "epoch": 7.73, "learning_rate": 4.6134850195763904e-05, "loss": 1.4726, "step": 732500 }, { "epoch": 7.74, "learning_rate": 4.613221186825248e-05, "loss": 1.4841, "step": 733000 }, { "epoch": 7.74, "learning_rate": 4.6129573540741055e-05, "loss": 1.5312, "step": 733500 }, { "epoch": 7.75, "learning_rate": 4.612693521322963e-05, "loss": 1.4909, "step": 734000 }, { "epoch": 7.75, "learning_rate": 4.612429688571821e-05, "loss": 1.5605, "step": 734500 }, { "epoch": 7.76, "learning_rate": 4.612165855820678e-05, "loss": 1.5075, "step": 735000 }, { "epoch": 7.76, "learning_rate": 4.611902023069536e-05, "loss": 1.5562, "step": 735500 }, { "epoch": 7.77, "learning_rate": 4.611638190318393e-05, "loss": 1.58, "step": 736000 }, { "epoch": 7.77, "learning_rate": 4.6113743575672515e-05, "loss": 1.5121, "step": 736500 }, { "epoch": 7.78, "learning_rate": 4.611110524816109e-05, "loss": 1.5274, "step": 737000 }, { "epoch": 7.78, "learning_rate": 4.610846692064966e-05, "loss": 1.5839, "step": 737500 }, { "epoch": 7.79, "learning_rate": 4.610582859313824e-05, "loss": 1.5267, "step": 738000 }, { "epoch": 7.79, "learning_rate": 4.610319026562682e-05, "loss": 1.5886, "step": 738500 }, { "epoch": 7.8, "learning_rate": 4.610055193811539e-05, "loss": 1.5151, "step": 739000 }, { "epoch": 7.8, "learning_rate": 4.609791361060397e-05, "loss": 1.5348, "step": 739500 }, { "epoch": 7.81, "learning_rate": 4.6095275283092543e-05, "loss": 1.5032, "step": 740000 }, { "epoch": 7.81, "learning_rate": 4.609263695558112e-05, "loss": 1.5589, "step": 740500 }, { "epoch": 7.82, "learning_rate": 4.6089998628069694e-05, "loss": 1.5095, "step": 741000 }, { "epoch": 7.83, "learning_rate": 4.608736030055828e-05, "loss": 1.5131, "step": 741500 }, { "epoch": 7.83, "learning_rate": 4.6084721973046845e-05, "loss": 1.528, "step": 742000 }, { "epoch": 7.84, "learning_rate": 4.608208364553542e-05, "loss": 1.4847, "step": 742500 }, { "epoch": 7.84, "learning_rate": 4.6079445318024e-05, "loss": 1.5035, "step": 743000 }, { "epoch": 7.85, "learning_rate": 4.607680699051258e-05, "loss": 1.5074, "step": 743500 }, { "epoch": 7.85, "learning_rate": 4.6074168663001154e-05, "loss": 1.5308, "step": 744000 }, { "epoch": 7.86, "learning_rate": 4.607153033548973e-05, "loss": 1.5442, "step": 744500 }, { "epoch": 7.86, "learning_rate": 4.6068892007978305e-05, "loss": 1.5365, "step": 745000 }, { "epoch": 7.87, "learning_rate": 4.606625368046688e-05, "loss": 1.5338, "step": 745500 }, { "epoch": 7.87, "learning_rate": 4.6063615352955456e-05, "loss": 1.4638, "step": 746000 }, { "epoch": 7.88, "learning_rate": 4.606097702544404e-05, "loss": 1.5363, "step": 746500 }, { "epoch": 7.88, "learning_rate": 4.605833869793261e-05, "loss": 1.4783, "step": 747000 }, { "epoch": 7.89, "learning_rate": 4.605570037042118e-05, "loss": 1.5154, "step": 747500 }, { "epoch": 7.89, "learning_rate": 4.6053062042909765e-05, "loss": 1.5383, "step": 748000 }, { "epoch": 7.9, "learning_rate": 4.605042371539834e-05, "loss": 1.5622, "step": 748500 }, { "epoch": 7.9, "learning_rate": 4.604778538788691e-05, "loss": 1.4818, "step": 749000 }, { "epoch": 7.91, "learning_rate": 4.6045147060375484e-05, "loss": 1.5289, "step": 749500 }, { "epoch": 7.91, "learning_rate": 4.604250873286407e-05, "loss": 1.5275, "step": 750000 }, { "epoch": 7.92, "learning_rate": 4.603987040535264e-05, "loss": 1.5334, "step": 750500 }, { "epoch": 7.93, "learning_rate": 4.603723207784122e-05, "loss": 1.5717, "step": 751000 }, { "epoch": 7.93, "learning_rate": 4.603459375032979e-05, "loss": 1.5339, "step": 751500 }, { "epoch": 7.94, "learning_rate": 4.603195542281837e-05, "loss": 1.4903, "step": 752000 }, { "epoch": 7.94, "learning_rate": 4.6029317095306944e-05, "loss": 1.541, "step": 752500 }, { "epoch": 7.95, "learning_rate": 4.602667876779552e-05, "loss": 1.4812, "step": 753000 }, { "epoch": 7.95, "learning_rate": 4.60240404402841e-05, "loss": 1.5262, "step": 753500 }, { "epoch": 7.96, "learning_rate": 4.602140211277267e-05, "loss": 1.5397, "step": 754000 }, { "epoch": 7.96, "learning_rate": 4.6018763785261246e-05, "loss": 1.5148, "step": 754500 }, { "epoch": 7.97, "learning_rate": 4.601612545774983e-05, "loss": 1.5317, "step": 755000 }, { "epoch": 7.97, "learning_rate": 4.6013487130238404e-05, "loss": 1.5226, "step": 755500 }, { "epoch": 7.98, "learning_rate": 4.601084880272698e-05, "loss": 1.5377, "step": 756000 }, { "epoch": 7.98, "learning_rate": 4.6008210475215555e-05, "loss": 1.5145, "step": 756500 }, { "epoch": 7.99, "learning_rate": 4.600557214770413e-05, "loss": 1.5717, "step": 757000 }, { "epoch": 7.99, "learning_rate": 4.6002933820192706e-05, "loss": 1.4813, "step": 757500 }, { "epoch": 8.0, "learning_rate": 4.600029549268128e-05, "loss": 1.5353, "step": 758000 }, { "epoch": 8.0, "learning_rate": 4.599765716516986e-05, "loss": 1.4844, "step": 758500 }, { "epoch": 8.01, "learning_rate": 4.599501883765843e-05, "loss": 1.4703, "step": 759000 }, { "epoch": 8.02, "learning_rate": 4.599238051014701e-05, "loss": 1.5227, "step": 759500 }, { "epoch": 8.02, "learning_rate": 4.598974218263559e-05, "loss": 1.4999, "step": 760000 }, { "epoch": 8.03, "learning_rate": 4.5987103855124166e-05, "loss": 1.5236, "step": 760500 }, { "epoch": 8.03, "learning_rate": 4.5984465527612734e-05, "loss": 1.5229, "step": 761000 }, { "epoch": 8.04, "learning_rate": 4.598182720010131e-05, "loss": 1.4932, "step": 761500 }, { "epoch": 8.04, "learning_rate": 4.597918887258989e-05, "loss": 1.5068, "step": 762000 }, { "epoch": 8.05, "learning_rate": 4.597655054507847e-05, "loss": 1.5057, "step": 762500 }, { "epoch": 8.05, "learning_rate": 4.597391221756704e-05, "loss": 1.526, "step": 763000 }, { "epoch": 8.06, "learning_rate": 4.597127389005562e-05, "loss": 1.5663, "step": 763500 }, { "epoch": 8.06, "learning_rate": 4.5968635562544194e-05, "loss": 1.5341, "step": 764000 }, { "epoch": 8.07, "learning_rate": 4.596599723503277e-05, "loss": 1.5351, "step": 764500 }, { "epoch": 8.07, "learning_rate": 4.5963358907521345e-05, "loss": 1.4832, "step": 765000 }, { "epoch": 8.08, "learning_rate": 4.596072058000993e-05, "loss": 1.5022, "step": 765500 }, { "epoch": 8.08, "learning_rate": 4.5958082252498496e-05, "loss": 1.5413, "step": 766000 }, { "epoch": 8.09, "learning_rate": 4.595544392498707e-05, "loss": 1.5498, "step": 766500 }, { "epoch": 8.09, "learning_rate": 4.5952805597475654e-05, "loss": 1.525, "step": 767000 }, { "epoch": 8.1, "learning_rate": 4.595016726996423e-05, "loss": 1.5338, "step": 767500 }, { "epoch": 8.1, "learning_rate": 4.59475289424528e-05, "loss": 1.5164, "step": 768000 }, { "epoch": 8.11, "learning_rate": 4.594489061494138e-05, "loss": 1.4618, "step": 768500 }, { "epoch": 8.12, "learning_rate": 4.5942252287429956e-05, "loss": 1.5494, "step": 769000 }, { "epoch": 8.12, "learning_rate": 4.593961395991853e-05, "loss": 1.5027, "step": 769500 }, { "epoch": 8.13, "learning_rate": 4.5936975632407107e-05, "loss": 1.4816, "step": 770000 }, { "epoch": 8.13, "learning_rate": 4.593433730489568e-05, "loss": 1.4805, "step": 770500 }, { "epoch": 8.14, "learning_rate": 4.593169897738426e-05, "loss": 1.5015, "step": 771000 }, { "epoch": 8.14, "learning_rate": 4.592906064987283e-05, "loss": 1.4868, "step": 771500 }, { "epoch": 8.15, "learning_rate": 4.5926422322361415e-05, "loss": 1.53, "step": 772000 }, { "epoch": 8.15, "learning_rate": 4.592378399484999e-05, "loss": 1.5063, "step": 772500 }, { "epoch": 8.16, "learning_rate": 4.592114566733856e-05, "loss": 1.498, "step": 773000 }, { "epoch": 8.16, "learning_rate": 4.5918507339827135e-05, "loss": 1.5027, "step": 773500 }, { "epoch": 8.17, "learning_rate": 4.591586901231572e-05, "loss": 1.523, "step": 774000 }, { "epoch": 8.17, "learning_rate": 4.591323068480429e-05, "loss": 1.5508, "step": 774500 }, { "epoch": 8.18, "learning_rate": 4.591059235729287e-05, "loss": 1.4233, "step": 775000 }, { "epoch": 8.18, "learning_rate": 4.5907954029781444e-05, "loss": 1.4985, "step": 775500 }, { "epoch": 8.19, "learning_rate": 4.590531570227002e-05, "loss": 1.5265, "step": 776000 }, { "epoch": 8.19, "learning_rate": 4.5902677374758595e-05, "loss": 1.5287, "step": 776500 }, { "epoch": 8.2, "learning_rate": 4.590003904724717e-05, "loss": 1.5233, "step": 777000 }, { "epoch": 8.21, "learning_rate": 4.5897400719735746e-05, "loss": 1.5272, "step": 777500 }, { "epoch": 8.21, "learning_rate": 4.589476239222432e-05, "loss": 1.5692, "step": 778000 }, { "epoch": 8.22, "learning_rate": 4.58921240647129e-05, "loss": 1.4921, "step": 778500 }, { "epoch": 8.22, "learning_rate": 4.588948573720148e-05, "loss": 1.4837, "step": 779000 }, { "epoch": 8.23, "learning_rate": 4.5886847409690054e-05, "loss": 1.5291, "step": 779500 }, { "epoch": 8.23, "learning_rate": 4.588420908217862e-05, "loss": 1.5614, "step": 780000 }, { "epoch": 8.24, "learning_rate": 4.5881570754667205e-05, "loss": 1.5763, "step": 780500 }, { "epoch": 8.24, "learning_rate": 4.587893242715578e-05, "loss": 1.5123, "step": 781000 }, { "epoch": 8.25, "learning_rate": 4.5876294099644356e-05, "loss": 1.5518, "step": 781500 }, { "epoch": 8.25, "learning_rate": 4.587365577213293e-05, "loss": 1.4835, "step": 782000 }, { "epoch": 8.26, "learning_rate": 4.587101744462151e-05, "loss": 1.4936, "step": 782500 }, { "epoch": 8.26, "learning_rate": 4.586837911711008e-05, "loss": 1.489, "step": 783000 }, { "epoch": 8.27, "learning_rate": 4.586574078959866e-05, "loss": 1.4575, "step": 783500 }, { "epoch": 8.27, "learning_rate": 4.586310246208724e-05, "loss": 1.5163, "step": 784000 }, { "epoch": 8.28, "learning_rate": 4.5860464134575816e-05, "loss": 1.536, "step": 784500 }, { "epoch": 8.28, "learning_rate": 4.5857825807064385e-05, "loss": 1.4468, "step": 785000 }, { "epoch": 8.29, "learning_rate": 4.585518747955296e-05, "loss": 1.4916, "step": 785500 }, { "epoch": 8.29, "learning_rate": 4.585254915204154e-05, "loss": 1.5535, "step": 786000 }, { "epoch": 8.3, "learning_rate": 4.584991082453012e-05, "loss": 1.5481, "step": 786500 }, { "epoch": 8.31, "learning_rate": 4.584727249701869e-05, "loss": 1.4677, "step": 787000 }, { "epoch": 8.31, "learning_rate": 4.584463416950727e-05, "loss": 1.518, "step": 787500 }, { "epoch": 8.32, "learning_rate": 4.5841995841995844e-05, "loss": 1.55, "step": 788000 }, { "epoch": 8.32, "learning_rate": 4.583935751448442e-05, "loss": 1.5275, "step": 788500 }, { "epoch": 8.33, "learning_rate": 4.5836719186972995e-05, "loss": 1.4966, "step": 789000 }, { "epoch": 8.33, "learning_rate": 4.583408085946157e-05, "loss": 1.498, "step": 789500 }, { "epoch": 8.34, "learning_rate": 4.5831442531950146e-05, "loss": 1.4838, "step": 790000 }, { "epoch": 8.34, "learning_rate": 4.582880420443872e-05, "loss": 1.4418, "step": 790500 }, { "epoch": 8.35, "learning_rate": 4.5826165876927304e-05, "loss": 1.4914, "step": 791000 }, { "epoch": 8.35, "learning_rate": 4.582352754941588e-05, "loss": 1.5284, "step": 791500 }, { "epoch": 8.36, "learning_rate": 4.582088922190445e-05, "loss": 1.4825, "step": 792000 }, { "epoch": 8.36, "learning_rate": 4.581825089439303e-05, "loss": 1.5273, "step": 792500 }, { "epoch": 8.37, "learning_rate": 4.5815612566881606e-05, "loss": 1.5519, "step": 793000 }, { "epoch": 8.37, "learning_rate": 4.581297423937018e-05, "loss": 1.4948, "step": 793500 }, { "epoch": 8.38, "learning_rate": 4.581033591185876e-05, "loss": 1.4513, "step": 794000 }, { "epoch": 8.38, "learning_rate": 4.580769758434733e-05, "loss": 1.4713, "step": 794500 }, { "epoch": 8.39, "learning_rate": 4.580505925683591e-05, "loss": 1.4987, "step": 795000 }, { "epoch": 8.4, "learning_rate": 4.5802420929324484e-05, "loss": 1.5318, "step": 795500 }, { "epoch": 8.4, "learning_rate": 4.5799782601813066e-05, "loss": 1.5051, "step": 796000 }, { "epoch": 8.41, "learning_rate": 4.5797144274301634e-05, "loss": 1.4995, "step": 796500 }, { "epoch": 8.41, "learning_rate": 4.579450594679021e-05, "loss": 1.4304, "step": 797000 }, { "epoch": 8.42, "learning_rate": 4.5791867619278785e-05, "loss": 1.5333, "step": 797500 }, { "epoch": 8.42, "learning_rate": 4.578922929176737e-05, "loss": 1.5254, "step": 798000 }, { "epoch": 8.43, "learning_rate": 4.578659096425594e-05, "loss": 1.5061, "step": 798500 }, { "epoch": 8.43, "learning_rate": 4.578395263674451e-05, "loss": 1.4652, "step": 799000 }, { "epoch": 8.44, "learning_rate": 4.5781314309233094e-05, "loss": 1.4926, "step": 799500 }, { "epoch": 8.44, "learning_rate": 4.577867598172167e-05, "loss": 1.4471, "step": 800000 }, { "epoch": 8.45, "learning_rate": 4.5776037654210245e-05, "loss": 1.5457, "step": 800500 }, { "epoch": 8.45, "learning_rate": 4.577339932669882e-05, "loss": 1.4899, "step": 801000 }, { "epoch": 8.46, "learning_rate": 4.5770760999187396e-05, "loss": 1.5145, "step": 801500 }, { "epoch": 8.46, "learning_rate": 4.576812267167597e-05, "loss": 1.45, "step": 802000 }, { "epoch": 8.47, "learning_rate": 4.576548434416455e-05, "loss": 1.5025, "step": 802500 }, { "epoch": 8.47, "learning_rate": 4.576284601665313e-05, "loss": 1.4812, "step": 803000 }, { "epoch": 8.48, "learning_rate": 4.5760207689141705e-05, "loss": 1.4892, "step": 803500 }, { "epoch": 8.48, "learning_rate": 4.5757569361630274e-05, "loss": 1.5799, "step": 804000 }, { "epoch": 8.49, "learning_rate": 4.5754931034118856e-05, "loss": 1.4934, "step": 804500 }, { "epoch": 8.5, "learning_rate": 4.575229270660743e-05, "loss": 1.5279, "step": 805000 }, { "epoch": 8.5, "learning_rate": 4.574965437909601e-05, "loss": 1.4973, "step": 805500 }, { "epoch": 8.51, "learning_rate": 4.574701605158458e-05, "loss": 1.4551, "step": 806000 }, { "epoch": 8.51, "learning_rate": 4.574437772407316e-05, "loss": 1.5139, "step": 806500 }, { "epoch": 8.52, "learning_rate": 4.574173939656173e-05, "loss": 1.5575, "step": 807000 }, { "epoch": 8.52, "learning_rate": 4.573910106905031e-05, "loss": 1.4928, "step": 807500 }, { "epoch": 8.53, "learning_rate": 4.573646274153889e-05, "loss": 1.5196, "step": 808000 }, { "epoch": 8.53, "learning_rate": 4.573382441402746e-05, "loss": 1.4923, "step": 808500 }, { "epoch": 8.54, "learning_rate": 4.5731186086516035e-05, "loss": 1.4987, "step": 809000 }, { "epoch": 8.54, "learning_rate": 4.572854775900461e-05, "loss": 1.6131, "step": 809500 }, { "epoch": 8.55, "learning_rate": 4.572590943149319e-05, "loss": 1.4986, "step": 810000 }, { "epoch": 8.55, "learning_rate": 4.572327110398177e-05, "loss": 1.5385, "step": 810500 }, { "epoch": 8.56, "learning_rate": 4.572063277647034e-05, "loss": 1.4832, "step": 811000 }, { "epoch": 8.56, "learning_rate": 4.571799444895892e-05, "loss": 1.5388, "step": 811500 }, { "epoch": 8.57, "learning_rate": 4.5715356121447495e-05, "loss": 1.5117, "step": 812000 }, { "epoch": 8.57, "learning_rate": 4.571271779393607e-05, "loss": 1.5383, "step": 812500 }, { "epoch": 8.58, "learning_rate": 4.5710079466424646e-05, "loss": 1.476, "step": 813000 }, { "epoch": 8.59, "learning_rate": 4.570744113891322e-05, "loss": 1.4776, "step": 813500 }, { "epoch": 8.59, "learning_rate": 4.57048028114018e-05, "loss": 1.4802, "step": 814000 }, { "epoch": 8.6, "learning_rate": 4.570216448389037e-05, "loss": 1.5292, "step": 814500 }, { "epoch": 8.6, "learning_rate": 4.5699526156378955e-05, "loss": 1.5119, "step": 815000 }, { "epoch": 8.61, "learning_rate": 4.569688782886752e-05, "loss": 1.5454, "step": 815500 }, { "epoch": 8.61, "learning_rate": 4.56942495013561e-05, "loss": 1.4798, "step": 816000 }, { "epoch": 8.62, "learning_rate": 4.569161117384468e-05, "loss": 1.485, "step": 816500 }, { "epoch": 8.62, "learning_rate": 4.5688972846333257e-05, "loss": 1.4885, "step": 817000 }, { "epoch": 8.63, "learning_rate": 4.568633451882183e-05, "loss": 1.4986, "step": 817500 }, { "epoch": 8.63, "learning_rate": 4.568369619131041e-05, "loss": 1.4396, "step": 818000 }, { "epoch": 8.64, "learning_rate": 4.568105786379898e-05, "loss": 1.5205, "step": 818500 }, { "epoch": 8.64, "learning_rate": 4.567841953628756e-05, "loss": 1.5677, "step": 819000 }, { "epoch": 8.65, "learning_rate": 4.5675781208776134e-05, "loss": 1.4936, "step": 819500 }, { "epoch": 8.65, "learning_rate": 4.5673142881264716e-05, "loss": 1.4856, "step": 820000 }, { "epoch": 8.66, "learning_rate": 4.5670504553753285e-05, "loss": 1.4669, "step": 820500 }, { "epoch": 8.66, "learning_rate": 4.566786622624186e-05, "loss": 1.5214, "step": 821000 }, { "epoch": 8.67, "learning_rate": 4.566522789873044e-05, "loss": 1.5361, "step": 821500 }, { "epoch": 8.67, "learning_rate": 4.566258957121902e-05, "loss": 1.5234, "step": 822000 }, { "epoch": 8.68, "learning_rate": 4.5659951243707594e-05, "loss": 1.4918, "step": 822500 }, { "epoch": 8.69, "learning_rate": 4.565731291619616e-05, "loss": 1.4677, "step": 823000 }, { "epoch": 8.69, "learning_rate": 4.5654674588684745e-05, "loss": 1.5137, "step": 823500 }, { "epoch": 8.7, "learning_rate": 4.565203626117332e-05, "loss": 1.5054, "step": 824000 }, { "epoch": 8.7, "learning_rate": 4.5649397933661896e-05, "loss": 1.5198, "step": 824500 }, { "epoch": 8.71, "learning_rate": 4.564675960615047e-05, "loss": 1.5457, "step": 825000 }, { "epoch": 8.71, "learning_rate": 4.564412127863905e-05, "loss": 1.4964, "step": 825500 }, { "epoch": 8.72, "learning_rate": 4.564148295112762e-05, "loss": 1.5181, "step": 826000 }, { "epoch": 8.72, "learning_rate": 4.56388446236162e-05, "loss": 1.5402, "step": 826500 }, { "epoch": 8.73, "learning_rate": 4.563620629610478e-05, "loss": 1.4371, "step": 827000 }, { "epoch": 8.73, "learning_rate": 4.563356796859335e-05, "loss": 1.5457, "step": 827500 }, { "epoch": 8.74, "learning_rate": 4.5630929641081924e-05, "loss": 1.5171, "step": 828000 }, { "epoch": 8.74, "learning_rate": 4.5628291313570506e-05, "loss": 1.5143, "step": 828500 }, { "epoch": 8.75, "learning_rate": 4.562565298605908e-05, "loss": 1.4715, "step": 829000 }, { "epoch": 8.75, "learning_rate": 4.562301465854766e-05, "loss": 1.5141, "step": 829500 }, { "epoch": 8.76, "learning_rate": 4.562037633103623e-05, "loss": 1.4906, "step": 830000 }, { "epoch": 8.76, "learning_rate": 4.561773800352481e-05, "loss": 1.5788, "step": 830500 }, { "epoch": 8.77, "learning_rate": 4.5615099676013384e-05, "loss": 1.5097, "step": 831000 }, { "epoch": 8.78, "learning_rate": 4.561246134850196e-05, "loss": 1.4957, "step": 831500 }, { "epoch": 8.78, "learning_rate": 4.560982302099054e-05, "loss": 1.5315, "step": 832000 }, { "epoch": 8.79, "learning_rate": 4.560718469347911e-05, "loss": 1.4576, "step": 832500 }, { "epoch": 8.79, "learning_rate": 4.5604546365967686e-05, "loss": 1.4825, "step": 833000 }, { "epoch": 8.8, "learning_rate": 4.560190803845627e-05, "loss": 1.4553, "step": 833500 }, { "epoch": 8.8, "learning_rate": 4.5599269710944843e-05, "loss": 1.5057, "step": 834000 }, { "epoch": 8.81, "learning_rate": 4.559663138343341e-05, "loss": 1.521, "step": 834500 }, { "epoch": 8.81, "learning_rate": 4.559399305592199e-05, "loss": 1.4966, "step": 835000 }, { "epoch": 8.82, "learning_rate": 4.559135472841057e-05, "loss": 1.4842, "step": 835500 }, { "epoch": 8.82, "learning_rate": 4.5588716400899145e-05, "loss": 1.5414, "step": 836000 }, { "epoch": 8.83, "learning_rate": 4.558607807338772e-05, "loss": 1.5532, "step": 836500 }, { "epoch": 8.83, "learning_rate": 4.5583439745876296e-05, "loss": 1.457, "step": 837000 }, { "epoch": 8.84, "learning_rate": 4.558080141836487e-05, "loss": 1.5209, "step": 837500 }, { "epoch": 8.84, "learning_rate": 4.557816309085345e-05, "loss": 1.5159, "step": 838000 }, { "epoch": 8.85, "learning_rate": 4.557552476334202e-05, "loss": 1.448, "step": 838500 }, { "epoch": 8.85, "learning_rate": 4.5572886435830605e-05, "loss": 1.5197, "step": 839000 }, { "epoch": 8.86, "learning_rate": 4.5570248108319174e-05, "loss": 1.5156, "step": 839500 }, { "epoch": 8.86, "learning_rate": 4.556760978080775e-05, "loss": 1.4897, "step": 840000 }, { "epoch": 8.87, "learning_rate": 4.556497145329633e-05, "loss": 1.5486, "step": 840500 }, { "epoch": 8.88, "learning_rate": 4.556233312578491e-05, "loss": 1.5172, "step": 841000 }, { "epoch": 8.88, "learning_rate": 4.555969479827348e-05, "loss": 1.4715, "step": 841500 }, { "epoch": 8.89, "learning_rate": 4.555705647076206e-05, "loss": 1.5004, "step": 842000 }, { "epoch": 8.89, "learning_rate": 4.5554418143250634e-05, "loss": 1.513, "step": 842500 }, { "epoch": 8.9, "learning_rate": 4.555177981573921e-05, "loss": 1.5132, "step": 843000 }, { "epoch": 8.9, "learning_rate": 4.5549141488227785e-05, "loss": 1.5247, "step": 843500 }, { "epoch": 8.91, "learning_rate": 4.554650316071636e-05, "loss": 1.4759, "step": 844000 }, { "epoch": 8.91, "learning_rate": 4.5543864833204935e-05, "loss": 1.4496, "step": 844500 }, { "epoch": 8.92, "learning_rate": 4.554122650569351e-05, "loss": 1.5219, "step": 845000 }, { "epoch": 8.92, "learning_rate": 4.553858817818209e-05, "loss": 1.4615, "step": 845500 }, { "epoch": 8.93, "learning_rate": 4.553594985067067e-05, "loss": 1.5516, "step": 846000 }, { "epoch": 8.93, "learning_rate": 4.553331152315924e-05, "loss": 1.5273, "step": 846500 }, { "epoch": 8.94, "learning_rate": 4.553067319564781e-05, "loss": 1.4877, "step": 847000 }, { "epoch": 8.94, "learning_rate": 4.5528034868136395e-05, "loss": 1.4884, "step": 847500 }, { "epoch": 8.95, "learning_rate": 4.552539654062497e-05, "loss": 1.4771, "step": 848000 }, { "epoch": 8.95, "learning_rate": 4.5522758213113546e-05, "loss": 1.5481, "step": 848500 }, { "epoch": 8.96, "learning_rate": 4.552011988560212e-05, "loss": 1.5059, "step": 849000 }, { "epoch": 8.97, "learning_rate": 4.55174815580907e-05, "loss": 1.4753, "step": 849500 }, { "epoch": 8.97, "learning_rate": 4.551484323057927e-05, "loss": 1.4916, "step": 850000 }, { "epoch": 8.98, "learning_rate": 4.551220490306785e-05, "loss": 1.5208, "step": 850500 }, { "epoch": 8.98, "learning_rate": 4.550956657555643e-05, "loss": 1.4954, "step": 851000 }, { "epoch": 8.99, "learning_rate": 4.5506928248045e-05, "loss": 1.5149, "step": 851500 }, { "epoch": 8.99, "learning_rate": 4.5504289920533575e-05, "loss": 1.5112, "step": 852000 }, { "epoch": 9.0, "learning_rate": 4.550165159302216e-05, "loss": 1.5326, "step": 852500 }, { "epoch": 9.0, "learning_rate": 4.549901326551073e-05, "loss": 1.4933, "step": 853000 }, { "epoch": 9.01, "learning_rate": 4.54963749379993e-05, "loss": 1.487, "step": 853500 }, { "epoch": 9.01, "learning_rate": 4.549373661048788e-05, "loss": 1.5004, "step": 854000 }, { "epoch": 9.02, "learning_rate": 4.549109828297646e-05, "loss": 1.5143, "step": 854500 }, { "epoch": 9.02, "learning_rate": 4.5488459955465034e-05, "loss": 1.5013, "step": 855000 }, { "epoch": 9.03, "learning_rate": 4.548582162795361e-05, "loss": 1.4674, "step": 855500 }, { "epoch": 9.03, "learning_rate": 4.5483183300442185e-05, "loss": 1.4734, "step": 856000 }, { "epoch": 9.04, "learning_rate": 4.548054497293076e-05, "loss": 1.4547, "step": 856500 }, { "epoch": 9.04, "learning_rate": 4.5477906645419336e-05, "loss": 1.5009, "step": 857000 }, { "epoch": 9.05, "learning_rate": 4.547526831790792e-05, "loss": 1.5013, "step": 857500 }, { "epoch": 9.05, "learning_rate": 4.5472629990396494e-05, "loss": 1.5221, "step": 858000 }, { "epoch": 9.06, "learning_rate": 4.546999166288506e-05, "loss": 1.5439, "step": 858500 }, { "epoch": 9.07, "learning_rate": 4.546735333537364e-05, "loss": 1.4816, "step": 859000 }, { "epoch": 9.07, "learning_rate": 4.546471500786222e-05, "loss": 1.4365, "step": 859500 }, { "epoch": 9.08, "learning_rate": 4.5462076680350796e-05, "loss": 1.476, "step": 860000 }, { "epoch": 9.08, "learning_rate": 4.5459438352839365e-05, "loss": 1.5484, "step": 860500 }, { "epoch": 9.09, "learning_rate": 4.545680002532795e-05, "loss": 1.5225, "step": 861000 }, { "epoch": 9.09, "learning_rate": 4.545416169781652e-05, "loss": 1.5144, "step": 861500 }, { "epoch": 9.1, "learning_rate": 4.54515233703051e-05, "loss": 1.4876, "step": 862000 }, { "epoch": 9.1, "learning_rate": 4.544888504279367e-05, "loss": 1.488, "step": 862500 }, { "epoch": 9.11, "learning_rate": 4.544624671528225e-05, "loss": 1.5019, "step": 863000 }, { "epoch": 9.11, "learning_rate": 4.5443608387770824e-05, "loss": 1.4586, "step": 863500 }, { "epoch": 9.12, "learning_rate": 4.54409700602594e-05, "loss": 1.4882, "step": 864000 }, { "epoch": 9.12, "learning_rate": 4.543833173274798e-05, "loss": 1.4721, "step": 864500 }, { "epoch": 9.13, "learning_rate": 4.543569340523656e-05, "loss": 1.5136, "step": 865000 }, { "epoch": 9.13, "learning_rate": 4.5433055077725126e-05, "loss": 1.4903, "step": 865500 }, { "epoch": 9.14, "learning_rate": 4.543041675021371e-05, "loss": 1.5404, "step": 866000 }, { "epoch": 9.14, "learning_rate": 4.5427778422702284e-05, "loss": 1.4911, "step": 866500 }, { "epoch": 9.15, "learning_rate": 4.542514009519086e-05, "loss": 1.4516, "step": 867000 }, { "epoch": 9.15, "learning_rate": 4.5422501767679435e-05, "loss": 1.4905, "step": 867500 }, { "epoch": 9.16, "learning_rate": 4.541986344016801e-05, "loss": 1.4924, "step": 868000 }, { "epoch": 9.17, "learning_rate": 4.5417225112656586e-05, "loss": 1.5063, "step": 868500 }, { "epoch": 9.17, "learning_rate": 4.541458678514516e-05, "loss": 1.4862, "step": 869000 }, { "epoch": 9.18, "learning_rate": 4.5411948457633744e-05, "loss": 1.4828, "step": 869500 }, { "epoch": 9.18, "learning_rate": 4.540931013012231e-05, "loss": 1.4989, "step": 870000 }, { "epoch": 9.19, "learning_rate": 4.540667180261089e-05, "loss": 1.533, "step": 870500 }, { "epoch": 9.19, "learning_rate": 4.5404033475099463e-05, "loss": 1.5392, "step": 871000 }, { "epoch": 9.2, "learning_rate": 4.5401395147588046e-05, "loss": 1.5212, "step": 871500 }, { "epoch": 9.2, "learning_rate": 4.539875682007662e-05, "loss": 1.5228, "step": 872000 }, { "epoch": 9.21, "learning_rate": 4.539611849256519e-05, "loss": 1.5116, "step": 872500 }, { "epoch": 9.21, "learning_rate": 4.539348016505377e-05, "loss": 1.519, "step": 873000 }, { "epoch": 9.22, "learning_rate": 4.539084183754235e-05, "loss": 1.5352, "step": 873500 }, { "epoch": 9.22, "learning_rate": 4.538820351003092e-05, "loss": 1.5112, "step": 874000 }, { "epoch": 9.23, "learning_rate": 4.53855651825195e-05, "loss": 1.5095, "step": 874500 }, { "epoch": 9.23, "learning_rate": 4.5382926855008074e-05, "loss": 1.4485, "step": 875000 }, { "epoch": 9.24, "learning_rate": 4.538028852749665e-05, "loss": 1.4753, "step": 875500 }, { "epoch": 9.24, "learning_rate": 4.5377650199985225e-05, "loss": 1.5099, "step": 876000 }, { "epoch": 9.25, "learning_rate": 4.537501187247381e-05, "loss": 1.5292, "step": 876500 }, { "epoch": 9.26, "learning_rate": 4.537237354496238e-05, "loss": 1.4996, "step": 877000 }, { "epoch": 9.26, "learning_rate": 4.536973521745095e-05, "loss": 1.525, "step": 877500 }, { "epoch": 9.27, "learning_rate": 4.5367096889939534e-05, "loss": 1.5394, "step": 878000 }, { "epoch": 9.27, "learning_rate": 4.536445856242811e-05, "loss": 1.5309, "step": 878500 }, { "epoch": 9.28, "learning_rate": 4.5361820234916685e-05, "loss": 1.4884, "step": 879000 }, { "epoch": 9.28, "learning_rate": 4.535918190740526e-05, "loss": 1.512, "step": 879500 }, { "epoch": 9.29, "learning_rate": 4.5356543579893836e-05, "loss": 1.4659, "step": 880000 }, { "epoch": 9.29, "learning_rate": 4.535390525238241e-05, "loss": 1.4314, "step": 880500 }, { "epoch": 9.3, "learning_rate": 4.535126692487099e-05, "loss": 1.5154, "step": 881000 }, { "epoch": 9.3, "learning_rate": 4.534862859735957e-05, "loss": 1.4589, "step": 881500 }, { "epoch": 9.31, "learning_rate": 4.534599026984814e-05, "loss": 1.4929, "step": 882000 }, { "epoch": 9.31, "learning_rate": 4.534335194233671e-05, "loss": 1.4657, "step": 882500 }, { "epoch": 9.32, "learning_rate": 4.534071361482529e-05, "loss": 1.5184, "step": 883000 }, { "epoch": 9.32, "learning_rate": 4.533807528731387e-05, "loss": 1.5129, "step": 883500 }, { "epoch": 9.33, "learning_rate": 4.5335436959802446e-05, "loss": 1.4943, "step": 884000 }, { "epoch": 9.33, "learning_rate": 4.5332798632291015e-05, "loss": 1.5047, "step": 884500 }, { "epoch": 9.34, "learning_rate": 4.53301603047796e-05, "loss": 1.5267, "step": 885000 }, { "epoch": 9.34, "learning_rate": 4.532752197726817e-05, "loss": 1.4905, "step": 885500 }, { "epoch": 9.35, "learning_rate": 4.532488364975675e-05, "loss": 1.501, "step": 886000 }, { "epoch": 9.36, "learning_rate": 4.5322245322245324e-05, "loss": 1.4997, "step": 886500 }, { "epoch": 9.36, "learning_rate": 4.53196069947339e-05, "loss": 1.4772, "step": 887000 }, { "epoch": 9.37, "learning_rate": 4.5316968667222475e-05, "loss": 1.4075, "step": 887500 }, { "epoch": 9.37, "learning_rate": 4.531433033971105e-05, "loss": 1.4817, "step": 888000 }, { "epoch": 9.38, "learning_rate": 4.531169201219963e-05, "loss": 1.5309, "step": 888500 }, { "epoch": 9.38, "learning_rate": 4.53090536846882e-05, "loss": 1.4818, "step": 889000 }, { "epoch": 9.39, "learning_rate": 4.530641535717678e-05, "loss": 1.5279, "step": 889500 }, { "epoch": 9.39, "learning_rate": 4.530377702966536e-05, "loss": 1.4995, "step": 890000 }, { "epoch": 9.4, "learning_rate": 4.5301138702153935e-05, "loss": 1.5277, "step": 890500 }, { "epoch": 9.4, "learning_rate": 4.529850037464251e-05, "loss": 1.4601, "step": 891000 }, { "epoch": 9.41, "learning_rate": 4.5295862047131085e-05, "loss": 1.4825, "step": 891500 }, { "epoch": 9.41, "learning_rate": 4.529322371961966e-05, "loss": 1.5565, "step": 892000 }, { "epoch": 9.42, "learning_rate": 4.5290585392108236e-05, "loss": 1.4455, "step": 892500 }, { "epoch": 9.42, "learning_rate": 4.528794706459681e-05, "loss": 1.5124, "step": 893000 }, { "epoch": 9.43, "learning_rate": 4.5285308737085394e-05, "loss": 1.4756, "step": 893500 }, { "epoch": 9.43, "learning_rate": 4.528267040957396e-05, "loss": 1.4438, "step": 894000 }, { "epoch": 9.44, "learning_rate": 4.528003208206254e-05, "loss": 1.4551, "step": 894500 }, { "epoch": 9.45, "learning_rate": 4.527739375455112e-05, "loss": 1.4604, "step": 895000 }, { "epoch": 9.45, "learning_rate": 4.5274755427039696e-05, "loss": 1.4646, "step": 895500 }, { "epoch": 9.46, "learning_rate": 4.527211709952827e-05, "loss": 1.4837, "step": 896000 }, { "epoch": 9.46, "learning_rate": 4.526947877201684e-05, "loss": 1.4403, "step": 896500 }, { "epoch": 9.47, "learning_rate": 4.526684044450542e-05, "loss": 1.4495, "step": 897000 }, { "epoch": 9.47, "learning_rate": 4.5264202116994e-05, "loss": 1.4795, "step": 897500 }, { "epoch": 9.48, "learning_rate": 4.5261563789482574e-05, "loss": 1.4648, "step": 898000 }, { "epoch": 9.48, "learning_rate": 4.525892546197115e-05, "loss": 1.4478, "step": 898500 }, { "epoch": 9.49, "learning_rate": 4.5256287134459725e-05, "loss": 1.446, "step": 899000 }, { "epoch": 9.49, "learning_rate": 4.52536488069483e-05, "loss": 1.4738, "step": 899500 }, { "epoch": 9.5, "learning_rate": 4.5251010479436876e-05, "loss": 1.4616, "step": 900000 }, { "epoch": 9.5, "learning_rate": 4.524837215192546e-05, "loss": 1.5362, "step": 900500 }, { "epoch": 9.51, "learning_rate": 4.5245733824414027e-05, "loss": 1.4643, "step": 901000 }, { "epoch": 9.51, "learning_rate": 4.52430954969026e-05, "loss": 1.5177, "step": 901500 }, { "epoch": 9.52, "learning_rate": 4.5240457169391184e-05, "loss": 1.5416, "step": 902000 }, { "epoch": 9.52, "learning_rate": 4.523781884187976e-05, "loss": 1.4862, "step": 902500 }, { "epoch": 9.53, "learning_rate": 4.5235180514368335e-05, "loss": 1.5007, "step": 903000 }, { "epoch": 9.53, "learning_rate": 4.523254218685691e-05, "loss": 1.5211, "step": 903500 }, { "epoch": 9.54, "learning_rate": 4.5229903859345486e-05, "loss": 1.5028, "step": 904000 }, { "epoch": 9.55, "learning_rate": 4.522726553183406e-05, "loss": 1.4976, "step": 904500 }, { "epoch": 9.55, "learning_rate": 4.522462720432264e-05, "loss": 1.4339, "step": 905000 }, { "epoch": 9.56, "learning_rate": 4.522198887681122e-05, "loss": 1.4719, "step": 905500 }, { "epoch": 9.56, "learning_rate": 4.521935054929979e-05, "loss": 1.5127, "step": 906000 }, { "epoch": 9.57, "learning_rate": 4.5216712221788364e-05, "loss": 1.5213, "step": 906500 }, { "epoch": 9.57, "learning_rate": 4.5214073894276946e-05, "loss": 1.4638, "step": 907000 }, { "epoch": 9.58, "learning_rate": 4.521143556676552e-05, "loss": 1.4422, "step": 907500 }, { "epoch": 9.58, "learning_rate": 4.520879723925409e-05, "loss": 1.4453, "step": 908000 }, { "epoch": 9.59, "learning_rate": 4.5206158911742666e-05, "loss": 1.475, "step": 908500 }, { "epoch": 9.59, "learning_rate": 4.520352058423125e-05, "loss": 1.4721, "step": 909000 }, { "epoch": 9.6, "learning_rate": 4.520088225671982e-05, "loss": 1.51, "step": 909500 }, { "epoch": 9.6, "learning_rate": 4.51982439292084e-05, "loss": 1.4479, "step": 910000 }, { "epoch": 9.61, "learning_rate": 4.5195605601696974e-05, "loss": 1.5073, "step": 910500 }, { "epoch": 9.61, "learning_rate": 4.519296727418555e-05, "loss": 1.491, "step": 911000 }, { "epoch": 9.62, "learning_rate": 4.5190328946674125e-05, "loss": 1.5191, "step": 911500 }, { "epoch": 9.62, "learning_rate": 4.51876906191627e-05, "loss": 1.5183, "step": 912000 }, { "epoch": 9.63, "learning_rate": 4.518505229165128e-05, "loss": 1.5053, "step": 912500 }, { "epoch": 9.64, "learning_rate": 4.518241396413985e-05, "loss": 1.51, "step": 913000 }, { "epoch": 9.64, "learning_rate": 4.517977563662843e-05, "loss": 1.4287, "step": 913500 }, { "epoch": 9.65, "learning_rate": 4.517713730911701e-05, "loss": 1.5434, "step": 914000 }, { "epoch": 9.65, "learning_rate": 4.5174498981605585e-05, "loss": 1.4698, "step": 914500 }, { "epoch": 9.66, "learning_rate": 4.517186065409416e-05, "loss": 1.5364, "step": 915000 }, { "epoch": 9.66, "learning_rate": 4.5169222326582736e-05, "loss": 1.4931, "step": 915500 }, { "epoch": 9.67, "learning_rate": 4.516658399907131e-05, "loss": 1.4778, "step": 916000 }, { "epoch": 9.67, "learning_rate": 4.516394567155989e-05, "loss": 1.5369, "step": 916500 }, { "epoch": 9.68, "learning_rate": 4.516130734404846e-05, "loss": 1.5657, "step": 917000 }, { "epoch": 9.68, "learning_rate": 4.515866901653704e-05, "loss": 1.5032, "step": 917500 }, { "epoch": 9.69, "learning_rate": 4.5156030689025613e-05, "loss": 1.4626, "step": 918000 }, { "epoch": 9.69, "learning_rate": 4.515339236151419e-05, "loss": 1.4944, "step": 918500 }, { "epoch": 9.7, "learning_rate": 4.515075403400277e-05, "loss": 1.5235, "step": 919000 }, { "epoch": 9.7, "learning_rate": 4.514811570649135e-05, "loss": 1.4774, "step": 919500 }, { "epoch": 9.71, "learning_rate": 4.5145477378979915e-05, "loss": 1.5623, "step": 920000 }, { "epoch": 9.71, "learning_rate": 4.514283905146849e-05, "loss": 1.4622, "step": 920500 }, { "epoch": 9.72, "learning_rate": 4.514020072395707e-05, "loss": 1.4604, "step": 921000 }, { "epoch": 9.72, "learning_rate": 4.513756239644565e-05, "loss": 1.4849, "step": 921500 }, { "epoch": 9.73, "learning_rate": 4.5134924068934224e-05, "loss": 1.4958, "step": 922000 }, { "epoch": 9.74, "learning_rate": 4.51322857414228e-05, "loss": 1.4507, "step": 922500 }, { "epoch": 9.74, "learning_rate": 4.5129647413911375e-05, "loss": 1.4955, "step": 923000 }, { "epoch": 9.75, "learning_rate": 4.512700908639995e-05, "loss": 1.4541, "step": 923500 }, { "epoch": 9.75, "learning_rate": 4.5124370758888526e-05, "loss": 1.4939, "step": 924000 }, { "epoch": 9.76, "learning_rate": 4.512173243137711e-05, "loss": 1.4765, "step": 924500 }, { "epoch": 9.76, "learning_rate": 4.511909410386568e-05, "loss": 1.5421, "step": 925000 }, { "epoch": 9.77, "learning_rate": 4.511645577635425e-05, "loss": 1.4423, "step": 925500 }, { "epoch": 9.77, "learning_rate": 4.5113817448842835e-05, "loss": 1.4601, "step": 926000 }, { "epoch": 9.78, "learning_rate": 4.511117912133141e-05, "loss": 1.4704, "step": 926500 }, { "epoch": 9.78, "learning_rate": 4.510854079381998e-05, "loss": 1.5334, "step": 927000 }, { "epoch": 9.79, "learning_rate": 4.510590246630856e-05, "loss": 1.4531, "step": 927500 }, { "epoch": 9.79, "learning_rate": 4.510326413879714e-05, "loss": 1.5642, "step": 928000 }, { "epoch": 9.8, "learning_rate": 4.510062581128571e-05, "loss": 1.4868, "step": 928500 }, { "epoch": 9.8, "learning_rate": 4.509798748377429e-05, "loss": 1.4767, "step": 929000 }, { "epoch": 9.81, "learning_rate": 4.509534915626286e-05, "loss": 1.5022, "step": 929500 }, { "epoch": 9.81, "learning_rate": 4.509271082875144e-05, "loss": 1.4662, "step": 930000 }, { "epoch": 9.82, "learning_rate": 4.5090072501240014e-05, "loss": 1.5083, "step": 930500 }, { "epoch": 9.83, "learning_rate": 4.5087434173728596e-05, "loss": 1.5323, "step": 931000 }, { "epoch": 9.83, "learning_rate": 4.508479584621717e-05, "loss": 1.5259, "step": 931500 }, { "epoch": 9.84, "learning_rate": 4.508215751870574e-05, "loss": 1.4552, "step": 932000 }, { "epoch": 9.84, "learning_rate": 4.5079519191194316e-05, "loss": 1.5096, "step": 932500 }, { "epoch": 9.85, "learning_rate": 4.50768808636829e-05, "loss": 1.5349, "step": 933000 }, { "epoch": 9.85, "learning_rate": 4.5074242536171474e-05, "loss": 1.5324, "step": 933500 }, { "epoch": 9.86, "learning_rate": 4.507160420866005e-05, "loss": 1.4674, "step": 934000 }, { "epoch": 9.86, "learning_rate": 4.5068965881148625e-05, "loss": 1.5186, "step": 934500 }, { "epoch": 9.87, "learning_rate": 4.50663275536372e-05, "loss": 1.4904, "step": 935000 }, { "epoch": 9.87, "learning_rate": 4.5063689226125776e-05, "loss": 1.4751, "step": 935500 }, { "epoch": 9.88, "learning_rate": 4.506105089861435e-05, "loss": 1.4166, "step": 936000 }, { "epoch": 9.88, "learning_rate": 4.505841257110293e-05, "loss": 1.4835, "step": 936500 }, { "epoch": 9.89, "learning_rate": 4.50557742435915e-05, "loss": 1.4622, "step": 937000 }, { "epoch": 9.89, "learning_rate": 4.505313591608008e-05, "loss": 1.4672, "step": 937500 }, { "epoch": 9.9, "learning_rate": 4.505049758856866e-05, "loss": 1.4824, "step": 938000 }, { "epoch": 9.9, "learning_rate": 4.5047859261057235e-05, "loss": 1.4678, "step": 938500 }, { "epoch": 9.91, "learning_rate": 4.5045220933545804e-05, "loss": 1.4639, "step": 939000 }, { "epoch": 9.91, "learning_rate": 4.5042582606034386e-05, "loss": 1.4757, "step": 939500 }, { "epoch": 9.92, "learning_rate": 4.503994427852296e-05, "loss": 1.4152, "step": 940000 }, { "epoch": 9.93, "learning_rate": 4.503730595101154e-05, "loss": 1.5075, "step": 940500 }, { "epoch": 9.93, "learning_rate": 4.503466762350011e-05, "loss": 1.4309, "step": 941000 }, { "epoch": 9.94, "learning_rate": 4.503202929598869e-05, "loss": 1.5002, "step": 941500 }, { "epoch": 9.94, "learning_rate": 4.5029390968477264e-05, "loss": 1.4672, "step": 942000 }, { "epoch": 9.95, "learning_rate": 4.502675264096584e-05, "loss": 1.4766, "step": 942500 }, { "epoch": 9.95, "learning_rate": 4.502411431345442e-05, "loss": 1.4786, "step": 943000 }, { "epoch": 9.96, "learning_rate": 4.5021475985943e-05, "loss": 1.509, "step": 943500 }, { "epoch": 9.96, "learning_rate": 4.5018837658431566e-05, "loss": 1.4909, "step": 944000 }, { "epoch": 9.97, "learning_rate": 4.501619933092014e-05, "loss": 1.4568, "step": 944500 }, { "epoch": 9.97, "learning_rate": 4.5013561003408724e-05, "loss": 1.4634, "step": 945000 }, { "epoch": 9.98, "learning_rate": 4.50109226758973e-05, "loss": 1.498, "step": 945500 }, { "epoch": 9.98, "learning_rate": 4.500828434838587e-05, "loss": 1.4812, "step": 946000 }, { "epoch": 9.99, "learning_rate": 4.500564602087445e-05, "loss": 1.5238, "step": 946500 }, { "epoch": 9.99, "learning_rate": 4.5003007693363026e-05, "loss": 1.4968, "step": 947000 }, { "epoch": 10.0, "learning_rate": 4.50003693658516e-05, "loss": 1.5074, "step": 947500 }, { "epoch": 10.0, "learning_rate": 4.4997731038340177e-05, "loss": 1.5028, "step": 948000 }, { "epoch": 10.01, "learning_rate": 4.499509271082875e-05, "loss": 1.4614, "step": 948500 }, { "epoch": 10.02, "learning_rate": 4.499245438331733e-05, "loss": 1.5029, "step": 949000 }, { "epoch": 10.02, "learning_rate": 4.49898160558059e-05, "loss": 1.5074, "step": 949500 }, { "epoch": 10.03, "learning_rate": 4.4987177728294485e-05, "loss": 1.519, "step": 950000 }, { "epoch": 10.03, "learning_rate": 4.498453940078306e-05, "loss": 1.4205, "step": 950500 }, { "epoch": 10.04, "learning_rate": 4.498190107327163e-05, "loss": 1.4628, "step": 951000 }, { "epoch": 10.04, "learning_rate": 4.497926274576021e-05, "loss": 1.4497, "step": 951500 }, { "epoch": 10.05, "learning_rate": 4.497662441824879e-05, "loss": 1.5096, "step": 952000 }, { "epoch": 10.05, "learning_rate": 4.497398609073736e-05, "loss": 1.4989, "step": 952500 }, { "epoch": 10.06, "learning_rate": 4.497134776322594e-05, "loss": 1.4959, "step": 953000 }, { "epoch": 10.06, "learning_rate": 4.4968709435714514e-05, "loss": 1.4446, "step": 953500 }, { "epoch": 10.07, "learning_rate": 4.496607110820309e-05, "loss": 1.4954, "step": 954000 }, { "epoch": 10.07, "learning_rate": 4.4963432780691665e-05, "loss": 1.4781, "step": 954500 }, { "epoch": 10.08, "learning_rate": 4.496079445318025e-05, "loss": 1.4616, "step": 955000 }, { "epoch": 10.08, "learning_rate": 4.4958156125668816e-05, "loss": 1.4417, "step": 955500 }, { "epoch": 10.09, "learning_rate": 4.495551779815739e-05, "loss": 1.5022, "step": 956000 }, { "epoch": 10.09, "learning_rate": 4.4952879470645967e-05, "loss": 1.4272, "step": 956500 }, { "epoch": 10.1, "learning_rate": 4.495024114313455e-05, "loss": 1.4717, "step": 957000 }, { "epoch": 10.1, "learning_rate": 4.4947602815623124e-05, "loss": 1.5145, "step": 957500 }, { "epoch": 10.11, "learning_rate": 4.494496448811169e-05, "loss": 1.4569, "step": 958000 }, { "epoch": 10.12, "learning_rate": 4.4942326160600275e-05, "loss": 1.5077, "step": 958500 }, { "epoch": 10.12, "learning_rate": 4.493968783308885e-05, "loss": 1.4501, "step": 959000 }, { "epoch": 10.13, "learning_rate": 4.4937049505577426e-05, "loss": 1.4879, "step": 959500 }, { "epoch": 10.13, "learning_rate": 4.4934411178066e-05, "loss": 1.5376, "step": 960000 }, { "epoch": 10.14, "learning_rate": 4.493177285055458e-05, "loss": 1.473, "step": 960500 }, { "epoch": 10.14, "learning_rate": 4.492913452304315e-05, "loss": 1.5191, "step": 961000 }, { "epoch": 10.15, "learning_rate": 4.492649619553173e-05, "loss": 1.4843, "step": 961500 }, { "epoch": 10.15, "learning_rate": 4.492385786802031e-05, "loss": 1.4281, "step": 962000 }, { "epoch": 10.16, "learning_rate": 4.4921219540508886e-05, "loss": 1.4868, "step": 962500 }, { "epoch": 10.16, "learning_rate": 4.4918581212997455e-05, "loss": 1.4922, "step": 963000 }, { "epoch": 10.17, "learning_rate": 4.491594288548604e-05, "loss": 1.4366, "step": 963500 }, { "epoch": 10.17, "learning_rate": 4.491330455797461e-05, "loss": 1.4608, "step": 964000 }, { "epoch": 10.18, "learning_rate": 4.491066623046319e-05, "loss": 1.4923, "step": 964500 }, { "epoch": 10.18, "learning_rate": 4.4908027902951763e-05, "loss": 1.4819, "step": 965000 }, { "epoch": 10.19, "learning_rate": 4.490538957544034e-05, "loss": 1.3943, "step": 965500 }, { "epoch": 10.19, "learning_rate": 4.4902751247928914e-05, "loss": 1.4837, "step": 966000 }, { "epoch": 10.2, "learning_rate": 4.490011292041749e-05, "loss": 1.5072, "step": 966500 }, { "epoch": 10.21, "learning_rate": 4.489747459290607e-05, "loss": 1.4958, "step": 967000 }, { "epoch": 10.21, "learning_rate": 4.489483626539464e-05, "loss": 1.4854, "step": 967500 }, { "epoch": 10.22, "learning_rate": 4.4892197937883216e-05, "loss": 1.4557, "step": 968000 }, { "epoch": 10.22, "learning_rate": 4.48895596103718e-05, "loss": 1.4132, "step": 968500 }, { "epoch": 10.23, "learning_rate": 4.4886921282860374e-05, "loss": 1.4359, "step": 969000 }, { "epoch": 10.23, "learning_rate": 4.488428295534895e-05, "loss": 1.4939, "step": 969500 }, { "epoch": 10.24, "learning_rate": 4.488164462783752e-05, "loss": 1.5083, "step": 970000 }, { "epoch": 10.24, "learning_rate": 4.48790063003261e-05, "loss": 1.466, "step": 970500 }, { "epoch": 10.25, "learning_rate": 4.4876367972814676e-05, "loss": 1.4175, "step": 971000 }, { "epoch": 10.25, "learning_rate": 4.487372964530325e-05, "loss": 1.4706, "step": 971500 }, { "epoch": 10.26, "learning_rate": 4.487109131779183e-05, "loss": 1.4759, "step": 972000 }, { "epoch": 10.26, "learning_rate": 4.48684529902804e-05, "loss": 1.4587, "step": 972500 }, { "epoch": 10.27, "learning_rate": 4.486581466276898e-05, "loss": 1.4934, "step": 973000 }, { "epoch": 10.27, "learning_rate": 4.4863176335257553e-05, "loss": 1.5089, "step": 973500 }, { "epoch": 10.28, "learning_rate": 4.4860538007746136e-05, "loss": 1.4623, "step": 974000 }, { "epoch": 10.28, "learning_rate": 4.4857899680234704e-05, "loss": 1.4125, "step": 974500 }, { "epoch": 10.29, "learning_rate": 4.485526135272328e-05, "loss": 1.4723, "step": 975000 }, { "epoch": 10.29, "learning_rate": 4.485262302521186e-05, "loss": 1.512, "step": 975500 }, { "epoch": 10.3, "learning_rate": 4.484998469770044e-05, "loss": 1.4719, "step": 976000 }, { "epoch": 10.31, "learning_rate": 4.484734637018901e-05, "loss": 1.5121, "step": 976500 }, { "epoch": 10.31, "learning_rate": 4.484470804267759e-05, "loss": 1.4502, "step": 977000 }, { "epoch": 10.32, "learning_rate": 4.4842069715166164e-05, "loss": 1.4706, "step": 977500 }, { "epoch": 10.32, "learning_rate": 4.483943138765474e-05, "loss": 1.4827, "step": 978000 }, { "epoch": 10.33, "learning_rate": 4.4836793060143315e-05, "loss": 1.451, "step": 978500 }, { "epoch": 10.33, "learning_rate": 4.48341547326319e-05, "loss": 1.4546, "step": 979000 }, { "epoch": 10.34, "learning_rate": 4.4831516405120466e-05, "loss": 1.4421, "step": 979500 }, { "epoch": 10.34, "learning_rate": 4.482887807760904e-05, "loss": 1.5474, "step": 980000 }, { "epoch": 10.35, "learning_rate": 4.4826239750097624e-05, "loss": 1.4536, "step": 980500 }, { "epoch": 10.35, "learning_rate": 4.48236014225862e-05, "loss": 1.4571, "step": 981000 }, { "epoch": 10.36, "learning_rate": 4.4820963095074775e-05, "loss": 1.5258, "step": 981500 }, { "epoch": 10.36, "learning_rate": 4.4818324767563344e-05, "loss": 1.4907, "step": 982000 }, { "epoch": 10.37, "learning_rate": 4.4815686440051926e-05, "loss": 1.5235, "step": 982500 }, { "epoch": 10.37, "learning_rate": 4.48130481125405e-05, "loss": 1.5357, "step": 983000 }, { "epoch": 10.38, "learning_rate": 4.481040978502908e-05, "loss": 1.4476, "step": 983500 }, { "epoch": 10.38, "learning_rate": 4.480777145751765e-05, "loss": 1.4688, "step": 984000 }, { "epoch": 10.39, "learning_rate": 4.480513313000623e-05, "loss": 1.4813, "step": 984500 }, { "epoch": 10.4, "learning_rate": 4.48024948024948e-05, "loss": 1.5097, "step": 985000 }, { "epoch": 10.4, "learning_rate": 4.479985647498338e-05, "loss": 1.4923, "step": 985500 }, { "epoch": 10.41, "learning_rate": 4.479721814747196e-05, "loss": 1.472, "step": 986000 }, { "epoch": 10.41, "learning_rate": 4.479457981996053e-05, "loss": 1.4826, "step": 986500 }, { "epoch": 10.42, "learning_rate": 4.4791941492449105e-05, "loss": 1.4857, "step": 987000 }, { "epoch": 10.42, "learning_rate": 4.478930316493769e-05, "loss": 1.4517, "step": 987500 }, { "epoch": 10.43, "learning_rate": 4.478666483742626e-05, "loss": 1.4957, "step": 988000 }, { "epoch": 10.43, "learning_rate": 4.478402650991484e-05, "loss": 1.4694, "step": 988500 }, { "epoch": 10.44, "learning_rate": 4.4781388182403414e-05, "loss": 1.4693, "step": 989000 }, { "epoch": 10.44, "learning_rate": 4.477874985489199e-05, "loss": 1.4784, "step": 989500 }, { "epoch": 10.45, "learning_rate": 4.4776111527380565e-05, "loss": 1.4867, "step": 990000 }, { "epoch": 10.45, "learning_rate": 4.477347319986914e-05, "loss": 1.4665, "step": 990500 }, { "epoch": 10.46, "learning_rate": 4.477083487235772e-05, "loss": 1.4847, "step": 991000 }, { "epoch": 10.46, "learning_rate": 4.476819654484629e-05, "loss": 1.4726, "step": 991500 }, { "epoch": 10.47, "learning_rate": 4.476555821733487e-05, "loss": 1.4515, "step": 992000 }, { "epoch": 10.47, "learning_rate": 4.476291988982345e-05, "loss": 1.4694, "step": 992500 }, { "epoch": 10.48, "learning_rate": 4.4760281562312025e-05, "loss": 1.4844, "step": 993000 }, { "epoch": 10.48, "learning_rate": 4.475764323480059e-05, "loss": 1.4635, "step": 993500 }, { "epoch": 10.49, "learning_rate": 4.475500490728917e-05, "loss": 1.4775, "step": 994000 }, { "epoch": 10.5, "learning_rate": 4.475236657977775e-05, "loss": 1.4765, "step": 994500 }, { "epoch": 10.5, "learning_rate": 4.4749728252266327e-05, "loss": 1.4752, "step": 995000 }, { "epoch": 10.51, "learning_rate": 4.47470899247549e-05, "loss": 1.5024, "step": 995500 }, { "epoch": 10.51, "learning_rate": 4.474445159724348e-05, "loss": 1.4487, "step": 996000 }, { "epoch": 10.52, "learning_rate": 4.474181326973205e-05, "loss": 1.4695, "step": 996500 }, { "epoch": 10.52, "learning_rate": 4.473917494222063e-05, "loss": 1.4563, "step": 997000 }, { "epoch": 10.53, "learning_rate": 4.4736536614709204e-05, "loss": 1.533, "step": 997500 }, { "epoch": 10.53, "learning_rate": 4.4733898287197786e-05, "loss": 1.4621, "step": 998000 }, { "epoch": 10.54, "learning_rate": 4.4731259959686355e-05, "loss": 1.4063, "step": 998500 }, { "epoch": 10.54, "learning_rate": 4.472862163217493e-05, "loss": 1.4481, "step": 999000 }, { "epoch": 10.55, "learning_rate": 4.472598330466351e-05, "loss": 1.4998, "step": 999500 }, { "epoch": 10.55, "learning_rate": 4.472334497715209e-05, "loss": 1.4728, "step": 1000000 }, { "epoch": 10.56, "learning_rate": 4.4720706649640664e-05, "loss": 1.4496, "step": 1000500 }, { "epoch": 10.56, "learning_rate": 4.471806832212924e-05, "loss": 1.5025, "step": 1001000 }, { "epoch": 10.57, "learning_rate": 4.4715429994617815e-05, "loss": 1.4753, "step": 1001500 }, { "epoch": 10.57, "learning_rate": 4.471279166710639e-05, "loss": 1.4395, "step": 1002000 }, { "epoch": 10.58, "learning_rate": 4.4710153339594966e-05, "loss": 1.4607, "step": 1002500 }, { "epoch": 10.58, "learning_rate": 4.470751501208354e-05, "loss": 1.5016, "step": 1003000 }, { "epoch": 10.59, "learning_rate": 4.4704876684572117e-05, "loss": 1.4951, "step": 1003500 }, { "epoch": 10.6, "learning_rate": 4.470223835706069e-05, "loss": 1.4987, "step": 1004000 }, { "epoch": 10.6, "learning_rate": 4.4699600029549274e-05, "loss": 1.4285, "step": 1004500 }, { "epoch": 10.61, "learning_rate": 4.469696170203785e-05, "loss": 1.4291, "step": 1005000 }, { "epoch": 10.61, "learning_rate": 4.469432337452642e-05, "loss": 1.4817, "step": 1005500 }, { "epoch": 10.62, "learning_rate": 4.4691685047014994e-05, "loss": 1.4571, "step": 1006000 }, { "epoch": 10.62, "learning_rate": 4.4689046719503576e-05, "loss": 1.46, "step": 1006500 }, { "epoch": 10.63, "learning_rate": 4.468640839199215e-05, "loss": 1.5045, "step": 1007000 }, { "epoch": 10.63, "learning_rate": 4.468377006448073e-05, "loss": 1.5132, "step": 1007500 }, { "epoch": 10.64, "learning_rate": 4.46811317369693e-05, "loss": 1.4437, "step": 1008000 }, { "epoch": 10.64, "learning_rate": 4.467849340945788e-05, "loss": 1.4863, "step": 1008500 }, { "epoch": 10.65, "learning_rate": 4.4675855081946454e-05, "loss": 1.4778, "step": 1009000 }, { "epoch": 10.65, "learning_rate": 4.467321675443503e-05, "loss": 1.4645, "step": 1009500 }, { "epoch": 10.66, "learning_rate": 4.467057842692361e-05, "loss": 1.4974, "step": 1010000 }, { "epoch": 10.66, "learning_rate": 4.466794009941218e-05, "loss": 1.4646, "step": 1010500 }, { "epoch": 10.67, "learning_rate": 4.4665301771900756e-05, "loss": 1.5104, "step": 1011000 }, { "epoch": 10.67, "learning_rate": 4.466266344438934e-05, "loss": 1.4706, "step": 1011500 }, { "epoch": 10.68, "learning_rate": 4.4660025116877913e-05, "loss": 1.4921, "step": 1012000 }, { "epoch": 10.69, "learning_rate": 4.465738678936648e-05, "loss": 1.4999, "step": 1012500 }, { "epoch": 10.69, "learning_rate": 4.4654748461855064e-05, "loss": 1.5034, "step": 1013000 }, { "epoch": 10.7, "learning_rate": 4.465211013434364e-05, "loss": 1.4542, "step": 1013500 }, { "epoch": 10.7, "learning_rate": 4.4649471806832215e-05, "loss": 1.4814, "step": 1014000 }, { "epoch": 10.71, "learning_rate": 4.464683347932079e-05, "loss": 1.471, "step": 1014500 }, { "epoch": 10.71, "learning_rate": 4.4644195151809366e-05, "loss": 1.4532, "step": 1015000 }, { "epoch": 10.72, "learning_rate": 4.464155682429794e-05, "loss": 1.4373, "step": 1015500 }, { "epoch": 10.72, "learning_rate": 4.463891849678652e-05, "loss": 1.4787, "step": 1016000 }, { "epoch": 10.73, "learning_rate": 4.46362801692751e-05, "loss": 1.4601, "step": 1016500 }, { "epoch": 10.73, "learning_rate": 4.4633641841763675e-05, "loss": 1.4594, "step": 1017000 }, { "epoch": 10.74, "learning_rate": 4.4631003514252244e-05, "loss": 1.5043, "step": 1017500 }, { "epoch": 10.74, "learning_rate": 4.462836518674082e-05, "loss": 1.4808, "step": 1018000 }, { "epoch": 10.75, "learning_rate": 4.46257268592294e-05, "loss": 1.477, "step": 1018500 }, { "epoch": 10.75, "learning_rate": 4.462308853171798e-05, "loss": 1.5432, "step": 1019000 }, { "epoch": 10.76, "learning_rate": 4.462045020420655e-05, "loss": 1.4557, "step": 1019500 }, { "epoch": 10.76, "learning_rate": 4.461781187669513e-05, "loss": 1.4537, "step": 1020000 }, { "epoch": 10.77, "learning_rate": 4.4615173549183703e-05, "loss": 1.4945, "step": 1020500 }, { "epoch": 10.77, "learning_rate": 4.461253522167228e-05, "loss": 1.471, "step": 1021000 }, { "epoch": 10.78, "learning_rate": 4.4609896894160854e-05, "loss": 1.4408, "step": 1021500 }, { "epoch": 10.79, "learning_rate": 4.460725856664943e-05, "loss": 1.4655, "step": 1022000 }, { "epoch": 10.79, "learning_rate": 4.4604620239138005e-05, "loss": 1.4721, "step": 1022500 }, { "epoch": 10.8, "learning_rate": 4.460198191162658e-05, "loss": 1.4475, "step": 1023000 }, { "epoch": 10.8, "learning_rate": 4.459934358411516e-05, "loss": 1.4831, "step": 1023500 }, { "epoch": 10.81, "learning_rate": 4.459670525660374e-05, "loss": 1.4868, "step": 1024000 }, { "epoch": 10.81, "learning_rate": 4.459406692909231e-05, "loss": 1.492, "step": 1024500 }, { "epoch": 10.82, "learning_rate": 4.459142860158089e-05, "loss": 1.498, "step": 1025000 }, { "epoch": 10.82, "learning_rate": 4.4588790274069465e-05, "loss": 1.4162, "step": 1025500 }, { "epoch": 10.83, "learning_rate": 4.458615194655804e-05, "loss": 1.4832, "step": 1026000 }, { "epoch": 10.83, "learning_rate": 4.4583513619046616e-05, "loss": 1.5313, "step": 1026500 }, { "epoch": 10.84, "learning_rate": 4.458087529153519e-05, "loss": 1.4502, "step": 1027000 }, { "epoch": 10.84, "learning_rate": 4.457823696402377e-05, "loss": 1.47, "step": 1027500 }, { "epoch": 10.85, "learning_rate": 4.457559863651234e-05, "loss": 1.5197, "step": 1028000 }, { "epoch": 10.85, "learning_rate": 4.4572960309000925e-05, "loss": 1.4838, "step": 1028500 }, { "epoch": 10.86, "learning_rate": 4.45703219814895e-05, "loss": 1.4788, "step": 1029000 }, { "epoch": 10.86, "learning_rate": 4.456768365397807e-05, "loss": 1.4449, "step": 1029500 }, { "epoch": 10.87, "learning_rate": 4.4565045326466645e-05, "loss": 1.4606, "step": 1030000 }, { "epoch": 10.88, "learning_rate": 4.456240699895523e-05, "loss": 1.5465, "step": 1030500 }, { "epoch": 10.88, "learning_rate": 4.45597686714438e-05, "loss": 1.4758, "step": 1031000 }, { "epoch": 10.89, "learning_rate": 4.455713034393237e-05, "loss": 1.5108, "step": 1031500 }, { "epoch": 10.89, "learning_rate": 4.455449201642095e-05, "loss": 1.4862, "step": 1032000 }, { "epoch": 10.9, "learning_rate": 4.455185368890953e-05, "loss": 1.4871, "step": 1032500 }, { "epoch": 10.9, "learning_rate": 4.4549215361398104e-05, "loss": 1.4167, "step": 1033000 }, { "epoch": 10.91, "learning_rate": 4.454657703388668e-05, "loss": 1.4649, "step": 1033500 }, { "epoch": 10.91, "learning_rate": 4.4543938706375255e-05, "loss": 1.4926, "step": 1034000 }, { "epoch": 10.92, "learning_rate": 4.454130037886383e-05, "loss": 1.5074, "step": 1034500 }, { "epoch": 10.92, "learning_rate": 4.4538662051352406e-05, "loss": 1.5181, "step": 1035000 }, { "epoch": 10.93, "learning_rate": 4.453602372384099e-05, "loss": 1.4328, "step": 1035500 }, { "epoch": 10.93, "learning_rate": 4.4533385396329564e-05, "loss": 1.4495, "step": 1036000 }, { "epoch": 10.94, "learning_rate": 4.453074706881813e-05, "loss": 1.4775, "step": 1036500 }, { "epoch": 10.94, "learning_rate": 4.4528108741306715e-05, "loss": 1.4632, "step": 1037000 }, { "epoch": 10.95, "learning_rate": 4.452547041379529e-05, "loss": 1.4614, "step": 1037500 }, { "epoch": 10.95, "learning_rate": 4.4522832086283866e-05, "loss": 1.4344, "step": 1038000 }, { "epoch": 10.96, "learning_rate": 4.452019375877244e-05, "loss": 1.4329, "step": 1038500 }, { "epoch": 10.96, "learning_rate": 4.451755543126102e-05, "loss": 1.4505, "step": 1039000 }, { "epoch": 10.97, "learning_rate": 4.451491710374959e-05, "loss": 1.4591, "step": 1039500 }, { "epoch": 10.98, "learning_rate": 4.451227877623817e-05, "loss": 1.4828, "step": 1040000 }, { "epoch": 10.98, "learning_rate": 4.450964044872675e-05, "loss": 1.4477, "step": 1040500 }, { "epoch": 10.99, "learning_rate": 4.450700212121532e-05, "loss": 1.5263, "step": 1041000 }, { "epoch": 10.99, "learning_rate": 4.4504363793703894e-05, "loss": 1.4474, "step": 1041500 }, { "epoch": 11.0, "learning_rate": 4.4501725466192477e-05, "loss": 1.4488, "step": 1042000 }, { "epoch": 11.0, "learning_rate": 4.449908713868105e-05, "loss": 1.5118, "step": 1042500 }, { "epoch": 11.01, "learning_rate": 4.449644881116963e-05, "loss": 1.4807, "step": 1043000 }, { "epoch": 11.01, "learning_rate": 4.4493810483658196e-05, "loss": 1.4302, "step": 1043500 }, { "epoch": 11.02, "learning_rate": 4.449117215614678e-05, "loss": 1.4364, "step": 1044000 }, { "epoch": 11.02, "learning_rate": 4.4488533828635354e-05, "loss": 1.4693, "step": 1044500 }, { "epoch": 11.03, "learning_rate": 4.448589550112393e-05, "loss": 1.4561, "step": 1045000 }, { "epoch": 11.03, "learning_rate": 4.4483257173612505e-05, "loss": 1.458, "step": 1045500 }, { "epoch": 11.04, "learning_rate": 4.448061884610108e-05, "loss": 1.4763, "step": 1046000 }, { "epoch": 11.04, "learning_rate": 4.4477980518589656e-05, "loss": 1.443, "step": 1046500 }, { "epoch": 11.05, "learning_rate": 4.447534219107823e-05, "loss": 1.4537, "step": 1047000 }, { "epoch": 11.05, "learning_rate": 4.4472703863566814e-05, "loss": 1.4717, "step": 1047500 }, { "epoch": 11.06, "learning_rate": 4.447006553605539e-05, "loss": 1.3994, "step": 1048000 }, { "epoch": 11.07, "learning_rate": 4.446742720854396e-05, "loss": 1.4524, "step": 1048500 }, { "epoch": 11.07, "learning_rate": 4.446478888103254e-05, "loss": 1.4808, "step": 1049000 }, { "epoch": 11.08, "learning_rate": 4.4462150553521116e-05, "loss": 1.4701, "step": 1049500 }, { "epoch": 11.08, "learning_rate": 4.445951222600969e-05, "loss": 1.414, "step": 1050000 }, { "epoch": 11.09, "learning_rate": 4.445687389849827e-05, "loss": 1.5012, "step": 1050500 }, { "epoch": 11.09, "learning_rate": 4.445423557098684e-05, "loss": 1.4941, "step": 1051000 }, { "epoch": 11.1, "learning_rate": 4.445159724347542e-05, "loss": 1.4335, "step": 1051500 }, { "epoch": 11.1, "learning_rate": 4.444895891596399e-05, "loss": 1.3966, "step": 1052000 }, { "epoch": 11.11, "learning_rate": 4.4446320588452575e-05, "loss": 1.4673, "step": 1052500 }, { "epoch": 11.11, "learning_rate": 4.4443682260941144e-05, "loss": 1.4664, "step": 1053000 }, { "epoch": 11.12, "learning_rate": 4.444104393342972e-05, "loss": 1.4682, "step": 1053500 }, { "epoch": 11.12, "learning_rate": 4.44384056059183e-05, "loss": 1.4812, "step": 1054000 }, { "epoch": 11.13, "learning_rate": 4.443576727840688e-05, "loss": 1.457, "step": 1054500 }, { "epoch": 11.13, "learning_rate": 4.443312895089545e-05, "loss": 1.4898, "step": 1055000 }, { "epoch": 11.14, "learning_rate": 4.443049062338402e-05, "loss": 1.4538, "step": 1055500 }, { "epoch": 11.14, "learning_rate": 4.4427852295872604e-05, "loss": 1.4962, "step": 1056000 }, { "epoch": 11.15, "learning_rate": 4.442521396836118e-05, "loss": 1.5021, "step": 1056500 }, { "epoch": 11.15, "learning_rate": 4.4422575640849755e-05, "loss": 1.4588, "step": 1057000 }, { "epoch": 11.16, "learning_rate": 4.441993731333833e-05, "loss": 1.5175, "step": 1057500 }, { "epoch": 11.17, "learning_rate": 4.4417298985826906e-05, "loss": 1.4994, "step": 1058000 }, { "epoch": 11.17, "learning_rate": 4.441466065831548e-05, "loss": 1.4983, "step": 1058500 }, { "epoch": 11.18, "learning_rate": 4.441202233080406e-05, "loss": 1.4514, "step": 1059000 }, { "epoch": 11.18, "learning_rate": 4.440938400329264e-05, "loss": 1.4945, "step": 1059500 }, { "epoch": 11.19, "learning_rate": 4.440674567578121e-05, "loss": 1.4531, "step": 1060000 }, { "epoch": 11.19, "learning_rate": 4.440410734826978e-05, "loss": 1.4371, "step": 1060500 }, { "epoch": 11.2, "learning_rate": 4.4401469020758365e-05, "loss": 1.51, "step": 1061000 }, { "epoch": 11.2, "learning_rate": 4.439883069324694e-05, "loss": 1.4528, "step": 1061500 }, { "epoch": 11.21, "learning_rate": 4.4396192365735516e-05, "loss": 1.4786, "step": 1062000 }, { "epoch": 11.21, "learning_rate": 4.439355403822409e-05, "loss": 1.5288, "step": 1062500 }, { "epoch": 11.22, "learning_rate": 4.439091571071267e-05, "loss": 1.4431, "step": 1063000 }, { "epoch": 11.22, "learning_rate": 4.438827738320124e-05, "loss": 1.5064, "step": 1063500 }, { "epoch": 11.23, "learning_rate": 4.438563905568982e-05, "loss": 1.4263, "step": 1064000 }, { "epoch": 11.23, "learning_rate": 4.43830007281784e-05, "loss": 1.4611, "step": 1064500 }, { "epoch": 11.24, "learning_rate": 4.438036240066697e-05, "loss": 1.4955, "step": 1065000 }, { "epoch": 11.24, "learning_rate": 4.4377724073155545e-05, "loss": 1.4874, "step": 1065500 }, { "epoch": 11.25, "learning_rate": 4.437508574564413e-05, "loss": 1.498, "step": 1066000 }, { "epoch": 11.26, "learning_rate": 4.43724474181327e-05, "loss": 1.4159, "step": 1066500 }, { "epoch": 11.26, "learning_rate": 4.436980909062128e-05, "loss": 1.5195, "step": 1067000 }, { "epoch": 11.27, "learning_rate": 4.436717076310985e-05, "loss": 1.5183, "step": 1067500 }, { "epoch": 11.27, "learning_rate": 4.436453243559843e-05, "loss": 1.44, "step": 1068000 }, { "epoch": 11.28, "learning_rate": 4.4361894108087004e-05, "loss": 1.4171, "step": 1068500 }, { "epoch": 11.28, "learning_rate": 4.435925578057558e-05, "loss": 1.4739, "step": 1069000 }, { "epoch": 11.29, "learning_rate": 4.4356617453064155e-05, "loss": 1.4537, "step": 1069500 }, { "epoch": 11.29, "learning_rate": 4.435397912555273e-05, "loss": 1.4611, "step": 1070000 }, { "epoch": 11.3, "learning_rate": 4.4351340798041306e-05, "loss": 1.4715, "step": 1070500 }, { "epoch": 11.3, "learning_rate": 4.434870247052988e-05, "loss": 1.468, "step": 1071000 }, { "epoch": 11.31, "learning_rate": 4.4346064143018464e-05, "loss": 1.5055, "step": 1071500 }, { "epoch": 11.31, "learning_rate": 4.434342581550703e-05, "loss": 1.4133, "step": 1072000 }, { "epoch": 11.32, "learning_rate": 4.434078748799561e-05, "loss": 1.4769, "step": 1072500 }, { "epoch": 11.32, "learning_rate": 4.433814916048419e-05, "loss": 1.4183, "step": 1073000 }, { "epoch": 11.33, "learning_rate": 4.4335510832972766e-05, "loss": 1.4266, "step": 1073500 }, { "epoch": 11.33, "learning_rate": 4.433287250546134e-05, "loss": 1.4487, "step": 1074000 }, { "epoch": 11.34, "learning_rate": 4.433023417794992e-05, "loss": 1.4605, "step": 1074500 }, { "epoch": 11.34, "learning_rate": 4.432759585043849e-05, "loss": 1.4443, "step": 1075000 }, { "epoch": 11.35, "learning_rate": 4.432495752292707e-05, "loss": 1.4501, "step": 1075500 }, { "epoch": 11.36, "learning_rate": 4.4322319195415644e-05, "loss": 1.4871, "step": 1076000 }, { "epoch": 11.36, "learning_rate": 4.4319680867904226e-05, "loss": 1.4934, "step": 1076500 }, { "epoch": 11.37, "learning_rate": 4.4317042540392795e-05, "loss": 1.4151, "step": 1077000 }, { "epoch": 11.37, "learning_rate": 4.431440421288137e-05, "loss": 1.4155, "step": 1077500 }, { "epoch": 11.38, "learning_rate": 4.431176588536995e-05, "loss": 1.4655, "step": 1078000 }, { "epoch": 11.38, "learning_rate": 4.430912755785853e-05, "loss": 1.4146, "step": 1078500 }, { "epoch": 11.39, "learning_rate": 4.4306489230347096e-05, "loss": 1.499, "step": 1079000 }, { "epoch": 11.39, "learning_rate": 4.430385090283567e-05, "loss": 1.4797, "step": 1079500 }, { "epoch": 11.4, "learning_rate": 4.4301212575324254e-05, "loss": 1.457, "step": 1080000 }, { "epoch": 11.4, "learning_rate": 4.429857424781283e-05, "loss": 1.4104, "step": 1080500 }, { "epoch": 11.41, "learning_rate": 4.4295935920301405e-05, "loss": 1.479, "step": 1081000 }, { "epoch": 11.41, "learning_rate": 4.429329759278998e-05, "loss": 1.5106, "step": 1081500 }, { "epoch": 11.42, "learning_rate": 4.4290659265278556e-05, "loss": 1.4806, "step": 1082000 }, { "epoch": 11.42, "learning_rate": 4.428802093776713e-05, "loss": 1.5093, "step": 1082500 }, { "epoch": 11.43, "learning_rate": 4.428538261025571e-05, "loss": 1.4441, "step": 1083000 }, { "epoch": 11.43, "learning_rate": 4.428274428274429e-05, "loss": 1.4705, "step": 1083500 }, { "epoch": 11.44, "learning_rate": 4.428010595523286e-05, "loss": 1.4475, "step": 1084000 }, { "epoch": 11.45, "learning_rate": 4.4277467627721434e-05, "loss": 1.4609, "step": 1084500 }, { "epoch": 11.45, "learning_rate": 4.4274829300210016e-05, "loss": 1.4598, "step": 1085000 }, { "epoch": 11.46, "learning_rate": 4.427219097269859e-05, "loss": 1.4494, "step": 1085500 }, { "epoch": 11.46, "learning_rate": 4.426955264518717e-05, "loss": 1.4967, "step": 1086000 }, { "epoch": 11.47, "learning_rate": 4.426691431767574e-05, "loss": 1.4804, "step": 1086500 }, { "epoch": 11.47, "learning_rate": 4.426427599016432e-05, "loss": 1.4461, "step": 1087000 }, { "epoch": 11.48, "learning_rate": 4.426163766265289e-05, "loss": 1.4279, "step": 1087500 }, { "epoch": 11.48, "learning_rate": 4.425899933514147e-05, "loss": 1.4353, "step": 1088000 }, { "epoch": 11.49, "learning_rate": 4.4256361007630044e-05, "loss": 1.4698, "step": 1088500 }, { "epoch": 11.49, "learning_rate": 4.425372268011862e-05, "loss": 1.5078, "step": 1089000 }, { "epoch": 11.5, "learning_rate": 4.4251084352607195e-05, "loss": 1.459, "step": 1089500 }, { "epoch": 11.5, "learning_rate": 4.424844602509578e-05, "loss": 1.4874, "step": 1090000 }, { "epoch": 11.51, "learning_rate": 4.424580769758435e-05, "loss": 1.4802, "step": 1090500 }, { "epoch": 11.51, "learning_rate": 4.424316937007292e-05, "loss": 1.5399, "step": 1091000 }, { "epoch": 11.52, "learning_rate": 4.42405310425615e-05, "loss": 1.526, "step": 1091500 }, { "epoch": 11.52, "learning_rate": 4.423789271505008e-05, "loss": 1.4809, "step": 1092000 }, { "epoch": 11.53, "learning_rate": 4.4235254387538655e-05, "loss": 1.433, "step": 1092500 }, { "epoch": 11.53, "learning_rate": 4.423261606002723e-05, "loss": 1.498, "step": 1093000 }, { "epoch": 11.54, "learning_rate": 4.4229977732515806e-05, "loss": 1.5211, "step": 1093500 }, { "epoch": 11.55, "learning_rate": 4.422733940500438e-05, "loss": 1.4117, "step": 1094000 }, { "epoch": 11.55, "learning_rate": 4.422470107749296e-05, "loss": 1.4596, "step": 1094500 }, { "epoch": 11.56, "learning_rate": 4.422206274998153e-05, "loss": 1.464, "step": 1095000 }, { "epoch": 11.56, "learning_rate": 4.4219424422470115e-05, "loss": 1.5075, "step": 1095500 }, { "epoch": 11.57, "learning_rate": 4.4216786094958683e-05, "loss": 1.4727, "step": 1096000 }, { "epoch": 11.57, "learning_rate": 4.421414776744726e-05, "loss": 1.5109, "step": 1096500 }, { "epoch": 11.58, "learning_rate": 4.421150943993584e-05, "loss": 1.4561, "step": 1097000 }, { "epoch": 11.58, "learning_rate": 4.420887111242442e-05, "loss": 1.5011, "step": 1097500 }, { "epoch": 11.59, "learning_rate": 4.4206232784912985e-05, "loss": 1.4741, "step": 1098000 }, { "epoch": 11.59, "learning_rate": 4.420359445740157e-05, "loss": 1.4817, "step": 1098500 }, { "epoch": 11.6, "learning_rate": 4.420095612989014e-05, "loss": 1.4541, "step": 1099000 }, { "epoch": 11.6, "learning_rate": 4.419831780237872e-05, "loss": 1.4958, "step": 1099500 }, { "epoch": 11.61, "learning_rate": 4.4195679474867294e-05, "loss": 1.4595, "step": 1100000 }, { "epoch": 11.61, "learning_rate": 4.419304114735587e-05, "loss": 1.4543, "step": 1100500 }, { "epoch": 11.62, "learning_rate": 4.4190402819844445e-05, "loss": 1.4858, "step": 1101000 }, { "epoch": 11.62, "learning_rate": 4.418776449233302e-05, "loss": 1.4688, "step": 1101500 }, { "epoch": 11.63, "learning_rate": 4.41851261648216e-05, "loss": 1.4833, "step": 1102000 }, { "epoch": 11.64, "learning_rate": 4.418248783731018e-05, "loss": 1.4594, "step": 1102500 }, { "epoch": 11.64, "learning_rate": 4.417984950979875e-05, "loss": 1.4759, "step": 1103000 }, { "epoch": 11.65, "learning_rate": 4.417721118228732e-05, "loss": 1.4695, "step": 1103500 }, { "epoch": 11.65, "learning_rate": 4.4174572854775905e-05, "loss": 1.4908, "step": 1104000 }, { "epoch": 11.66, "learning_rate": 4.417193452726448e-05, "loss": 1.4731, "step": 1104500 }, { "epoch": 11.66, "learning_rate": 4.416929619975305e-05, "loss": 1.4622, "step": 1105000 }, { "epoch": 11.67, "learning_rate": 4.416665787224163e-05, "loss": 1.4355, "step": 1105500 }, { "epoch": 11.67, "learning_rate": 4.416401954473021e-05, "loss": 1.4175, "step": 1106000 }, { "epoch": 11.68, "learning_rate": 4.416138121721878e-05, "loss": 1.4538, "step": 1106500 }, { "epoch": 11.68, "learning_rate": 4.415874288970736e-05, "loss": 1.495, "step": 1107000 }, { "epoch": 11.69, "learning_rate": 4.415610456219593e-05, "loss": 1.4078, "step": 1107500 }, { "epoch": 11.69, "learning_rate": 4.415346623468451e-05, "loss": 1.4585, "step": 1108000 }, { "epoch": 11.7, "learning_rate": 4.4150827907173084e-05, "loss": 1.4537, "step": 1108500 }, { "epoch": 11.7, "learning_rate": 4.4148189579661666e-05, "loss": 1.4607, "step": 1109000 }, { "epoch": 11.71, "learning_rate": 4.414555125215024e-05, "loss": 1.4567, "step": 1109500 }, { "epoch": 11.71, "learning_rate": 4.414291292463881e-05, "loss": 1.4165, "step": 1110000 }, { "epoch": 11.72, "learning_rate": 4.414027459712739e-05, "loss": 1.5011, "step": 1110500 }, { "epoch": 11.72, "learning_rate": 4.413763626961597e-05, "loss": 1.4602, "step": 1111000 }, { "epoch": 11.73, "learning_rate": 4.4134997942104544e-05, "loss": 1.5124, "step": 1111500 }, { "epoch": 11.74, "learning_rate": 4.413235961459312e-05, "loss": 1.4919, "step": 1112000 }, { "epoch": 11.74, "learning_rate": 4.4129721287081695e-05, "loss": 1.4609, "step": 1112500 }, { "epoch": 11.75, "learning_rate": 4.412708295957027e-05, "loss": 1.4683, "step": 1113000 }, { "epoch": 11.75, "learning_rate": 4.4124444632058846e-05, "loss": 1.4995, "step": 1113500 }, { "epoch": 11.76, "learning_rate": 4.412180630454743e-05, "loss": 1.4677, "step": 1114000 }, { "epoch": 11.76, "learning_rate": 4.4119167977036e-05, "loss": 1.4662, "step": 1114500 }, { "epoch": 11.77, "learning_rate": 4.411652964952457e-05, "loss": 1.4104, "step": 1115000 }, { "epoch": 11.77, "learning_rate": 4.4113891322013154e-05, "loss": 1.4018, "step": 1115500 }, { "epoch": 11.78, "learning_rate": 4.411125299450173e-05, "loss": 1.4204, "step": 1116000 }, { "epoch": 11.78, "learning_rate": 4.4108614666990305e-05, "loss": 1.5528, "step": 1116500 }, { "epoch": 11.79, "learning_rate": 4.4105976339478874e-05, "loss": 1.4344, "step": 1117000 }, { "epoch": 11.79, "learning_rate": 4.4103338011967456e-05, "loss": 1.4773, "step": 1117500 }, { "epoch": 11.8, "learning_rate": 4.410069968445603e-05, "loss": 1.4582, "step": 1118000 }, { "epoch": 11.8, "learning_rate": 4.409806135694461e-05, "loss": 1.4615, "step": 1118500 }, { "epoch": 11.81, "learning_rate": 4.409542302943318e-05, "loss": 1.4384, "step": 1119000 }, { "epoch": 11.81, "learning_rate": 4.409278470192176e-05, "loss": 1.4455, "step": 1119500 }, { "epoch": 11.82, "learning_rate": 4.4090146374410334e-05, "loss": 1.4302, "step": 1120000 }, { "epoch": 11.82, "learning_rate": 4.408750804689891e-05, "loss": 1.5017, "step": 1120500 }, { "epoch": 11.83, "learning_rate": 4.408486971938749e-05, "loss": 1.5205, "step": 1121000 }, { "epoch": 11.84, "learning_rate": 4.408223139187607e-05, "loss": 1.4898, "step": 1121500 }, { "epoch": 11.84, "learning_rate": 4.4079593064364636e-05, "loss": 1.484, "step": 1122000 }, { "epoch": 11.85, "learning_rate": 4.407695473685322e-05, "loss": 1.4346, "step": 1122500 }, { "epoch": 11.85, "learning_rate": 4.4074316409341794e-05, "loss": 1.5027, "step": 1123000 }, { "epoch": 11.86, "learning_rate": 4.407167808183037e-05, "loss": 1.4217, "step": 1123500 }, { "epoch": 11.86, "learning_rate": 4.4069039754318945e-05, "loss": 1.416, "step": 1124000 }, { "epoch": 11.87, "learning_rate": 4.406640142680752e-05, "loss": 1.5081, "step": 1124500 }, { "epoch": 11.87, "learning_rate": 4.4063763099296096e-05, "loss": 1.482, "step": 1125000 }, { "epoch": 11.88, "learning_rate": 4.406112477178467e-05, "loss": 1.4946, "step": 1125500 }, { "epoch": 11.88, "learning_rate": 4.405848644427325e-05, "loss": 1.5036, "step": 1126000 }, { "epoch": 11.89, "learning_rate": 4.405584811676182e-05, "loss": 1.4919, "step": 1126500 }, { "epoch": 11.89, "learning_rate": 4.40532097892504e-05, "loss": 1.5205, "step": 1127000 }, { "epoch": 11.9, "learning_rate": 4.405057146173898e-05, "loss": 1.4783, "step": 1127500 }, { "epoch": 11.9, "learning_rate": 4.4047933134227555e-05, "loss": 1.4587, "step": 1128000 }, { "epoch": 11.91, "learning_rate": 4.404529480671613e-05, "loss": 1.4918, "step": 1128500 }, { "epoch": 11.91, "learning_rate": 4.40426564792047e-05, "loss": 1.4808, "step": 1129000 }, { "epoch": 11.92, "learning_rate": 4.404001815169328e-05, "loss": 1.4798, "step": 1129500 }, { "epoch": 11.93, "learning_rate": 4.403737982418186e-05, "loss": 1.4413, "step": 1130000 }, { "epoch": 11.93, "learning_rate": 4.403474149667043e-05, "loss": 1.4266, "step": 1130500 }, { "epoch": 11.94, "learning_rate": 4.403210316915901e-05, "loss": 1.4803, "step": 1131000 }, { "epoch": 11.94, "learning_rate": 4.4029464841647584e-05, "loss": 1.4789, "step": 1131500 }, { "epoch": 11.95, "learning_rate": 4.402682651413616e-05, "loss": 1.4825, "step": 1132000 }, { "epoch": 11.95, "learning_rate": 4.4024188186624735e-05, "loss": 1.4837, "step": 1132500 }, { "epoch": 11.96, "learning_rate": 4.402154985911332e-05, "loss": 1.4127, "step": 1133000 }, { "epoch": 11.96, "learning_rate": 4.4018911531601886e-05, "loss": 1.4806, "step": 1133500 }, { "epoch": 11.97, "learning_rate": 4.401627320409046e-05, "loss": 1.4134, "step": 1134000 }, { "epoch": 11.97, "learning_rate": 4.401363487657904e-05, "loss": 1.4374, "step": 1134500 }, { "epoch": 11.98, "learning_rate": 4.401099654906762e-05, "loss": 1.5121, "step": 1135000 }, { "epoch": 11.98, "learning_rate": 4.4008358221556194e-05, "loss": 1.4637, "step": 1135500 }, { "epoch": 11.99, "learning_rate": 4.400571989404477e-05, "loss": 1.4861, "step": 1136000 }, { "epoch": 11.99, "learning_rate": 4.4003081566533345e-05, "loss": 1.4625, "step": 1136500 }, { "epoch": 12.0, "learning_rate": 4.400044323902192e-05, "loss": 1.4223, "step": 1137000 }, { "epoch": 12.0, "learning_rate": 4.3997804911510496e-05, "loss": 1.4483, "step": 1137500 }, { "epoch": 12.01, "learning_rate": 4.399516658399908e-05, "loss": 1.4713, "step": 1138000 }, { "epoch": 12.01, "learning_rate": 4.399252825648765e-05, "loss": 1.3901, "step": 1138500 }, { "epoch": 12.02, "learning_rate": 4.398988992897622e-05, "loss": 1.4504, "step": 1139000 }, { "epoch": 12.03, "learning_rate": 4.3987251601464805e-05, "loss": 1.4236, "step": 1139500 }, { "epoch": 12.03, "learning_rate": 4.398461327395338e-05, "loss": 1.509, "step": 1140000 }, { "epoch": 12.04, "learning_rate": 4.3981974946441956e-05, "loss": 1.436, "step": 1140500 }, { "epoch": 12.04, "learning_rate": 4.3979336618930525e-05, "loss": 1.3985, "step": 1141000 }, { "epoch": 12.05, "learning_rate": 4.397669829141911e-05, "loss": 1.4373, "step": 1141500 }, { "epoch": 12.05, "learning_rate": 4.397405996390768e-05, "loss": 1.4668, "step": 1142000 }, { "epoch": 12.06, "learning_rate": 4.397142163639626e-05, "loss": 1.4313, "step": 1142500 }, { "epoch": 12.06, "learning_rate": 4.3968783308884833e-05, "loss": 1.4952, "step": 1143000 }, { "epoch": 12.07, "learning_rate": 4.396614498137341e-05, "loss": 1.4409, "step": 1143500 }, { "epoch": 12.07, "learning_rate": 4.3963506653861984e-05, "loss": 1.4608, "step": 1144000 }, { "epoch": 12.08, "learning_rate": 4.396086832635056e-05, "loss": 1.4305, "step": 1144500 }, { "epoch": 12.08, "learning_rate": 4.395822999883914e-05, "loss": 1.4621, "step": 1145000 }, { "epoch": 12.09, "learning_rate": 4.395559167132771e-05, "loss": 1.429, "step": 1145500 }, { "epoch": 12.09, "learning_rate": 4.3952953343816286e-05, "loss": 1.4399, "step": 1146000 }, { "epoch": 12.1, "learning_rate": 4.395031501630487e-05, "loss": 1.5109, "step": 1146500 }, { "epoch": 12.1, "learning_rate": 4.3947676688793444e-05, "loss": 1.4768, "step": 1147000 }, { "epoch": 12.11, "learning_rate": 4.394503836128202e-05, "loss": 1.4552, "step": 1147500 }, { "epoch": 12.12, "learning_rate": 4.3942400033770595e-05, "loss": 1.413, "step": 1148000 }, { "epoch": 12.12, "learning_rate": 4.393976170625917e-05, "loss": 1.413, "step": 1148500 }, { "epoch": 12.13, "learning_rate": 4.3937123378747746e-05, "loss": 1.4494, "step": 1149000 }, { "epoch": 12.13, "learning_rate": 4.393448505123632e-05, "loss": 1.4454, "step": 1149500 }, { "epoch": 12.14, "learning_rate": 4.3931846723724904e-05, "loss": 1.4753, "step": 1150000 }, { "epoch": 12.14, "learning_rate": 4.392920839621347e-05, "loss": 1.4595, "step": 1150500 }, { "epoch": 12.15, "learning_rate": 4.392657006870205e-05, "loss": 1.4292, "step": 1151000 }, { "epoch": 12.15, "learning_rate": 4.392393174119063e-05, "loss": 1.4753, "step": 1151500 }, { "epoch": 12.16, "learning_rate": 4.3921293413679206e-05, "loss": 1.427, "step": 1152000 }, { "epoch": 12.16, "learning_rate": 4.3918655086167774e-05, "loss": 1.4129, "step": 1152500 }, { "epoch": 12.17, "learning_rate": 4.391601675865635e-05, "loss": 1.4367, "step": 1153000 }, { "epoch": 12.17, "learning_rate": 4.391337843114493e-05, "loss": 1.4695, "step": 1153500 }, { "epoch": 12.18, "learning_rate": 4.391074010363351e-05, "loss": 1.455, "step": 1154000 }, { "epoch": 12.18, "learning_rate": 4.390810177612208e-05, "loss": 1.4823, "step": 1154500 }, { "epoch": 12.19, "learning_rate": 4.390546344861066e-05, "loss": 1.5212, "step": 1155000 }, { "epoch": 12.19, "learning_rate": 4.3902825121099234e-05, "loss": 1.4614, "step": 1155500 }, { "epoch": 12.2, "learning_rate": 4.390018679358781e-05, "loss": 1.4647, "step": 1156000 }, { "epoch": 12.2, "learning_rate": 4.3897548466076385e-05, "loss": 1.4679, "step": 1156500 }, { "epoch": 12.21, "learning_rate": 4.389491013856497e-05, "loss": 1.4406, "step": 1157000 }, { "epoch": 12.22, "learning_rate": 4.3892271811053536e-05, "loss": 1.478, "step": 1157500 }, { "epoch": 12.22, "learning_rate": 4.388963348354211e-05, "loss": 1.4471, "step": 1158000 }, { "epoch": 12.23, "learning_rate": 4.3886995156030694e-05, "loss": 1.4604, "step": 1158500 }, { "epoch": 12.23, "learning_rate": 4.388435682851927e-05, "loss": 1.4586, "step": 1159000 }, { "epoch": 12.24, "learning_rate": 4.3881718501007845e-05, "loss": 1.4572, "step": 1159500 }, { "epoch": 12.24, "learning_rate": 4.387908017349642e-05, "loss": 1.4652, "step": 1160000 }, { "epoch": 12.25, "learning_rate": 4.3876441845984996e-05, "loss": 1.4128, "step": 1160500 }, { "epoch": 12.25, "learning_rate": 4.387380351847357e-05, "loss": 1.4277, "step": 1161000 }, { "epoch": 12.26, "learning_rate": 4.387116519096215e-05, "loss": 1.4966, "step": 1161500 }, { "epoch": 12.26, "learning_rate": 4.386852686345072e-05, "loss": 1.4655, "step": 1162000 }, { "epoch": 12.27, "learning_rate": 4.38658885359393e-05, "loss": 1.4591, "step": 1162500 }, { "epoch": 12.27, "learning_rate": 4.386325020842787e-05, "loss": 1.4534, "step": 1163000 }, { "epoch": 12.28, "learning_rate": 4.3860611880916455e-05, "loss": 1.4678, "step": 1163500 }, { "epoch": 12.28, "learning_rate": 4.385797355340503e-05, "loss": 1.4427, "step": 1164000 }, { "epoch": 12.29, "learning_rate": 4.38553352258936e-05, "loss": 1.4431, "step": 1164500 }, { "epoch": 12.29, "learning_rate": 4.3852696898382175e-05, "loss": 1.3975, "step": 1165000 }, { "epoch": 12.3, "learning_rate": 4.385005857087076e-05, "loss": 1.396, "step": 1165500 }, { "epoch": 12.31, "learning_rate": 4.384742024335933e-05, "loss": 1.4766, "step": 1166000 }, { "epoch": 12.31, "learning_rate": 4.384478191584791e-05, "loss": 1.4554, "step": 1166500 }, { "epoch": 12.32, "learning_rate": 4.3842143588336484e-05, "loss": 1.4847, "step": 1167000 }, { "epoch": 12.32, "learning_rate": 4.383950526082506e-05, "loss": 1.4327, "step": 1167500 }, { "epoch": 12.33, "learning_rate": 4.3836866933313635e-05, "loss": 1.477, "step": 1168000 }, { "epoch": 12.33, "learning_rate": 4.383422860580221e-05, "loss": 1.4528, "step": 1168500 }, { "epoch": 12.34, "learning_rate": 4.383159027829079e-05, "loss": 1.4576, "step": 1169000 }, { "epoch": 12.34, "learning_rate": 4.382895195077936e-05, "loss": 1.4339, "step": 1169500 }, { "epoch": 12.35, "learning_rate": 4.382631362326794e-05, "loss": 1.4667, "step": 1170000 }, { "epoch": 12.35, "learning_rate": 4.382367529575652e-05, "loss": 1.4742, "step": 1170500 }, { "epoch": 12.36, "learning_rate": 4.3821036968245095e-05, "loss": 1.4785, "step": 1171000 }, { "epoch": 12.36, "learning_rate": 4.381839864073366e-05, "loss": 1.444, "step": 1171500 }, { "epoch": 12.37, "learning_rate": 4.3815760313222246e-05, "loss": 1.485, "step": 1172000 }, { "epoch": 12.37, "learning_rate": 4.381312198571082e-05, "loss": 1.4572, "step": 1172500 }, { "epoch": 12.38, "learning_rate": 4.3810483658199397e-05, "loss": 1.3661, "step": 1173000 }, { "epoch": 12.38, "learning_rate": 4.380784533068797e-05, "loss": 1.4189, "step": 1173500 }, { "epoch": 12.39, "learning_rate": 4.380520700317655e-05, "loss": 1.4728, "step": 1174000 }, { "epoch": 12.39, "learning_rate": 4.380256867566512e-05, "loss": 1.5145, "step": 1174500 }, { "epoch": 12.4, "learning_rate": 4.37999303481537e-05, "loss": 1.4957, "step": 1175000 }, { "epoch": 12.41, "learning_rate": 4.379729202064228e-05, "loss": 1.408, "step": 1175500 }, { "epoch": 12.41, "learning_rate": 4.3794653693130856e-05, "loss": 1.3978, "step": 1176000 }, { "epoch": 12.42, "learning_rate": 4.3792015365619425e-05, "loss": 1.4065, "step": 1176500 }, { "epoch": 12.42, "learning_rate": 4.3789377038108e-05, "loss": 1.4917, "step": 1177000 }, { "epoch": 12.43, "learning_rate": 4.378673871059658e-05, "loss": 1.4383, "step": 1177500 }, { "epoch": 12.43, "learning_rate": 4.378410038308516e-05, "loss": 1.4241, "step": 1178000 }, { "epoch": 12.44, "learning_rate": 4.3781462055573734e-05, "loss": 1.4593, "step": 1178500 }, { "epoch": 12.44, "learning_rate": 4.377882372806231e-05, "loss": 1.4271, "step": 1179000 }, { "epoch": 12.45, "learning_rate": 4.3776185400550885e-05, "loss": 1.4439, "step": 1179500 }, { "epoch": 12.45, "learning_rate": 4.377354707303946e-05, "loss": 1.474, "step": 1180000 }, { "epoch": 12.46, "learning_rate": 4.3770908745528036e-05, "loss": 1.5043, "step": 1180500 }, { "epoch": 12.46, "learning_rate": 4.376827041801661e-05, "loss": 1.4561, "step": 1181000 }, { "epoch": 12.47, "learning_rate": 4.3765632090505187e-05, "loss": 1.4849, "step": 1181500 }, { "epoch": 12.47, "learning_rate": 4.376299376299376e-05, "loss": 1.4686, "step": 1182000 }, { "epoch": 12.48, "learning_rate": 4.3760355435482344e-05, "loss": 1.455, "step": 1182500 }, { "epoch": 12.48, "learning_rate": 4.375771710797092e-05, "loss": 1.4487, "step": 1183000 }, { "epoch": 12.49, "learning_rate": 4.375507878045949e-05, "loss": 1.4219, "step": 1183500 }, { "epoch": 12.5, "learning_rate": 4.375244045294807e-05, "loss": 1.4689, "step": 1184000 }, { "epoch": 12.5, "learning_rate": 4.3749802125436646e-05, "loss": 1.467, "step": 1184500 }, { "epoch": 12.51, "learning_rate": 4.374716379792522e-05, "loss": 1.431, "step": 1185000 }, { "epoch": 12.51, "learning_rate": 4.37445254704138e-05, "loss": 1.4939, "step": 1185500 }, { "epoch": 12.52, "learning_rate": 4.374188714290237e-05, "loss": 1.4387, "step": 1186000 }, { "epoch": 12.52, "learning_rate": 4.373924881539095e-05, "loss": 1.4953, "step": 1186500 }, { "epoch": 12.53, "learning_rate": 4.3736610487879524e-05, "loss": 1.4444, "step": 1187000 }, { "epoch": 12.53, "learning_rate": 4.3733972160368106e-05, "loss": 1.4969, "step": 1187500 }, { "epoch": 12.54, "learning_rate": 4.373133383285668e-05, "loss": 1.3882, "step": 1188000 }, { "epoch": 12.54, "learning_rate": 4.372869550534525e-05, "loss": 1.4494, "step": 1188500 }, { "epoch": 12.55, "learning_rate": 4.3726057177833826e-05, "loss": 1.428, "step": 1189000 }, { "epoch": 12.55, "learning_rate": 4.372341885032241e-05, "loss": 1.4502, "step": 1189500 }, { "epoch": 12.56, "learning_rate": 4.3720780522810983e-05, "loss": 1.4374, "step": 1190000 }, { "epoch": 12.56, "learning_rate": 4.371814219529955e-05, "loss": 1.4464, "step": 1190500 }, { "epoch": 12.57, "learning_rate": 4.3715503867788134e-05, "loss": 1.5181, "step": 1191000 }, { "epoch": 12.57, "learning_rate": 4.371286554027671e-05, "loss": 1.4237, "step": 1191500 }, { "epoch": 12.58, "learning_rate": 4.3710227212765285e-05, "loss": 1.4622, "step": 1192000 }, { "epoch": 12.58, "learning_rate": 4.370758888525386e-05, "loss": 1.4572, "step": 1192500 }, { "epoch": 12.59, "learning_rate": 4.3704950557742436e-05, "loss": 1.4835, "step": 1193000 }, { "epoch": 12.6, "learning_rate": 4.370231223023101e-05, "loss": 1.4487, "step": 1193500 }, { "epoch": 12.6, "learning_rate": 4.369967390271959e-05, "loss": 1.4445, "step": 1194000 }, { "epoch": 12.61, "learning_rate": 4.369703557520817e-05, "loss": 1.4279, "step": 1194500 }, { "epoch": 12.61, "learning_rate": 4.3694397247696745e-05, "loss": 1.4103, "step": 1195000 }, { "epoch": 12.62, "learning_rate": 4.3691758920185314e-05, "loss": 1.4733, "step": 1195500 }, { "epoch": 12.62, "learning_rate": 4.3689120592673896e-05, "loss": 1.4847, "step": 1196000 }, { "epoch": 12.63, "learning_rate": 4.368648226516247e-05, "loss": 1.4658, "step": 1196500 }, { "epoch": 12.63, "learning_rate": 4.368384393765105e-05, "loss": 1.4595, "step": 1197000 }, { "epoch": 12.64, "learning_rate": 4.368120561013962e-05, "loss": 1.4648, "step": 1197500 }, { "epoch": 12.64, "learning_rate": 4.36785672826282e-05, "loss": 1.4779, "step": 1198000 }, { "epoch": 12.65, "learning_rate": 4.3675928955116773e-05, "loss": 1.4552, "step": 1198500 }, { "epoch": 12.65, "learning_rate": 4.367329062760535e-05, "loss": 1.4943, "step": 1199000 }, { "epoch": 12.66, "learning_rate": 4.367065230009393e-05, "loss": 1.4869, "step": 1199500 }, { "epoch": 12.66, "learning_rate": 4.36680139725825e-05, "loss": 1.5111, "step": 1200000 }, { "epoch": 12.67, "learning_rate": 4.3665375645071075e-05, "loss": 1.4279, "step": 1200500 }, { "epoch": 12.67, "learning_rate": 4.366273731755966e-05, "loss": 1.463, "step": 1201000 }, { "epoch": 12.68, "learning_rate": 4.366009899004823e-05, "loss": 1.4492, "step": 1201500 }, { "epoch": 12.69, "learning_rate": 4.365746066253681e-05, "loss": 1.4818, "step": 1202000 }, { "epoch": 12.69, "learning_rate": 4.365482233502538e-05, "loss": 1.4574, "step": 1202500 }, { "epoch": 12.7, "learning_rate": 4.365218400751396e-05, "loss": 1.4155, "step": 1203000 }, { "epoch": 12.7, "learning_rate": 4.3649545680002535e-05, "loss": 1.4823, "step": 1203500 }, { "epoch": 12.71, "learning_rate": 4.364690735249111e-05, "loss": 1.4731, "step": 1204000 }, { "epoch": 12.71, "learning_rate": 4.3644269024979686e-05, "loss": 1.4405, "step": 1204500 }, { "epoch": 12.72, "learning_rate": 4.364163069746826e-05, "loss": 1.4777, "step": 1205000 }, { "epoch": 12.72, "learning_rate": 4.363899236995684e-05, "loss": 1.49, "step": 1205500 }, { "epoch": 12.73, "learning_rate": 4.363635404244541e-05, "loss": 1.4945, "step": 1206000 }, { "epoch": 12.73, "learning_rate": 4.3633715714933995e-05, "loss": 1.4833, "step": 1206500 }, { "epoch": 12.74, "learning_rate": 4.363107738742257e-05, "loss": 1.42, "step": 1207000 }, { "epoch": 12.74, "learning_rate": 4.362843905991114e-05, "loss": 1.4476, "step": 1207500 }, { "epoch": 12.75, "learning_rate": 4.362580073239972e-05, "loss": 1.4953, "step": 1208000 }, { "epoch": 12.75, "learning_rate": 4.36231624048883e-05, "loss": 1.5059, "step": 1208500 }, { "epoch": 12.76, "learning_rate": 4.362052407737687e-05, "loss": 1.4179, "step": 1209000 }, { "epoch": 12.76, "learning_rate": 4.361788574986545e-05, "loss": 1.4303, "step": 1209500 }, { "epoch": 12.77, "learning_rate": 4.361524742235402e-05, "loss": 1.4041, "step": 1210000 }, { "epoch": 12.77, "learning_rate": 4.36126090948426e-05, "loss": 1.459, "step": 1210500 }, { "epoch": 12.78, "learning_rate": 4.3609970767331174e-05, "loss": 1.4744, "step": 1211000 }, { "epoch": 12.79, "learning_rate": 4.3607332439819756e-05, "loss": 1.4469, "step": 1211500 }, { "epoch": 12.79, "learning_rate": 4.3604694112308325e-05, "loss": 1.4127, "step": 1212000 }, { "epoch": 12.8, "learning_rate": 4.36020557847969e-05, "loss": 1.4363, "step": 1212500 }, { "epoch": 12.8, "learning_rate": 4.359941745728548e-05, "loss": 1.4585, "step": 1213000 }, { "epoch": 12.81, "learning_rate": 4.359677912977406e-05, "loss": 1.4184, "step": 1213500 }, { "epoch": 12.81, "learning_rate": 4.3594140802262634e-05, "loss": 1.4055, "step": 1214000 }, { "epoch": 12.82, "learning_rate": 4.35915024747512e-05, "loss": 1.4556, "step": 1214500 }, { "epoch": 12.82, "learning_rate": 4.3588864147239785e-05, "loss": 1.4648, "step": 1215000 }, { "epoch": 12.83, "learning_rate": 4.358622581972836e-05, "loss": 1.4629, "step": 1215500 }, { "epoch": 12.83, "learning_rate": 4.3583587492216936e-05, "loss": 1.4426, "step": 1216000 }, { "epoch": 12.84, "learning_rate": 4.358094916470551e-05, "loss": 1.4957, "step": 1216500 }, { "epoch": 12.84, "learning_rate": 4.357831083719409e-05, "loss": 1.433, "step": 1217000 }, { "epoch": 12.85, "learning_rate": 4.357567250968266e-05, "loss": 1.4437, "step": 1217500 }, { "epoch": 12.85, "learning_rate": 4.357303418217124e-05, "loss": 1.4626, "step": 1218000 }, { "epoch": 12.86, "learning_rate": 4.357039585465982e-05, "loss": 1.4756, "step": 1218500 }, { "epoch": 12.86, "learning_rate": 4.356775752714839e-05, "loss": 1.4044, "step": 1219000 }, { "epoch": 12.87, "learning_rate": 4.3565119199636964e-05, "loss": 1.3893, "step": 1219500 }, { "epoch": 12.88, "learning_rate": 4.3562480872125547e-05, "loss": 1.4272, "step": 1220000 }, { "epoch": 12.88, "learning_rate": 4.355984254461412e-05, "loss": 1.4232, "step": 1220500 }, { "epoch": 12.89, "learning_rate": 4.35572042171027e-05, "loss": 1.4445, "step": 1221000 }, { "epoch": 12.89, "learning_rate": 4.355456588959127e-05, "loss": 1.4568, "step": 1221500 }, { "epoch": 12.9, "learning_rate": 4.355192756207985e-05, "loss": 1.4431, "step": 1222000 }, { "epoch": 12.9, "learning_rate": 4.3549289234568424e-05, "loss": 1.4647, "step": 1222500 }, { "epoch": 12.91, "learning_rate": 4.3546650907057e-05, "loss": 1.4608, "step": 1223000 }, { "epoch": 12.91, "learning_rate": 4.354401257954558e-05, "loss": 1.4092, "step": 1223500 }, { "epoch": 12.92, "learning_rate": 4.354137425203415e-05, "loss": 1.4511, "step": 1224000 }, { "epoch": 12.92, "learning_rate": 4.3538735924522726e-05, "loss": 1.4707, "step": 1224500 }, { "epoch": 12.93, "learning_rate": 4.353609759701131e-05, "loss": 1.4521, "step": 1225000 }, { "epoch": 12.93, "learning_rate": 4.3533459269499884e-05, "loss": 1.5028, "step": 1225500 }, { "epoch": 12.94, "learning_rate": 4.353082094198846e-05, "loss": 1.477, "step": 1226000 }, { "epoch": 12.94, "learning_rate": 4.352818261447703e-05, "loss": 1.4471, "step": 1226500 }, { "epoch": 12.95, "learning_rate": 4.352554428696561e-05, "loss": 1.4502, "step": 1227000 }, { "epoch": 12.95, "learning_rate": 4.3522905959454186e-05, "loss": 1.4445, "step": 1227500 }, { "epoch": 12.96, "learning_rate": 4.352026763194276e-05, "loss": 1.404, "step": 1228000 }, { "epoch": 12.96, "learning_rate": 4.3517629304431337e-05, "loss": 1.479, "step": 1228500 }, { "epoch": 12.97, "learning_rate": 4.351499097691991e-05, "loss": 1.4346, "step": 1229000 }, { "epoch": 12.98, "learning_rate": 4.351235264940849e-05, "loss": 1.4542, "step": 1229500 }, { "epoch": 12.98, "learning_rate": 4.350971432189706e-05, "loss": 1.4787, "step": 1230000 }, { "epoch": 12.99, "learning_rate": 4.3507075994385645e-05, "loss": 1.4431, "step": 1230500 }, { "epoch": 12.99, "learning_rate": 4.3504437666874214e-05, "loss": 1.4413, "step": 1231000 }, { "epoch": 13.0, "learning_rate": 4.350179933936279e-05, "loss": 1.4239, "step": 1231500 }, { "epoch": 13.0, "learning_rate": 4.349916101185137e-05, "loss": 1.4167, "step": 1232000 }, { "epoch": 13.01, "learning_rate": 4.349652268433995e-05, "loss": 1.4747, "step": 1232500 }, { "epoch": 13.01, "learning_rate": 4.349388435682852e-05, "loss": 1.4099, "step": 1233000 }, { "epoch": 13.02, "learning_rate": 4.34912460293171e-05, "loss": 1.4398, "step": 1233500 }, { "epoch": 13.02, "learning_rate": 4.3488607701805674e-05, "loss": 1.4277, "step": 1234000 }, { "epoch": 13.03, "learning_rate": 4.348596937429425e-05, "loss": 1.4655, "step": 1234500 }, { "epoch": 13.03, "learning_rate": 4.3483331046782825e-05, "loss": 1.4957, "step": 1235000 }, { "epoch": 13.04, "learning_rate": 4.348069271927141e-05, "loss": 1.4432, "step": 1235500 }, { "epoch": 13.04, "learning_rate": 4.3478054391759976e-05, "loss": 1.4102, "step": 1236000 }, { "epoch": 13.05, "learning_rate": 4.347541606424855e-05, "loss": 1.4367, "step": 1236500 }, { "epoch": 13.05, "learning_rate": 4.3472777736737133e-05, "loss": 1.4089, "step": 1237000 }, { "epoch": 13.06, "learning_rate": 4.347013940922571e-05, "loss": 1.4659, "step": 1237500 }, { "epoch": 13.06, "learning_rate": 4.346750108171428e-05, "loss": 1.4502, "step": 1238000 }, { "epoch": 13.07, "learning_rate": 4.346486275420285e-05, "loss": 1.3999, "step": 1238500 }, { "epoch": 13.08, "learning_rate": 4.3462224426691435e-05, "loss": 1.4476, "step": 1239000 }, { "epoch": 13.08, "learning_rate": 4.345958609918001e-05, "loss": 1.3991, "step": 1239500 }, { "epoch": 13.09, "learning_rate": 4.3456947771668586e-05, "loss": 1.4685, "step": 1240000 }, { "epoch": 13.09, "learning_rate": 4.345430944415716e-05, "loss": 1.4471, "step": 1240500 }, { "epoch": 13.1, "learning_rate": 4.345167111664574e-05, "loss": 1.4198, "step": 1241000 }, { "epoch": 13.1, "learning_rate": 4.344903278913431e-05, "loss": 1.4547, "step": 1241500 }, { "epoch": 13.11, "learning_rate": 4.344639446162289e-05, "loss": 1.4569, "step": 1242000 }, { "epoch": 13.11, "learning_rate": 4.344375613411147e-05, "loss": 1.4712, "step": 1242500 }, { "epoch": 13.12, "learning_rate": 4.344111780660004e-05, "loss": 1.4383, "step": 1243000 }, { "epoch": 13.12, "learning_rate": 4.3438479479088615e-05, "loss": 1.4513, "step": 1243500 }, { "epoch": 13.13, "learning_rate": 4.34358411515772e-05, "loss": 1.4515, "step": 1244000 }, { "epoch": 13.13, "learning_rate": 4.343320282406577e-05, "loss": 1.4303, "step": 1244500 }, { "epoch": 13.14, "learning_rate": 4.343056449655435e-05, "loss": 1.4874, "step": 1245000 }, { "epoch": 13.14, "learning_rate": 4.3427926169042923e-05, "loss": 1.449, "step": 1245500 }, { "epoch": 13.15, "learning_rate": 4.34252878415315e-05, "loss": 1.4557, "step": 1246000 }, { "epoch": 13.15, "learning_rate": 4.3422649514020074e-05, "loss": 1.4296, "step": 1246500 }, { "epoch": 13.16, "learning_rate": 4.342001118650865e-05, "loss": 1.4159, "step": 1247000 }, { "epoch": 13.17, "learning_rate": 4.3417372858997225e-05, "loss": 1.4998, "step": 1247500 }, { "epoch": 13.17, "learning_rate": 4.34147345314858e-05, "loss": 1.4832, "step": 1248000 }, { "epoch": 13.18, "learning_rate": 4.3412096203974376e-05, "loss": 1.4831, "step": 1248500 }, { "epoch": 13.18, "learning_rate": 4.340945787646296e-05, "loss": 1.4493, "step": 1249000 }, { "epoch": 13.19, "learning_rate": 4.3406819548951534e-05, "loss": 1.4164, "step": 1249500 }, { "epoch": 13.19, "learning_rate": 4.34041812214401e-05, "loss": 1.4018, "step": 1250000 }, { "epoch": 13.2, "learning_rate": 4.340154289392868e-05, "loss": 1.3922, "step": 1250500 }, { "epoch": 13.2, "learning_rate": 4.339890456641726e-05, "loss": 1.4464, "step": 1251000 }, { "epoch": 13.21, "learning_rate": 4.3396266238905836e-05, "loss": 1.4674, "step": 1251500 }, { "epoch": 13.21, "learning_rate": 4.339362791139441e-05, "loss": 1.4167, "step": 1252000 }, { "epoch": 13.22, "learning_rate": 4.339098958388299e-05, "loss": 1.4554, "step": 1252500 }, { "epoch": 13.22, "learning_rate": 4.338835125637156e-05, "loss": 1.4652, "step": 1253000 }, { "epoch": 13.23, "learning_rate": 4.338571292886014e-05, "loss": 1.471, "step": 1253500 }, { "epoch": 13.23, "learning_rate": 4.3383074601348714e-05, "loss": 1.4448, "step": 1254000 }, { "epoch": 13.24, "learning_rate": 4.3380436273837296e-05, "loss": 1.4048, "step": 1254500 }, { "epoch": 13.24, "learning_rate": 4.3377797946325865e-05, "loss": 1.4865, "step": 1255000 }, { "epoch": 13.25, "learning_rate": 4.337515961881444e-05, "loss": 1.4579, "step": 1255500 }, { "epoch": 13.25, "learning_rate": 4.337252129130302e-05, "loss": 1.4155, "step": 1256000 }, { "epoch": 13.26, "learning_rate": 4.33698829637916e-05, "loss": 1.4135, "step": 1256500 }, { "epoch": 13.27, "learning_rate": 4.3367244636280166e-05, "loss": 1.4184, "step": 1257000 }, { "epoch": 13.27, "learning_rate": 4.336460630876875e-05, "loss": 1.4557, "step": 1257500 }, { "epoch": 13.28, "learning_rate": 4.3361967981257324e-05, "loss": 1.4267, "step": 1258000 }, { "epoch": 13.28, "learning_rate": 4.33593296537459e-05, "loss": 1.4621, "step": 1258500 }, { "epoch": 13.29, "learning_rate": 4.3356691326234475e-05, "loss": 1.4572, "step": 1259000 }, { "epoch": 13.29, "learning_rate": 4.335405299872305e-05, "loss": 1.4824, "step": 1259500 }, { "epoch": 13.3, "learning_rate": 4.3351414671211626e-05, "loss": 1.4424, "step": 1260000 }, { "epoch": 13.3, "learning_rate": 4.33487763437002e-05, "loss": 1.3926, "step": 1260500 }, { "epoch": 13.31, "learning_rate": 4.3346138016188784e-05, "loss": 1.4463, "step": 1261000 }, { "epoch": 13.31, "learning_rate": 4.334349968867736e-05, "loss": 1.4644, "step": 1261500 }, { "epoch": 13.32, "learning_rate": 4.334086136116593e-05, "loss": 1.3755, "step": 1262000 }, { "epoch": 13.32, "learning_rate": 4.3338223033654504e-05, "loss": 1.4395, "step": 1262500 }, { "epoch": 13.33, "learning_rate": 4.3335584706143086e-05, "loss": 1.4614, "step": 1263000 }, { "epoch": 13.33, "learning_rate": 4.333294637863166e-05, "loss": 1.482, "step": 1263500 }, { "epoch": 13.34, "learning_rate": 4.333030805112024e-05, "loss": 1.4242, "step": 1264000 }, { "epoch": 13.34, "learning_rate": 4.332766972360881e-05, "loss": 1.4563, "step": 1264500 }, { "epoch": 13.35, "learning_rate": 4.332503139609739e-05, "loss": 1.4653, "step": 1265000 }, { "epoch": 13.36, "learning_rate": 4.332239306858596e-05, "loss": 1.4526, "step": 1265500 }, { "epoch": 13.36, "learning_rate": 4.331975474107454e-05, "loss": 1.3878, "step": 1266000 }, { "epoch": 13.37, "learning_rate": 4.3317116413563114e-05, "loss": 1.4203, "step": 1266500 }, { "epoch": 13.37, "learning_rate": 4.331447808605169e-05, "loss": 1.4502, "step": 1267000 }, { "epoch": 13.38, "learning_rate": 4.3311839758540265e-05, "loss": 1.4742, "step": 1267500 }, { "epoch": 13.38, "learning_rate": 4.330920143102885e-05, "loss": 1.4462, "step": 1268000 }, { "epoch": 13.39, "learning_rate": 4.330656310351742e-05, "loss": 1.4673, "step": 1268500 }, { "epoch": 13.39, "learning_rate": 4.330392477600599e-05, "loss": 1.4778, "step": 1269000 }, { "epoch": 13.4, "learning_rate": 4.3301286448494574e-05, "loss": 1.4308, "step": 1269500 }, { "epoch": 13.4, "learning_rate": 4.329864812098315e-05, "loss": 1.4408, "step": 1270000 }, { "epoch": 13.41, "learning_rate": 4.3296009793471725e-05, "loss": 1.4381, "step": 1270500 }, { "epoch": 13.41, "learning_rate": 4.32933714659603e-05, "loss": 1.4269, "step": 1271000 }, { "epoch": 13.42, "learning_rate": 4.3290733138448876e-05, "loss": 1.4325, "step": 1271500 }, { "epoch": 13.42, "learning_rate": 4.328809481093745e-05, "loss": 1.4718, "step": 1272000 }, { "epoch": 13.43, "learning_rate": 4.328545648342603e-05, "loss": 1.4378, "step": 1272500 }, { "epoch": 13.43, "learning_rate": 4.328281815591461e-05, "loss": 1.4205, "step": 1273000 }, { "epoch": 13.44, "learning_rate": 4.3280179828403185e-05, "loss": 1.4716, "step": 1273500 }, { "epoch": 13.44, "learning_rate": 4.327754150089175e-05, "loss": 1.4862, "step": 1274000 }, { "epoch": 13.45, "learning_rate": 4.3274903173380336e-05, "loss": 1.4433, "step": 1274500 }, { "epoch": 13.46, "learning_rate": 4.327226484586891e-05, "loss": 1.4696, "step": 1275000 }, { "epoch": 13.46, "learning_rate": 4.3269626518357487e-05, "loss": 1.4201, "step": 1275500 }, { "epoch": 13.47, "learning_rate": 4.3266988190846055e-05, "loss": 1.4277, "step": 1276000 }, { "epoch": 13.47, "learning_rate": 4.326434986333464e-05, "loss": 1.4317, "step": 1276500 }, { "epoch": 13.48, "learning_rate": 4.326171153582321e-05, "loss": 1.4573, "step": 1277000 }, { "epoch": 13.48, "learning_rate": 4.325907320831179e-05, "loss": 1.4342, "step": 1277500 }, { "epoch": 13.49, "learning_rate": 4.3256434880800364e-05, "loss": 1.4632, "step": 1278000 }, { "epoch": 13.49, "learning_rate": 4.325379655328894e-05, "loss": 1.3882, "step": 1278500 }, { "epoch": 13.5, "learning_rate": 4.3251158225777515e-05, "loss": 1.5049, "step": 1279000 }, { "epoch": 13.5, "learning_rate": 4.324851989826609e-05, "loss": 1.4372, "step": 1279500 }, { "epoch": 13.51, "learning_rate": 4.324588157075467e-05, "loss": 1.4366, "step": 1280000 }, { "epoch": 13.51, "learning_rate": 4.324324324324325e-05, "loss": 1.4606, "step": 1280500 }, { "epoch": 13.52, "learning_rate": 4.324060491573182e-05, "loss": 1.4417, "step": 1281000 }, { "epoch": 13.52, "learning_rate": 4.32379665882204e-05, "loss": 1.4111, "step": 1281500 }, { "epoch": 13.53, "learning_rate": 4.3235328260708975e-05, "loss": 1.4108, "step": 1282000 }, { "epoch": 13.53, "learning_rate": 4.323268993319755e-05, "loss": 1.5074, "step": 1282500 }, { "epoch": 13.54, "learning_rate": 4.3230051605686126e-05, "loss": 1.438, "step": 1283000 }, { "epoch": 13.55, "learning_rate": 4.32274132781747e-05, "loss": 1.4717, "step": 1283500 }, { "epoch": 13.55, "learning_rate": 4.322477495066328e-05, "loss": 1.4335, "step": 1284000 }, { "epoch": 13.56, "learning_rate": 4.322213662315185e-05, "loss": 1.453, "step": 1284500 }, { "epoch": 13.56, "learning_rate": 4.3219498295640434e-05, "loss": 1.442, "step": 1285000 }, { "epoch": 13.57, "learning_rate": 4.3216859968129e-05, "loss": 1.4309, "step": 1285500 }, { "epoch": 13.57, "learning_rate": 4.321422164061758e-05, "loss": 1.494, "step": 1286000 }, { "epoch": 13.58, "learning_rate": 4.321158331310616e-05, "loss": 1.4151, "step": 1286500 }, { "epoch": 13.58, "learning_rate": 4.3208944985594736e-05, "loss": 1.4589, "step": 1287000 }, { "epoch": 13.59, "learning_rate": 4.320630665808331e-05, "loss": 1.4737, "step": 1287500 }, { "epoch": 13.59, "learning_rate": 4.320366833057188e-05, "loss": 1.4062, "step": 1288000 }, { "epoch": 13.6, "learning_rate": 4.320103000306046e-05, "loss": 1.3784, "step": 1288500 }, { "epoch": 13.6, "learning_rate": 4.319839167554904e-05, "loss": 1.3678, "step": 1289000 }, { "epoch": 13.61, "learning_rate": 4.3195753348037614e-05, "loss": 1.4653, "step": 1289500 }, { "epoch": 13.61, "learning_rate": 4.319311502052619e-05, "loss": 1.4923, "step": 1290000 }, { "epoch": 13.62, "learning_rate": 4.3190476693014765e-05, "loss": 1.4175, "step": 1290500 }, { "epoch": 13.62, "learning_rate": 4.318783836550334e-05, "loss": 1.4265, "step": 1291000 }, { "epoch": 13.63, "learning_rate": 4.3185200037991916e-05, "loss": 1.4612, "step": 1291500 }, { "epoch": 13.63, "learning_rate": 4.31825617104805e-05, "loss": 1.4854, "step": 1292000 }, { "epoch": 13.64, "learning_rate": 4.3179923382969073e-05, "loss": 1.4248, "step": 1292500 }, { "epoch": 13.65, "learning_rate": 4.317728505545764e-05, "loss": 1.4424, "step": 1293000 }, { "epoch": 13.65, "learning_rate": 4.3174646727946224e-05, "loss": 1.4548, "step": 1293500 }, { "epoch": 13.66, "learning_rate": 4.31720084004348e-05, "loss": 1.3944, "step": 1294000 }, { "epoch": 13.66, "learning_rate": 4.3169370072923375e-05, "loss": 1.4502, "step": 1294500 }, { "epoch": 13.67, "learning_rate": 4.316673174541195e-05, "loss": 1.4486, "step": 1295000 }, { "epoch": 13.67, "learning_rate": 4.3164093417900526e-05, "loss": 1.4467, "step": 1295500 }, { "epoch": 13.68, "learning_rate": 4.31614550903891e-05, "loss": 1.3831, "step": 1296000 }, { "epoch": 13.68, "learning_rate": 4.315881676287768e-05, "loss": 1.4345, "step": 1296500 }, { "epoch": 13.69, "learning_rate": 4.315617843536626e-05, "loss": 1.4966, "step": 1297000 }, { "epoch": 13.69, "learning_rate": 4.315354010785483e-05, "loss": 1.4467, "step": 1297500 }, { "epoch": 13.7, "learning_rate": 4.3150901780343404e-05, "loss": 1.4282, "step": 1298000 }, { "epoch": 13.7, "learning_rate": 4.3148263452831986e-05, "loss": 1.4309, "step": 1298500 }, { "epoch": 13.71, "learning_rate": 4.314562512532056e-05, "loss": 1.4253, "step": 1299000 }, { "epoch": 13.71, "learning_rate": 4.314298679780914e-05, "loss": 1.467, "step": 1299500 }, { "epoch": 13.72, "learning_rate": 4.3140348470297706e-05, "loss": 1.4791, "step": 1300000 }, { "epoch": 13.72, "learning_rate": 4.313771014278629e-05, "loss": 1.418, "step": 1300500 }, { "epoch": 13.73, "learning_rate": 4.3135071815274864e-05, "loss": 1.4752, "step": 1301000 }, { "epoch": 13.74, "learning_rate": 4.313243348776344e-05, "loss": 1.4631, "step": 1301500 }, { "epoch": 13.74, "learning_rate": 4.3129795160252015e-05, "loss": 1.4475, "step": 1302000 }, { "epoch": 13.75, "learning_rate": 4.312715683274059e-05, "loss": 1.4576, "step": 1302500 }, { "epoch": 13.75, "learning_rate": 4.3124518505229166e-05, "loss": 1.4885, "step": 1303000 }, { "epoch": 13.76, "learning_rate": 4.312188017771774e-05, "loss": 1.4153, "step": 1303500 }, { "epoch": 13.76, "learning_rate": 4.311924185020632e-05, "loss": 1.4504, "step": 1304000 }, { "epoch": 13.77, "learning_rate": 4.311660352269489e-05, "loss": 1.4381, "step": 1304500 }, { "epoch": 13.77, "learning_rate": 4.311396519518347e-05, "loss": 1.4731, "step": 1305000 }, { "epoch": 13.78, "learning_rate": 4.311132686767205e-05, "loss": 1.4424, "step": 1305500 }, { "epoch": 13.78, "learning_rate": 4.3108688540160625e-05, "loss": 1.4923, "step": 1306000 }, { "epoch": 13.79, "learning_rate": 4.31060502126492e-05, "loss": 1.4151, "step": 1306500 }, { "epoch": 13.79, "learning_rate": 4.3103411885137776e-05, "loss": 1.4222, "step": 1307000 }, { "epoch": 13.8, "learning_rate": 4.310077355762635e-05, "loss": 1.453, "step": 1307500 }, { "epoch": 13.8, "learning_rate": 4.309813523011493e-05, "loss": 1.4496, "step": 1308000 }, { "epoch": 13.81, "learning_rate": 4.30954969026035e-05, "loss": 1.4788, "step": 1308500 }, { "epoch": 13.81, "learning_rate": 4.3092858575092085e-05, "loss": 1.4912, "step": 1309000 }, { "epoch": 13.82, "learning_rate": 4.3090220247580654e-05, "loss": 1.4788, "step": 1309500 }, { "epoch": 13.82, "learning_rate": 4.308758192006923e-05, "loss": 1.435, "step": 1310000 }, { "epoch": 13.83, "learning_rate": 4.308494359255781e-05, "loss": 1.4491, "step": 1310500 }, { "epoch": 13.84, "learning_rate": 4.308230526504639e-05, "loss": 1.4107, "step": 1311000 }, { "epoch": 13.84, "learning_rate": 4.307966693753496e-05, "loss": 1.454, "step": 1311500 }, { "epoch": 13.85, "learning_rate": 4.307702861002353e-05, "loss": 1.4671, "step": 1312000 }, { "epoch": 13.85, "learning_rate": 4.307439028251211e-05, "loss": 1.4923, "step": 1312500 }, { "epoch": 13.86, "learning_rate": 4.307175195500069e-05, "loss": 1.4468, "step": 1313000 }, { "epoch": 13.86, "learning_rate": 4.3069113627489264e-05, "loss": 1.4277, "step": 1313500 }, { "epoch": 13.87, "learning_rate": 4.306647529997784e-05, "loss": 1.4094, "step": 1314000 }, { "epoch": 13.87, "learning_rate": 4.3063836972466415e-05, "loss": 1.4575, "step": 1314500 }, { "epoch": 13.88, "learning_rate": 4.306119864495499e-05, "loss": 1.4778, "step": 1315000 }, { "epoch": 13.88, "learning_rate": 4.3058560317443566e-05, "loss": 1.4425, "step": 1315500 }, { "epoch": 13.89, "learning_rate": 4.305592198993215e-05, "loss": 1.392, "step": 1316000 }, { "epoch": 13.89, "learning_rate": 4.305328366242072e-05, "loss": 1.4104, "step": 1316500 }, { "epoch": 13.9, "learning_rate": 4.305064533490929e-05, "loss": 1.4181, "step": 1317000 }, { "epoch": 13.9, "learning_rate": 4.3048007007397875e-05, "loss": 1.4412, "step": 1317500 }, { "epoch": 13.91, "learning_rate": 4.304536867988645e-05, "loss": 1.4555, "step": 1318000 }, { "epoch": 13.91, "learning_rate": 4.3042730352375026e-05, "loss": 1.4466, "step": 1318500 }, { "epoch": 13.92, "learning_rate": 4.30400920248636e-05, "loss": 1.4876, "step": 1319000 }, { "epoch": 13.93, "learning_rate": 4.303745369735218e-05, "loss": 1.4294, "step": 1319500 }, { "epoch": 13.93, "learning_rate": 4.303481536984075e-05, "loss": 1.3798, "step": 1320000 }, { "epoch": 13.94, "learning_rate": 4.303217704232933e-05, "loss": 1.5197, "step": 1320500 }, { "epoch": 13.94, "learning_rate": 4.302953871481791e-05, "loss": 1.3965, "step": 1321000 }, { "epoch": 13.95, "learning_rate": 4.302690038730648e-05, "loss": 1.4528, "step": 1321500 }, { "epoch": 13.95, "learning_rate": 4.3024262059795054e-05, "loss": 1.4704, "step": 1322000 }, { "epoch": 13.96, "learning_rate": 4.3021623732283637e-05, "loss": 1.4118, "step": 1322500 }, { "epoch": 13.96, "learning_rate": 4.301898540477221e-05, "loss": 1.4696, "step": 1323000 }, { "epoch": 13.97, "learning_rate": 4.301634707726078e-05, "loss": 1.4651, "step": 1323500 }, { "epoch": 13.97, "learning_rate": 4.3013708749749356e-05, "loss": 1.4267, "step": 1324000 }, { "epoch": 13.98, "learning_rate": 4.301107042223794e-05, "loss": 1.4364, "step": 1324500 }, { "epoch": 13.98, "learning_rate": 4.3008432094726514e-05, "loss": 1.5419, "step": 1325000 }, { "epoch": 13.99, "learning_rate": 4.300579376721509e-05, "loss": 1.4504, "step": 1325500 }, { "epoch": 13.99, "learning_rate": 4.3003155439703665e-05, "loss": 1.436, "step": 1326000 }, { "epoch": 14.0, "learning_rate": 4.300051711219224e-05, "loss": 1.462, "step": 1326500 }, { "epoch": 14.0, "learning_rate": 4.2997878784680816e-05, "loss": 1.4669, "step": 1327000 }, { "epoch": 14.01, "learning_rate": 4.299524045716939e-05, "loss": 1.534, "step": 1327500 }, { "epoch": 14.01, "learning_rate": 4.2992602129657974e-05, "loss": 1.4019, "step": 1328000 }, { "epoch": 14.02, "learning_rate": 4.298996380214654e-05, "loss": 1.4406, "step": 1328500 }, { "epoch": 14.03, "learning_rate": 4.298732547463512e-05, "loss": 1.4044, "step": 1329000 }, { "epoch": 14.03, "learning_rate": 4.29846871471237e-05, "loss": 1.463, "step": 1329500 }, { "epoch": 14.04, "learning_rate": 4.2982048819612276e-05, "loss": 1.4588, "step": 1330000 }, { "epoch": 14.04, "learning_rate": 4.297941049210085e-05, "loss": 1.3962, "step": 1330500 }, { "epoch": 14.05, "learning_rate": 4.297677216458943e-05, "loss": 1.4217, "step": 1331000 }, { "epoch": 14.05, "learning_rate": 4.2974133837078e-05, "loss": 1.4174, "step": 1331500 }, { "epoch": 14.06, "learning_rate": 4.297149550956658e-05, "loss": 1.4547, "step": 1332000 }, { "epoch": 14.06, "learning_rate": 4.296885718205515e-05, "loss": 1.4235, "step": 1332500 }, { "epoch": 14.07, "learning_rate": 4.296621885454373e-05, "loss": 1.4557, "step": 1333000 }, { "epoch": 14.07, "learning_rate": 4.2963580527032304e-05, "loss": 1.4267, "step": 1333500 }, { "epoch": 14.08, "learning_rate": 4.296094219952088e-05, "loss": 1.4141, "step": 1334000 }, { "epoch": 14.08, "learning_rate": 4.295830387200946e-05, "loss": 1.4294, "step": 1334500 }, { "epoch": 14.09, "learning_rate": 4.295566554449804e-05, "loss": 1.4576, "step": 1335000 }, { "epoch": 14.09, "learning_rate": 4.2953027216986606e-05, "loss": 1.4481, "step": 1335500 }, { "epoch": 14.1, "learning_rate": 4.295038888947518e-05, "loss": 1.3587, "step": 1336000 }, { "epoch": 14.1, "learning_rate": 4.2947750561963764e-05, "loss": 1.4188, "step": 1336500 }, { "epoch": 14.11, "learning_rate": 4.294511223445234e-05, "loss": 1.4232, "step": 1337000 }, { "epoch": 14.12, "learning_rate": 4.2942473906940915e-05, "loss": 1.4798, "step": 1337500 }, { "epoch": 14.12, "learning_rate": 4.293983557942949e-05, "loss": 1.4275, "step": 1338000 }, { "epoch": 14.13, "learning_rate": 4.2937197251918066e-05, "loss": 1.4715, "step": 1338500 }, { "epoch": 14.13, "learning_rate": 4.293455892440664e-05, "loss": 1.4626, "step": 1339000 }, { "epoch": 14.14, "learning_rate": 4.293192059689522e-05, "loss": 1.4713, "step": 1339500 }, { "epoch": 14.14, "learning_rate": 4.29292822693838e-05, "loss": 1.3867, "step": 1340000 }, { "epoch": 14.15, "learning_rate": 4.292664394187237e-05, "loss": 1.463, "step": 1340500 }, { "epoch": 14.15, "learning_rate": 4.292400561436094e-05, "loss": 1.4555, "step": 1341000 }, { "epoch": 14.16, "learning_rate": 4.2921367286849525e-05, "loss": 1.4194, "step": 1341500 }, { "epoch": 14.16, "learning_rate": 4.29187289593381e-05, "loss": 1.4565, "step": 1342000 }, { "epoch": 14.17, "learning_rate": 4.291609063182667e-05, "loss": 1.418, "step": 1342500 }, { "epoch": 14.17, "learning_rate": 4.291345230431525e-05, "loss": 1.4428, "step": 1343000 }, { "epoch": 14.18, "learning_rate": 4.291081397680383e-05, "loss": 1.4662, "step": 1343500 }, { "epoch": 14.18, "learning_rate": 4.29081756492924e-05, "loss": 1.4827, "step": 1344000 }, { "epoch": 14.19, "learning_rate": 4.290553732178098e-05, "loss": 1.4102, "step": 1344500 }, { "epoch": 14.19, "learning_rate": 4.2902898994269554e-05, "loss": 1.5091, "step": 1345000 }, { "epoch": 14.2, "learning_rate": 4.290026066675813e-05, "loss": 1.445, "step": 1345500 }, { "epoch": 14.2, "learning_rate": 4.2897622339246705e-05, "loss": 1.4345, "step": 1346000 }, { "epoch": 14.21, "learning_rate": 4.289498401173529e-05, "loss": 1.5064, "step": 1346500 }, { "epoch": 14.22, "learning_rate": 4.289234568422386e-05, "loss": 1.4208, "step": 1347000 }, { "epoch": 14.22, "learning_rate": 4.288970735671243e-05, "loss": 1.4495, "step": 1347500 }, { "epoch": 14.23, "learning_rate": 4.2887069029201014e-05, "loss": 1.4242, "step": 1348000 }, { "epoch": 14.23, "learning_rate": 4.288443070168959e-05, "loss": 1.4316, "step": 1348500 }, { "epoch": 14.24, "learning_rate": 4.2881792374178165e-05, "loss": 1.5038, "step": 1349000 }, { "epoch": 14.24, "learning_rate": 4.287915404666674e-05, "loss": 1.4302, "step": 1349500 }, { "epoch": 14.25, "learning_rate": 4.2876515719155316e-05, "loss": 1.4754, "step": 1350000 }, { "epoch": 14.25, "learning_rate": 4.287387739164389e-05, "loss": 1.4778, "step": 1350500 }, { "epoch": 14.26, "learning_rate": 4.2871239064132466e-05, "loss": 1.4108, "step": 1351000 }, { "epoch": 14.26, "learning_rate": 4.286860073662104e-05, "loss": 1.4257, "step": 1351500 }, { "epoch": 14.27, "learning_rate": 4.286596240910962e-05, "loss": 1.4347, "step": 1352000 }, { "epoch": 14.27, "learning_rate": 4.286332408159819e-05, "loss": 1.3727, "step": 1352500 }, { "epoch": 14.28, "learning_rate": 4.286068575408677e-05, "loss": 1.4622, "step": 1353000 }, { "epoch": 14.28, "learning_rate": 4.285804742657535e-05, "loss": 1.354, "step": 1353500 }, { "epoch": 14.29, "learning_rate": 4.2855409099063926e-05, "loss": 1.4071, "step": 1354000 }, { "epoch": 14.29, "learning_rate": 4.2852770771552495e-05, "loss": 1.4403, "step": 1354500 }, { "epoch": 14.3, "learning_rate": 4.285013244404108e-05, "loss": 1.4118, "step": 1355000 }, { "epoch": 14.31, "learning_rate": 4.284749411652965e-05, "loss": 1.4641, "step": 1355500 }, { "epoch": 14.31, "learning_rate": 4.284485578901823e-05, "loss": 1.3872, "step": 1356000 }, { "epoch": 14.32, "learning_rate": 4.2842217461506804e-05, "loss": 1.4622, "step": 1356500 }, { "epoch": 14.32, "learning_rate": 4.283957913399538e-05, "loss": 1.4932, "step": 1357000 }, { "epoch": 14.33, "learning_rate": 4.2836940806483955e-05, "loss": 1.4223, "step": 1357500 }, { "epoch": 14.33, "learning_rate": 4.283430247897253e-05, "loss": 1.4587, "step": 1358000 }, { "epoch": 14.34, "learning_rate": 4.283166415146111e-05, "loss": 1.4294, "step": 1358500 }, { "epoch": 14.34, "learning_rate": 4.282902582394968e-05, "loss": 1.3946, "step": 1359000 }, { "epoch": 14.35, "learning_rate": 4.2826387496438257e-05, "loss": 1.3846, "step": 1359500 }, { "epoch": 14.35, "learning_rate": 4.282374916892684e-05, "loss": 1.4249, "step": 1360000 }, { "epoch": 14.36, "learning_rate": 4.2821110841415414e-05, "loss": 1.47, "step": 1360500 }, { "epoch": 14.36, "learning_rate": 4.281847251390399e-05, "loss": 1.4172, "step": 1361000 }, { "epoch": 14.37, "learning_rate": 4.281583418639256e-05, "loss": 1.4302, "step": 1361500 }, { "epoch": 14.37, "learning_rate": 4.281319585888114e-05, "loss": 1.481, "step": 1362000 }, { "epoch": 14.38, "learning_rate": 4.2810557531369716e-05, "loss": 1.488, "step": 1362500 }, { "epoch": 14.38, "learning_rate": 4.280791920385829e-05, "loss": 1.4709, "step": 1363000 }, { "epoch": 14.39, "learning_rate": 4.280528087634687e-05, "loss": 1.4077, "step": 1363500 }, { "epoch": 14.39, "learning_rate": 4.280264254883544e-05, "loss": 1.3819, "step": 1364000 }, { "epoch": 14.4, "learning_rate": 4.280000422132402e-05, "loss": 1.4584, "step": 1364500 }, { "epoch": 14.41, "learning_rate": 4.2797365893812594e-05, "loss": 1.385, "step": 1365000 }, { "epoch": 14.41, "learning_rate": 4.2794727566301176e-05, "loss": 1.4458, "step": 1365500 }, { "epoch": 14.42, "learning_rate": 4.279208923878975e-05, "loss": 1.4542, "step": 1366000 }, { "epoch": 14.42, "learning_rate": 4.278945091127832e-05, "loss": 1.442, "step": 1366500 }, { "epoch": 14.43, "learning_rate": 4.27868125837669e-05, "loss": 1.4127, "step": 1367000 }, { "epoch": 14.43, "learning_rate": 4.278417425625548e-05, "loss": 1.4622, "step": 1367500 }, { "epoch": 14.44, "learning_rate": 4.278153592874405e-05, "loss": 1.4792, "step": 1368000 }, { "epoch": 14.44, "learning_rate": 4.277889760123263e-05, "loss": 1.4533, "step": 1368500 }, { "epoch": 14.45, "learning_rate": 4.2776259273721204e-05, "loss": 1.4645, "step": 1369000 }, { "epoch": 14.45, "learning_rate": 4.277362094620978e-05, "loss": 1.509, "step": 1369500 }, { "epoch": 14.46, "learning_rate": 4.2770982618698355e-05, "loss": 1.4695, "step": 1370000 }, { "epoch": 14.46, "learning_rate": 4.276834429118694e-05, "loss": 1.4049, "step": 1370500 }, { "epoch": 14.47, "learning_rate": 4.2765705963675506e-05, "loss": 1.4159, "step": 1371000 }, { "epoch": 14.47, "learning_rate": 4.276306763616408e-05, "loss": 1.4437, "step": 1371500 }, { "epoch": 14.48, "learning_rate": 4.2760429308652664e-05, "loss": 1.4437, "step": 1372000 }, { "epoch": 14.48, "learning_rate": 4.275779098114124e-05, "loss": 1.4463, "step": 1372500 }, { "epoch": 14.49, "learning_rate": 4.2755152653629815e-05, "loss": 1.3961, "step": 1373000 }, { "epoch": 14.49, "learning_rate": 4.2752514326118384e-05, "loss": 1.4133, "step": 1373500 }, { "epoch": 14.5, "learning_rate": 4.2749875998606966e-05, "loss": 1.432, "step": 1374000 }, { "epoch": 14.51, "learning_rate": 4.274723767109554e-05, "loss": 1.4483, "step": 1374500 }, { "epoch": 14.51, "learning_rate": 4.274459934358412e-05, "loss": 1.4226, "step": 1375000 }, { "epoch": 14.52, "learning_rate": 4.274196101607269e-05, "loss": 1.3943, "step": 1375500 }, { "epoch": 14.52, "learning_rate": 4.273932268856127e-05, "loss": 1.3989, "step": 1376000 }, { "epoch": 14.53, "learning_rate": 4.2736684361049843e-05, "loss": 1.4344, "step": 1376500 }, { "epoch": 14.53, "learning_rate": 4.273404603353842e-05, "loss": 1.3787, "step": 1377000 }, { "epoch": 14.54, "learning_rate": 4.2731407706027e-05, "loss": 1.4472, "step": 1377500 }, { "epoch": 14.54, "learning_rate": 4.272876937851557e-05, "loss": 1.4903, "step": 1378000 }, { "epoch": 14.55, "learning_rate": 4.2726131051004145e-05, "loss": 1.4585, "step": 1378500 }, { "epoch": 14.55, "learning_rate": 4.272349272349273e-05, "loss": 1.4224, "step": 1379000 }, { "epoch": 14.56, "learning_rate": 4.27208543959813e-05, "loss": 1.4495, "step": 1379500 }, { "epoch": 14.56, "learning_rate": 4.271821606846988e-05, "loss": 1.4428, "step": 1380000 }, { "epoch": 14.57, "learning_rate": 4.2715577740958454e-05, "loss": 1.4681, "step": 1380500 }, { "epoch": 14.57, "learning_rate": 4.271293941344703e-05, "loss": 1.4424, "step": 1381000 }, { "epoch": 14.58, "learning_rate": 4.2710301085935605e-05, "loss": 1.4595, "step": 1381500 }, { "epoch": 14.58, "learning_rate": 4.270766275842418e-05, "loss": 1.482, "step": 1382000 }, { "epoch": 14.59, "learning_rate": 4.270502443091276e-05, "loss": 1.4221, "step": 1382500 }, { "epoch": 14.6, "learning_rate": 4.270238610340133e-05, "loss": 1.4475, "step": 1383000 }, { "epoch": 14.6, "learning_rate": 4.269974777588991e-05, "loss": 1.4658, "step": 1383500 }, { "epoch": 14.61, "learning_rate": 4.269710944837849e-05, "loss": 1.4348, "step": 1384000 }, { "epoch": 14.61, "learning_rate": 4.2694471120867065e-05, "loss": 1.458, "step": 1384500 }, { "epoch": 14.62, "learning_rate": 4.269183279335564e-05, "loss": 1.483, "step": 1385000 }, { "epoch": 14.62, "learning_rate": 4.268919446584421e-05, "loss": 1.4675, "step": 1385500 }, { "epoch": 14.63, "learning_rate": 4.268655613833279e-05, "loss": 1.4428, "step": 1386000 }, { "epoch": 14.63, "learning_rate": 4.268391781082137e-05, "loss": 1.3951, "step": 1386500 }, { "epoch": 14.64, "learning_rate": 4.268127948330994e-05, "loss": 1.4153, "step": 1387000 }, { "epoch": 14.64, "learning_rate": 4.267864115579852e-05, "loss": 1.4052, "step": 1387500 }, { "epoch": 14.65, "learning_rate": 4.267600282828709e-05, "loss": 1.3805, "step": 1388000 }, { "epoch": 14.65, "learning_rate": 4.267336450077567e-05, "loss": 1.4292, "step": 1388500 }, { "epoch": 14.66, "learning_rate": 4.2670726173264244e-05, "loss": 1.4566, "step": 1389000 }, { "epoch": 14.66, "learning_rate": 4.2668087845752826e-05, "loss": 1.4348, "step": 1389500 }, { "epoch": 14.67, "learning_rate": 4.2665449518241395e-05, "loss": 1.435, "step": 1390000 }, { "epoch": 14.67, "learning_rate": 4.266281119072997e-05, "loss": 1.4065, "step": 1390500 }, { "epoch": 14.68, "learning_rate": 4.266017286321855e-05, "loss": 1.4269, "step": 1391000 }, { "epoch": 14.68, "learning_rate": 4.265753453570713e-05, "loss": 1.4534, "step": 1391500 }, { "epoch": 14.69, "learning_rate": 4.2654896208195704e-05, "loss": 1.4202, "step": 1392000 }, { "epoch": 14.7, "learning_rate": 4.265225788068428e-05, "loss": 1.4484, "step": 1392500 }, { "epoch": 14.7, "learning_rate": 4.2649619553172855e-05, "loss": 1.4681, "step": 1393000 }, { "epoch": 14.71, "learning_rate": 4.264698122566143e-05, "loss": 1.4505, "step": 1393500 }, { "epoch": 14.71, "learning_rate": 4.2644342898150006e-05, "loss": 1.4066, "step": 1394000 }, { "epoch": 14.72, "learning_rate": 4.264170457063859e-05, "loss": 1.4475, "step": 1394500 }, { "epoch": 14.72, "learning_rate": 4.263906624312716e-05, "loss": 1.4901, "step": 1395000 }, { "epoch": 14.73, "learning_rate": 4.263642791561573e-05, "loss": 1.4063, "step": 1395500 }, { "epoch": 14.73, "learning_rate": 4.2633789588104315e-05, "loss": 1.4272, "step": 1396000 }, { "epoch": 14.74, "learning_rate": 4.263115126059289e-05, "loss": 1.4076, "step": 1396500 }, { "epoch": 14.74, "learning_rate": 4.262851293308146e-05, "loss": 1.4344, "step": 1397000 }, { "epoch": 14.75, "learning_rate": 4.2625874605570034e-05, "loss": 1.4635, "step": 1397500 }, { "epoch": 14.75, "learning_rate": 4.2623236278058616e-05, "loss": 1.4065, "step": 1398000 }, { "epoch": 14.76, "learning_rate": 4.262059795054719e-05, "loss": 1.3997, "step": 1398500 }, { "epoch": 14.76, "learning_rate": 4.261795962303577e-05, "loss": 1.4185, "step": 1399000 }, { "epoch": 14.77, "learning_rate": 4.261532129552434e-05, "loss": 1.4426, "step": 1399500 }, { "epoch": 14.77, "learning_rate": 4.261268296801292e-05, "loss": 1.4253, "step": 1400000 }, { "epoch": 14.78, "learning_rate": 4.2610044640501494e-05, "loss": 1.4392, "step": 1400500 }, { "epoch": 14.79, "learning_rate": 4.260740631299007e-05, "loss": 1.4496, "step": 1401000 }, { "epoch": 14.79, "learning_rate": 4.260476798547865e-05, "loss": 1.4177, "step": 1401500 }, { "epoch": 14.8, "learning_rate": 4.260212965796722e-05, "loss": 1.4398, "step": 1402000 }, { "epoch": 14.8, "learning_rate": 4.2599491330455796e-05, "loss": 1.4479, "step": 1402500 }, { "epoch": 14.81, "learning_rate": 4.259685300294438e-05, "loss": 1.4581, "step": 1403000 }, { "epoch": 14.81, "learning_rate": 4.2594214675432954e-05, "loss": 1.445, "step": 1403500 }, { "epoch": 14.82, "learning_rate": 4.259157634792153e-05, "loss": 1.4518, "step": 1404000 }, { "epoch": 14.82, "learning_rate": 4.2588938020410105e-05, "loss": 1.4496, "step": 1404500 }, { "epoch": 14.83, "learning_rate": 4.258629969289868e-05, "loss": 1.4541, "step": 1405000 }, { "epoch": 14.83, "learning_rate": 4.2583661365387256e-05, "loss": 1.4105, "step": 1405500 }, { "epoch": 14.84, "learning_rate": 4.258102303787583e-05, "loss": 1.4352, "step": 1406000 }, { "epoch": 14.84, "learning_rate": 4.2578384710364407e-05, "loss": 1.4378, "step": 1406500 }, { "epoch": 14.85, "learning_rate": 4.257574638285298e-05, "loss": 1.4579, "step": 1407000 }, { "epoch": 14.85, "learning_rate": 4.257310805534156e-05, "loss": 1.4461, "step": 1407500 }, { "epoch": 14.86, "learning_rate": 4.257046972783014e-05, "loss": 1.4388, "step": 1408000 }, { "epoch": 14.86, "learning_rate": 4.2567831400318715e-05, "loss": 1.4515, "step": 1408500 }, { "epoch": 14.87, "learning_rate": 4.2565193072807284e-05, "loss": 1.3891, "step": 1409000 }, { "epoch": 14.87, "learning_rate": 4.256255474529586e-05, "loss": 1.4344, "step": 1409500 }, { "epoch": 14.88, "learning_rate": 4.255991641778444e-05, "loss": 1.4216, "step": 1410000 }, { "epoch": 14.89, "learning_rate": 4.255727809027302e-05, "loss": 1.4002, "step": 1410500 }, { "epoch": 14.89, "learning_rate": 4.255463976276159e-05, "loss": 1.4354, "step": 1411000 }, { "epoch": 14.9, "learning_rate": 4.255200143525017e-05, "loss": 1.3862, "step": 1411500 }, { "epoch": 14.9, "learning_rate": 4.2549363107738744e-05, "loss": 1.4464, "step": 1412000 }, { "epoch": 14.91, "learning_rate": 4.254672478022732e-05, "loss": 1.4417, "step": 1412500 }, { "epoch": 14.91, "learning_rate": 4.2544086452715895e-05, "loss": 1.4285, "step": 1413000 }, { "epoch": 14.92, "learning_rate": 4.254144812520448e-05, "loss": 1.3974, "step": 1413500 }, { "epoch": 14.92, "learning_rate": 4.2538809797693046e-05, "loss": 1.3871, "step": 1414000 }, { "epoch": 14.93, "learning_rate": 4.253617147018162e-05, "loss": 1.3666, "step": 1414500 }, { "epoch": 14.93, "learning_rate": 4.2533533142670203e-05, "loss": 1.432, "step": 1415000 }, { "epoch": 14.94, "learning_rate": 4.253089481515878e-05, "loss": 1.4763, "step": 1415500 }, { "epoch": 14.94, "learning_rate": 4.252825648764735e-05, "loss": 1.4317, "step": 1416000 }, { "epoch": 14.95, "learning_rate": 4.252561816013593e-05, "loss": 1.4525, "step": 1416500 }, { "epoch": 14.95, "learning_rate": 4.2522979832624505e-05, "loss": 1.3896, "step": 1417000 }, { "epoch": 14.96, "learning_rate": 4.252034150511308e-05, "loss": 1.4424, "step": 1417500 }, { "epoch": 14.96, "learning_rate": 4.2517703177601656e-05, "loss": 1.449, "step": 1418000 }, { "epoch": 14.97, "learning_rate": 4.251506485009023e-05, "loss": 1.4416, "step": 1418500 }, { "epoch": 14.98, "learning_rate": 4.251242652257881e-05, "loss": 1.4354, "step": 1419000 }, { "epoch": 14.98, "learning_rate": 4.250978819506738e-05, "loss": 1.4101, "step": 1419500 }, { "epoch": 14.99, "learning_rate": 4.2507149867555965e-05, "loss": 1.4349, "step": 1420000 }, { "epoch": 14.99, "learning_rate": 4.250451154004454e-05, "loss": 1.386, "step": 1420500 }, { "epoch": 15.0, "learning_rate": 4.250187321253311e-05, "loss": 1.4987, "step": 1421000 }, { "epoch": 15.0, "learning_rate": 4.249923488502169e-05, "loss": 1.4536, "step": 1421500 }, { "epoch": 15.01, "learning_rate": 4.249659655751027e-05, "loss": 1.4279, "step": 1422000 }, { "epoch": 15.01, "learning_rate": 4.249395822999884e-05, "loss": 1.4002, "step": 1422500 }, { "epoch": 15.02, "learning_rate": 4.249131990248742e-05, "loss": 1.4103, "step": 1423000 }, { "epoch": 15.02, "learning_rate": 4.2488681574975993e-05, "loss": 1.4357, "step": 1423500 }, { "epoch": 15.03, "learning_rate": 4.248604324746457e-05, "loss": 1.3906, "step": 1424000 }, { "epoch": 15.03, "learning_rate": 4.2483404919953144e-05, "loss": 1.3773, "step": 1424500 }, { "epoch": 15.04, "learning_rate": 4.248076659244172e-05, "loss": 1.3828, "step": 1425000 }, { "epoch": 15.04, "learning_rate": 4.2478128264930295e-05, "loss": 1.4608, "step": 1425500 }, { "epoch": 15.05, "learning_rate": 4.247548993741887e-05, "loss": 1.4179, "step": 1426000 }, { "epoch": 15.05, "learning_rate": 4.2472851609907446e-05, "loss": 1.3724, "step": 1426500 }, { "epoch": 15.06, "learning_rate": 4.247021328239603e-05, "loss": 1.4252, "step": 1427000 }, { "epoch": 15.06, "learning_rate": 4.2467574954884604e-05, "loss": 1.4156, "step": 1427500 }, { "epoch": 15.07, "learning_rate": 4.246493662737317e-05, "loss": 1.4056, "step": 1428000 }, { "epoch": 15.08, "learning_rate": 4.2462298299861755e-05, "loss": 1.4576, "step": 1428500 }, { "epoch": 15.08, "learning_rate": 4.245965997235033e-05, "loss": 1.4172, "step": 1429000 }, { "epoch": 15.09, "learning_rate": 4.2457021644838906e-05, "loss": 1.4121, "step": 1429500 }, { "epoch": 15.09, "learning_rate": 4.245438331732748e-05, "loss": 1.4266, "step": 1430000 }, { "epoch": 15.1, "learning_rate": 4.245174498981606e-05, "loss": 1.4192, "step": 1430500 }, { "epoch": 15.1, "learning_rate": 4.244910666230463e-05, "loss": 1.4386, "step": 1431000 }, { "epoch": 15.11, "learning_rate": 4.244646833479321e-05, "loss": 1.3915, "step": 1431500 }, { "epoch": 15.11, "learning_rate": 4.244383000728179e-05, "loss": 1.4181, "step": 1432000 }, { "epoch": 15.12, "learning_rate": 4.2441191679770366e-05, "loss": 1.4722, "step": 1432500 }, { "epoch": 15.12, "learning_rate": 4.2438553352258934e-05, "loss": 1.4196, "step": 1433000 }, { "epoch": 15.13, "learning_rate": 4.243591502474752e-05, "loss": 1.4048, "step": 1433500 }, { "epoch": 15.13, "learning_rate": 4.243327669723609e-05, "loss": 1.4722, "step": 1434000 }, { "epoch": 15.14, "learning_rate": 4.243063836972467e-05, "loss": 1.4158, "step": 1434500 }, { "epoch": 15.14, "learning_rate": 4.2428000042213236e-05, "loss": 1.4238, "step": 1435000 }, { "epoch": 15.15, "learning_rate": 4.242536171470182e-05, "loss": 1.3842, "step": 1435500 }, { "epoch": 15.15, "learning_rate": 4.2422723387190394e-05, "loss": 1.4521, "step": 1436000 }, { "epoch": 15.16, "learning_rate": 4.242008505967897e-05, "loss": 1.4185, "step": 1436500 }, { "epoch": 15.17, "learning_rate": 4.2417446732167545e-05, "loss": 1.3849, "step": 1437000 }, { "epoch": 15.17, "learning_rate": 4.241480840465612e-05, "loss": 1.4584, "step": 1437500 }, { "epoch": 15.18, "learning_rate": 4.2412170077144696e-05, "loss": 1.3937, "step": 1438000 }, { "epoch": 15.18, "learning_rate": 4.240953174963327e-05, "loss": 1.4838, "step": 1438500 }, { "epoch": 15.19, "learning_rate": 4.2406893422121854e-05, "loss": 1.4699, "step": 1439000 }, { "epoch": 15.19, "learning_rate": 4.240425509461043e-05, "loss": 1.4091, "step": 1439500 }, { "epoch": 15.2, "learning_rate": 4.2401616767099e-05, "loss": 1.4249, "step": 1440000 }, { "epoch": 15.2, "learning_rate": 4.239897843958758e-05, "loss": 1.4158, "step": 1440500 }, { "epoch": 15.21, "learning_rate": 4.2396340112076156e-05, "loss": 1.4346, "step": 1441000 }, { "epoch": 15.21, "learning_rate": 4.239370178456473e-05, "loss": 1.4227, "step": 1441500 }, { "epoch": 15.22, "learning_rate": 4.239106345705331e-05, "loss": 1.464, "step": 1442000 }, { "epoch": 15.22, "learning_rate": 4.238842512954188e-05, "loss": 1.4949, "step": 1442500 }, { "epoch": 15.23, "learning_rate": 4.238578680203046e-05, "loss": 1.4235, "step": 1443000 }, { "epoch": 15.23, "learning_rate": 4.238314847451903e-05, "loss": 1.4504, "step": 1443500 }, { "epoch": 15.24, "learning_rate": 4.2380510147007616e-05, "loss": 1.4018, "step": 1444000 }, { "epoch": 15.24, "learning_rate": 4.2377871819496184e-05, "loss": 1.4772, "step": 1444500 }, { "epoch": 15.25, "learning_rate": 4.237523349198476e-05, "loss": 1.4501, "step": 1445000 }, { "epoch": 15.25, "learning_rate": 4.237259516447334e-05, "loss": 1.4477, "step": 1445500 }, { "epoch": 15.26, "learning_rate": 4.236995683696192e-05, "loss": 1.4051, "step": 1446000 }, { "epoch": 15.27, "learning_rate": 4.236731850945049e-05, "loss": 1.4647, "step": 1446500 }, { "epoch": 15.27, "learning_rate": 4.236468018193906e-05, "loss": 1.4474, "step": 1447000 }, { "epoch": 15.28, "learning_rate": 4.2362041854427644e-05, "loss": 1.4842, "step": 1447500 }, { "epoch": 15.28, "learning_rate": 4.235940352691622e-05, "loss": 1.3837, "step": 1448000 }, { "epoch": 15.29, "learning_rate": 4.2356765199404795e-05, "loss": 1.3915, "step": 1448500 }, { "epoch": 15.29, "learning_rate": 4.235412687189337e-05, "loss": 1.4406, "step": 1449000 }, { "epoch": 15.3, "learning_rate": 4.2351488544381946e-05, "loss": 1.4197, "step": 1449500 }, { "epoch": 15.3, "learning_rate": 4.234885021687052e-05, "loss": 1.4037, "step": 1450000 }, { "epoch": 15.31, "learning_rate": 4.23462118893591e-05, "loss": 1.3542, "step": 1450500 }, { "epoch": 15.31, "learning_rate": 4.234357356184768e-05, "loss": 1.4102, "step": 1451000 }, { "epoch": 15.32, "learning_rate": 4.2340935234336255e-05, "loss": 1.491, "step": 1451500 }, { "epoch": 15.32, "learning_rate": 4.233829690682482e-05, "loss": 1.426, "step": 1452000 }, { "epoch": 15.33, "learning_rate": 4.2335658579313406e-05, "loss": 1.4666, "step": 1452500 }, { "epoch": 15.33, "learning_rate": 4.233302025180198e-05, "loss": 1.4345, "step": 1453000 }, { "epoch": 15.34, "learning_rate": 4.2330381924290557e-05, "loss": 1.4434, "step": 1453500 }, { "epoch": 15.34, "learning_rate": 4.232774359677913e-05, "loss": 1.4456, "step": 1454000 }, { "epoch": 15.35, "learning_rate": 4.232510526926771e-05, "loss": 1.436, "step": 1454500 }, { "epoch": 15.36, "learning_rate": 4.232246694175628e-05, "loss": 1.4379, "step": 1455000 }, { "epoch": 15.36, "learning_rate": 4.231982861424486e-05, "loss": 1.3711, "step": 1455500 }, { "epoch": 15.37, "learning_rate": 4.231719028673344e-05, "loss": 1.441, "step": 1456000 }, { "epoch": 15.37, "learning_rate": 4.231455195922201e-05, "loss": 1.4355, "step": 1456500 }, { "epoch": 15.38, "learning_rate": 4.2311913631710585e-05, "loss": 1.457, "step": 1457000 }, { "epoch": 15.38, "learning_rate": 4.230927530419917e-05, "loss": 1.4272, "step": 1457500 }, { "epoch": 15.39, "learning_rate": 4.230663697668774e-05, "loss": 1.426, "step": 1458000 }, { "epoch": 15.39, "learning_rate": 4.230399864917632e-05, "loss": 1.3966, "step": 1458500 }, { "epoch": 15.4, "learning_rate": 4.230136032166489e-05, "loss": 1.3711, "step": 1459000 }, { "epoch": 15.4, "learning_rate": 4.229872199415347e-05, "loss": 1.4381, "step": 1459500 }, { "epoch": 15.41, "learning_rate": 4.2296083666642045e-05, "loss": 1.4773, "step": 1460000 }, { "epoch": 15.41, "learning_rate": 4.229344533913062e-05, "loss": 1.426, "step": 1460500 }, { "epoch": 15.42, "learning_rate": 4.2290807011619196e-05, "loss": 1.3853, "step": 1461000 }, { "epoch": 15.42, "learning_rate": 4.228816868410777e-05, "loss": 1.378, "step": 1461500 }, { "epoch": 15.43, "learning_rate": 4.228553035659635e-05, "loss": 1.4238, "step": 1462000 }, { "epoch": 15.43, "learning_rate": 4.228289202908492e-05, "loss": 1.4442, "step": 1462500 }, { "epoch": 15.44, "learning_rate": 4.2280253701573504e-05, "loss": 1.4786, "step": 1463000 }, { "epoch": 15.44, "learning_rate": 4.227761537406207e-05, "loss": 1.4063, "step": 1463500 }, { "epoch": 15.45, "learning_rate": 4.227497704655065e-05, "loss": 1.4739, "step": 1464000 }, { "epoch": 15.46, "learning_rate": 4.227233871903923e-05, "loss": 1.4429, "step": 1464500 }, { "epoch": 15.46, "learning_rate": 4.2269700391527806e-05, "loss": 1.4361, "step": 1465000 }, { "epoch": 15.47, "learning_rate": 4.226706206401638e-05, "loss": 1.4094, "step": 1465500 }, { "epoch": 15.47, "learning_rate": 4.226442373650496e-05, "loss": 1.386, "step": 1466000 }, { "epoch": 15.48, "learning_rate": 4.226178540899353e-05, "loss": 1.4265, "step": 1466500 }, { "epoch": 15.48, "learning_rate": 4.225914708148211e-05, "loss": 1.4235, "step": 1467000 }, { "epoch": 15.49, "learning_rate": 4.2256508753970684e-05, "loss": 1.4498, "step": 1467500 }, { "epoch": 15.49, "learning_rate": 4.2253870426459266e-05, "loss": 1.4086, "step": 1468000 }, { "epoch": 15.5, "learning_rate": 4.2251232098947835e-05, "loss": 1.4511, "step": 1468500 }, { "epoch": 15.5, "learning_rate": 4.224859377143641e-05, "loss": 1.4505, "step": 1469000 }, { "epoch": 15.51, "learning_rate": 4.224595544392499e-05, "loss": 1.3893, "step": 1469500 }, { "epoch": 15.51, "learning_rate": 4.224331711641357e-05, "loss": 1.3945, "step": 1470000 }, { "epoch": 15.52, "learning_rate": 4.2240678788902143e-05, "loss": 1.473, "step": 1470500 }, { "epoch": 15.52, "learning_rate": 4.223804046139071e-05, "loss": 1.4292, "step": 1471000 }, { "epoch": 15.53, "learning_rate": 4.2235402133879294e-05, "loss": 1.3867, "step": 1471500 }, { "epoch": 15.53, "learning_rate": 4.223276380636787e-05, "loss": 1.4397, "step": 1472000 }, { "epoch": 15.54, "learning_rate": 4.2230125478856445e-05, "loss": 1.42, "step": 1472500 }, { "epoch": 15.55, "learning_rate": 4.222748715134502e-05, "loss": 1.4611, "step": 1473000 }, { "epoch": 15.55, "learning_rate": 4.2224848823833596e-05, "loss": 1.4016, "step": 1473500 }, { "epoch": 15.56, "learning_rate": 4.222221049632217e-05, "loss": 1.4021, "step": 1474000 }, { "epoch": 15.56, "learning_rate": 4.221957216881075e-05, "loss": 1.4527, "step": 1474500 }, { "epoch": 15.57, "learning_rate": 4.221693384129933e-05, "loss": 1.4611, "step": 1475000 }, { "epoch": 15.57, "learning_rate": 4.22142955137879e-05, "loss": 1.4115, "step": 1475500 }, { "epoch": 15.58, "learning_rate": 4.2211657186276474e-05, "loss": 1.3728, "step": 1476000 }, { "epoch": 15.58, "learning_rate": 4.2209018858765056e-05, "loss": 1.4799, "step": 1476500 }, { "epoch": 15.59, "learning_rate": 4.220638053125363e-05, "loss": 1.4472, "step": 1477000 }, { "epoch": 15.59, "learning_rate": 4.220374220374221e-05, "loss": 1.4324, "step": 1477500 }, { "epoch": 15.6, "learning_rate": 4.220110387623078e-05, "loss": 1.4254, "step": 1478000 }, { "epoch": 15.6, "learning_rate": 4.219846554871936e-05, "loss": 1.4333, "step": 1478500 }, { "epoch": 15.61, "learning_rate": 4.2195827221207934e-05, "loss": 1.4117, "step": 1479000 }, { "epoch": 15.61, "learning_rate": 4.219318889369651e-05, "loss": 1.4862, "step": 1479500 }, { "epoch": 15.62, "learning_rate": 4.219055056618509e-05, "loss": 1.3971, "step": 1480000 }, { "epoch": 15.62, "learning_rate": 4.218791223867366e-05, "loss": 1.4141, "step": 1480500 }, { "epoch": 15.63, "learning_rate": 4.2185273911162235e-05, "loss": 1.3968, "step": 1481000 }, { "epoch": 15.63, "learning_rate": 4.218263558365082e-05, "loss": 1.4484, "step": 1481500 }, { "epoch": 15.64, "learning_rate": 4.217999725613939e-05, "loss": 1.4697, "step": 1482000 }, { "epoch": 15.65, "learning_rate": 4.217735892862796e-05, "loss": 1.4498, "step": 1482500 }, { "epoch": 15.65, "learning_rate": 4.217472060111654e-05, "loss": 1.4286, "step": 1483000 }, { "epoch": 15.66, "learning_rate": 4.217208227360512e-05, "loss": 1.4066, "step": 1483500 }, { "epoch": 15.66, "learning_rate": 4.2169443946093695e-05, "loss": 1.4835, "step": 1484000 }, { "epoch": 15.67, "learning_rate": 4.216680561858227e-05, "loss": 1.4679, "step": 1484500 }, { "epoch": 15.67, "learning_rate": 4.2164167291070846e-05, "loss": 1.3883, "step": 1485000 }, { "epoch": 15.68, "learning_rate": 4.216152896355942e-05, "loss": 1.3922, "step": 1485500 }, { "epoch": 15.68, "learning_rate": 4.2158890636048e-05, "loss": 1.4424, "step": 1486000 }, { "epoch": 15.69, "learning_rate": 4.215625230853657e-05, "loss": 1.4408, "step": 1486500 }, { "epoch": 15.69, "learning_rate": 4.2153613981025155e-05, "loss": 1.3841, "step": 1487000 }, { "epoch": 15.7, "learning_rate": 4.2150975653513724e-05, "loss": 1.4434, "step": 1487500 }, { "epoch": 15.7, "learning_rate": 4.21483373260023e-05, "loss": 1.3981, "step": 1488000 }, { "epoch": 15.71, "learning_rate": 4.214569899849088e-05, "loss": 1.446, "step": 1488500 }, { "epoch": 15.71, "learning_rate": 4.214306067097946e-05, "loss": 1.4333, "step": 1489000 }, { "epoch": 15.72, "learning_rate": 4.214042234346803e-05, "loss": 1.4608, "step": 1489500 }, { "epoch": 15.72, "learning_rate": 4.213778401595661e-05, "loss": 1.4301, "step": 1490000 }, { "epoch": 15.73, "learning_rate": 4.213514568844518e-05, "loss": 1.409, "step": 1490500 }, { "epoch": 15.73, "learning_rate": 4.213250736093376e-05, "loss": 1.4312, "step": 1491000 }, { "epoch": 15.74, "learning_rate": 4.2129869033422334e-05, "loss": 1.4541, "step": 1491500 }, { "epoch": 15.75, "learning_rate": 4.212723070591091e-05, "loss": 1.4487, "step": 1492000 }, { "epoch": 15.75, "learning_rate": 4.2124592378399485e-05, "loss": 1.4082, "step": 1492500 }, { "epoch": 15.76, "learning_rate": 4.212195405088806e-05, "loss": 1.4783, "step": 1493000 }, { "epoch": 15.76, "learning_rate": 4.211931572337664e-05, "loss": 1.3769, "step": 1493500 }, { "epoch": 15.77, "learning_rate": 4.211667739586522e-05, "loss": 1.3937, "step": 1494000 }, { "epoch": 15.77, "learning_rate": 4.211403906835379e-05, "loss": 1.4023, "step": 1494500 }, { "epoch": 15.78, "learning_rate": 4.211140074084237e-05, "loss": 1.4389, "step": 1495000 }, { "epoch": 15.78, "learning_rate": 4.2108762413330945e-05, "loss": 1.4852, "step": 1495500 }, { "epoch": 15.79, "learning_rate": 4.210612408581952e-05, "loss": 1.3463, "step": 1496000 }, { "epoch": 15.79, "learning_rate": 4.2103485758308096e-05, "loss": 1.4446, "step": 1496500 }, { "epoch": 15.8, "learning_rate": 4.210084743079667e-05, "loss": 1.4165, "step": 1497000 }, { "epoch": 15.8, "learning_rate": 4.209820910328525e-05, "loss": 1.4259, "step": 1497500 }, { "epoch": 15.81, "learning_rate": 4.209557077577382e-05, "loss": 1.413, "step": 1498000 }, { "epoch": 15.81, "learning_rate": 4.20929324482624e-05, "loss": 1.4454, "step": 1498500 }, { "epoch": 15.82, "learning_rate": 4.209029412075098e-05, "loss": 1.4524, "step": 1499000 }, { "epoch": 15.82, "learning_rate": 4.208765579323955e-05, "loss": 1.4086, "step": 1499500 }, { "epoch": 15.83, "learning_rate": 4.2085017465728124e-05, "loss": 1.4335, "step": 1500000 }, { "epoch": 15.84, "learning_rate": 4.2082379138216707e-05, "loss": 1.4369, "step": 1500500 }, { "epoch": 15.84, "learning_rate": 4.207974081070528e-05, "loss": 1.4312, "step": 1501000 }, { "epoch": 15.85, "learning_rate": 4.207710248319385e-05, "loss": 1.4136, "step": 1501500 }, { "epoch": 15.85, "learning_rate": 4.207446415568243e-05, "loss": 1.4283, "step": 1502000 }, { "epoch": 15.86, "learning_rate": 4.207182582817101e-05, "loss": 1.4503, "step": 1502500 }, { "epoch": 15.86, "learning_rate": 4.2069187500659584e-05, "loss": 1.4378, "step": 1503000 }, { "epoch": 15.87, "learning_rate": 4.206654917314816e-05, "loss": 1.4772, "step": 1503500 }, { "epoch": 15.87, "learning_rate": 4.2063910845636735e-05, "loss": 1.4235, "step": 1504000 }, { "epoch": 15.88, "learning_rate": 4.206127251812531e-05, "loss": 1.4441, "step": 1504500 }, { "epoch": 15.88, "learning_rate": 4.2058634190613886e-05, "loss": 1.4727, "step": 1505000 }, { "epoch": 15.89, "learning_rate": 4.205599586310247e-05, "loss": 1.4559, "step": 1505500 }, { "epoch": 15.89, "learning_rate": 4.2053357535591044e-05, "loss": 1.4154, "step": 1506000 }, { "epoch": 15.9, "learning_rate": 4.205071920807961e-05, "loss": 1.4288, "step": 1506500 }, { "epoch": 15.9, "learning_rate": 4.2048080880568195e-05, "loss": 1.3847, "step": 1507000 }, { "epoch": 15.91, "learning_rate": 4.204544255305677e-05, "loss": 1.5109, "step": 1507500 }, { "epoch": 15.91, "learning_rate": 4.2042804225545346e-05, "loss": 1.4625, "step": 1508000 }, { "epoch": 15.92, "learning_rate": 4.204016589803392e-05, "loss": 1.4261, "step": 1508500 }, { "epoch": 15.92, "learning_rate": 4.20375275705225e-05, "loss": 1.4479, "step": 1509000 }, { "epoch": 15.93, "learning_rate": 4.203488924301107e-05, "loss": 1.4176, "step": 1509500 }, { "epoch": 15.94, "learning_rate": 4.203225091549965e-05, "loss": 1.3624, "step": 1510000 }, { "epoch": 15.94, "learning_rate": 4.202961258798822e-05, "loss": 1.414, "step": 1510500 }, { "epoch": 15.95, "learning_rate": 4.20269742604768e-05, "loss": 1.447, "step": 1511000 }, { "epoch": 15.95, "learning_rate": 4.2024335932965374e-05, "loss": 1.4516, "step": 1511500 }, { "epoch": 15.96, "learning_rate": 4.202169760545395e-05, "loss": 1.3872, "step": 1512000 }, { "epoch": 15.96, "learning_rate": 4.201905927794253e-05, "loss": 1.4211, "step": 1512500 }, { "epoch": 15.97, "learning_rate": 4.201642095043111e-05, "loss": 1.4552, "step": 1513000 }, { "epoch": 15.97, "learning_rate": 4.2013782622919676e-05, "loss": 1.3732, "step": 1513500 }, { "epoch": 15.98, "learning_rate": 4.201114429540826e-05, "loss": 1.4778, "step": 1514000 }, { "epoch": 15.98, "learning_rate": 4.2008505967896834e-05, "loss": 1.3997, "step": 1514500 }, { "epoch": 15.99, "learning_rate": 4.200586764038541e-05, "loss": 1.4493, "step": 1515000 }, { "epoch": 15.99, "learning_rate": 4.2003229312873985e-05, "loss": 1.4036, "step": 1515500 }, { "epoch": 16.0, "learning_rate": 4.200059098536256e-05, "loss": 1.406, "step": 1516000 }, { "epoch": 16.0, "learning_rate": 4.1997952657851136e-05, "loss": 1.4399, "step": 1516500 }, { "epoch": 16.01, "learning_rate": 4.199531433033971e-05, "loss": 1.4463, "step": 1517000 }, { "epoch": 16.01, "learning_rate": 4.1992676002828293e-05, "loss": 1.4318, "step": 1517500 }, { "epoch": 16.02, "learning_rate": 4.199003767531687e-05, "loss": 1.4135, "step": 1518000 }, { "epoch": 16.03, "learning_rate": 4.198739934780544e-05, "loss": 1.4512, "step": 1518500 }, { "epoch": 16.03, "learning_rate": 4.198476102029402e-05, "loss": 1.3983, "step": 1519000 }, { "epoch": 16.04, "learning_rate": 4.1982122692782595e-05, "loss": 1.4161, "step": 1519500 }, { "epoch": 16.04, "learning_rate": 4.197948436527117e-05, "loss": 1.423, "step": 1520000 }, { "epoch": 16.05, "learning_rate": 4.197684603775974e-05, "loss": 1.4534, "step": 1520500 }, { "epoch": 16.05, "learning_rate": 4.197420771024832e-05, "loss": 1.471, "step": 1521000 }, { "epoch": 16.06, "learning_rate": 4.19715693827369e-05, "loss": 1.4596, "step": 1521500 }, { "epoch": 16.06, "learning_rate": 4.196893105522547e-05, "loss": 1.4405, "step": 1522000 }, { "epoch": 16.07, "learning_rate": 4.196629272771405e-05, "loss": 1.4176, "step": 1522500 }, { "epoch": 16.07, "learning_rate": 4.1963654400202624e-05, "loss": 1.4532, "step": 1523000 }, { "epoch": 16.08, "learning_rate": 4.19610160726912e-05, "loss": 1.3659, "step": 1523500 }, { "epoch": 16.08, "learning_rate": 4.1958377745179775e-05, "loss": 1.4079, "step": 1524000 }, { "epoch": 16.09, "learning_rate": 4.195573941766836e-05, "loss": 1.3446, "step": 1524500 }, { "epoch": 16.09, "learning_rate": 4.195310109015693e-05, "loss": 1.4858, "step": 1525000 }, { "epoch": 16.1, "learning_rate": 4.19504627626455e-05, "loss": 1.4956, "step": 1525500 }, { "epoch": 16.1, "learning_rate": 4.1947824435134084e-05, "loss": 1.4188, "step": 1526000 }, { "epoch": 16.11, "learning_rate": 4.194518610762266e-05, "loss": 1.415, "step": 1526500 }, { "epoch": 16.11, "learning_rate": 4.1942547780111235e-05, "loss": 1.4134, "step": 1527000 }, { "epoch": 16.12, "learning_rate": 4.193990945259981e-05, "loss": 1.414, "step": 1527500 }, { "epoch": 16.13, "learning_rate": 4.1937271125088385e-05, "loss": 1.3932, "step": 1528000 }, { "epoch": 16.13, "learning_rate": 4.193463279757696e-05, "loss": 1.3962, "step": 1528500 }, { "epoch": 16.14, "learning_rate": 4.1931994470065536e-05, "loss": 1.4497, "step": 1529000 }, { "epoch": 16.14, "learning_rate": 4.192935614255412e-05, "loss": 1.4519, "step": 1529500 }, { "epoch": 16.15, "learning_rate": 4.192671781504269e-05, "loss": 1.3972, "step": 1530000 }, { "epoch": 16.15, "learning_rate": 4.192407948753126e-05, "loss": 1.4172, "step": 1530500 }, { "epoch": 16.16, "learning_rate": 4.1921441160019845e-05, "loss": 1.4292, "step": 1531000 }, { "epoch": 16.16, "learning_rate": 4.191880283250842e-05, "loss": 1.4052, "step": 1531500 }, { "epoch": 16.17, "learning_rate": 4.1916164504996996e-05, "loss": 1.4259, "step": 1532000 }, { "epoch": 16.17, "learning_rate": 4.1913526177485565e-05, "loss": 1.3816, "step": 1532500 }, { "epoch": 16.18, "learning_rate": 4.191088784997415e-05, "loss": 1.3892, "step": 1533000 }, { "epoch": 16.18, "learning_rate": 4.190824952246272e-05, "loss": 1.4227, "step": 1533500 }, { "epoch": 16.19, "learning_rate": 4.19056111949513e-05, "loss": 1.4109, "step": 1534000 }, { "epoch": 16.19, "learning_rate": 4.1902972867439874e-05, "loss": 1.4403, "step": 1534500 }, { "epoch": 16.2, "learning_rate": 4.190033453992845e-05, "loss": 1.4506, "step": 1535000 }, { "epoch": 16.2, "learning_rate": 4.1897696212417025e-05, "loss": 1.4061, "step": 1535500 }, { "epoch": 16.21, "learning_rate": 4.18950578849056e-05, "loss": 1.4415, "step": 1536000 }, { "epoch": 16.22, "learning_rate": 4.189241955739418e-05, "loss": 1.4242, "step": 1536500 }, { "epoch": 16.22, "learning_rate": 4.188978122988276e-05, "loss": 1.401, "step": 1537000 }, { "epoch": 16.23, "learning_rate": 4.1887142902371327e-05, "loss": 1.4078, "step": 1537500 }, { "epoch": 16.23, "learning_rate": 4.188450457485991e-05, "loss": 1.4105, "step": 1538000 }, { "epoch": 16.24, "learning_rate": 4.1881866247348484e-05, "loss": 1.3957, "step": 1538500 }, { "epoch": 16.24, "learning_rate": 4.187922791983706e-05, "loss": 1.3706, "step": 1539000 }, { "epoch": 16.25, "learning_rate": 4.1876589592325635e-05, "loss": 1.4192, "step": 1539500 }, { "epoch": 16.25, "learning_rate": 4.187395126481421e-05, "loss": 1.403, "step": 1540000 }, { "epoch": 16.26, "learning_rate": 4.1871312937302786e-05, "loss": 1.4355, "step": 1540500 }, { "epoch": 16.26, "learning_rate": 4.186867460979136e-05, "loss": 1.3653, "step": 1541000 }, { "epoch": 16.27, "learning_rate": 4.1866036282279944e-05, "loss": 1.4254, "step": 1541500 }, { "epoch": 16.27, "learning_rate": 4.186339795476851e-05, "loss": 1.3987, "step": 1542000 }, { "epoch": 16.28, "learning_rate": 4.186075962725709e-05, "loss": 1.3716, "step": 1542500 }, { "epoch": 16.28, "learning_rate": 4.185812129974567e-05, "loss": 1.3712, "step": 1543000 }, { "epoch": 16.29, "learning_rate": 4.1855482972234246e-05, "loss": 1.4048, "step": 1543500 }, { "epoch": 16.29, "learning_rate": 4.185284464472282e-05, "loss": 1.3838, "step": 1544000 }, { "epoch": 16.3, "learning_rate": 4.185020631721139e-05, "loss": 1.4156, "step": 1544500 }, { "epoch": 16.3, "learning_rate": 4.184756798969997e-05, "loss": 1.4102, "step": 1545000 }, { "epoch": 16.31, "learning_rate": 4.184492966218855e-05, "loss": 1.4394, "step": 1545500 }, { "epoch": 16.32, "learning_rate": 4.184229133467712e-05, "loss": 1.4413, "step": 1546000 }, { "epoch": 16.32, "learning_rate": 4.1839653007165706e-05, "loss": 1.4344, "step": 1546500 }, { "epoch": 16.33, "learning_rate": 4.1837014679654274e-05, "loss": 1.3868, "step": 1547000 }, { "epoch": 16.33, "learning_rate": 4.183437635214285e-05, "loss": 1.4639, "step": 1547500 }, { "epoch": 16.34, "learning_rate": 4.1831738024631425e-05, "loss": 1.3568, "step": 1548000 }, { "epoch": 16.34, "learning_rate": 4.182909969712001e-05, "loss": 1.4366, "step": 1548500 }, { "epoch": 16.35, "learning_rate": 4.1826461369608576e-05, "loss": 1.4548, "step": 1549000 }, { "epoch": 16.35, "learning_rate": 4.182382304209715e-05, "loss": 1.4414, "step": 1549500 }, { "epoch": 16.36, "learning_rate": 4.1821184714585734e-05, "loss": 1.4497, "step": 1550000 }, { "epoch": 16.36, "learning_rate": 4.181854638707431e-05, "loss": 1.4126, "step": 1550500 }, { "epoch": 16.37, "learning_rate": 4.1815908059562885e-05, "loss": 1.4577, "step": 1551000 }, { "epoch": 16.37, "learning_rate": 4.181326973205146e-05, "loss": 1.3501, "step": 1551500 }, { "epoch": 16.38, "learning_rate": 4.1810631404540036e-05, "loss": 1.4447, "step": 1552000 }, { "epoch": 16.38, "learning_rate": 4.180799307702861e-05, "loss": 1.4166, "step": 1552500 }, { "epoch": 16.39, "learning_rate": 4.180535474951719e-05, "loss": 1.4606, "step": 1553000 }, { "epoch": 16.39, "learning_rate": 4.180271642200577e-05, "loss": 1.4318, "step": 1553500 }, { "epoch": 16.4, "learning_rate": 4.180007809449434e-05, "loss": 1.4348, "step": 1554000 }, { "epoch": 16.41, "learning_rate": 4.1797439766982913e-05, "loss": 1.4005, "step": 1554500 }, { "epoch": 16.41, "learning_rate": 4.1794801439471496e-05, "loss": 1.412, "step": 1555000 }, { "epoch": 16.42, "learning_rate": 4.179216311196007e-05, "loss": 1.4202, "step": 1555500 }, { "epoch": 16.42, "learning_rate": 4.178952478444865e-05, "loss": 1.4169, "step": 1556000 }, { "epoch": 16.43, "learning_rate": 4.1786886456937215e-05, "loss": 1.4006, "step": 1556500 }, { "epoch": 16.43, "learning_rate": 4.17842481294258e-05, "loss": 1.3605, "step": 1557000 }, { "epoch": 16.44, "learning_rate": 4.178160980191437e-05, "loss": 1.3829, "step": 1557500 }, { "epoch": 16.44, "learning_rate": 4.177897147440295e-05, "loss": 1.4698, "step": 1558000 }, { "epoch": 16.45, "learning_rate": 4.1776333146891524e-05, "loss": 1.4752, "step": 1558500 }, { "epoch": 16.45, "learning_rate": 4.17736948193801e-05, "loss": 1.4138, "step": 1559000 }, { "epoch": 16.46, "learning_rate": 4.1771056491868675e-05, "loss": 1.4337, "step": 1559500 }, { "epoch": 16.46, "learning_rate": 4.176841816435725e-05, "loss": 1.416, "step": 1560000 }, { "epoch": 16.47, "learning_rate": 4.176577983684583e-05, "loss": 1.4478, "step": 1560500 }, { "epoch": 16.47, "learning_rate": 4.17631415093344e-05, "loss": 1.382, "step": 1561000 }, { "epoch": 16.48, "learning_rate": 4.176050318182298e-05, "loss": 1.4176, "step": 1561500 }, { "epoch": 16.48, "learning_rate": 4.175786485431156e-05, "loss": 1.3608, "step": 1562000 }, { "epoch": 16.49, "learning_rate": 4.1755226526800135e-05, "loss": 1.4934, "step": 1562500 }, { "epoch": 16.49, "learning_rate": 4.175258819928871e-05, "loss": 1.4622, "step": 1563000 }, { "epoch": 16.5, "learning_rate": 4.1749949871777286e-05, "loss": 1.4154, "step": 1563500 }, { "epoch": 16.51, "learning_rate": 4.174731154426586e-05, "loss": 1.4503, "step": 1564000 }, { "epoch": 16.51, "learning_rate": 4.174467321675444e-05, "loss": 1.3952, "step": 1564500 }, { "epoch": 16.52, "learning_rate": 4.174203488924301e-05, "loss": 1.4024, "step": 1565000 }, { "epoch": 16.52, "learning_rate": 4.1739396561731594e-05, "loss": 1.4291, "step": 1565500 }, { "epoch": 16.53, "learning_rate": 4.173675823422016e-05, "loss": 1.5112, "step": 1566000 }, { "epoch": 16.53, "learning_rate": 4.173411990670874e-05, "loss": 1.4522, "step": 1566500 }, { "epoch": 16.54, "learning_rate": 4.173148157919732e-05, "loss": 1.4065, "step": 1567000 }, { "epoch": 16.54, "learning_rate": 4.1728843251685896e-05, "loss": 1.4686, "step": 1567500 }, { "epoch": 16.55, "learning_rate": 4.1726204924174465e-05, "loss": 1.4315, "step": 1568000 }, { "epoch": 16.55, "learning_rate": 4.172356659666305e-05, "loss": 1.4794, "step": 1568500 }, { "epoch": 16.56, "learning_rate": 4.172092826915162e-05, "loss": 1.4398, "step": 1569000 }, { "epoch": 16.56, "learning_rate": 4.17182899416402e-05, "loss": 1.4415, "step": 1569500 }, { "epoch": 16.57, "learning_rate": 4.1715651614128774e-05, "loss": 1.4995, "step": 1570000 }, { "epoch": 16.57, "learning_rate": 4.171301328661735e-05, "loss": 1.4024, "step": 1570500 }, { "epoch": 16.58, "learning_rate": 4.1710374959105925e-05, "loss": 1.3879, "step": 1571000 }, { "epoch": 16.58, "learning_rate": 4.17077366315945e-05, "loss": 1.4432, "step": 1571500 }, { "epoch": 16.59, "learning_rate": 4.1705098304083076e-05, "loss": 1.4236, "step": 1572000 }, { "epoch": 16.6, "learning_rate": 4.170245997657166e-05, "loss": 1.4061, "step": 1572500 }, { "epoch": 16.6, "learning_rate": 4.169982164906023e-05, "loss": 1.4371, "step": 1573000 }, { "epoch": 16.61, "learning_rate": 4.16971833215488e-05, "loss": 1.4286, "step": 1573500 }, { "epoch": 16.61, "learning_rate": 4.1694544994037385e-05, "loss": 1.4368, "step": 1574000 }, { "epoch": 16.62, "learning_rate": 4.169190666652596e-05, "loss": 1.3684, "step": 1574500 }, { "epoch": 16.62, "learning_rate": 4.1689268339014535e-05, "loss": 1.4477, "step": 1575000 }, { "epoch": 16.63, "learning_rate": 4.168663001150311e-05, "loss": 1.3976, "step": 1575500 }, { "epoch": 16.63, "learning_rate": 4.1683991683991686e-05, "loss": 1.3676, "step": 1576000 }, { "epoch": 16.64, "learning_rate": 4.168135335648026e-05, "loss": 1.3727, "step": 1576500 }, { "epoch": 16.64, "learning_rate": 4.167871502896884e-05, "loss": 1.4657, "step": 1577000 }, { "epoch": 16.65, "learning_rate": 4.167607670145741e-05, "loss": 1.4331, "step": 1577500 }, { "epoch": 16.65, "learning_rate": 4.167343837394599e-05, "loss": 1.4182, "step": 1578000 }, { "epoch": 16.66, "learning_rate": 4.1670800046434564e-05, "loss": 1.4205, "step": 1578500 }, { "epoch": 16.66, "learning_rate": 4.1668161718923146e-05, "loss": 1.3848, "step": 1579000 }, { "epoch": 16.67, "learning_rate": 4.166552339141172e-05, "loss": 1.4162, "step": 1579500 }, { "epoch": 16.67, "learning_rate": 4.166288506390029e-05, "loss": 1.4357, "step": 1580000 }, { "epoch": 16.68, "learning_rate": 4.166024673638887e-05, "loss": 1.4136, "step": 1580500 }, { "epoch": 16.68, "learning_rate": 4.165760840887745e-05, "loss": 1.4672, "step": 1581000 }, { "epoch": 16.69, "learning_rate": 4.1654970081366024e-05, "loss": 1.4228, "step": 1581500 }, { "epoch": 16.7, "learning_rate": 4.16523317538546e-05, "loss": 1.3872, "step": 1582000 }, { "epoch": 16.7, "learning_rate": 4.1649693426343175e-05, "loss": 1.4336, "step": 1582500 }, { "epoch": 16.71, "learning_rate": 4.164705509883175e-05, "loss": 1.4257, "step": 1583000 }, { "epoch": 16.71, "learning_rate": 4.1644416771320326e-05, "loss": 1.4025, "step": 1583500 }, { "epoch": 16.72, "learning_rate": 4.16417784438089e-05, "loss": 1.388, "step": 1584000 }, { "epoch": 16.72, "learning_rate": 4.163914011629748e-05, "loss": 1.4136, "step": 1584500 }, { "epoch": 16.73, "learning_rate": 4.163650178878605e-05, "loss": 1.4691, "step": 1585000 }, { "epoch": 16.73, "learning_rate": 4.163386346127463e-05, "loss": 1.405, "step": 1585500 }, { "epoch": 16.74, "learning_rate": 4.163122513376321e-05, "loss": 1.4426, "step": 1586000 }, { "epoch": 16.74, "learning_rate": 4.1628586806251785e-05, "loss": 1.4439, "step": 1586500 }, { "epoch": 16.75, "learning_rate": 4.1625948478740354e-05, "loss": 1.4723, "step": 1587000 }, { "epoch": 16.75, "learning_rate": 4.1623310151228936e-05, "loss": 1.3888, "step": 1587500 }, { "epoch": 16.76, "learning_rate": 4.162067182371751e-05, "loss": 1.3892, "step": 1588000 }, { "epoch": 16.76, "learning_rate": 4.161803349620609e-05, "loss": 1.4069, "step": 1588500 }, { "epoch": 16.77, "learning_rate": 4.161539516869466e-05, "loss": 1.3856, "step": 1589000 }, { "epoch": 16.77, "learning_rate": 4.161275684118324e-05, "loss": 1.3715, "step": 1589500 }, { "epoch": 16.78, "learning_rate": 4.1610118513671814e-05, "loss": 1.416, "step": 1590000 }, { "epoch": 16.79, "learning_rate": 4.160748018616039e-05, "loss": 1.4378, "step": 1590500 }, { "epoch": 16.79, "learning_rate": 4.160484185864897e-05, "loss": 1.4008, "step": 1591000 }, { "epoch": 16.8, "learning_rate": 4.160220353113755e-05, "loss": 1.4369, "step": 1591500 }, { "epoch": 16.8, "learning_rate": 4.1599565203626116e-05, "loss": 1.4007, "step": 1592000 }, { "epoch": 16.81, "learning_rate": 4.15969268761147e-05, "loss": 1.4348, "step": 1592500 }, { "epoch": 16.81, "learning_rate": 4.159428854860327e-05, "loss": 1.3795, "step": 1593000 }, { "epoch": 16.82, "learning_rate": 4.159165022109185e-05, "loss": 1.4096, "step": 1593500 }, { "epoch": 16.82, "learning_rate": 4.1589011893580424e-05, "loss": 1.438, "step": 1594000 }, { "epoch": 16.83, "learning_rate": 4.1586373566069e-05, "loss": 1.4782, "step": 1594500 }, { "epoch": 16.83, "learning_rate": 4.1583735238557575e-05, "loss": 1.4174, "step": 1595000 }, { "epoch": 16.84, "learning_rate": 4.158109691104615e-05, "loss": 1.4149, "step": 1595500 }, { "epoch": 16.84, "learning_rate": 4.1578458583534726e-05, "loss": 1.4173, "step": 1596000 }, { "epoch": 16.85, "learning_rate": 4.15758202560233e-05, "loss": 1.473, "step": 1596500 }, { "epoch": 16.85, "learning_rate": 4.157318192851188e-05, "loss": 1.4163, "step": 1597000 }, { "epoch": 16.86, "learning_rate": 4.157054360100045e-05, "loss": 1.4344, "step": 1597500 }, { "epoch": 16.86, "learning_rate": 4.1567905273489035e-05, "loss": 1.4571, "step": 1598000 }, { "epoch": 16.87, "learning_rate": 4.156526694597761e-05, "loss": 1.3993, "step": 1598500 }, { "epoch": 16.87, "learning_rate": 4.156262861846618e-05, "loss": 1.4791, "step": 1599000 }, { "epoch": 16.88, "learning_rate": 4.155999029095476e-05, "loss": 1.4622, "step": 1599500 }, { "epoch": 16.89, "learning_rate": 4.155735196344334e-05, "loss": 1.4753, "step": 1600000 }, { "epoch": 16.89, "learning_rate": 4.155471363593191e-05, "loss": 1.4155, "step": 1600500 }, { "epoch": 16.9, "learning_rate": 4.155207530842049e-05, "loss": 1.4762, "step": 1601000 }, { "epoch": 16.9, "learning_rate": 4.1549436980909063e-05, "loss": 1.4654, "step": 1601500 }, { "epoch": 16.91, "learning_rate": 4.154679865339764e-05, "loss": 1.4434, "step": 1602000 }, { "epoch": 16.91, "learning_rate": 4.1544160325886214e-05, "loss": 1.4032, "step": 1602500 }, { "epoch": 16.92, "learning_rate": 4.15415219983748e-05, "loss": 1.39, "step": 1603000 }, { "epoch": 16.92, "learning_rate": 4.1538883670863365e-05, "loss": 1.3726, "step": 1603500 }, { "epoch": 16.93, "learning_rate": 4.153624534335194e-05, "loss": 1.4242, "step": 1604000 }, { "epoch": 16.93, "learning_rate": 4.153360701584052e-05, "loss": 1.4696, "step": 1604500 }, { "epoch": 16.94, "learning_rate": 4.15309686883291e-05, "loss": 1.4291, "step": 1605000 }, { "epoch": 16.94, "learning_rate": 4.1528330360817674e-05, "loss": 1.4749, "step": 1605500 }, { "epoch": 16.95, "learning_rate": 4.152569203330624e-05, "loss": 1.4216, "step": 1606000 }, { "epoch": 16.95, "learning_rate": 4.1523053705794825e-05, "loss": 1.4855, "step": 1606500 }, { "epoch": 16.96, "learning_rate": 4.15204153782834e-05, "loss": 1.4221, "step": 1607000 }, { "epoch": 16.96, "learning_rate": 4.1517777050771976e-05, "loss": 1.4627, "step": 1607500 }, { "epoch": 16.97, "learning_rate": 4.151513872326055e-05, "loss": 1.4187, "step": 1608000 }, { "epoch": 16.97, "learning_rate": 4.151250039574913e-05, "loss": 1.4203, "step": 1608500 }, { "epoch": 16.98, "learning_rate": 4.15098620682377e-05, "loss": 1.3503, "step": 1609000 }, { "epoch": 16.99, "learning_rate": 4.150722374072628e-05, "loss": 1.4109, "step": 1609500 }, { "epoch": 16.99, "learning_rate": 4.150458541321486e-05, "loss": 1.4218, "step": 1610000 }, { "epoch": 17.0, "learning_rate": 4.1501947085703436e-05, "loss": 1.4512, "step": 1610500 }, { "epoch": 17.0, "learning_rate": 4.1499308758192004e-05, "loss": 1.3891, "step": 1611000 }, { "epoch": 17.01, "learning_rate": 4.149667043068059e-05, "loss": 1.3712, "step": 1611500 }, { "epoch": 17.01, "learning_rate": 4.149403210316916e-05, "loss": 1.4174, "step": 1612000 }, { "epoch": 17.02, "learning_rate": 4.149139377565774e-05, "loss": 1.3895, "step": 1612500 }, { "epoch": 17.02, "learning_rate": 4.148875544814631e-05, "loss": 1.4064, "step": 1613000 }, { "epoch": 17.03, "learning_rate": 4.148611712063489e-05, "loss": 1.3801, "step": 1613500 }, { "epoch": 17.03, "learning_rate": 4.1483478793123464e-05, "loss": 1.4441, "step": 1614000 }, { "epoch": 17.04, "learning_rate": 4.148084046561204e-05, "loss": 1.3741, "step": 1614500 }, { "epoch": 17.04, "learning_rate": 4.147820213810062e-05, "loss": 1.3954, "step": 1615000 }, { "epoch": 17.05, "learning_rate": 4.147556381058919e-05, "loss": 1.3902, "step": 1615500 }, { "epoch": 17.05, "learning_rate": 4.1472925483077766e-05, "loss": 1.3911, "step": 1616000 }, { "epoch": 17.06, "learning_rate": 4.147028715556635e-05, "loss": 1.4318, "step": 1616500 }, { "epoch": 17.06, "learning_rate": 4.1467648828054924e-05, "loss": 1.4343, "step": 1617000 }, { "epoch": 17.07, "learning_rate": 4.14650105005435e-05, "loss": 1.4023, "step": 1617500 }, { "epoch": 17.08, "learning_rate": 4.146237217303207e-05, "loss": 1.4193, "step": 1618000 }, { "epoch": 17.08, "learning_rate": 4.145973384552065e-05, "loss": 1.4062, "step": 1618500 }, { "epoch": 17.09, "learning_rate": 4.1457095518009226e-05, "loss": 1.4051, "step": 1619000 }, { "epoch": 17.09, "learning_rate": 4.14544571904978e-05, "loss": 1.3939, "step": 1619500 }, { "epoch": 17.1, "learning_rate": 4.1451818862986384e-05, "loss": 1.4426, "step": 1620000 }, { "epoch": 17.1, "learning_rate": 4.144918053547495e-05, "loss": 1.4619, "step": 1620500 }, { "epoch": 17.11, "learning_rate": 4.144654220796353e-05, "loss": 1.3402, "step": 1621000 }, { "epoch": 17.11, "learning_rate": 4.14439038804521e-05, "loss": 1.4035, "step": 1621500 }, { "epoch": 17.12, "learning_rate": 4.1441265552940686e-05, "loss": 1.4382, "step": 1622000 }, { "epoch": 17.12, "learning_rate": 4.1438627225429254e-05, "loss": 1.3843, "step": 1622500 }, { "epoch": 17.13, "learning_rate": 4.143598889791783e-05, "loss": 1.3832, "step": 1623000 }, { "epoch": 17.13, "learning_rate": 4.143335057040641e-05, "loss": 1.358, "step": 1623500 }, { "epoch": 17.14, "learning_rate": 4.143071224289499e-05, "loss": 1.4259, "step": 1624000 }, { "epoch": 17.14, "learning_rate": 4.142807391538356e-05, "loss": 1.3565, "step": 1624500 }, { "epoch": 17.15, "learning_rate": 4.142543558787214e-05, "loss": 1.4238, "step": 1625000 }, { "epoch": 17.15, "learning_rate": 4.1422797260360714e-05, "loss": 1.4076, "step": 1625500 }, { "epoch": 17.16, "learning_rate": 4.142015893284929e-05, "loss": 1.3893, "step": 1626000 }, { "epoch": 17.16, "learning_rate": 4.1417520605337865e-05, "loss": 1.4245, "step": 1626500 }, { "epoch": 17.17, "learning_rate": 4.141488227782645e-05, "loss": 1.4009, "step": 1627000 }, { "epoch": 17.18, "learning_rate": 4.1412243950315016e-05, "loss": 1.4113, "step": 1627500 }, { "epoch": 17.18, "learning_rate": 4.140960562280359e-05, "loss": 1.3989, "step": 1628000 }, { "epoch": 17.19, "learning_rate": 4.1406967295292174e-05, "loss": 1.389, "step": 1628500 }, { "epoch": 17.19, "learning_rate": 4.140432896778075e-05, "loss": 1.43, "step": 1629000 }, { "epoch": 17.2, "learning_rate": 4.1401690640269325e-05, "loss": 1.4441, "step": 1629500 }, { "epoch": 17.2, "learning_rate": 4.139905231275789e-05, "loss": 1.3752, "step": 1630000 }, { "epoch": 17.21, "learning_rate": 4.1396413985246476e-05, "loss": 1.3485, "step": 1630500 }, { "epoch": 17.21, "learning_rate": 4.139377565773505e-05, "loss": 1.4128, "step": 1631000 }, { "epoch": 17.22, "learning_rate": 4.1391137330223627e-05, "loss": 1.4076, "step": 1631500 }, { "epoch": 17.22, "learning_rate": 4.13884990027122e-05, "loss": 1.3962, "step": 1632000 }, { "epoch": 17.23, "learning_rate": 4.138586067520078e-05, "loss": 1.3894, "step": 1632500 }, { "epoch": 17.23, "learning_rate": 4.138322234768935e-05, "loss": 1.3827, "step": 1633000 }, { "epoch": 17.24, "learning_rate": 4.138058402017793e-05, "loss": 1.4004, "step": 1633500 }, { "epoch": 17.24, "learning_rate": 4.137794569266651e-05, "loss": 1.4115, "step": 1634000 }, { "epoch": 17.25, "learning_rate": 4.137530736515508e-05, "loss": 1.4184, "step": 1634500 }, { "epoch": 17.25, "learning_rate": 4.1372669037643655e-05, "loss": 1.3857, "step": 1635000 }, { "epoch": 17.26, "learning_rate": 4.137003071013224e-05, "loss": 1.4266, "step": 1635500 }, { "epoch": 17.27, "learning_rate": 4.136739238262081e-05, "loss": 1.4686, "step": 1636000 }, { "epoch": 17.27, "learning_rate": 4.136475405510939e-05, "loss": 1.4251, "step": 1636500 }, { "epoch": 17.28, "learning_rate": 4.1362115727597964e-05, "loss": 1.3737, "step": 1637000 }, { "epoch": 17.28, "learning_rate": 4.135947740008654e-05, "loss": 1.434, "step": 1637500 }, { "epoch": 17.29, "learning_rate": 4.1356839072575115e-05, "loss": 1.3759, "step": 1638000 }, { "epoch": 17.29, "learning_rate": 4.135420074506369e-05, "loss": 1.4274, "step": 1638500 }, { "epoch": 17.3, "learning_rate": 4.135156241755227e-05, "loss": 1.4217, "step": 1639000 }, { "epoch": 17.3, "learning_rate": 4.134892409004084e-05, "loss": 1.5034, "step": 1639500 }, { "epoch": 17.31, "learning_rate": 4.1346285762529417e-05, "loss": 1.427, "step": 1640000 }, { "epoch": 17.31, "learning_rate": 4.1343647435018e-05, "loss": 1.4253, "step": 1640500 }, { "epoch": 17.32, "learning_rate": 4.1341009107506574e-05, "loss": 1.3879, "step": 1641000 }, { "epoch": 17.32, "learning_rate": 4.133837077999514e-05, "loss": 1.4221, "step": 1641500 }, { "epoch": 17.33, "learning_rate": 4.1335732452483725e-05, "loss": 1.4657, "step": 1642000 }, { "epoch": 17.33, "learning_rate": 4.13330941249723e-05, "loss": 1.3487, "step": 1642500 }, { "epoch": 17.34, "learning_rate": 4.1330455797460876e-05, "loss": 1.4318, "step": 1643000 }, { "epoch": 17.34, "learning_rate": 4.132781746994945e-05, "loss": 1.4211, "step": 1643500 }, { "epoch": 17.35, "learning_rate": 4.132517914243803e-05, "loss": 1.4071, "step": 1644000 }, { "epoch": 17.35, "learning_rate": 4.13225408149266e-05, "loss": 1.3708, "step": 1644500 }, { "epoch": 17.36, "learning_rate": 4.131990248741518e-05, "loss": 1.4038, "step": 1645000 }, { "epoch": 17.37, "learning_rate": 4.1317264159903754e-05, "loss": 1.4369, "step": 1645500 }, { "epoch": 17.37, "learning_rate": 4.1314625832392336e-05, "loss": 1.4657, "step": 1646000 }, { "epoch": 17.38, "learning_rate": 4.1311987504880905e-05, "loss": 1.4115, "step": 1646500 }, { "epoch": 17.38, "learning_rate": 4.130934917736948e-05, "loss": 1.4072, "step": 1647000 }, { "epoch": 17.39, "learning_rate": 4.130671084985806e-05, "loss": 1.4551, "step": 1647500 }, { "epoch": 17.39, "learning_rate": 4.130407252234664e-05, "loss": 1.4117, "step": 1648000 }, { "epoch": 17.4, "learning_rate": 4.1301434194835213e-05, "loss": 1.4944, "step": 1648500 }, { "epoch": 17.4, "learning_rate": 4.129879586732379e-05, "loss": 1.3961, "step": 1649000 }, { "epoch": 17.41, "learning_rate": 4.1296157539812364e-05, "loss": 1.4155, "step": 1649500 }, { "epoch": 17.41, "learning_rate": 4.129351921230094e-05, "loss": 1.4082, "step": 1650000 }, { "epoch": 17.42, "learning_rate": 4.1290880884789515e-05, "loss": 1.3698, "step": 1650500 }, { "epoch": 17.42, "learning_rate": 4.128824255727809e-05, "loss": 1.3938, "step": 1651000 }, { "epoch": 17.43, "learning_rate": 4.1285604229766666e-05, "loss": 1.4001, "step": 1651500 }, { "epoch": 17.43, "learning_rate": 4.128296590225524e-05, "loss": 1.3852, "step": 1652000 }, { "epoch": 17.44, "learning_rate": 4.1280327574743824e-05, "loss": 1.4102, "step": 1652500 }, { "epoch": 17.44, "learning_rate": 4.12776892472324e-05, "loss": 1.3813, "step": 1653000 }, { "epoch": 17.45, "learning_rate": 4.127505091972097e-05, "loss": 1.4692, "step": 1653500 }, { "epoch": 17.46, "learning_rate": 4.127241259220955e-05, "loss": 1.3874, "step": 1654000 }, { "epoch": 17.46, "learning_rate": 4.1269774264698126e-05, "loss": 1.375, "step": 1654500 }, { "epoch": 17.47, "learning_rate": 4.12671359371867e-05, "loss": 1.4102, "step": 1655000 }, { "epoch": 17.47, "learning_rate": 4.126449760967528e-05, "loss": 1.4387, "step": 1655500 }, { "epoch": 17.48, "learning_rate": 4.126185928216385e-05, "loss": 1.3957, "step": 1656000 }, { "epoch": 17.48, "learning_rate": 4.125922095465243e-05, "loss": 1.3434, "step": 1656500 }, { "epoch": 17.49, "learning_rate": 4.1256582627141003e-05, "loss": 1.4455, "step": 1657000 }, { "epoch": 17.49, "learning_rate": 4.125394429962958e-05, "loss": 1.4251, "step": 1657500 }, { "epoch": 17.5, "learning_rate": 4.125130597211816e-05, "loss": 1.3911, "step": 1658000 }, { "epoch": 17.5, "learning_rate": 4.124866764460673e-05, "loss": 1.4198, "step": 1658500 }, { "epoch": 17.51, "learning_rate": 4.1246029317095305e-05, "loss": 1.3944, "step": 1659000 }, { "epoch": 17.51, "learning_rate": 4.124339098958389e-05, "loss": 1.409, "step": 1659500 }, { "epoch": 17.52, "learning_rate": 4.124075266207246e-05, "loss": 1.3751, "step": 1660000 }, { "epoch": 17.52, "learning_rate": 4.123811433456103e-05, "loss": 1.3848, "step": 1660500 }, { "epoch": 17.53, "learning_rate": 4.1235476007049614e-05, "loss": 1.4015, "step": 1661000 }, { "epoch": 17.53, "learning_rate": 4.123283767953819e-05, "loss": 1.4784, "step": 1661500 }, { "epoch": 17.54, "learning_rate": 4.1230199352026765e-05, "loss": 1.4108, "step": 1662000 }, { "epoch": 17.54, "learning_rate": 4.122756102451534e-05, "loss": 1.4127, "step": 1662500 }, { "epoch": 17.55, "learning_rate": 4.1224922697003916e-05, "loss": 1.4451, "step": 1663000 }, { "epoch": 17.56, "learning_rate": 4.122228436949249e-05, "loss": 1.3879, "step": 1663500 }, { "epoch": 17.56, "learning_rate": 4.121964604198107e-05, "loss": 1.4318, "step": 1664000 }, { "epoch": 17.57, "learning_rate": 4.121700771446965e-05, "loss": 1.437, "step": 1664500 }, { "epoch": 17.57, "learning_rate": 4.1214369386958225e-05, "loss": 1.4077, "step": 1665000 }, { "epoch": 17.58, "learning_rate": 4.1211731059446794e-05, "loss": 1.3917, "step": 1665500 }, { "epoch": 17.58, "learning_rate": 4.1209092731935376e-05, "loss": 1.4233, "step": 1666000 }, { "epoch": 17.59, "learning_rate": 4.120645440442395e-05, "loss": 1.392, "step": 1666500 }, { "epoch": 17.59, "learning_rate": 4.120381607691253e-05, "loss": 1.3881, "step": 1667000 }, { "epoch": 17.6, "learning_rate": 4.12011777494011e-05, "loss": 1.3747, "step": 1667500 }, { "epoch": 17.6, "learning_rate": 4.119853942188968e-05, "loss": 1.376, "step": 1668000 }, { "epoch": 17.61, "learning_rate": 4.119590109437825e-05, "loss": 1.4042, "step": 1668500 }, { "epoch": 17.61, "learning_rate": 4.119326276686683e-05, "loss": 1.4273, "step": 1669000 }, { "epoch": 17.62, "learning_rate": 4.1190624439355404e-05, "loss": 1.3918, "step": 1669500 }, { "epoch": 17.62, "learning_rate": 4.118798611184398e-05, "loss": 1.4206, "step": 1670000 }, { "epoch": 17.63, "learning_rate": 4.1185347784332555e-05, "loss": 1.4635, "step": 1670500 }, { "epoch": 17.63, "learning_rate": 4.118270945682113e-05, "loss": 1.4158, "step": 1671000 }, { "epoch": 17.64, "learning_rate": 4.118007112930971e-05, "loss": 1.3758, "step": 1671500 }, { "epoch": 17.65, "learning_rate": 4.117743280179829e-05, "loss": 1.4426, "step": 1672000 }, { "epoch": 17.65, "learning_rate": 4.117479447428686e-05, "loss": 1.3776, "step": 1672500 }, { "epoch": 17.66, "learning_rate": 4.117215614677544e-05, "loss": 1.4144, "step": 1673000 }, { "epoch": 17.66, "learning_rate": 4.1169517819264015e-05, "loss": 1.4319, "step": 1673500 }, { "epoch": 17.67, "learning_rate": 4.116687949175259e-05, "loss": 1.3946, "step": 1674000 }, { "epoch": 17.67, "learning_rate": 4.1164241164241166e-05, "loss": 1.4193, "step": 1674500 }, { "epoch": 17.68, "learning_rate": 4.116160283672974e-05, "loss": 1.4442, "step": 1675000 }, { "epoch": 17.68, "learning_rate": 4.115896450921832e-05, "loss": 1.4038, "step": 1675500 }, { "epoch": 17.69, "learning_rate": 4.115632618170689e-05, "loss": 1.3859, "step": 1676000 }, { "epoch": 17.69, "learning_rate": 4.1153687854195475e-05, "loss": 1.4027, "step": 1676500 }, { "epoch": 17.7, "learning_rate": 4.115104952668405e-05, "loss": 1.4258, "step": 1677000 }, { "epoch": 17.7, "learning_rate": 4.114841119917262e-05, "loss": 1.4403, "step": 1677500 }, { "epoch": 17.71, "learning_rate": 4.11457728716612e-05, "loss": 1.4458, "step": 1678000 }, { "epoch": 17.71, "learning_rate": 4.1143134544149777e-05, "loss": 1.372, "step": 1678500 }, { "epoch": 17.72, "learning_rate": 4.114049621663835e-05, "loss": 1.4563, "step": 1679000 }, { "epoch": 17.72, "learning_rate": 4.113785788912692e-05, "loss": 1.3458, "step": 1679500 }, { "epoch": 17.73, "learning_rate": 4.11352195616155e-05, "loss": 1.4158, "step": 1680000 }, { "epoch": 17.73, "learning_rate": 4.113258123410408e-05, "loss": 1.4461, "step": 1680500 }, { "epoch": 17.74, "learning_rate": 4.1129942906592654e-05, "loss": 1.4106, "step": 1681000 }, { "epoch": 17.75, "learning_rate": 4.112730457908123e-05, "loss": 1.3313, "step": 1681500 }, { "epoch": 17.75, "learning_rate": 4.1124666251569805e-05, "loss": 1.4166, "step": 1682000 }, { "epoch": 17.76, "learning_rate": 4.112202792405838e-05, "loss": 1.4184, "step": 1682500 }, { "epoch": 17.76, "learning_rate": 4.1119389596546956e-05, "loss": 1.4705, "step": 1683000 }, { "epoch": 17.77, "learning_rate": 4.111675126903554e-05, "loss": 1.4387, "step": 1683500 }, { "epoch": 17.77, "learning_rate": 4.1114112941524114e-05, "loss": 1.4419, "step": 1684000 }, { "epoch": 17.78, "learning_rate": 4.111147461401268e-05, "loss": 1.3749, "step": 1684500 }, { "epoch": 17.78, "learning_rate": 4.1108836286501265e-05, "loss": 1.4237, "step": 1685000 }, { "epoch": 17.79, "learning_rate": 4.110619795898984e-05, "loss": 1.408, "step": 1685500 }, { "epoch": 17.79, "learning_rate": 4.1103559631478416e-05, "loss": 1.4028, "step": 1686000 }, { "epoch": 17.8, "learning_rate": 4.110092130396699e-05, "loss": 1.3431, "step": 1686500 }, { "epoch": 17.8, "learning_rate": 4.1098282976455567e-05, "loss": 1.4033, "step": 1687000 }, { "epoch": 17.81, "learning_rate": 4.109564464894414e-05, "loss": 1.3624, "step": 1687500 }, { "epoch": 17.81, "learning_rate": 4.109300632143272e-05, "loss": 1.3997, "step": 1688000 }, { "epoch": 17.82, "learning_rate": 4.10903679939213e-05, "loss": 1.4175, "step": 1688500 }, { "epoch": 17.82, "learning_rate": 4.108772966640987e-05, "loss": 1.4081, "step": 1689000 }, { "epoch": 17.83, "learning_rate": 4.1085091338898444e-05, "loss": 1.4452, "step": 1689500 }, { "epoch": 17.84, "learning_rate": 4.1082453011387026e-05, "loss": 1.4137, "step": 1690000 }, { "epoch": 17.84, "learning_rate": 4.10798146838756e-05, "loss": 1.4287, "step": 1690500 }, { "epoch": 17.85, "learning_rate": 4.107717635636418e-05, "loss": 1.4189, "step": 1691000 }, { "epoch": 17.85, "learning_rate": 4.1074538028852746e-05, "loss": 1.4131, "step": 1691500 }, { "epoch": 17.86, "learning_rate": 4.107189970134133e-05, "loss": 1.4264, "step": 1692000 }, { "epoch": 17.86, "learning_rate": 4.1069261373829904e-05, "loss": 1.4024, "step": 1692500 }, { "epoch": 17.87, "learning_rate": 4.106662304631848e-05, "loss": 1.436, "step": 1693000 }, { "epoch": 17.87, "learning_rate": 4.106398471880706e-05, "loss": 1.3802, "step": 1693500 }, { "epoch": 17.88, "learning_rate": 4.106134639129563e-05, "loss": 1.3979, "step": 1694000 }, { "epoch": 17.88, "learning_rate": 4.1058708063784206e-05, "loss": 1.4186, "step": 1694500 }, { "epoch": 17.89, "learning_rate": 4.105606973627278e-05, "loss": 1.4537, "step": 1695000 }, { "epoch": 17.89, "learning_rate": 4.1053431408761363e-05, "loss": 1.3824, "step": 1695500 }, { "epoch": 17.9, "learning_rate": 4.105079308124994e-05, "loss": 1.4055, "step": 1696000 }, { "epoch": 17.9, "learning_rate": 4.104815475373851e-05, "loss": 1.4082, "step": 1696500 }, { "epoch": 17.91, "learning_rate": 4.104551642622709e-05, "loss": 1.3764, "step": 1697000 }, { "epoch": 17.91, "learning_rate": 4.1042878098715665e-05, "loss": 1.3854, "step": 1697500 }, { "epoch": 17.92, "learning_rate": 4.104023977120424e-05, "loss": 1.3957, "step": 1698000 }, { "epoch": 17.92, "learning_rate": 4.1037601443692816e-05, "loss": 1.4096, "step": 1698500 }, { "epoch": 17.93, "learning_rate": 4.103496311618139e-05, "loss": 1.3663, "step": 1699000 }, { "epoch": 17.94, "learning_rate": 4.103232478866997e-05, "loss": 1.4467, "step": 1699500 }, { "epoch": 17.94, "learning_rate": 4.102968646115854e-05, "loss": 1.3877, "step": 1700000 }, { "epoch": 17.95, "learning_rate": 4.1027048133647125e-05, "loss": 1.3571, "step": 1700500 }, { "epoch": 17.95, "learning_rate": 4.1024409806135694e-05, "loss": 1.4084, "step": 1701000 }, { "epoch": 17.96, "learning_rate": 4.102177147862427e-05, "loss": 1.367, "step": 1701500 }, { "epoch": 17.96, "learning_rate": 4.101913315111285e-05, "loss": 1.4394, "step": 1702000 }, { "epoch": 17.97, "learning_rate": 4.101649482360143e-05, "loss": 1.3515, "step": 1702500 }, { "epoch": 17.97, "learning_rate": 4.101385649609e-05, "loss": 1.3886, "step": 1703000 }, { "epoch": 17.98, "learning_rate": 4.101121816857857e-05, "loss": 1.4163, "step": 1703500 }, { "epoch": 17.98, "learning_rate": 4.1008579841067154e-05, "loss": 1.4178, "step": 1704000 }, { "epoch": 17.99, "learning_rate": 4.100594151355573e-05, "loss": 1.4481, "step": 1704500 }, { "epoch": 17.99, "learning_rate": 4.1003303186044304e-05, "loss": 1.395, "step": 1705000 }, { "epoch": 18.0, "learning_rate": 4.100066485853289e-05, "loss": 1.3864, "step": 1705500 }, { "epoch": 18.0, "learning_rate": 4.0998026531021455e-05, "loss": 1.4637, "step": 1706000 }, { "epoch": 18.01, "learning_rate": 4.099538820351003e-05, "loss": 1.3979, "step": 1706500 }, { "epoch": 18.01, "learning_rate": 4.0992749875998606e-05, "loss": 1.4055, "step": 1707000 }, { "epoch": 18.02, "learning_rate": 4.099011154848719e-05, "loss": 1.3972, "step": 1707500 }, { "epoch": 18.03, "learning_rate": 4.098747322097576e-05, "loss": 1.4098, "step": 1708000 }, { "epoch": 18.03, "learning_rate": 4.098483489346433e-05, "loss": 1.414, "step": 1708500 }, { "epoch": 18.04, "learning_rate": 4.0982196565952915e-05, "loss": 1.3378, "step": 1709000 }, { "epoch": 18.04, "learning_rate": 4.097955823844149e-05, "loss": 1.4574, "step": 1709500 }, { "epoch": 18.05, "learning_rate": 4.0976919910930066e-05, "loss": 1.4028, "step": 1710000 }, { "epoch": 18.05, "learning_rate": 4.097428158341864e-05, "loss": 1.4227, "step": 1710500 }, { "epoch": 18.06, "learning_rate": 4.097164325590722e-05, "loss": 1.4289, "step": 1711000 }, { "epoch": 18.06, "learning_rate": 4.096900492839579e-05, "loss": 1.4783, "step": 1711500 }, { "epoch": 18.07, "learning_rate": 4.096636660088437e-05, "loss": 1.4166, "step": 1712000 }, { "epoch": 18.07, "learning_rate": 4.096372827337295e-05, "loss": 1.3661, "step": 1712500 }, { "epoch": 18.08, "learning_rate": 4.096108994586152e-05, "loss": 1.3777, "step": 1713000 }, { "epoch": 18.08, "learning_rate": 4.0958451618350095e-05, "loss": 1.4439, "step": 1713500 }, { "epoch": 18.09, "learning_rate": 4.095581329083868e-05, "loss": 1.406, "step": 1714000 }, { "epoch": 18.09, "learning_rate": 4.095317496332725e-05, "loss": 1.3514, "step": 1714500 }, { "epoch": 18.1, "learning_rate": 4.095053663581583e-05, "loss": 1.4041, "step": 1715000 }, { "epoch": 18.1, "learning_rate": 4.09478983083044e-05, "loss": 1.4694, "step": 1715500 }, { "epoch": 18.11, "learning_rate": 4.094525998079298e-05, "loss": 1.3818, "step": 1716000 }, { "epoch": 18.11, "learning_rate": 4.0942621653281554e-05, "loss": 1.4226, "step": 1716500 }, { "epoch": 18.12, "learning_rate": 4.093998332577013e-05, "loss": 1.3911, "step": 1717000 }, { "epoch": 18.13, "learning_rate": 4.0937344998258705e-05, "loss": 1.3881, "step": 1717500 }, { "epoch": 18.13, "learning_rate": 4.093470667074728e-05, "loss": 1.3749, "step": 1718000 }, { "epoch": 18.14, "learning_rate": 4.0932068343235856e-05, "loss": 1.4006, "step": 1718500 }, { "epoch": 18.14, "learning_rate": 4.092943001572443e-05, "loss": 1.3855, "step": 1719000 }, { "epoch": 18.15, "learning_rate": 4.0926791688213014e-05, "loss": 1.3801, "step": 1719500 }, { "epoch": 18.15, "learning_rate": 4.092415336070158e-05, "loss": 1.415, "step": 1720000 }, { "epoch": 18.16, "learning_rate": 4.092151503319016e-05, "loss": 1.4261, "step": 1720500 }, { "epoch": 18.16, "learning_rate": 4.091887670567874e-05, "loss": 1.3356, "step": 1721000 }, { "epoch": 18.17, "learning_rate": 4.0916238378167316e-05, "loss": 1.3731, "step": 1721500 }, { "epoch": 18.17, "learning_rate": 4.091360005065589e-05, "loss": 1.3935, "step": 1722000 }, { "epoch": 18.18, "learning_rate": 4.091096172314447e-05, "loss": 1.4272, "step": 1722500 }, { "epoch": 18.18, "learning_rate": 4.090832339563304e-05, "loss": 1.4481, "step": 1723000 }, { "epoch": 18.19, "learning_rate": 4.090568506812162e-05, "loss": 1.3989, "step": 1723500 }, { "epoch": 18.19, "learning_rate": 4.090304674061019e-05, "loss": 1.4065, "step": 1724000 }, { "epoch": 18.2, "learning_rate": 4.0900408413098776e-05, "loss": 1.3788, "step": 1724500 }, { "epoch": 18.2, "learning_rate": 4.0897770085587344e-05, "loss": 1.4143, "step": 1725000 }, { "epoch": 18.21, "learning_rate": 4.089513175807592e-05, "loss": 1.4343, "step": 1725500 }, { "epoch": 18.22, "learning_rate": 4.08924934305645e-05, "loss": 1.4069, "step": 1726000 }, { "epoch": 18.22, "learning_rate": 4.088985510305308e-05, "loss": 1.3818, "step": 1726500 }, { "epoch": 18.23, "learning_rate": 4.0887216775541646e-05, "loss": 1.3914, "step": 1727000 }, { "epoch": 18.23, "learning_rate": 4.088457844803023e-05, "loss": 1.3951, "step": 1727500 }, { "epoch": 18.24, "learning_rate": 4.0881940120518804e-05, "loss": 1.436, "step": 1728000 }, { "epoch": 18.24, "learning_rate": 4.087930179300738e-05, "loss": 1.3744, "step": 1728500 }, { "epoch": 18.25, "learning_rate": 4.0876663465495955e-05, "loss": 1.4525, "step": 1729000 }, { "epoch": 18.25, "learning_rate": 4.087402513798453e-05, "loss": 1.4713, "step": 1729500 }, { "epoch": 18.26, "learning_rate": 4.0871386810473106e-05, "loss": 1.4495, "step": 1730000 }, { "epoch": 18.26, "learning_rate": 4.086874848296168e-05, "loss": 1.396, "step": 1730500 }, { "epoch": 18.27, "learning_rate": 4.086611015545026e-05, "loss": 1.374, "step": 1731000 }, { "epoch": 18.27, "learning_rate": 4.086347182793884e-05, "loss": 1.3902, "step": 1731500 }, { "epoch": 18.28, "learning_rate": 4.086083350042741e-05, "loss": 1.3647, "step": 1732000 }, { "epoch": 18.28, "learning_rate": 4.085819517291598e-05, "loss": 1.3832, "step": 1732500 }, { "epoch": 18.29, "learning_rate": 4.0855556845404566e-05, "loss": 1.4342, "step": 1733000 }, { "epoch": 18.29, "learning_rate": 4.085291851789314e-05, "loss": 1.4427, "step": 1733500 }, { "epoch": 18.3, "learning_rate": 4.085028019038172e-05, "loss": 1.341, "step": 1734000 }, { "epoch": 18.3, "learning_rate": 4.084764186287029e-05, "loss": 1.4681, "step": 1734500 }, { "epoch": 18.31, "learning_rate": 4.084500353535887e-05, "loss": 1.394, "step": 1735000 }, { "epoch": 18.32, "learning_rate": 4.084236520784744e-05, "loss": 1.4121, "step": 1735500 }, { "epoch": 18.32, "learning_rate": 4.083972688033602e-05, "loss": 1.4604, "step": 1736000 }, { "epoch": 18.33, "learning_rate": 4.0837088552824594e-05, "loss": 1.3862, "step": 1736500 }, { "epoch": 18.33, "learning_rate": 4.083445022531317e-05, "loss": 1.3952, "step": 1737000 }, { "epoch": 18.34, "learning_rate": 4.0831811897801745e-05, "loss": 1.4741, "step": 1737500 }, { "epoch": 18.34, "learning_rate": 4.082917357029033e-05, "loss": 1.3878, "step": 1738000 }, { "epoch": 18.35, "learning_rate": 4.08265352427789e-05, "loss": 1.4202, "step": 1738500 }, { "epoch": 18.35, "learning_rate": 4.082389691526747e-05, "loss": 1.4692, "step": 1739000 }, { "epoch": 18.36, "learning_rate": 4.0821258587756054e-05, "loss": 1.423, "step": 1739500 }, { "epoch": 18.36, "learning_rate": 4.081862026024463e-05, "loss": 1.3663, "step": 1740000 }, { "epoch": 18.37, "learning_rate": 4.0815981932733205e-05, "loss": 1.4436, "step": 1740500 }, { "epoch": 18.37, "learning_rate": 4.081334360522178e-05, "loss": 1.4039, "step": 1741000 }, { "epoch": 18.38, "learning_rate": 4.0810705277710356e-05, "loss": 1.3768, "step": 1741500 }, { "epoch": 18.38, "learning_rate": 4.080806695019893e-05, "loss": 1.3571, "step": 1742000 }, { "epoch": 18.39, "learning_rate": 4.080542862268751e-05, "loss": 1.4203, "step": 1742500 }, { "epoch": 18.39, "learning_rate": 4.080279029517608e-05, "loss": 1.3783, "step": 1743000 }, { "epoch": 18.4, "learning_rate": 4.0800151967664664e-05, "loss": 1.3761, "step": 1743500 }, { "epoch": 18.4, "learning_rate": 4.079751364015323e-05, "loss": 1.445, "step": 1744000 }, { "epoch": 18.41, "learning_rate": 4.079487531264181e-05, "loss": 1.3615, "step": 1744500 }, { "epoch": 18.42, "learning_rate": 4.079223698513039e-05, "loss": 1.4041, "step": 1745000 }, { "epoch": 18.42, "learning_rate": 4.0789598657618966e-05, "loss": 1.4385, "step": 1745500 }, { "epoch": 18.43, "learning_rate": 4.0786960330107535e-05, "loss": 1.4075, "step": 1746000 }, { "epoch": 18.43, "learning_rate": 4.078432200259612e-05, "loss": 1.4118, "step": 1746500 }, { "epoch": 18.44, "learning_rate": 4.078168367508469e-05, "loss": 1.3896, "step": 1747000 }, { "epoch": 18.44, "learning_rate": 4.077904534757327e-05, "loss": 1.3702, "step": 1747500 }, { "epoch": 18.45, "learning_rate": 4.0776407020061844e-05, "loss": 1.3957, "step": 1748000 }, { "epoch": 18.45, "learning_rate": 4.077376869255042e-05, "loss": 1.3821, "step": 1748500 }, { "epoch": 18.46, "learning_rate": 4.0771130365038995e-05, "loss": 1.4022, "step": 1749000 }, { "epoch": 18.46, "learning_rate": 4.076849203752757e-05, "loss": 1.4358, "step": 1749500 }, { "epoch": 18.47, "learning_rate": 4.076585371001615e-05, "loss": 1.3689, "step": 1750000 }, { "epoch": 18.47, "learning_rate": 4.076321538250473e-05, "loss": 1.3837, "step": 1750500 }, { "epoch": 18.48, "learning_rate": 4.07605770549933e-05, "loss": 1.431, "step": 1751000 }, { "epoch": 18.48, "learning_rate": 4.075793872748188e-05, "loss": 1.4075, "step": 1751500 }, { "epoch": 18.49, "learning_rate": 4.0755300399970454e-05, "loss": 1.3728, "step": 1752000 }, { "epoch": 18.49, "learning_rate": 4.075266207245903e-05, "loss": 1.3977, "step": 1752500 }, { "epoch": 18.5, "learning_rate": 4.0750023744947605e-05, "loss": 1.3597, "step": 1753000 }, { "epoch": 18.51, "learning_rate": 4.074738541743618e-05, "loss": 1.4623, "step": 1753500 }, { "epoch": 18.51, "learning_rate": 4.0744747089924756e-05, "loss": 1.4116, "step": 1754000 }, { "epoch": 18.52, "learning_rate": 4.074210876241333e-05, "loss": 1.3637, "step": 1754500 }, { "epoch": 18.52, "learning_rate": 4.073947043490191e-05, "loss": 1.3885, "step": 1755000 }, { "epoch": 18.53, "learning_rate": 4.073683210739048e-05, "loss": 1.4205, "step": 1755500 }, { "epoch": 18.53, "learning_rate": 4.073419377987906e-05, "loss": 1.4105, "step": 1756000 }, { "epoch": 18.54, "learning_rate": 4.0731555452367634e-05, "loss": 1.4063, "step": 1756500 }, { "epoch": 18.54, "learning_rate": 4.0728917124856216e-05, "loss": 1.3974, "step": 1757000 }, { "epoch": 18.55, "learning_rate": 4.072627879734479e-05, "loss": 1.4187, "step": 1757500 }, { "epoch": 18.55, "learning_rate": 4.072364046983336e-05, "loss": 1.4469, "step": 1758000 }, { "epoch": 18.56, "learning_rate": 4.072100214232194e-05, "loss": 1.3838, "step": 1758500 }, { "epoch": 18.56, "learning_rate": 4.071836381481052e-05, "loss": 1.4305, "step": 1759000 }, { "epoch": 18.57, "learning_rate": 4.0715725487299094e-05, "loss": 1.3648, "step": 1759500 }, { "epoch": 18.57, "learning_rate": 4.071308715978767e-05, "loss": 1.4272, "step": 1760000 }, { "epoch": 18.58, "learning_rate": 4.0710448832276245e-05, "loss": 1.4026, "step": 1760500 }, { "epoch": 18.58, "learning_rate": 4.070781050476482e-05, "loss": 1.4187, "step": 1761000 }, { "epoch": 18.59, "learning_rate": 4.0705172177253396e-05, "loss": 1.4385, "step": 1761500 }, { "epoch": 18.59, "learning_rate": 4.070253384974198e-05, "loss": 1.425, "step": 1762000 }, { "epoch": 18.6, "learning_rate": 4.069989552223055e-05, "loss": 1.3813, "step": 1762500 }, { "epoch": 18.61, "learning_rate": 4.069725719471912e-05, "loss": 1.4353, "step": 1763000 }, { "epoch": 18.61, "learning_rate": 4.0694618867207704e-05, "loss": 1.4273, "step": 1763500 }, { "epoch": 18.62, "learning_rate": 4.069198053969628e-05, "loss": 1.3862, "step": 1764000 }, { "epoch": 18.62, "learning_rate": 4.0689342212184855e-05, "loss": 1.3725, "step": 1764500 }, { "epoch": 18.63, "learning_rate": 4.0686703884673424e-05, "loss": 1.4, "step": 1765000 }, { "epoch": 18.63, "learning_rate": 4.0684065557162006e-05, "loss": 1.4487, "step": 1765500 }, { "epoch": 18.64, "learning_rate": 4.068142722965058e-05, "loss": 1.3975, "step": 1766000 }, { "epoch": 18.64, "learning_rate": 4.067878890213916e-05, "loss": 1.4325, "step": 1766500 }, { "epoch": 18.65, "learning_rate": 4.067615057462773e-05, "loss": 1.4048, "step": 1767000 }, { "epoch": 18.65, "learning_rate": 4.067351224711631e-05, "loss": 1.3742, "step": 1767500 }, { "epoch": 18.66, "learning_rate": 4.0670873919604884e-05, "loss": 1.4373, "step": 1768000 }, { "epoch": 18.66, "learning_rate": 4.066823559209346e-05, "loss": 1.3741, "step": 1768500 }, { "epoch": 18.67, "learning_rate": 4.066559726458204e-05, "loss": 1.3871, "step": 1769000 }, { "epoch": 18.67, "learning_rate": 4.066295893707062e-05, "loss": 1.415, "step": 1769500 }, { "epoch": 18.68, "learning_rate": 4.0660320609559186e-05, "loss": 1.4055, "step": 1770000 }, { "epoch": 18.68, "learning_rate": 4.065768228204777e-05, "loss": 1.3852, "step": 1770500 }, { "epoch": 18.69, "learning_rate": 4.065504395453634e-05, "loss": 1.4023, "step": 1771000 }, { "epoch": 18.7, "learning_rate": 4.065240562702492e-05, "loss": 1.3843, "step": 1771500 }, { "epoch": 18.7, "learning_rate": 4.0649767299513494e-05, "loss": 1.4387, "step": 1772000 }, { "epoch": 18.71, "learning_rate": 4.064712897200207e-05, "loss": 1.3947, "step": 1772500 }, { "epoch": 18.71, "learning_rate": 4.0644490644490645e-05, "loss": 1.4227, "step": 1773000 }, { "epoch": 18.72, "learning_rate": 4.064185231697922e-05, "loss": 1.4439, "step": 1773500 }, { "epoch": 18.72, "learning_rate": 4.06392139894678e-05, "loss": 1.3833, "step": 1774000 }, { "epoch": 18.73, "learning_rate": 4.063657566195637e-05, "loss": 1.386, "step": 1774500 }, { "epoch": 18.73, "learning_rate": 4.063393733444495e-05, "loss": 1.4227, "step": 1775000 }, { "epoch": 18.74, "learning_rate": 4.063129900693353e-05, "loss": 1.4729, "step": 1775500 }, { "epoch": 18.74, "learning_rate": 4.0628660679422105e-05, "loss": 1.4374, "step": 1776000 }, { "epoch": 18.75, "learning_rate": 4.062602235191068e-05, "loss": 1.4193, "step": 1776500 }, { "epoch": 18.75, "learning_rate": 4.062338402439925e-05, "loss": 1.3928, "step": 1777000 }, { "epoch": 18.76, "learning_rate": 4.062074569688783e-05, "loss": 1.3704, "step": 1777500 }, { "epoch": 18.76, "learning_rate": 4.061810736937641e-05, "loss": 1.4303, "step": 1778000 }, { "epoch": 18.77, "learning_rate": 4.061546904186498e-05, "loss": 1.3718, "step": 1778500 }, { "epoch": 18.77, "learning_rate": 4.0612830714353565e-05, "loss": 1.3847, "step": 1779000 }, { "epoch": 18.78, "learning_rate": 4.0610192386842133e-05, "loss": 1.3831, "step": 1779500 }, { "epoch": 18.78, "learning_rate": 4.060755405933071e-05, "loss": 1.4217, "step": 1780000 }, { "epoch": 18.79, "learning_rate": 4.0604915731819284e-05, "loss": 1.4196, "step": 1780500 }, { "epoch": 18.8, "learning_rate": 4.060227740430787e-05, "loss": 1.3967, "step": 1781000 }, { "epoch": 18.8, "learning_rate": 4.059963907679644e-05, "loss": 1.4133, "step": 1781500 }, { "epoch": 18.81, "learning_rate": 4.059700074928501e-05, "loss": 1.4086, "step": 1782000 }, { "epoch": 18.81, "learning_rate": 4.059436242177359e-05, "loss": 1.4077, "step": 1782500 }, { "epoch": 18.82, "learning_rate": 4.059172409426217e-05, "loss": 1.42, "step": 1783000 }, { "epoch": 18.82, "learning_rate": 4.0589085766750744e-05, "loss": 1.3795, "step": 1783500 }, { "epoch": 18.83, "learning_rate": 4.058644743923932e-05, "loss": 1.3886, "step": 1784000 }, { "epoch": 18.83, "learning_rate": 4.0583809111727895e-05, "loss": 1.3601, "step": 1784500 }, { "epoch": 18.84, "learning_rate": 4.058117078421647e-05, "loss": 1.4413, "step": 1785000 }, { "epoch": 18.84, "learning_rate": 4.0578532456705046e-05, "loss": 1.4182, "step": 1785500 }, { "epoch": 18.85, "learning_rate": 4.057589412919363e-05, "loss": 1.4528, "step": 1786000 }, { "epoch": 18.85, "learning_rate": 4.05732558016822e-05, "loss": 1.4288, "step": 1786500 }, { "epoch": 18.86, "learning_rate": 4.057061747417077e-05, "loss": 1.3878, "step": 1787000 }, { "epoch": 18.86, "learning_rate": 4.0567979146659355e-05, "loss": 1.4401, "step": 1787500 }, { "epoch": 18.87, "learning_rate": 4.056534081914793e-05, "loss": 1.3424, "step": 1788000 }, { "epoch": 18.87, "learning_rate": 4.0562702491636506e-05, "loss": 1.4719, "step": 1788500 }, { "epoch": 18.88, "learning_rate": 4.0560064164125074e-05, "loss": 1.3932, "step": 1789000 }, { "epoch": 18.89, "learning_rate": 4.055742583661366e-05, "loss": 1.4019, "step": 1789500 }, { "epoch": 18.89, "learning_rate": 4.055478750910223e-05, "loss": 1.4661, "step": 1790000 }, { "epoch": 18.9, "learning_rate": 4.055214918159081e-05, "loss": 1.441, "step": 1790500 }, { "epoch": 18.9, "learning_rate": 4.054951085407939e-05, "loss": 1.3353, "step": 1791000 }, { "epoch": 18.91, "learning_rate": 4.054687252656796e-05, "loss": 1.3993, "step": 1791500 }, { "epoch": 18.91, "learning_rate": 4.0544234199056534e-05, "loss": 1.495, "step": 1792000 }, { "epoch": 18.92, "learning_rate": 4.054159587154511e-05, "loss": 1.4303, "step": 1792500 }, { "epoch": 18.92, "learning_rate": 4.053895754403369e-05, "loss": 1.4076, "step": 1793000 }, { "epoch": 18.93, "learning_rate": 4.053631921652226e-05, "loss": 1.3858, "step": 1793500 }, { "epoch": 18.93, "learning_rate": 4.0533680889010836e-05, "loss": 1.4697, "step": 1794000 }, { "epoch": 18.94, "learning_rate": 4.053104256149942e-05, "loss": 1.4348, "step": 1794500 }, { "epoch": 18.94, "learning_rate": 4.0528404233987994e-05, "loss": 1.378, "step": 1795000 }, { "epoch": 18.95, "learning_rate": 4.052576590647657e-05, "loss": 1.3566, "step": 1795500 }, { "epoch": 18.95, "learning_rate": 4.0523127578965145e-05, "loss": 1.3959, "step": 1796000 }, { "epoch": 18.96, "learning_rate": 4.052048925145372e-05, "loss": 1.3865, "step": 1796500 }, { "epoch": 18.96, "learning_rate": 4.0517850923942296e-05, "loss": 1.4257, "step": 1797000 }, { "epoch": 18.97, "learning_rate": 4.051521259643087e-05, "loss": 1.3747, "step": 1797500 }, { "epoch": 18.97, "learning_rate": 4.0512574268919454e-05, "loss": 1.367, "step": 1798000 }, { "epoch": 18.98, "learning_rate": 4.050993594140802e-05, "loss": 1.4048, "step": 1798500 }, { "epoch": 18.99, "learning_rate": 4.05072976138966e-05, "loss": 1.4598, "step": 1799000 }, { "epoch": 18.99, "learning_rate": 4.050465928638518e-05, "loss": 1.4001, "step": 1799500 }, { "epoch": 19.0, "learning_rate": 4.0502020958873755e-05, "loss": 1.3849, "step": 1800000 }, { "epoch": 19.0, "learning_rate": 4.049938263136233e-05, "loss": 1.3229, "step": 1800500 }, { "epoch": 19.01, "learning_rate": 4.0496744303850906e-05, "loss": 1.4023, "step": 1801000 }, { "epoch": 19.01, "learning_rate": 4.049410597633948e-05, "loss": 1.4199, "step": 1801500 }, { "epoch": 19.02, "learning_rate": 4.049146764882806e-05, "loss": 1.3348, "step": 1802000 }, { "epoch": 19.02, "learning_rate": 4.048882932131663e-05, "loss": 1.423, "step": 1802500 }, { "epoch": 19.03, "learning_rate": 4.048619099380521e-05, "loss": 1.4232, "step": 1803000 }, { "epoch": 19.03, "learning_rate": 4.0483552666293784e-05, "loss": 1.4228, "step": 1803500 }, { "epoch": 19.04, "learning_rate": 4.048091433878236e-05, "loss": 1.4016, "step": 1804000 }, { "epoch": 19.04, "learning_rate": 4.0478276011270935e-05, "loss": 1.4225, "step": 1804500 }, { "epoch": 19.05, "learning_rate": 4.047563768375952e-05, "loss": 1.3869, "step": 1805000 }, { "epoch": 19.05, "learning_rate": 4.0472999356248086e-05, "loss": 1.3759, "step": 1805500 }, { "epoch": 19.06, "learning_rate": 4.047036102873666e-05, "loss": 1.3904, "step": 1806000 }, { "epoch": 19.06, "learning_rate": 4.0467722701225244e-05, "loss": 1.435, "step": 1806500 }, { "epoch": 19.07, "learning_rate": 4.046508437371382e-05, "loss": 1.3683, "step": 1807000 }, { "epoch": 19.08, "learning_rate": 4.0462446046202395e-05, "loss": 1.4382, "step": 1807500 }, { "epoch": 19.08, "learning_rate": 4.045980771869097e-05, "loss": 1.4524, "step": 1808000 }, { "epoch": 19.09, "learning_rate": 4.0457169391179546e-05, "loss": 1.416, "step": 1808500 }, { "epoch": 19.09, "learning_rate": 4.045453106366812e-05, "loss": 1.413, "step": 1809000 }, { "epoch": 19.1, "learning_rate": 4.0451892736156697e-05, "loss": 1.3971, "step": 1809500 }, { "epoch": 19.1, "learning_rate": 4.044925440864528e-05, "loss": 1.4418, "step": 1810000 }, { "epoch": 19.11, "learning_rate": 4.044661608113385e-05, "loss": 1.3296, "step": 1810500 }, { "epoch": 19.11, "learning_rate": 4.044397775362242e-05, "loss": 1.4372, "step": 1811000 }, { "epoch": 19.12, "learning_rate": 4.0441339426111005e-05, "loss": 1.4447, "step": 1811500 }, { "epoch": 19.12, "learning_rate": 4.043870109859958e-05, "loss": 1.368, "step": 1812000 }, { "epoch": 19.13, "learning_rate": 4.043606277108815e-05, "loss": 1.4045, "step": 1812500 }, { "epoch": 19.13, "learning_rate": 4.043342444357673e-05, "loss": 1.375, "step": 1813000 }, { "epoch": 19.14, "learning_rate": 4.043078611606531e-05, "loss": 1.3882, "step": 1813500 }, { "epoch": 19.14, "learning_rate": 4.042814778855388e-05, "loss": 1.4298, "step": 1814000 }, { "epoch": 19.15, "learning_rate": 4.042550946104246e-05, "loss": 1.4198, "step": 1814500 }, { "epoch": 19.15, "learning_rate": 4.0422871133531034e-05, "loss": 1.3889, "step": 1815000 }, { "epoch": 19.16, "learning_rate": 4.042023280601961e-05, "loss": 1.4015, "step": 1815500 }, { "epoch": 19.16, "learning_rate": 4.0417594478508185e-05, "loss": 1.3762, "step": 1816000 }, { "epoch": 19.17, "learning_rate": 4.041495615099676e-05, "loss": 1.4887, "step": 1816500 }, { "epoch": 19.18, "learning_rate": 4.041231782348534e-05, "loss": 1.3586, "step": 1817000 }, { "epoch": 19.18, "learning_rate": 4.040967949597391e-05, "loss": 1.3634, "step": 1817500 }, { "epoch": 19.19, "learning_rate": 4.0407041168462487e-05, "loss": 1.3879, "step": 1818000 }, { "epoch": 19.19, "learning_rate": 4.040440284095107e-05, "loss": 1.4514, "step": 1818500 }, { "epoch": 19.2, "learning_rate": 4.0401764513439644e-05, "loss": 1.4079, "step": 1819000 }, { "epoch": 19.2, "learning_rate": 4.039912618592822e-05, "loss": 1.4294, "step": 1819500 }, { "epoch": 19.21, "learning_rate": 4.0396487858416795e-05, "loss": 1.3646, "step": 1820000 }, { "epoch": 19.21, "learning_rate": 4.039384953090537e-05, "loss": 1.3985, "step": 1820500 }, { "epoch": 19.22, "learning_rate": 4.0391211203393946e-05, "loss": 1.3822, "step": 1821000 }, { "epoch": 19.22, "learning_rate": 4.038857287588252e-05, "loss": 1.4498, "step": 1821500 }, { "epoch": 19.23, "learning_rate": 4.03859345483711e-05, "loss": 1.4101, "step": 1822000 }, { "epoch": 19.23, "learning_rate": 4.038329622085967e-05, "loss": 1.4184, "step": 1822500 }, { "epoch": 19.24, "learning_rate": 4.038065789334825e-05, "loss": 1.3924, "step": 1823000 }, { "epoch": 19.24, "learning_rate": 4.037801956583683e-05, "loss": 1.4215, "step": 1823500 }, { "epoch": 19.25, "learning_rate": 4.0375381238325406e-05, "loss": 1.3923, "step": 1824000 }, { "epoch": 19.25, "learning_rate": 4.0372742910813975e-05, "loss": 1.3601, "step": 1824500 }, { "epoch": 19.26, "learning_rate": 4.037010458330256e-05, "loss": 1.3725, "step": 1825000 }, { "epoch": 19.27, "learning_rate": 4.036746625579113e-05, "loss": 1.427, "step": 1825500 }, { "epoch": 19.27, "learning_rate": 4.036482792827971e-05, "loss": 1.454, "step": 1826000 }, { "epoch": 19.28, "learning_rate": 4.0362189600768283e-05, "loss": 1.4075, "step": 1826500 }, { "epoch": 19.28, "learning_rate": 4.035955127325686e-05, "loss": 1.4031, "step": 1827000 }, { "epoch": 19.29, "learning_rate": 4.0356912945745434e-05, "loss": 1.4247, "step": 1827500 }, { "epoch": 19.29, "learning_rate": 4.035427461823401e-05, "loss": 1.4519, "step": 1828000 }, { "epoch": 19.3, "learning_rate": 4.0351636290722585e-05, "loss": 1.4123, "step": 1828500 }, { "epoch": 19.3, "learning_rate": 4.034899796321117e-05, "loss": 1.4143, "step": 1829000 }, { "epoch": 19.31, "learning_rate": 4.0346359635699736e-05, "loss": 1.3302, "step": 1829500 }, { "epoch": 19.31, "learning_rate": 4.034372130818831e-05, "loss": 1.3971, "step": 1830000 }, { "epoch": 19.32, "learning_rate": 4.0341082980676894e-05, "loss": 1.4, "step": 1830500 }, { "epoch": 19.32, "learning_rate": 4.033844465316547e-05, "loss": 1.4003, "step": 1831000 }, { "epoch": 19.33, "learning_rate": 4.033580632565404e-05, "loss": 1.3988, "step": 1831500 }, { "epoch": 19.33, "learning_rate": 4.033316799814262e-05, "loss": 1.4139, "step": 1832000 }, { "epoch": 19.34, "learning_rate": 4.0330529670631196e-05, "loss": 1.3967, "step": 1832500 }, { "epoch": 19.34, "learning_rate": 4.032789134311977e-05, "loss": 1.4056, "step": 1833000 }, { "epoch": 19.35, "learning_rate": 4.032525301560835e-05, "loss": 1.3942, "step": 1833500 }, { "epoch": 19.35, "learning_rate": 4.032261468809692e-05, "loss": 1.4106, "step": 1834000 }, { "epoch": 19.36, "learning_rate": 4.03199763605855e-05, "loss": 1.3924, "step": 1834500 }, { "epoch": 19.37, "learning_rate": 4.0317338033074073e-05, "loss": 1.3908, "step": 1835000 }, { "epoch": 19.37, "learning_rate": 4.0314699705562656e-05, "loss": 1.3892, "step": 1835500 }, { "epoch": 19.38, "learning_rate": 4.031206137805123e-05, "loss": 1.4137, "step": 1836000 }, { "epoch": 19.38, "learning_rate": 4.03094230505398e-05, "loss": 1.3963, "step": 1836500 }, { "epoch": 19.39, "learning_rate": 4.030678472302838e-05, "loss": 1.3684, "step": 1837000 }, { "epoch": 19.39, "learning_rate": 4.030414639551696e-05, "loss": 1.393, "step": 1837500 }, { "epoch": 19.4, "learning_rate": 4.030150806800553e-05, "loss": 1.4189, "step": 1838000 }, { "epoch": 19.4, "learning_rate": 4.029886974049411e-05, "loss": 1.3537, "step": 1838500 }, { "epoch": 19.41, "learning_rate": 4.0296231412982684e-05, "loss": 1.4103, "step": 1839000 }, { "epoch": 19.41, "learning_rate": 4.029359308547126e-05, "loss": 1.3865, "step": 1839500 }, { "epoch": 19.42, "learning_rate": 4.0290954757959835e-05, "loss": 1.3325, "step": 1840000 }, { "epoch": 19.42, "learning_rate": 4.028831643044841e-05, "loss": 1.4076, "step": 1840500 }, { "epoch": 19.43, "learning_rate": 4.0285678102936986e-05, "loss": 1.3867, "step": 1841000 }, { "epoch": 19.43, "learning_rate": 4.028303977542556e-05, "loss": 1.3491, "step": 1841500 }, { "epoch": 19.44, "learning_rate": 4.028040144791414e-05, "loss": 1.4049, "step": 1842000 }, { "epoch": 19.44, "learning_rate": 4.027776312040272e-05, "loss": 1.424, "step": 1842500 }, { "epoch": 19.45, "learning_rate": 4.0275124792891295e-05, "loss": 1.411, "step": 1843000 }, { "epoch": 19.46, "learning_rate": 4.0272486465379864e-05, "loss": 1.3588, "step": 1843500 }, { "epoch": 19.46, "learning_rate": 4.0269848137868446e-05, "loss": 1.3839, "step": 1844000 }, { "epoch": 19.47, "learning_rate": 4.026720981035702e-05, "loss": 1.4166, "step": 1844500 }, { "epoch": 19.47, "learning_rate": 4.02645714828456e-05, "loss": 1.3761, "step": 1845000 }, { "epoch": 19.48, "learning_rate": 4.026193315533417e-05, "loss": 1.3472, "step": 1845500 }, { "epoch": 19.48, "learning_rate": 4.025929482782275e-05, "loss": 1.4332, "step": 1846000 }, { "epoch": 19.49, "learning_rate": 4.025665650031132e-05, "loss": 1.4386, "step": 1846500 }, { "epoch": 19.49, "learning_rate": 4.02540181727999e-05, "loss": 1.3839, "step": 1847000 }, { "epoch": 19.5, "learning_rate": 4.025137984528848e-05, "loss": 1.4294, "step": 1847500 }, { "epoch": 19.5, "learning_rate": 4.024874151777705e-05, "loss": 1.4233, "step": 1848000 }, { "epoch": 19.51, "learning_rate": 4.0246103190265625e-05, "loss": 1.3349, "step": 1848500 }, { "epoch": 19.51, "learning_rate": 4.024346486275421e-05, "loss": 1.5044, "step": 1849000 }, { "epoch": 19.52, "learning_rate": 4.024082653524278e-05, "loss": 1.5333, "step": 1849500 }, { "epoch": 19.52, "learning_rate": 4.023818820773136e-05, "loss": 1.3632, "step": 1850000 }, { "epoch": 19.53, "learning_rate": 4.023554988021993e-05, "loss": 1.4335, "step": 1850500 }, { "epoch": 19.53, "learning_rate": 4.023291155270851e-05, "loss": 1.3795, "step": 1851000 }, { "epoch": 19.54, "learning_rate": 4.0230273225197085e-05, "loss": 1.3861, "step": 1851500 }, { "epoch": 19.54, "learning_rate": 4.022763489768566e-05, "loss": 1.414, "step": 1852000 }, { "epoch": 19.55, "learning_rate": 4.022499657017424e-05, "loss": 1.3528, "step": 1852500 }, { "epoch": 19.56, "learning_rate": 4.022235824266281e-05, "loss": 1.4388, "step": 1853000 }, { "epoch": 19.56, "learning_rate": 4.021971991515139e-05, "loss": 1.3605, "step": 1853500 }, { "epoch": 19.57, "learning_rate": 4.021708158763996e-05, "loss": 1.3941, "step": 1854000 }, { "epoch": 19.57, "learning_rate": 4.0214443260128545e-05, "loss": 1.3824, "step": 1854500 }, { "epoch": 19.58, "learning_rate": 4.021180493261712e-05, "loss": 1.398, "step": 1855000 }, { "epoch": 19.58, "learning_rate": 4.020916660510569e-05, "loss": 1.3893, "step": 1855500 }, { "epoch": 19.59, "learning_rate": 4.020652827759427e-05, "loss": 1.4426, "step": 1856000 }, { "epoch": 19.59, "learning_rate": 4.0203889950082847e-05, "loss": 1.3779, "step": 1856500 }, { "epoch": 19.6, "learning_rate": 4.020125162257142e-05, "loss": 1.4221, "step": 1857000 }, { "epoch": 19.6, "learning_rate": 4.019861329506e-05, "loss": 1.3946, "step": 1857500 }, { "epoch": 19.61, "learning_rate": 4.019597496754857e-05, "loss": 1.393, "step": 1858000 }, { "epoch": 19.61, "learning_rate": 4.019333664003715e-05, "loss": 1.4055, "step": 1858500 }, { "epoch": 19.62, "learning_rate": 4.0190698312525724e-05, "loss": 1.4147, "step": 1859000 }, { "epoch": 19.62, "learning_rate": 4.0188059985014306e-05, "loss": 1.4385, "step": 1859500 }, { "epoch": 19.63, "learning_rate": 4.0185421657502875e-05, "loss": 1.3874, "step": 1860000 }, { "epoch": 19.63, "learning_rate": 4.018278332999145e-05, "loss": 1.417, "step": 1860500 }, { "epoch": 19.64, "learning_rate": 4.018014500248003e-05, "loss": 1.4307, "step": 1861000 }, { "epoch": 19.64, "learning_rate": 4.017750667496861e-05, "loss": 1.3595, "step": 1861500 }, { "epoch": 19.65, "learning_rate": 4.0174868347457184e-05, "loss": 1.4402, "step": 1862000 }, { "epoch": 19.66, "learning_rate": 4.017223001994575e-05, "loss": 1.432, "step": 1862500 }, { "epoch": 19.66, "learning_rate": 4.0169591692434335e-05, "loss": 1.373, "step": 1863000 }, { "epoch": 19.67, "learning_rate": 4.016695336492291e-05, "loss": 1.4094, "step": 1863500 }, { "epoch": 19.67, "learning_rate": 4.0164315037411486e-05, "loss": 1.3814, "step": 1864000 }, { "epoch": 19.68, "learning_rate": 4.016167670990007e-05, "loss": 1.4477, "step": 1864500 }, { "epoch": 19.68, "learning_rate": 4.0159038382388637e-05, "loss": 1.398, "step": 1865000 }, { "epoch": 19.69, "learning_rate": 4.015640005487721e-05, "loss": 1.4282, "step": 1865500 }, { "epoch": 19.69, "learning_rate": 4.015376172736579e-05, "loss": 1.3982, "step": 1866000 }, { "epoch": 19.7, "learning_rate": 4.015112339985437e-05, "loss": 1.4066, "step": 1866500 }, { "epoch": 19.7, "learning_rate": 4.014848507234294e-05, "loss": 1.3423, "step": 1867000 }, { "epoch": 19.71, "learning_rate": 4.0145846744831514e-05, "loss": 1.3794, "step": 1867500 }, { "epoch": 19.71, "learning_rate": 4.0143208417320096e-05, "loss": 1.4382, "step": 1868000 }, { "epoch": 19.72, "learning_rate": 4.014057008980867e-05, "loss": 1.4164, "step": 1868500 }, { "epoch": 19.72, "learning_rate": 4.013793176229725e-05, "loss": 1.4094, "step": 1869000 }, { "epoch": 19.73, "learning_rate": 4.013529343478582e-05, "loss": 1.4461, "step": 1869500 }, { "epoch": 19.73, "learning_rate": 4.01326551072744e-05, "loss": 1.364, "step": 1870000 }, { "epoch": 19.74, "learning_rate": 4.0130016779762974e-05, "loss": 1.4177, "step": 1870500 }, { "epoch": 19.75, "learning_rate": 4.012737845225155e-05, "loss": 1.3998, "step": 1871000 }, { "epoch": 19.75, "learning_rate": 4.012474012474013e-05, "loss": 1.3849, "step": 1871500 }, { "epoch": 19.76, "learning_rate": 4.01221017972287e-05, "loss": 1.3265, "step": 1872000 }, { "epoch": 19.76, "learning_rate": 4.0119463469717276e-05, "loss": 1.3537, "step": 1872500 }, { "epoch": 19.77, "learning_rate": 4.011682514220586e-05, "loss": 1.4226, "step": 1873000 }, { "epoch": 19.77, "learning_rate": 4.0114186814694433e-05, "loss": 1.3843, "step": 1873500 }, { "epoch": 19.78, "learning_rate": 4.011154848718301e-05, "loss": 1.4235, "step": 1874000 }, { "epoch": 19.78, "learning_rate": 4.0108910159671584e-05, "loss": 1.388, "step": 1874500 }, { "epoch": 19.79, "learning_rate": 4.010627183216016e-05, "loss": 1.4619, "step": 1875000 }, { "epoch": 19.79, "learning_rate": 4.0103633504648735e-05, "loss": 1.3546, "step": 1875500 }, { "epoch": 19.8, "learning_rate": 4.010099517713731e-05, "loss": 1.39, "step": 1876000 }, { "epoch": 19.8, "learning_rate": 4.0098356849625886e-05, "loss": 1.3761, "step": 1876500 }, { "epoch": 19.81, "learning_rate": 4.009571852211446e-05, "loss": 1.4279, "step": 1877000 }, { "epoch": 19.81, "learning_rate": 4.009308019460304e-05, "loss": 1.3943, "step": 1877500 }, { "epoch": 19.82, "learning_rate": 4.009044186709161e-05, "loss": 1.3773, "step": 1878000 }, { "epoch": 19.82, "learning_rate": 4.0087803539580195e-05, "loss": 1.4063, "step": 1878500 }, { "epoch": 19.83, "learning_rate": 4.0085165212068764e-05, "loss": 1.4517, "step": 1879000 }, { "epoch": 19.83, "learning_rate": 4.008252688455734e-05, "loss": 1.3969, "step": 1879500 }, { "epoch": 19.84, "learning_rate": 4.007988855704592e-05, "loss": 1.4296, "step": 1880000 }, { "epoch": 19.85, "learning_rate": 4.00772502295345e-05, "loss": 1.4058, "step": 1880500 }, { "epoch": 19.85, "learning_rate": 4.007461190202307e-05, "loss": 1.409, "step": 1881000 }, { "epoch": 19.86, "learning_rate": 4.007197357451165e-05, "loss": 1.4029, "step": 1881500 }, { "epoch": 19.86, "learning_rate": 4.0069335247000223e-05, "loss": 1.3428, "step": 1882000 }, { "epoch": 19.87, "learning_rate": 4.00666969194888e-05, "loss": 1.3929, "step": 1882500 }, { "epoch": 19.87, "learning_rate": 4.0064058591977374e-05, "loss": 1.3785, "step": 1883000 }, { "epoch": 19.88, "learning_rate": 4.006142026446596e-05, "loss": 1.4103, "step": 1883500 }, { "epoch": 19.88, "learning_rate": 4.0058781936954525e-05, "loss": 1.3804, "step": 1884000 }, { "epoch": 19.89, "learning_rate": 4.00561436094431e-05, "loss": 1.4134, "step": 1884500 }, { "epoch": 19.89, "learning_rate": 4.005350528193168e-05, "loss": 1.3931, "step": 1885000 }, { "epoch": 19.9, "learning_rate": 4.005086695442026e-05, "loss": 1.3474, "step": 1885500 }, { "epoch": 19.9, "learning_rate": 4.004822862690883e-05, "loss": 1.4283, "step": 1886000 }, { "epoch": 19.91, "learning_rate": 4.004559029939741e-05, "loss": 1.4236, "step": 1886500 }, { "epoch": 19.91, "learning_rate": 4.0042951971885985e-05, "loss": 1.438, "step": 1887000 }, { "epoch": 19.92, "learning_rate": 4.004031364437456e-05, "loss": 1.3472, "step": 1887500 }, { "epoch": 19.92, "learning_rate": 4.0037675316863136e-05, "loss": 1.4101, "step": 1888000 }, { "epoch": 19.93, "learning_rate": 4.003503698935171e-05, "loss": 1.4116, "step": 1888500 }, { "epoch": 19.94, "learning_rate": 4.003239866184029e-05, "loss": 1.4134, "step": 1889000 }, { "epoch": 19.94, "learning_rate": 4.002976033432886e-05, "loss": 1.3698, "step": 1889500 }, { "epoch": 19.95, "learning_rate": 4.002712200681744e-05, "loss": 1.4134, "step": 1890000 }, { "epoch": 19.95, "learning_rate": 4.002448367930602e-05, "loss": 1.3835, "step": 1890500 }, { "epoch": 19.96, "learning_rate": 4.002184535179459e-05, "loss": 1.4263, "step": 1891000 }, { "epoch": 19.96, "learning_rate": 4.0019207024283165e-05, "loss": 1.403, "step": 1891500 }, { "epoch": 19.97, "learning_rate": 4.001656869677175e-05, "loss": 1.4256, "step": 1892000 }, { "epoch": 19.97, "learning_rate": 4.001393036926032e-05, "loss": 1.3907, "step": 1892500 }, { "epoch": 19.98, "learning_rate": 4.00112920417489e-05, "loss": 1.3813, "step": 1893000 }, { "epoch": 19.98, "learning_rate": 4.000865371423747e-05, "loss": 1.4141, "step": 1893500 }, { "epoch": 19.99, "learning_rate": 4.000601538672605e-05, "loss": 1.3624, "step": 1894000 }, { "epoch": 19.99, "learning_rate": 4.0003377059214624e-05, "loss": 1.4064, "step": 1894500 }, { "epoch": 20.0, "learning_rate": 4.00007387317032e-05, "loss": 1.4128, "step": 1895000 }, { "epoch": 20.0, "learning_rate": 3.9998100404191775e-05, "loss": 1.3767, "step": 1895500 }, { "epoch": 20.01, "learning_rate": 3.999546207668035e-05, "loss": 1.4081, "step": 1896000 }, { "epoch": 20.01, "learning_rate": 3.9992823749168926e-05, "loss": 1.4168, "step": 1896500 }, { "epoch": 20.02, "learning_rate": 3.999018542165751e-05, "loss": 1.3853, "step": 1897000 }, { "epoch": 20.02, "learning_rate": 3.9987547094146084e-05, "loss": 1.5025, "step": 1897500 }, { "epoch": 20.03, "learning_rate": 3.998490876663465e-05, "loss": 1.4281, "step": 1898000 }, { "epoch": 20.04, "learning_rate": 3.9982270439123235e-05, "loss": 1.3968, "step": 1898500 }, { "epoch": 20.04, "learning_rate": 3.997963211161181e-05, "loss": 1.3779, "step": 1899000 }, { "epoch": 20.05, "learning_rate": 3.9976993784100386e-05, "loss": 1.3928, "step": 1899500 }, { "epoch": 20.05, "learning_rate": 3.997435545658896e-05, "loss": 1.3691, "step": 1900000 }, { "epoch": 20.06, "learning_rate": 3.997171712907754e-05, "loss": 1.3404, "step": 1900500 }, { "epoch": 20.06, "learning_rate": 3.996907880156611e-05, "loss": 1.4241, "step": 1901000 }, { "epoch": 20.07, "learning_rate": 3.996644047405469e-05, "loss": 1.3821, "step": 1901500 }, { "epoch": 20.07, "learning_rate": 3.996380214654326e-05, "loss": 1.422, "step": 1902000 }, { "epoch": 20.08, "learning_rate": 3.9961163819031846e-05, "loss": 1.4222, "step": 1902500 }, { "epoch": 20.08, "learning_rate": 3.9958525491520414e-05, "loss": 1.3918, "step": 1903000 }, { "epoch": 20.09, "learning_rate": 3.995588716400899e-05, "loss": 1.3881, "step": 1903500 }, { "epoch": 20.09, "learning_rate": 3.995324883649757e-05, "loss": 1.3407, "step": 1904000 }, { "epoch": 20.1, "learning_rate": 3.995061050898615e-05, "loss": 1.386, "step": 1904500 }, { "epoch": 20.1, "learning_rate": 3.9947972181474716e-05, "loss": 1.4213, "step": 1905000 }, { "epoch": 20.11, "learning_rate": 3.99453338539633e-05, "loss": 1.3336, "step": 1905500 }, { "epoch": 20.11, "learning_rate": 3.9942695526451874e-05, "loss": 1.4267, "step": 1906000 }, { "epoch": 20.12, "learning_rate": 3.994005719894045e-05, "loss": 1.3593, "step": 1906500 }, { "epoch": 20.13, "learning_rate": 3.9937418871429025e-05, "loss": 1.4413, "step": 1907000 }, { "epoch": 20.13, "learning_rate": 3.99347805439176e-05, "loss": 1.4042, "step": 1907500 }, { "epoch": 20.14, "learning_rate": 3.9932142216406176e-05, "loss": 1.4396, "step": 1908000 }, { "epoch": 20.14, "learning_rate": 3.992950388889475e-05, "loss": 1.3634, "step": 1908500 }, { "epoch": 20.15, "learning_rate": 3.9926865561383334e-05, "loss": 1.3908, "step": 1909000 }, { "epoch": 20.15, "learning_rate": 3.992422723387191e-05, "loss": 1.371, "step": 1909500 }, { "epoch": 20.16, "learning_rate": 3.992158890636048e-05, "loss": 1.4064, "step": 1910000 }, { "epoch": 20.16, "learning_rate": 3.991895057884906e-05, "loss": 1.3936, "step": 1910500 }, { "epoch": 20.17, "learning_rate": 3.9916312251337636e-05, "loss": 1.3462, "step": 1911000 }, { "epoch": 20.17, "learning_rate": 3.991367392382621e-05, "loss": 1.3574, "step": 1911500 }, { "epoch": 20.18, "learning_rate": 3.9911035596314787e-05, "loss": 1.3773, "step": 1912000 }, { "epoch": 20.18, "learning_rate": 3.990839726880336e-05, "loss": 1.4133, "step": 1912500 }, { "epoch": 20.19, "learning_rate": 3.990575894129194e-05, "loss": 1.3501, "step": 1913000 }, { "epoch": 20.19, "learning_rate": 3.990312061378051e-05, "loss": 1.4369, "step": 1913500 }, { "epoch": 20.2, "learning_rate": 3.990048228626909e-05, "loss": 1.3918, "step": 1914000 }, { "epoch": 20.2, "learning_rate": 3.9897843958757664e-05, "loss": 1.4301, "step": 1914500 }, { "epoch": 20.21, "learning_rate": 3.989520563124624e-05, "loss": 1.3879, "step": 1915000 }, { "epoch": 20.21, "learning_rate": 3.9892567303734815e-05, "loss": 1.3421, "step": 1915500 }, { "epoch": 20.22, "learning_rate": 3.98899289762234e-05, "loss": 1.404, "step": 1916000 }, { "epoch": 20.23, "learning_rate": 3.988729064871197e-05, "loss": 1.4111, "step": 1916500 }, { "epoch": 20.23, "learning_rate": 3.988465232120054e-05, "loss": 1.3988, "step": 1917000 }, { "epoch": 20.24, "learning_rate": 3.9882013993689124e-05, "loss": 1.4278, "step": 1917500 }, { "epoch": 20.24, "learning_rate": 3.98793756661777e-05, "loss": 1.3789, "step": 1918000 }, { "epoch": 20.25, "learning_rate": 3.9876737338666275e-05, "loss": 1.4065, "step": 1918500 }, { "epoch": 20.25, "learning_rate": 3.987409901115485e-05, "loss": 1.3129, "step": 1919000 }, { "epoch": 20.26, "learning_rate": 3.9871460683643426e-05, "loss": 1.3828, "step": 1919500 }, { "epoch": 20.26, "learning_rate": 3.9868822356132e-05, "loss": 1.4146, "step": 1920000 }, { "epoch": 20.27, "learning_rate": 3.986618402862058e-05, "loss": 1.401, "step": 1920500 }, { "epoch": 20.27, "learning_rate": 3.986354570110916e-05, "loss": 1.3948, "step": 1921000 }, { "epoch": 20.28, "learning_rate": 3.9860907373597734e-05, "loss": 1.364, "step": 1921500 }, { "epoch": 20.28, "learning_rate": 3.98582690460863e-05, "loss": 1.4297, "step": 1922000 }, { "epoch": 20.29, "learning_rate": 3.9855630718574885e-05, "loss": 1.4479, "step": 1922500 }, { "epoch": 20.29, "learning_rate": 3.985299239106346e-05, "loss": 1.4008, "step": 1923000 }, { "epoch": 20.3, "learning_rate": 3.9850354063552036e-05, "loss": 1.3443, "step": 1923500 }, { "epoch": 20.3, "learning_rate": 3.9847715736040605e-05, "loss": 1.441, "step": 1924000 }, { "epoch": 20.31, "learning_rate": 3.984507740852919e-05, "loss": 1.3606, "step": 1924500 }, { "epoch": 20.32, "learning_rate": 3.984243908101776e-05, "loss": 1.3847, "step": 1925000 }, { "epoch": 20.32, "learning_rate": 3.983980075350634e-05, "loss": 1.4029, "step": 1925500 }, { "epoch": 20.33, "learning_rate": 3.983716242599492e-05, "loss": 1.3621, "step": 1926000 }, { "epoch": 20.33, "learning_rate": 3.983452409848349e-05, "loss": 1.4123, "step": 1926500 }, { "epoch": 20.34, "learning_rate": 3.9831885770972065e-05, "loss": 1.3956, "step": 1927000 }, { "epoch": 20.34, "learning_rate": 3.982924744346064e-05, "loss": 1.3966, "step": 1927500 }, { "epoch": 20.35, "learning_rate": 3.982660911594922e-05, "loss": 1.3739, "step": 1928000 }, { "epoch": 20.35, "learning_rate": 3.98239707884378e-05, "loss": 1.4048, "step": 1928500 }, { "epoch": 20.36, "learning_rate": 3.982133246092637e-05, "loss": 1.3981, "step": 1929000 }, { "epoch": 20.36, "learning_rate": 3.981869413341495e-05, "loss": 1.3952, "step": 1929500 }, { "epoch": 20.37, "learning_rate": 3.9816055805903524e-05, "loss": 1.4018, "step": 1930000 }, { "epoch": 20.37, "learning_rate": 3.98134174783921e-05, "loss": 1.3862, "step": 1930500 }, { "epoch": 20.38, "learning_rate": 3.9810779150880675e-05, "loss": 1.361, "step": 1931000 }, { "epoch": 20.38, "learning_rate": 3.980814082336925e-05, "loss": 1.3787, "step": 1931500 }, { "epoch": 20.39, "learning_rate": 3.9805502495857826e-05, "loss": 1.4228, "step": 1932000 }, { "epoch": 20.39, "learning_rate": 3.98028641683464e-05, "loss": 1.4023, "step": 1932500 }, { "epoch": 20.4, "learning_rate": 3.9800225840834984e-05, "loss": 1.3997, "step": 1933000 }, { "epoch": 20.4, "learning_rate": 3.979758751332355e-05, "loss": 1.3649, "step": 1933500 }, { "epoch": 20.41, "learning_rate": 3.979494918581213e-05, "loss": 1.3518, "step": 1934000 }, { "epoch": 20.42, "learning_rate": 3.979231085830071e-05, "loss": 1.4469, "step": 1934500 }, { "epoch": 20.42, "learning_rate": 3.9789672530789286e-05, "loss": 1.4352, "step": 1935000 }, { "epoch": 20.43, "learning_rate": 3.978703420327786e-05, "loss": 1.4124, "step": 1935500 }, { "epoch": 20.43, "learning_rate": 3.978439587576643e-05, "loss": 1.3817, "step": 1936000 }, { "epoch": 20.44, "learning_rate": 3.978175754825501e-05, "loss": 1.3931, "step": 1936500 }, { "epoch": 20.44, "learning_rate": 3.977911922074359e-05, "loss": 1.3906, "step": 1937000 }, { "epoch": 20.45, "learning_rate": 3.9776480893232164e-05, "loss": 1.349, "step": 1937500 }, { "epoch": 20.45, "learning_rate": 3.9773842565720746e-05, "loss": 1.3696, "step": 1938000 }, { "epoch": 20.46, "learning_rate": 3.9771204238209315e-05, "loss": 1.4525, "step": 1938500 }, { "epoch": 20.46, "learning_rate": 3.976856591069789e-05, "loss": 1.3983, "step": 1939000 }, { "epoch": 20.47, "learning_rate": 3.9765927583186465e-05, "loss": 1.4368, "step": 1939500 }, { "epoch": 20.47, "learning_rate": 3.976328925567505e-05, "loss": 1.3546, "step": 1940000 }, { "epoch": 20.48, "learning_rate": 3.976065092816362e-05, "loss": 1.3862, "step": 1940500 }, { "epoch": 20.48, "learning_rate": 3.975801260065219e-05, "loss": 1.3247, "step": 1941000 }, { "epoch": 20.49, "learning_rate": 3.9755374273140774e-05, "loss": 1.3959, "step": 1941500 }, { "epoch": 20.49, "learning_rate": 3.975273594562935e-05, "loss": 1.3936, "step": 1942000 }, { "epoch": 20.5, "learning_rate": 3.9750097618117925e-05, "loss": 1.3887, "step": 1942500 }, { "epoch": 20.51, "learning_rate": 3.97474592906065e-05, "loss": 1.423, "step": 1943000 }, { "epoch": 20.51, "learning_rate": 3.9744820963095076e-05, "loss": 1.4125, "step": 1943500 }, { "epoch": 20.52, "learning_rate": 3.974218263558365e-05, "loss": 1.4138, "step": 1944000 }, { "epoch": 20.52, "learning_rate": 3.973954430807223e-05, "loss": 1.3938, "step": 1944500 }, { "epoch": 20.53, "learning_rate": 3.973690598056081e-05, "loss": 1.3831, "step": 1945000 }, { "epoch": 20.53, "learning_rate": 3.973426765304938e-05, "loss": 1.4009, "step": 1945500 }, { "epoch": 20.54, "learning_rate": 3.9731629325537954e-05, "loss": 1.3552, "step": 1946000 }, { "epoch": 20.54, "learning_rate": 3.9728990998026536e-05, "loss": 1.3687, "step": 1946500 }, { "epoch": 20.55, "learning_rate": 3.972635267051511e-05, "loss": 1.3648, "step": 1947000 }, { "epoch": 20.55, "learning_rate": 3.972371434300369e-05, "loss": 1.332, "step": 1947500 }, { "epoch": 20.56, "learning_rate": 3.972107601549226e-05, "loss": 1.3732, "step": 1948000 }, { "epoch": 20.56, "learning_rate": 3.971843768798084e-05, "loss": 1.4307, "step": 1948500 }, { "epoch": 20.57, "learning_rate": 3.971579936046941e-05, "loss": 1.3886, "step": 1949000 }, { "epoch": 20.57, "learning_rate": 3.971316103295799e-05, "loss": 1.4076, "step": 1949500 }, { "epoch": 20.58, "learning_rate": 3.971052270544657e-05, "loss": 1.4009, "step": 1950000 }, { "epoch": 20.58, "learning_rate": 3.970788437793514e-05, "loss": 1.3967, "step": 1950500 }, { "epoch": 20.59, "learning_rate": 3.9705246050423715e-05, "loss": 1.3545, "step": 1951000 }, { "epoch": 20.59, "learning_rate": 3.970260772291229e-05, "loss": 1.4205, "step": 1951500 }, { "epoch": 20.6, "learning_rate": 3.969996939540087e-05, "loss": 1.3663, "step": 1952000 }, { "epoch": 20.61, "learning_rate": 3.969733106788944e-05, "loss": 1.3826, "step": 1952500 }, { "epoch": 20.61, "learning_rate": 3.969469274037802e-05, "loss": 1.4412, "step": 1953000 }, { "epoch": 20.62, "learning_rate": 3.96920544128666e-05, "loss": 1.4127, "step": 1953500 }, { "epoch": 20.62, "learning_rate": 3.9689416085355175e-05, "loss": 1.3765, "step": 1954000 }, { "epoch": 20.63, "learning_rate": 3.968677775784375e-05, "loss": 1.4246, "step": 1954500 }, { "epoch": 20.63, "learning_rate": 3.9684139430332326e-05, "loss": 1.3691, "step": 1955000 }, { "epoch": 20.64, "learning_rate": 3.96815011028209e-05, "loss": 1.4067, "step": 1955500 }, { "epoch": 20.64, "learning_rate": 3.967886277530948e-05, "loss": 1.3581, "step": 1956000 }, { "epoch": 20.65, "learning_rate": 3.967622444779805e-05, "loss": 1.3505, "step": 1956500 }, { "epoch": 20.65, "learning_rate": 3.9673586120286635e-05, "loss": 1.3466, "step": 1957000 }, { "epoch": 20.66, "learning_rate": 3.96709477927752e-05, "loss": 1.3826, "step": 1957500 }, { "epoch": 20.66, "learning_rate": 3.966830946526378e-05, "loss": 1.409, "step": 1958000 }, { "epoch": 20.67, "learning_rate": 3.966567113775236e-05, "loss": 1.3859, "step": 1958500 }, { "epoch": 20.67, "learning_rate": 3.9663032810240937e-05, "loss": 1.3333, "step": 1959000 }, { "epoch": 20.68, "learning_rate": 3.966039448272951e-05, "loss": 1.4234, "step": 1959500 }, { "epoch": 20.68, "learning_rate": 3.965775615521809e-05, "loss": 1.3993, "step": 1960000 }, { "epoch": 20.69, "learning_rate": 3.965511782770666e-05, "loss": 1.3606, "step": 1960500 }, { "epoch": 20.7, "learning_rate": 3.965247950019524e-05, "loss": 1.3678, "step": 1961000 }, { "epoch": 20.7, "learning_rate": 3.9649841172683814e-05, "loss": 1.3818, "step": 1961500 }, { "epoch": 20.71, "learning_rate": 3.964720284517239e-05, "loss": 1.3728, "step": 1962000 }, { "epoch": 20.71, "learning_rate": 3.9644564517660965e-05, "loss": 1.3786, "step": 1962500 }, { "epoch": 20.72, "learning_rate": 3.964192619014954e-05, "loss": 1.366, "step": 1963000 }, { "epoch": 20.72, "learning_rate": 3.9639287862638116e-05, "loss": 1.3997, "step": 1963500 }, { "epoch": 20.73, "learning_rate": 3.96366495351267e-05, "loss": 1.4217, "step": 1964000 }, { "epoch": 20.73, "learning_rate": 3.963401120761527e-05, "loss": 1.3929, "step": 1964500 }, { "epoch": 20.74, "learning_rate": 3.963137288010384e-05, "loss": 1.3958, "step": 1965000 }, { "epoch": 20.74, "learning_rate": 3.9628734552592425e-05, "loss": 1.4074, "step": 1965500 }, { "epoch": 20.75, "learning_rate": 3.9626096225081e-05, "loss": 1.459, "step": 1966000 }, { "epoch": 20.75, "learning_rate": 3.9623457897569576e-05, "loss": 1.4208, "step": 1966500 }, { "epoch": 20.76, "learning_rate": 3.962081957005815e-05, "loss": 1.4192, "step": 1967000 }, { "epoch": 20.76, "learning_rate": 3.961818124254673e-05, "loss": 1.3874, "step": 1967500 }, { "epoch": 20.77, "learning_rate": 3.96155429150353e-05, "loss": 1.4058, "step": 1968000 }, { "epoch": 20.77, "learning_rate": 3.961290458752388e-05, "loss": 1.3758, "step": 1968500 }, { "epoch": 20.78, "learning_rate": 3.961026626001246e-05, "loss": 1.3668, "step": 1969000 }, { "epoch": 20.78, "learning_rate": 3.960762793250103e-05, "loss": 1.3741, "step": 1969500 }, { "epoch": 20.79, "learning_rate": 3.9604989604989604e-05, "loss": 1.3386, "step": 1970000 }, { "epoch": 20.8, "learning_rate": 3.9602351277478186e-05, "loss": 1.3763, "step": 1970500 }, { "epoch": 20.8, "learning_rate": 3.959971294996676e-05, "loss": 1.3775, "step": 1971000 }, { "epoch": 20.81, "learning_rate": 3.959707462245533e-05, "loss": 1.4096, "step": 1971500 }, { "epoch": 20.81, "learning_rate": 3.959443629494391e-05, "loss": 1.4028, "step": 1972000 }, { "epoch": 20.82, "learning_rate": 3.959179796743249e-05, "loss": 1.4264, "step": 1972500 }, { "epoch": 20.82, "learning_rate": 3.9589159639921064e-05, "loss": 1.4029, "step": 1973000 }, { "epoch": 20.83, "learning_rate": 3.958652131240964e-05, "loss": 1.4513, "step": 1973500 }, { "epoch": 20.83, "learning_rate": 3.9583882984898215e-05, "loss": 1.3796, "step": 1974000 }, { "epoch": 20.84, "learning_rate": 3.958124465738679e-05, "loss": 1.3782, "step": 1974500 }, { "epoch": 20.84, "learning_rate": 3.9578606329875366e-05, "loss": 1.3684, "step": 1975000 }, { "epoch": 20.85, "learning_rate": 3.957596800236394e-05, "loss": 1.344, "step": 1975500 }, { "epoch": 20.85, "learning_rate": 3.9573329674852523e-05, "loss": 1.43, "step": 1976000 }, { "epoch": 20.86, "learning_rate": 3.957069134734109e-05, "loss": 1.4129, "step": 1976500 }, { "epoch": 20.86, "learning_rate": 3.956805301982967e-05, "loss": 1.4021, "step": 1977000 }, { "epoch": 20.87, "learning_rate": 3.956541469231825e-05, "loss": 1.3958, "step": 1977500 }, { "epoch": 20.87, "learning_rate": 3.9562776364806825e-05, "loss": 1.4524, "step": 1978000 }, { "epoch": 20.88, "learning_rate": 3.95601380372954e-05, "loss": 1.3598, "step": 1978500 }, { "epoch": 20.89, "learning_rate": 3.9557499709783976e-05, "loss": 1.3873, "step": 1979000 }, { "epoch": 20.89, "learning_rate": 3.955486138227255e-05, "loss": 1.4303, "step": 1979500 }, { "epoch": 20.9, "learning_rate": 3.955222305476113e-05, "loss": 1.4005, "step": 1980000 }, { "epoch": 20.9, "learning_rate": 3.95495847272497e-05, "loss": 1.3917, "step": 1980500 }, { "epoch": 20.91, "learning_rate": 3.954694639973828e-05, "loss": 1.347, "step": 1981000 }, { "epoch": 20.91, "learning_rate": 3.9544308072226854e-05, "loss": 1.4134, "step": 1981500 }, { "epoch": 20.92, "learning_rate": 3.954166974471543e-05, "loss": 1.4305, "step": 1982000 }, { "epoch": 20.92, "learning_rate": 3.953903141720401e-05, "loss": 1.336, "step": 1982500 }, { "epoch": 20.93, "learning_rate": 3.953639308969259e-05, "loss": 1.3903, "step": 1983000 }, { "epoch": 20.93, "learning_rate": 3.9533754762181156e-05, "loss": 1.4109, "step": 1983500 }, { "epoch": 20.94, "learning_rate": 3.953111643466974e-05, "loss": 1.4346, "step": 1984000 }, { "epoch": 20.94, "learning_rate": 3.9528478107158314e-05, "loss": 1.3927, "step": 1984500 }, { "epoch": 20.95, "learning_rate": 3.952583977964689e-05, "loss": 1.432, "step": 1985000 }, { "epoch": 20.95, "learning_rate": 3.9523201452135465e-05, "loss": 1.3841, "step": 1985500 }, { "epoch": 20.96, "learning_rate": 3.952056312462404e-05, "loss": 1.3985, "step": 1986000 }, { "epoch": 20.96, "learning_rate": 3.9517924797112616e-05, "loss": 1.3909, "step": 1986500 }, { "epoch": 20.97, "learning_rate": 3.951528646960119e-05, "loss": 1.3998, "step": 1987000 }, { "epoch": 20.97, "learning_rate": 3.9512648142089766e-05, "loss": 1.4033, "step": 1987500 }, { "epoch": 20.98, "learning_rate": 3.951000981457835e-05, "loss": 1.404, "step": 1988000 }, { "epoch": 20.99, "learning_rate": 3.950737148706692e-05, "loss": 1.3968, "step": 1988500 }, { "epoch": 20.99, "learning_rate": 3.950473315955549e-05, "loss": 1.3982, "step": 1989000 }, { "epoch": 21.0, "learning_rate": 3.9502094832044075e-05, "loss": 1.3755, "step": 1989500 }, { "epoch": 21.0, "learning_rate": 3.949945650453265e-05, "loss": 1.3694, "step": 1990000 }, { "epoch": 21.01, "learning_rate": 3.949681817702122e-05, "loss": 1.4208, "step": 1990500 }, { "epoch": 21.01, "learning_rate": 3.94941798495098e-05, "loss": 1.4059, "step": 1991000 }, { "epoch": 21.02, "learning_rate": 3.949154152199838e-05, "loss": 1.3682, "step": 1991500 }, { "epoch": 21.02, "learning_rate": 3.948890319448695e-05, "loss": 1.3812, "step": 1992000 }, { "epoch": 21.03, "learning_rate": 3.948626486697553e-05, "loss": 1.4008, "step": 1992500 }, { "epoch": 21.03, "learning_rate": 3.9483626539464104e-05, "loss": 1.36, "step": 1993000 }, { "epoch": 21.04, "learning_rate": 3.948098821195268e-05, "loss": 1.3967, "step": 1993500 }, { "epoch": 21.04, "learning_rate": 3.9478349884441255e-05, "loss": 1.3952, "step": 1994000 }, { "epoch": 21.05, "learning_rate": 3.947571155692984e-05, "loss": 1.3631, "step": 1994500 }, { "epoch": 21.05, "learning_rate": 3.947307322941841e-05, "loss": 1.3882, "step": 1995000 }, { "epoch": 21.06, "learning_rate": 3.947043490190698e-05, "loss": 1.4387, "step": 1995500 }, { "epoch": 21.06, "learning_rate": 3.946779657439556e-05, "loss": 1.3544, "step": 1996000 }, { "epoch": 21.07, "learning_rate": 3.946515824688414e-05, "loss": 1.3837, "step": 1996500 }, { "epoch": 21.07, "learning_rate": 3.9462519919372714e-05, "loss": 1.3868, "step": 1997000 }, { "epoch": 21.08, "learning_rate": 3.945988159186129e-05, "loss": 1.3782, "step": 1997500 }, { "epoch": 21.09, "learning_rate": 3.9457243264349865e-05, "loss": 1.387, "step": 1998000 }, { "epoch": 21.09, "learning_rate": 3.945460493683844e-05, "loss": 1.3356, "step": 1998500 }, { "epoch": 21.1, "learning_rate": 3.9451966609327016e-05, "loss": 1.4335, "step": 1999000 }, { "epoch": 21.1, "learning_rate": 3.94493282818156e-05, "loss": 1.392, "step": 1999500 }, { "epoch": 21.11, "learning_rate": 3.944668995430417e-05, "loss": 1.4194, "step": 2000000 }, { "epoch": 21.11, "learning_rate": 3.944405162679274e-05, "loss": 1.3712, "step": 2000500 }, { "epoch": 21.12, "learning_rate": 3.944141329928132e-05, "loss": 1.2999, "step": 2001000 }, { "epoch": 21.12, "learning_rate": 3.94387749717699e-05, "loss": 1.3456, "step": 2001500 }, { "epoch": 21.13, "learning_rate": 3.9436136644258476e-05, "loss": 1.4002, "step": 2002000 }, { "epoch": 21.13, "learning_rate": 3.9433498316747045e-05, "loss": 1.411, "step": 2002500 }, { "epoch": 21.14, "learning_rate": 3.943085998923563e-05, "loss": 1.346, "step": 2003000 }, { "epoch": 21.14, "learning_rate": 3.94282216617242e-05, "loss": 1.4249, "step": 2003500 }, { "epoch": 21.15, "learning_rate": 3.942558333421278e-05, "loss": 1.3578, "step": 2004000 }, { "epoch": 21.15, "learning_rate": 3.942294500670135e-05, "loss": 1.4234, "step": 2004500 }, { "epoch": 21.16, "learning_rate": 3.942030667918993e-05, "loss": 1.4418, "step": 2005000 }, { "epoch": 21.16, "learning_rate": 3.9417668351678504e-05, "loss": 1.4219, "step": 2005500 }, { "epoch": 21.17, "learning_rate": 3.941503002416708e-05, "loss": 1.4311, "step": 2006000 }, { "epoch": 21.18, "learning_rate": 3.941239169665566e-05, "loss": 1.3649, "step": 2006500 }, { "epoch": 21.18, "learning_rate": 3.940975336914424e-05, "loss": 1.4193, "step": 2007000 }, { "epoch": 21.19, "learning_rate": 3.9407115041632806e-05, "loss": 1.3597, "step": 2007500 }, { "epoch": 21.19, "learning_rate": 3.940447671412139e-05, "loss": 1.3891, "step": 2008000 }, { "epoch": 21.2, "learning_rate": 3.9401838386609964e-05, "loss": 1.4104, "step": 2008500 }, { "epoch": 21.2, "learning_rate": 3.939920005909854e-05, "loss": 1.3909, "step": 2009000 }, { "epoch": 21.21, "learning_rate": 3.939656173158711e-05, "loss": 1.411, "step": 2009500 }, { "epoch": 21.21, "learning_rate": 3.939392340407569e-05, "loss": 1.3715, "step": 2010000 }, { "epoch": 21.22, "learning_rate": 3.9391285076564266e-05, "loss": 1.3664, "step": 2010500 }, { "epoch": 21.22, "learning_rate": 3.938864674905284e-05, "loss": 1.3745, "step": 2011000 }, { "epoch": 21.23, "learning_rate": 3.9386008421541424e-05, "loss": 1.4044, "step": 2011500 }, { "epoch": 21.23, "learning_rate": 3.938337009402999e-05, "loss": 1.3718, "step": 2012000 }, { "epoch": 21.24, "learning_rate": 3.938073176651857e-05, "loss": 1.3933, "step": 2012500 }, { "epoch": 21.24, "learning_rate": 3.9378093439007143e-05, "loss": 1.3883, "step": 2013000 }, { "epoch": 21.25, "learning_rate": 3.9375455111495726e-05, "loss": 1.327, "step": 2013500 }, { "epoch": 21.25, "learning_rate": 3.93728167839843e-05, "loss": 1.3579, "step": 2014000 }, { "epoch": 21.26, "learning_rate": 3.937017845647287e-05, "loss": 1.3574, "step": 2014500 }, { "epoch": 21.26, "learning_rate": 3.936754012896145e-05, "loss": 1.3915, "step": 2015000 }, { "epoch": 21.27, "learning_rate": 3.936490180145003e-05, "loss": 1.3836, "step": 2015500 }, { "epoch": 21.28, "learning_rate": 3.93622634739386e-05, "loss": 1.3582, "step": 2016000 }, { "epoch": 21.28, "learning_rate": 3.935962514642718e-05, "loss": 1.4099, "step": 2016500 }, { "epoch": 21.29, "learning_rate": 3.9356986818915754e-05, "loss": 1.3748, "step": 2017000 }, { "epoch": 21.29, "learning_rate": 3.935434849140433e-05, "loss": 1.3847, "step": 2017500 }, { "epoch": 21.3, "learning_rate": 3.9351710163892905e-05, "loss": 1.3439, "step": 2018000 }, { "epoch": 21.3, "learning_rate": 3.934907183638149e-05, "loss": 1.3927, "step": 2018500 }, { "epoch": 21.31, "learning_rate": 3.9346433508870056e-05, "loss": 1.3641, "step": 2019000 }, { "epoch": 21.31, "learning_rate": 3.934379518135863e-05, "loss": 1.4157, "step": 2019500 }, { "epoch": 21.32, "learning_rate": 3.9341156853847214e-05, "loss": 1.4578, "step": 2020000 }, { "epoch": 21.32, "learning_rate": 3.933851852633579e-05, "loss": 1.3924, "step": 2020500 }, { "epoch": 21.33, "learning_rate": 3.9335880198824365e-05, "loss": 1.3831, "step": 2021000 }, { "epoch": 21.33, "learning_rate": 3.933324187131294e-05, "loss": 1.3496, "step": 2021500 }, { "epoch": 21.34, "learning_rate": 3.9330603543801516e-05, "loss": 1.3849, "step": 2022000 }, { "epoch": 21.34, "learning_rate": 3.932796521629009e-05, "loss": 1.3781, "step": 2022500 }, { "epoch": 21.35, "learning_rate": 3.932532688877867e-05, "loss": 1.3825, "step": 2023000 }, { "epoch": 21.35, "learning_rate": 3.932268856126725e-05, "loss": 1.3575, "step": 2023500 }, { "epoch": 21.36, "learning_rate": 3.932005023375582e-05, "loss": 1.3808, "step": 2024000 }, { "epoch": 21.37, "learning_rate": 3.931741190624439e-05, "loss": 1.3736, "step": 2024500 }, { "epoch": 21.37, "learning_rate": 3.931477357873297e-05, "loss": 1.3814, "step": 2025000 }, { "epoch": 21.38, "learning_rate": 3.931213525122155e-05, "loss": 1.3912, "step": 2025500 }, { "epoch": 21.38, "learning_rate": 3.9309496923710126e-05, "loss": 1.4359, "step": 2026000 }, { "epoch": 21.39, "learning_rate": 3.9306858596198695e-05, "loss": 1.4003, "step": 2026500 }, { "epoch": 21.39, "learning_rate": 3.930422026868728e-05, "loss": 1.3788, "step": 2027000 }, { "epoch": 21.4, "learning_rate": 3.930158194117585e-05, "loss": 1.3274, "step": 2027500 }, { "epoch": 21.4, "learning_rate": 3.929894361366443e-05, "loss": 1.3303, "step": 2028000 }, { "epoch": 21.41, "learning_rate": 3.9296305286153004e-05, "loss": 1.4014, "step": 2028500 }, { "epoch": 21.41, "learning_rate": 3.929366695864158e-05, "loss": 1.4113, "step": 2029000 }, { "epoch": 21.42, "learning_rate": 3.9291028631130155e-05, "loss": 1.3823, "step": 2029500 }, { "epoch": 21.42, "learning_rate": 3.928839030361873e-05, "loss": 1.357, "step": 2030000 }, { "epoch": 21.43, "learning_rate": 3.928575197610731e-05, "loss": 1.3855, "step": 2030500 }, { "epoch": 21.43, "learning_rate": 3.928311364859588e-05, "loss": 1.4027, "step": 2031000 }, { "epoch": 21.44, "learning_rate": 3.928047532108446e-05, "loss": 1.4262, "step": 2031500 }, { "epoch": 21.44, "learning_rate": 3.927783699357304e-05, "loss": 1.3829, "step": 2032000 }, { "epoch": 21.45, "learning_rate": 3.9275198666061615e-05, "loss": 1.3352, "step": 2032500 }, { "epoch": 21.45, "learning_rate": 3.927256033855019e-05, "loss": 1.3796, "step": 2033000 }, { "epoch": 21.46, "learning_rate": 3.9269922011038766e-05, "loss": 1.3966, "step": 2033500 }, { "epoch": 21.47, "learning_rate": 3.926728368352734e-05, "loss": 1.3562, "step": 2034000 }, { "epoch": 21.47, "learning_rate": 3.9264645356015916e-05, "loss": 1.4295, "step": 2034500 }, { "epoch": 21.48, "learning_rate": 3.926200702850449e-05, "loss": 1.3522, "step": 2035000 }, { "epoch": 21.48, "learning_rate": 3.9259368700993074e-05, "loss": 1.4211, "step": 2035500 }, { "epoch": 21.49, "learning_rate": 3.925673037348164e-05, "loss": 1.3348, "step": 2036000 }, { "epoch": 21.49, "learning_rate": 3.925409204597022e-05, "loss": 1.3964, "step": 2036500 }, { "epoch": 21.5, "learning_rate": 3.9251453718458794e-05, "loss": 1.4079, "step": 2037000 }, { "epoch": 21.5, "learning_rate": 3.9248815390947376e-05, "loss": 1.4036, "step": 2037500 }, { "epoch": 21.51, "learning_rate": 3.9246177063435945e-05, "loss": 1.3511, "step": 2038000 }, { "epoch": 21.51, "learning_rate": 3.924353873592452e-05, "loss": 1.43, "step": 2038500 }, { "epoch": 21.52, "learning_rate": 3.92409004084131e-05, "loss": 1.4274, "step": 2039000 }, { "epoch": 21.52, "learning_rate": 3.923826208090168e-05, "loss": 1.3768, "step": 2039500 }, { "epoch": 21.53, "learning_rate": 3.9235623753390254e-05, "loss": 1.3771, "step": 2040000 }, { "epoch": 21.53, "learning_rate": 3.923298542587883e-05, "loss": 1.3668, "step": 2040500 }, { "epoch": 21.54, "learning_rate": 3.9230347098367405e-05, "loss": 1.4027, "step": 2041000 }, { "epoch": 21.54, "learning_rate": 3.922770877085598e-05, "loss": 1.3921, "step": 2041500 }, { "epoch": 21.55, "learning_rate": 3.9225070443344556e-05, "loss": 1.372, "step": 2042000 }, { "epoch": 21.56, "learning_rate": 3.922243211583314e-05, "loss": 1.3869, "step": 2042500 }, { "epoch": 21.56, "learning_rate": 3.9219793788321707e-05, "loss": 1.4188, "step": 2043000 }, { "epoch": 21.57, "learning_rate": 3.921715546081028e-05, "loss": 1.3903, "step": 2043500 }, { "epoch": 21.57, "learning_rate": 3.9214517133298864e-05, "loss": 1.3879, "step": 2044000 }, { "epoch": 21.58, "learning_rate": 3.921187880578744e-05, "loss": 1.333, "step": 2044500 }, { "epoch": 21.58, "learning_rate": 3.9209240478276015e-05, "loss": 1.4115, "step": 2045000 }, { "epoch": 21.59, "learning_rate": 3.920660215076459e-05, "loss": 1.3816, "step": 2045500 }, { "epoch": 21.59, "learning_rate": 3.9203963823253166e-05, "loss": 1.3807, "step": 2046000 }, { "epoch": 21.6, "learning_rate": 3.920132549574174e-05, "loss": 1.4282, "step": 2046500 }, { "epoch": 21.6, "learning_rate": 3.919868716823032e-05, "loss": 1.4287, "step": 2047000 }, { "epoch": 21.61, "learning_rate": 3.919604884071889e-05, "loss": 1.3562, "step": 2047500 }, { "epoch": 21.61, "learning_rate": 3.919341051320747e-05, "loss": 1.4201, "step": 2048000 }, { "epoch": 21.62, "learning_rate": 3.9190772185696044e-05, "loss": 1.3506, "step": 2048500 }, { "epoch": 21.62, "learning_rate": 3.918813385818462e-05, "loss": 1.3862, "step": 2049000 }, { "epoch": 21.63, "learning_rate": 3.91854955306732e-05, "loss": 1.3956, "step": 2049500 }, { "epoch": 21.63, "learning_rate": 3.918285720316177e-05, "loss": 1.366, "step": 2050000 }, { "epoch": 21.64, "learning_rate": 3.9180218875650346e-05, "loss": 1.4455, "step": 2050500 }, { "epoch": 21.64, "learning_rate": 3.917758054813893e-05, "loss": 1.3854, "step": 2051000 }, { "epoch": 21.65, "learning_rate": 3.91749422206275e-05, "loss": 1.388, "step": 2051500 }, { "epoch": 21.66, "learning_rate": 3.917230389311608e-05, "loss": 1.351, "step": 2052000 }, { "epoch": 21.66, "learning_rate": 3.9169665565604654e-05, "loss": 1.3894, "step": 2052500 }, { "epoch": 21.67, "learning_rate": 3.916702723809323e-05, "loss": 1.3669, "step": 2053000 }, { "epoch": 21.67, "learning_rate": 3.9164388910581805e-05, "loss": 1.4303, "step": 2053500 }, { "epoch": 21.68, "learning_rate": 3.916175058307038e-05, "loss": 1.4227, "step": 2054000 }, { "epoch": 21.68, "learning_rate": 3.915911225555896e-05, "loss": 1.406, "step": 2054500 }, { "epoch": 21.69, "learning_rate": 3.915647392804753e-05, "loss": 1.4154, "step": 2055000 }, { "epoch": 21.69, "learning_rate": 3.915383560053611e-05, "loss": 1.4222, "step": 2055500 }, { "epoch": 21.7, "learning_rate": 3.915119727302469e-05, "loss": 1.3749, "step": 2056000 }, { "epoch": 21.7, "learning_rate": 3.9148558945513265e-05, "loss": 1.3447, "step": 2056500 }, { "epoch": 21.71, "learning_rate": 3.9145920618001834e-05, "loss": 1.3684, "step": 2057000 }, { "epoch": 21.71, "learning_rate": 3.9143282290490416e-05, "loss": 1.3941, "step": 2057500 }, { "epoch": 21.72, "learning_rate": 3.914064396297899e-05, "loss": 1.3746, "step": 2058000 }, { "epoch": 21.72, "learning_rate": 3.913800563546757e-05, "loss": 1.3937, "step": 2058500 }, { "epoch": 21.73, "learning_rate": 3.913536730795614e-05, "loss": 1.3809, "step": 2059000 }, { "epoch": 21.73, "learning_rate": 3.913272898044472e-05, "loss": 1.3824, "step": 2059500 }, { "epoch": 21.74, "learning_rate": 3.9130090652933293e-05, "loss": 1.4225, "step": 2060000 }, { "epoch": 21.75, "learning_rate": 3.912745232542187e-05, "loss": 1.3488, "step": 2060500 }, { "epoch": 21.75, "learning_rate": 3.9124813997910444e-05, "loss": 1.3575, "step": 2061000 }, { "epoch": 21.76, "learning_rate": 3.912217567039903e-05, "loss": 1.3827, "step": 2061500 }, { "epoch": 21.76, "learning_rate": 3.9119537342887595e-05, "loss": 1.3716, "step": 2062000 }, { "epoch": 21.77, "learning_rate": 3.911689901537617e-05, "loss": 1.4321, "step": 2062500 }, { "epoch": 21.77, "learning_rate": 3.911426068786475e-05, "loss": 1.4089, "step": 2063000 }, { "epoch": 21.78, "learning_rate": 3.911162236035333e-05, "loss": 1.4191, "step": 2063500 }, { "epoch": 21.78, "learning_rate": 3.9108984032841904e-05, "loss": 1.3894, "step": 2064000 }, { "epoch": 21.79, "learning_rate": 3.910634570533048e-05, "loss": 1.3959, "step": 2064500 }, { "epoch": 21.79, "learning_rate": 3.9103707377819055e-05, "loss": 1.3587, "step": 2065000 }, { "epoch": 21.8, "learning_rate": 3.910106905030763e-05, "loss": 1.3471, "step": 2065500 }, { "epoch": 21.8, "learning_rate": 3.9098430722796206e-05, "loss": 1.3197, "step": 2066000 }, { "epoch": 21.81, "learning_rate": 3.909579239528478e-05, "loss": 1.399, "step": 2066500 }, { "epoch": 21.81, "learning_rate": 3.909315406777336e-05, "loss": 1.4248, "step": 2067000 }, { "epoch": 21.82, "learning_rate": 3.909051574026193e-05, "loss": 1.4075, "step": 2067500 }, { "epoch": 21.82, "learning_rate": 3.9087877412750515e-05, "loss": 1.3428, "step": 2068000 }, { "epoch": 21.83, "learning_rate": 3.908523908523909e-05, "loss": 1.3479, "step": 2068500 }, { "epoch": 21.83, "learning_rate": 3.908260075772766e-05, "loss": 1.4012, "step": 2069000 }, { "epoch": 21.84, "learning_rate": 3.907996243021624e-05, "loss": 1.4043, "step": 2069500 }, { "epoch": 21.85, "learning_rate": 3.907732410270482e-05, "loss": 1.3624, "step": 2070000 }, { "epoch": 21.85, "learning_rate": 3.907468577519339e-05, "loss": 1.3837, "step": 2070500 }, { "epoch": 21.86, "learning_rate": 3.907204744768197e-05, "loss": 1.3994, "step": 2071000 }, { "epoch": 21.86, "learning_rate": 3.906940912017054e-05, "loss": 1.4307, "step": 2071500 }, { "epoch": 21.87, "learning_rate": 3.906677079265912e-05, "loss": 1.3997, "step": 2072000 }, { "epoch": 21.87, "learning_rate": 3.9064132465147694e-05, "loss": 1.397, "step": 2072500 }, { "epoch": 21.88, "learning_rate": 3.9061494137636276e-05, "loss": 1.3745, "step": 2073000 }, { "epoch": 21.88, "learning_rate": 3.905885581012485e-05, "loss": 1.397, "step": 2073500 }, { "epoch": 21.89, "learning_rate": 3.905621748261342e-05, "loss": 1.3941, "step": 2074000 }, { "epoch": 21.89, "learning_rate": 3.9053579155101996e-05, "loss": 1.425, "step": 2074500 }, { "epoch": 21.9, "learning_rate": 3.905094082759058e-05, "loss": 1.3514, "step": 2075000 }, { "epoch": 21.9, "learning_rate": 3.9048302500079154e-05, "loss": 1.4036, "step": 2075500 }, { "epoch": 21.91, "learning_rate": 3.904566417256772e-05, "loss": 1.3406, "step": 2076000 }, { "epoch": 21.91, "learning_rate": 3.9043025845056305e-05, "loss": 1.4217, "step": 2076500 }, { "epoch": 21.92, "learning_rate": 3.904038751754488e-05, "loss": 1.4323, "step": 2077000 }, { "epoch": 21.92, "learning_rate": 3.9037749190033456e-05, "loss": 1.4246, "step": 2077500 }, { "epoch": 21.93, "learning_rate": 3.903511086252203e-05, "loss": 1.4214, "step": 2078000 }, { "epoch": 21.94, "learning_rate": 3.903247253501061e-05, "loss": 1.3256, "step": 2078500 }, { "epoch": 21.94, "learning_rate": 3.902983420749918e-05, "loss": 1.4434, "step": 2079000 }, { "epoch": 21.95, "learning_rate": 3.902719587998776e-05, "loss": 1.4262, "step": 2079500 }, { "epoch": 21.95, "learning_rate": 3.902455755247634e-05, "loss": 1.395, "step": 2080000 }, { "epoch": 21.96, "learning_rate": 3.9021919224964916e-05, "loss": 1.4113, "step": 2080500 }, { "epoch": 21.96, "learning_rate": 3.9019280897453484e-05, "loss": 1.3949, "step": 2081000 }, { "epoch": 21.97, "learning_rate": 3.9016642569942067e-05, "loss": 1.369, "step": 2081500 }, { "epoch": 21.97, "learning_rate": 3.901400424243064e-05, "loss": 1.3758, "step": 2082000 }, { "epoch": 21.98, "learning_rate": 3.901136591491922e-05, "loss": 1.391, "step": 2082500 }, { "epoch": 21.98, "learning_rate": 3.900872758740779e-05, "loss": 1.3923, "step": 2083000 }, { "epoch": 21.99, "learning_rate": 3.900608925989637e-05, "loss": 1.4035, "step": 2083500 }, { "epoch": 21.99, "learning_rate": 3.9003450932384944e-05, "loss": 1.385, "step": 2084000 }, { "epoch": 22.0, "learning_rate": 3.900081260487352e-05, "loss": 1.396, "step": 2084500 }, { "epoch": 22.0, "learning_rate": 3.89981742773621e-05, "loss": 1.3781, "step": 2085000 }, { "epoch": 22.01, "learning_rate": 3.899553594985067e-05, "loss": 1.3616, "step": 2085500 }, { "epoch": 22.01, "learning_rate": 3.8992897622339246e-05, "loss": 1.359, "step": 2086000 }, { "epoch": 22.02, "learning_rate": 3.899025929482782e-05, "loss": 1.3537, "step": 2086500 }, { "epoch": 22.02, "learning_rate": 3.8987620967316404e-05, "loss": 1.365, "step": 2087000 }, { "epoch": 22.03, "learning_rate": 3.898498263980498e-05, "loss": 1.3927, "step": 2087500 }, { "epoch": 22.04, "learning_rate": 3.898234431229355e-05, "loss": 1.3831, "step": 2088000 }, { "epoch": 22.04, "learning_rate": 3.897970598478213e-05, "loss": 1.3649, "step": 2088500 }, { "epoch": 22.05, "learning_rate": 3.8977067657270706e-05, "loss": 1.38, "step": 2089000 }, { "epoch": 22.05, "learning_rate": 3.897442932975928e-05, "loss": 1.3948, "step": 2089500 }, { "epoch": 22.06, "learning_rate": 3.8971791002247857e-05, "loss": 1.3284, "step": 2090000 }, { "epoch": 22.06, "learning_rate": 3.896915267473643e-05, "loss": 1.3197, "step": 2090500 }, { "epoch": 22.07, "learning_rate": 3.896651434722501e-05, "loss": 1.3589, "step": 2091000 }, { "epoch": 22.07, "learning_rate": 3.896387601971358e-05, "loss": 1.3749, "step": 2091500 }, { "epoch": 22.08, "learning_rate": 3.8961237692202165e-05, "loss": 1.4381, "step": 2092000 }, { "epoch": 22.08, "learning_rate": 3.8958599364690734e-05, "loss": 1.4096, "step": 2092500 }, { "epoch": 22.09, "learning_rate": 3.895596103717931e-05, "loss": 1.3671, "step": 2093000 }, { "epoch": 22.09, "learning_rate": 3.895332270966789e-05, "loss": 1.3691, "step": 2093500 }, { "epoch": 22.1, "learning_rate": 3.895068438215647e-05, "loss": 1.3214, "step": 2094000 }, { "epoch": 22.1, "learning_rate": 3.894804605464504e-05, "loss": 1.3683, "step": 2094500 }, { "epoch": 22.11, "learning_rate": 3.894540772713362e-05, "loss": 1.3764, "step": 2095000 }, { "epoch": 22.11, "learning_rate": 3.8942769399622194e-05, "loss": 1.3364, "step": 2095500 }, { "epoch": 22.12, "learning_rate": 3.894013107211077e-05, "loss": 1.4164, "step": 2096000 }, { "epoch": 22.13, "learning_rate": 3.8937492744599345e-05, "loss": 1.3631, "step": 2096500 }, { "epoch": 22.13, "learning_rate": 3.893485441708793e-05, "loss": 1.3855, "step": 2097000 }, { "epoch": 22.14, "learning_rate": 3.8932216089576496e-05, "loss": 1.3635, "step": 2097500 }, { "epoch": 22.14, "learning_rate": 3.892957776206507e-05, "loss": 1.4046, "step": 2098000 }, { "epoch": 22.15, "learning_rate": 3.892693943455365e-05, "loss": 1.4206, "step": 2098500 }, { "epoch": 22.15, "learning_rate": 3.892430110704223e-05, "loss": 1.3927, "step": 2099000 }, { "epoch": 22.16, "learning_rate": 3.8921662779530804e-05, "loss": 1.4013, "step": 2099500 }, { "epoch": 22.16, "learning_rate": 3.891902445201937e-05, "loss": 1.3648, "step": 2100000 }, { "epoch": 22.17, "learning_rate": 3.8916386124507955e-05, "loss": 1.3769, "step": 2100500 }, { "epoch": 22.17, "learning_rate": 3.891374779699653e-05, "loss": 1.4131, "step": 2101000 }, { "epoch": 22.18, "learning_rate": 3.8911109469485106e-05, "loss": 1.3333, "step": 2101500 }, { "epoch": 22.18, "learning_rate": 3.890847114197368e-05, "loss": 1.3513, "step": 2102000 }, { "epoch": 22.19, "learning_rate": 3.890583281446226e-05, "loss": 1.3955, "step": 2102500 }, { "epoch": 22.19, "learning_rate": 3.890319448695083e-05, "loss": 1.4137, "step": 2103000 }, { "epoch": 22.2, "learning_rate": 3.890055615943941e-05, "loss": 1.3644, "step": 2103500 }, { "epoch": 22.2, "learning_rate": 3.889791783192799e-05, "loss": 1.3727, "step": 2104000 }, { "epoch": 22.21, "learning_rate": 3.889527950441656e-05, "loss": 1.3657, "step": 2104500 }, { "epoch": 22.21, "learning_rate": 3.8892641176905135e-05, "loss": 1.3942, "step": 2105000 }, { "epoch": 22.22, "learning_rate": 3.889000284939372e-05, "loss": 1.3257, "step": 2105500 }, { "epoch": 22.23, "learning_rate": 3.888736452188229e-05, "loss": 1.3718, "step": 2106000 }, { "epoch": 22.23, "learning_rate": 3.888472619437087e-05, "loss": 1.3781, "step": 2106500 }, { "epoch": 22.24, "learning_rate": 3.8882087866859443e-05, "loss": 1.4102, "step": 2107000 }, { "epoch": 22.24, "learning_rate": 3.887944953934802e-05, "loss": 1.3396, "step": 2107500 }, { "epoch": 22.25, "learning_rate": 3.8876811211836594e-05, "loss": 1.4218, "step": 2108000 }, { "epoch": 22.25, "learning_rate": 3.887417288432517e-05, "loss": 1.4143, "step": 2108500 }, { "epoch": 22.26, "learning_rate": 3.887153455681375e-05, "loss": 1.4089, "step": 2109000 }, { "epoch": 22.26, "learning_rate": 3.886889622930232e-05, "loss": 1.345, "step": 2109500 }, { "epoch": 22.27, "learning_rate": 3.8866257901790896e-05, "loss": 1.363, "step": 2110000 }, { "epoch": 22.27, "learning_rate": 3.886361957427947e-05, "loss": 1.4317, "step": 2110500 }, { "epoch": 22.28, "learning_rate": 3.8860981246768054e-05, "loss": 1.4463, "step": 2111000 }, { "epoch": 22.28, "learning_rate": 3.885834291925662e-05, "loss": 1.3745, "step": 2111500 }, { "epoch": 22.29, "learning_rate": 3.88557045917452e-05, "loss": 1.4107, "step": 2112000 }, { "epoch": 22.29, "learning_rate": 3.885306626423378e-05, "loss": 1.4021, "step": 2112500 }, { "epoch": 22.3, "learning_rate": 3.8850427936722356e-05, "loss": 1.4359, "step": 2113000 }, { "epoch": 22.3, "learning_rate": 3.884778960921093e-05, "loss": 1.4052, "step": 2113500 }, { "epoch": 22.31, "learning_rate": 3.884515128169951e-05, "loss": 1.3745, "step": 2114000 }, { "epoch": 22.31, "learning_rate": 3.884251295418808e-05, "loss": 1.4098, "step": 2114500 }, { "epoch": 22.32, "learning_rate": 3.883987462667666e-05, "loss": 1.346, "step": 2115000 }, { "epoch": 22.33, "learning_rate": 3.8837236299165234e-05, "loss": 1.3932, "step": 2115500 }, { "epoch": 22.33, "learning_rate": 3.8834597971653816e-05, "loss": 1.422, "step": 2116000 }, { "epoch": 22.34, "learning_rate": 3.8831959644142384e-05, "loss": 1.4215, "step": 2116500 }, { "epoch": 22.34, "learning_rate": 3.882932131663096e-05, "loss": 1.4134, "step": 2117000 }, { "epoch": 22.35, "learning_rate": 3.882668298911954e-05, "loss": 1.4019, "step": 2117500 }, { "epoch": 22.35, "learning_rate": 3.882404466160812e-05, "loss": 1.3887, "step": 2118000 }, { "epoch": 22.36, "learning_rate": 3.882140633409669e-05, "loss": 1.3917, "step": 2118500 }, { "epoch": 22.36, "learning_rate": 3.881876800658527e-05, "loss": 1.3572, "step": 2119000 }, { "epoch": 22.37, "learning_rate": 3.8816129679073844e-05, "loss": 1.337, "step": 2119500 }, { "epoch": 22.37, "learning_rate": 3.881349135156242e-05, "loss": 1.3807, "step": 2120000 }, { "epoch": 22.38, "learning_rate": 3.8810853024050995e-05, "loss": 1.4321, "step": 2120500 }, { "epoch": 22.38, "learning_rate": 3.880821469653957e-05, "loss": 1.3691, "step": 2121000 }, { "epoch": 22.39, "learning_rate": 3.8805576369028146e-05, "loss": 1.3481, "step": 2121500 }, { "epoch": 22.39, "learning_rate": 3.880293804151672e-05, "loss": 1.3918, "step": 2122000 }, { "epoch": 22.4, "learning_rate": 3.88002997140053e-05, "loss": 1.4121, "step": 2122500 }, { "epoch": 22.4, "learning_rate": 3.879766138649388e-05, "loss": 1.4158, "step": 2123000 }, { "epoch": 22.41, "learning_rate": 3.879502305898245e-05, "loss": 1.3719, "step": 2123500 }, { "epoch": 22.42, "learning_rate": 3.8792384731471024e-05, "loss": 1.4054, "step": 2124000 }, { "epoch": 22.42, "learning_rate": 3.8789746403959606e-05, "loss": 1.3895, "step": 2124500 }, { "epoch": 22.43, "learning_rate": 3.878710807644818e-05, "loss": 1.3731, "step": 2125000 }, { "epoch": 22.43, "learning_rate": 3.878446974893676e-05, "loss": 1.4031, "step": 2125500 }, { "epoch": 22.44, "learning_rate": 3.878183142142533e-05, "loss": 1.3789, "step": 2126000 }, { "epoch": 22.44, "learning_rate": 3.877919309391391e-05, "loss": 1.3357, "step": 2126500 }, { "epoch": 22.45, "learning_rate": 3.877655476640248e-05, "loss": 1.3258, "step": 2127000 }, { "epoch": 22.45, "learning_rate": 3.877391643889106e-05, "loss": 1.4094, "step": 2127500 }, { "epoch": 22.46, "learning_rate": 3.877127811137964e-05, "loss": 1.3487, "step": 2128000 }, { "epoch": 22.46, "learning_rate": 3.876863978386821e-05, "loss": 1.4007, "step": 2128500 }, { "epoch": 22.47, "learning_rate": 3.8766001456356785e-05, "loss": 1.4097, "step": 2129000 }, { "epoch": 22.47, "learning_rate": 3.876336312884537e-05, "loss": 1.3327, "step": 2129500 }, { "epoch": 22.48, "learning_rate": 3.876072480133394e-05, "loss": 1.3239, "step": 2130000 }, { "epoch": 22.48, "learning_rate": 3.875808647382251e-05, "loss": 1.3627, "step": 2130500 }, { "epoch": 22.49, "learning_rate": 3.8755448146311094e-05, "loss": 1.3628, "step": 2131000 }, { "epoch": 22.49, "learning_rate": 3.875280981879967e-05, "loss": 1.3855, "step": 2131500 }, { "epoch": 22.5, "learning_rate": 3.8750171491288245e-05, "loss": 1.4129, "step": 2132000 }, { "epoch": 22.5, "learning_rate": 3.874753316377682e-05, "loss": 1.4109, "step": 2132500 }, { "epoch": 22.51, "learning_rate": 3.8744894836265396e-05, "loss": 1.3777, "step": 2133000 }, { "epoch": 22.52, "learning_rate": 3.874225650875397e-05, "loss": 1.3298, "step": 2133500 }, { "epoch": 22.52, "learning_rate": 3.873961818124255e-05, "loss": 1.4152, "step": 2134000 }, { "epoch": 22.53, "learning_rate": 3.873697985373112e-05, "loss": 1.4411, "step": 2134500 }, { "epoch": 22.53, "learning_rate": 3.8734341526219705e-05, "loss": 1.3461, "step": 2135000 }, { "epoch": 22.54, "learning_rate": 3.873170319870827e-05, "loss": 1.39, "step": 2135500 }, { "epoch": 22.54, "learning_rate": 3.872906487119685e-05, "loss": 1.392, "step": 2136000 }, { "epoch": 22.55, "learning_rate": 3.872642654368543e-05, "loss": 1.3663, "step": 2136500 }, { "epoch": 22.55, "learning_rate": 3.8723788216174007e-05, "loss": 1.3575, "step": 2137000 }, { "epoch": 22.56, "learning_rate": 3.872114988866258e-05, "loss": 1.3611, "step": 2137500 }, { "epoch": 22.56, "learning_rate": 3.871851156115116e-05, "loss": 1.4542, "step": 2138000 }, { "epoch": 22.57, "learning_rate": 3.871587323363973e-05, "loss": 1.4129, "step": 2138500 }, { "epoch": 22.57, "learning_rate": 3.871323490612831e-05, "loss": 1.3629, "step": 2139000 }, { "epoch": 22.58, "learning_rate": 3.8710596578616884e-05, "loss": 1.4065, "step": 2139500 }, { "epoch": 22.58, "learning_rate": 3.870795825110546e-05, "loss": 1.4103, "step": 2140000 }, { "epoch": 22.59, "learning_rate": 3.8705319923594035e-05, "loss": 1.386, "step": 2140500 }, { "epoch": 22.59, "learning_rate": 3.870268159608261e-05, "loss": 1.34, "step": 2141000 }, { "epoch": 22.6, "learning_rate": 3.870004326857119e-05, "loss": 1.4004, "step": 2141500 }, { "epoch": 22.61, "learning_rate": 3.869740494105977e-05, "loss": 1.3937, "step": 2142000 }, { "epoch": 22.61, "learning_rate": 3.869476661354834e-05, "loss": 1.3376, "step": 2142500 }, { "epoch": 22.62, "learning_rate": 3.869212828603692e-05, "loss": 1.3208, "step": 2143000 }, { "epoch": 22.62, "learning_rate": 3.8689489958525495e-05, "loss": 1.3979, "step": 2143500 }, { "epoch": 22.63, "learning_rate": 3.868685163101407e-05, "loss": 1.4188, "step": 2144000 }, { "epoch": 22.63, "learning_rate": 3.8684213303502646e-05, "loss": 1.3614, "step": 2144500 }, { "epoch": 22.64, "learning_rate": 3.868157497599122e-05, "loss": 1.4315, "step": 2145000 }, { "epoch": 22.64, "learning_rate": 3.86789366484798e-05, "loss": 1.3498, "step": 2145500 }, { "epoch": 22.65, "learning_rate": 3.867629832096837e-05, "loss": 1.3904, "step": 2146000 }, { "epoch": 22.65, "learning_rate": 3.8673659993456954e-05, "loss": 1.389, "step": 2146500 }, { "epoch": 22.66, "learning_rate": 3.867102166594553e-05, "loss": 1.3774, "step": 2147000 }, { "epoch": 22.66, "learning_rate": 3.86683833384341e-05, "loss": 1.3809, "step": 2147500 }, { "epoch": 22.67, "learning_rate": 3.8665745010922674e-05, "loss": 1.3985, "step": 2148000 }, { "epoch": 22.67, "learning_rate": 3.8663106683411256e-05, "loss": 1.4532, "step": 2148500 }, { "epoch": 22.68, "learning_rate": 3.866046835589983e-05, "loss": 1.4191, "step": 2149000 }, { "epoch": 22.68, "learning_rate": 3.86578300283884e-05, "loss": 1.3776, "step": 2149500 }, { "epoch": 22.69, "learning_rate": 3.865519170087698e-05, "loss": 1.3557, "step": 2150000 }, { "epoch": 22.69, "learning_rate": 3.865255337336556e-05, "loss": 1.4316, "step": 2150500 }, { "epoch": 22.7, "learning_rate": 3.8649915045854134e-05, "loss": 1.3722, "step": 2151000 }, { "epoch": 22.71, "learning_rate": 3.864727671834271e-05, "loss": 1.3578, "step": 2151500 }, { "epoch": 22.71, "learning_rate": 3.8644638390831285e-05, "loss": 1.3957, "step": 2152000 }, { "epoch": 22.72, "learning_rate": 3.864200006331986e-05, "loss": 1.3812, "step": 2152500 }, { "epoch": 22.72, "learning_rate": 3.8639361735808436e-05, "loss": 1.3718, "step": 2153000 }, { "epoch": 22.73, "learning_rate": 3.863672340829702e-05, "loss": 1.378, "step": 2153500 }, { "epoch": 22.73, "learning_rate": 3.8634085080785593e-05, "loss": 1.4019, "step": 2154000 }, { "epoch": 22.74, "learning_rate": 3.863144675327416e-05, "loss": 1.4038, "step": 2154500 }, { "epoch": 22.74, "learning_rate": 3.8628808425762744e-05, "loss": 1.3565, "step": 2155000 }, { "epoch": 22.75, "learning_rate": 3.862617009825132e-05, "loss": 1.4153, "step": 2155500 }, { "epoch": 22.75, "learning_rate": 3.8623531770739895e-05, "loss": 1.3661, "step": 2156000 }, { "epoch": 22.76, "learning_rate": 3.862089344322847e-05, "loss": 1.376, "step": 2156500 }, { "epoch": 22.76, "learning_rate": 3.8618255115717046e-05, "loss": 1.3722, "step": 2157000 }, { "epoch": 22.77, "learning_rate": 3.861561678820562e-05, "loss": 1.3942, "step": 2157500 }, { "epoch": 22.77, "learning_rate": 3.86129784606942e-05, "loss": 1.4198, "step": 2158000 }, { "epoch": 22.78, "learning_rate": 3.861034013318278e-05, "loss": 1.3766, "step": 2158500 }, { "epoch": 22.78, "learning_rate": 3.860770180567135e-05, "loss": 1.399, "step": 2159000 }, { "epoch": 22.79, "learning_rate": 3.8605063478159924e-05, "loss": 1.4037, "step": 2159500 }, { "epoch": 22.8, "learning_rate": 3.86024251506485e-05, "loss": 1.4365, "step": 2160000 }, { "epoch": 22.8, "learning_rate": 3.859978682313708e-05, "loss": 1.4006, "step": 2160500 }, { "epoch": 22.81, "learning_rate": 3.859714849562566e-05, "loss": 1.3187, "step": 2161000 }, { "epoch": 22.81, "learning_rate": 3.8594510168114226e-05, "loss": 1.3614, "step": 2161500 }, { "epoch": 22.82, "learning_rate": 3.859187184060281e-05, "loss": 1.3766, "step": 2162000 }, { "epoch": 22.82, "learning_rate": 3.8589233513091384e-05, "loss": 1.3964, "step": 2162500 }, { "epoch": 22.83, "learning_rate": 3.858659518557996e-05, "loss": 1.3745, "step": 2163000 }, { "epoch": 22.83, "learning_rate": 3.8583956858068535e-05, "loss": 1.3584, "step": 2163500 }, { "epoch": 22.84, "learning_rate": 3.858131853055711e-05, "loss": 1.392, "step": 2164000 }, { "epoch": 22.84, "learning_rate": 3.8578680203045685e-05, "loss": 1.3489, "step": 2164500 }, { "epoch": 22.85, "learning_rate": 3.857604187553426e-05, "loss": 1.4005, "step": 2165000 }, { "epoch": 22.85, "learning_rate": 3.857340354802284e-05, "loss": 1.3644, "step": 2165500 }, { "epoch": 22.86, "learning_rate": 3.857076522051142e-05, "loss": 1.4116, "step": 2166000 }, { "epoch": 22.86, "learning_rate": 3.856812689299999e-05, "loss": 1.3582, "step": 2166500 }, { "epoch": 22.87, "learning_rate": 3.856548856548857e-05, "loss": 1.3906, "step": 2167000 }, { "epoch": 22.87, "learning_rate": 3.8562850237977145e-05, "loss": 1.3645, "step": 2167500 }, { "epoch": 22.88, "learning_rate": 3.856021191046572e-05, "loss": 1.3256, "step": 2168000 }, { "epoch": 22.88, "learning_rate": 3.8557573582954296e-05, "loss": 1.4385, "step": 2168500 }, { "epoch": 22.89, "learning_rate": 3.855493525544287e-05, "loss": 1.3783, "step": 2169000 }, { "epoch": 22.9, "learning_rate": 3.855229692793145e-05, "loss": 1.3454, "step": 2169500 }, { "epoch": 22.9, "learning_rate": 3.854965860042002e-05, "loss": 1.4553, "step": 2170000 }, { "epoch": 22.91, "learning_rate": 3.8547020272908605e-05, "loss": 1.3766, "step": 2170500 }, { "epoch": 22.91, "learning_rate": 3.8544381945397174e-05, "loss": 1.3774, "step": 2171000 }, { "epoch": 22.92, "learning_rate": 3.854174361788575e-05, "loss": 1.3652, "step": 2171500 }, { "epoch": 22.92, "learning_rate": 3.8539105290374325e-05, "loss": 1.4615, "step": 2172000 }, { "epoch": 22.93, "learning_rate": 3.853646696286291e-05, "loss": 1.3481, "step": 2172500 }, { "epoch": 22.93, "learning_rate": 3.853382863535148e-05, "loss": 1.3796, "step": 2173000 }, { "epoch": 22.94, "learning_rate": 3.853119030784005e-05, "loss": 1.4418, "step": 2173500 }, { "epoch": 22.94, "learning_rate": 3.852855198032863e-05, "loss": 1.3512, "step": 2174000 }, { "epoch": 22.95, "learning_rate": 3.852591365281721e-05, "loss": 1.3985, "step": 2174500 }, { "epoch": 22.95, "learning_rate": 3.8523275325305784e-05, "loss": 1.3921, "step": 2175000 }, { "epoch": 22.96, "learning_rate": 3.852063699779436e-05, "loss": 1.405, "step": 2175500 }, { "epoch": 22.96, "learning_rate": 3.8517998670282935e-05, "loss": 1.4015, "step": 2176000 }, { "epoch": 22.97, "learning_rate": 3.851536034277151e-05, "loss": 1.3738, "step": 2176500 }, { "epoch": 22.97, "learning_rate": 3.8512722015260086e-05, "loss": 1.3145, "step": 2177000 }, { "epoch": 22.98, "learning_rate": 3.851008368774867e-05, "loss": 1.3293, "step": 2177500 }, { "epoch": 22.99, "learning_rate": 3.850744536023724e-05, "loss": 1.4203, "step": 2178000 }, { "epoch": 22.99, "learning_rate": 3.850480703272581e-05, "loss": 1.3633, "step": 2178500 }, { "epoch": 23.0, "learning_rate": 3.8502168705214395e-05, "loss": 1.4084, "step": 2179000 }, { "epoch": 23.0, "learning_rate": 3.849953037770297e-05, "loss": 1.3879, "step": 2179500 }, { "epoch": 23.01, "learning_rate": 3.8496892050191546e-05, "loss": 1.3911, "step": 2180000 }, { "epoch": 23.01, "learning_rate": 3.849425372268012e-05, "loss": 1.3861, "step": 2180500 }, { "epoch": 23.02, "learning_rate": 3.84916153951687e-05, "loss": 1.3439, "step": 2181000 }, { "epoch": 23.02, "learning_rate": 3.848897706765727e-05, "loss": 1.3995, "step": 2181500 }, { "epoch": 23.03, "learning_rate": 3.848633874014585e-05, "loss": 1.3432, "step": 2182000 }, { "epoch": 23.03, "learning_rate": 3.848370041263443e-05, "loss": 1.3438, "step": 2182500 }, { "epoch": 23.04, "learning_rate": 3.8481062085123e-05, "loss": 1.3171, "step": 2183000 }, { "epoch": 23.04, "learning_rate": 3.8478423757611574e-05, "loss": 1.3895, "step": 2183500 }, { "epoch": 23.05, "learning_rate": 3.847578543010015e-05, "loss": 1.3732, "step": 2184000 }, { "epoch": 23.05, "learning_rate": 3.847314710258873e-05, "loss": 1.3664, "step": 2184500 }, { "epoch": 23.06, "learning_rate": 3.847050877507731e-05, "loss": 1.4002, "step": 2185000 }, { "epoch": 23.06, "learning_rate": 3.8467870447565876e-05, "loss": 1.3586, "step": 2185500 }, { "epoch": 23.07, "learning_rate": 3.846523212005446e-05, "loss": 1.3735, "step": 2186000 }, { "epoch": 23.07, "learning_rate": 3.8462593792543034e-05, "loss": 1.3873, "step": 2186500 }, { "epoch": 23.08, "learning_rate": 3.845995546503161e-05, "loss": 1.3196, "step": 2187000 }, { "epoch": 23.09, "learning_rate": 3.8457317137520185e-05, "loss": 1.3597, "step": 2187500 }, { "epoch": 23.09, "learning_rate": 3.845467881000876e-05, "loss": 1.4094, "step": 2188000 }, { "epoch": 23.1, "learning_rate": 3.8452040482497336e-05, "loss": 1.4206, "step": 2188500 }, { "epoch": 23.1, "learning_rate": 3.844940215498591e-05, "loss": 1.3574, "step": 2189000 }, { "epoch": 23.11, "learning_rate": 3.8446763827474494e-05, "loss": 1.3849, "step": 2189500 }, { "epoch": 23.11, "learning_rate": 3.844412549996306e-05, "loss": 1.3598, "step": 2190000 }, { "epoch": 23.12, "learning_rate": 3.844148717245164e-05, "loss": 1.3666, "step": 2190500 }, { "epoch": 23.12, "learning_rate": 3.843884884494022e-05, "loss": 1.4157, "step": 2191000 }, { "epoch": 23.13, "learning_rate": 3.8436210517428796e-05, "loss": 1.3489, "step": 2191500 }, { "epoch": 23.13, "learning_rate": 3.843357218991737e-05, "loss": 1.3402, "step": 2192000 }, { "epoch": 23.14, "learning_rate": 3.843093386240595e-05, "loss": 1.3346, "step": 2192500 }, { "epoch": 23.14, "learning_rate": 3.842829553489452e-05, "loss": 1.3385, "step": 2193000 }, { "epoch": 23.15, "learning_rate": 3.84256572073831e-05, "loss": 1.3511, "step": 2193500 }, { "epoch": 23.15, "learning_rate": 3.842301887987167e-05, "loss": 1.3287, "step": 2194000 }, { "epoch": 23.16, "learning_rate": 3.8420380552360255e-05, "loss": 1.3241, "step": 2194500 }, { "epoch": 23.16, "learning_rate": 3.8417742224848824e-05, "loss": 1.4058, "step": 2195000 }, { "epoch": 23.17, "learning_rate": 3.84151038973374e-05, "loss": 1.4133, "step": 2195500 }, { "epoch": 23.18, "learning_rate": 3.8412465569825975e-05, "loss": 1.3571, "step": 2196000 }, { "epoch": 23.18, "learning_rate": 3.840982724231456e-05, "loss": 1.3674, "step": 2196500 }, { "epoch": 23.19, "learning_rate": 3.8407188914803126e-05, "loss": 1.3575, "step": 2197000 }, { "epoch": 23.19, "learning_rate": 3.84045505872917e-05, "loss": 1.4051, "step": 2197500 }, { "epoch": 23.2, "learning_rate": 3.8401912259780284e-05, "loss": 1.3374, "step": 2198000 }, { "epoch": 23.2, "learning_rate": 3.839927393226886e-05, "loss": 1.3463, "step": 2198500 }, { "epoch": 23.21, "learning_rate": 3.8396635604757435e-05, "loss": 1.3822, "step": 2199000 }, { "epoch": 23.21, "learning_rate": 3.839399727724601e-05, "loss": 1.3736, "step": 2199500 }, { "epoch": 23.22, "learning_rate": 3.8391358949734586e-05, "loss": 1.4278, "step": 2200000 }, { "epoch": 23.22, "learning_rate": 3.838872062222316e-05, "loss": 1.3306, "step": 2200500 }, { "epoch": 23.23, "learning_rate": 3.838608229471174e-05, "loss": 1.3677, "step": 2201000 }, { "epoch": 23.23, "learning_rate": 3.838344396720032e-05, "loss": 1.3634, "step": 2201500 }, { "epoch": 23.24, "learning_rate": 3.838080563968889e-05, "loss": 1.3823, "step": 2202000 }, { "epoch": 23.24, "learning_rate": 3.837816731217746e-05, "loss": 1.3411, "step": 2202500 }, { "epoch": 23.25, "learning_rate": 3.8375528984666045e-05, "loss": 1.4209, "step": 2203000 }, { "epoch": 23.25, "learning_rate": 3.837289065715462e-05, "loss": 1.3392, "step": 2203500 }, { "epoch": 23.26, "learning_rate": 3.8370252329643196e-05, "loss": 1.4029, "step": 2204000 }, { "epoch": 23.26, "learning_rate": 3.836761400213177e-05, "loss": 1.3981, "step": 2204500 }, { "epoch": 23.27, "learning_rate": 3.836497567462035e-05, "loss": 1.4187, "step": 2205000 }, { "epoch": 23.28, "learning_rate": 3.836233734710892e-05, "loss": 1.3895, "step": 2205500 }, { "epoch": 23.28, "learning_rate": 3.83596990195975e-05, "loss": 1.3708, "step": 2206000 }, { "epoch": 23.29, "learning_rate": 3.8357060692086074e-05, "loss": 1.3944, "step": 2206500 }, { "epoch": 23.29, "learning_rate": 3.835442236457465e-05, "loss": 1.409, "step": 2207000 }, { "epoch": 23.3, "learning_rate": 3.8351784037063225e-05, "loss": 1.3874, "step": 2207500 }, { "epoch": 23.3, "learning_rate": 3.83491457095518e-05, "loss": 1.4019, "step": 2208000 }, { "epoch": 23.31, "learning_rate": 3.834650738204038e-05, "loss": 1.3874, "step": 2208500 }, { "epoch": 23.31, "learning_rate": 3.834386905452895e-05, "loss": 1.4264, "step": 2209000 }, { "epoch": 23.32, "learning_rate": 3.834123072701753e-05, "loss": 1.4103, "step": 2209500 }, { "epoch": 23.32, "learning_rate": 3.833859239950611e-05, "loss": 1.3853, "step": 2210000 }, { "epoch": 23.33, "learning_rate": 3.8335954071994685e-05, "loss": 1.3222, "step": 2210500 }, { "epoch": 23.33, "learning_rate": 3.833331574448326e-05, "loss": 1.3446, "step": 2211000 }, { "epoch": 23.34, "learning_rate": 3.8330677416971835e-05, "loss": 1.3975, "step": 2211500 }, { "epoch": 23.34, "learning_rate": 3.832803908946041e-05, "loss": 1.3844, "step": 2212000 }, { "epoch": 23.35, "learning_rate": 3.8325400761948986e-05, "loss": 1.3861, "step": 2212500 }, { "epoch": 23.35, "learning_rate": 3.832276243443756e-05, "loss": 1.3848, "step": 2213000 }, { "epoch": 23.36, "learning_rate": 3.8320124106926144e-05, "loss": 1.3496, "step": 2213500 }, { "epoch": 23.37, "learning_rate": 3.831748577941471e-05, "loss": 1.4074, "step": 2214000 }, { "epoch": 23.37, "learning_rate": 3.831484745190329e-05, "loss": 1.407, "step": 2214500 }, { "epoch": 23.38, "learning_rate": 3.831220912439187e-05, "loss": 1.3482, "step": 2215000 }, { "epoch": 23.38, "learning_rate": 3.8309570796880446e-05, "loss": 1.3845, "step": 2215500 }, { "epoch": 23.39, "learning_rate": 3.8306932469369015e-05, "loss": 1.385, "step": 2216000 }, { "epoch": 23.39, "learning_rate": 3.83042941418576e-05, "loss": 1.3484, "step": 2216500 }, { "epoch": 23.4, "learning_rate": 3.830165581434617e-05, "loss": 1.3774, "step": 2217000 }, { "epoch": 23.4, "learning_rate": 3.829901748683475e-05, "loss": 1.3618, "step": 2217500 }, { "epoch": 23.41, "learning_rate": 3.8296379159323324e-05, "loss": 1.348, "step": 2218000 }, { "epoch": 23.41, "learning_rate": 3.82937408318119e-05, "loss": 1.336, "step": 2218500 }, { "epoch": 23.42, "learning_rate": 3.8291102504300475e-05, "loss": 1.3742, "step": 2219000 }, { "epoch": 23.42, "learning_rate": 3.828846417678905e-05, "loss": 1.3032, "step": 2219500 }, { "epoch": 23.43, "learning_rate": 3.828582584927763e-05, "loss": 1.3695, "step": 2220000 }, { "epoch": 23.43, "learning_rate": 3.828318752176621e-05, "loss": 1.3447, "step": 2220500 }, { "epoch": 23.44, "learning_rate": 3.8280549194254777e-05, "loss": 1.3956, "step": 2221000 }, { "epoch": 23.44, "learning_rate": 3.827791086674335e-05, "loss": 1.3412, "step": 2221500 }, { "epoch": 23.45, "learning_rate": 3.8275272539231934e-05, "loss": 1.3499, "step": 2222000 }, { "epoch": 23.45, "learning_rate": 3.827263421172051e-05, "loss": 1.3614, "step": 2222500 }, { "epoch": 23.46, "learning_rate": 3.8269995884209085e-05, "loss": 1.3847, "step": 2223000 }, { "epoch": 23.47, "learning_rate": 3.826735755669766e-05, "loss": 1.3615, "step": 2223500 }, { "epoch": 23.47, "learning_rate": 3.8264719229186236e-05, "loss": 1.3482, "step": 2224000 }, { "epoch": 23.48, "learning_rate": 3.826208090167481e-05, "loss": 1.3258, "step": 2224500 }, { "epoch": 23.48, "learning_rate": 3.825944257416339e-05, "loss": 1.3799, "step": 2225000 }, { "epoch": 23.49, "learning_rate": 3.825680424665196e-05, "loss": 1.376, "step": 2225500 }, { "epoch": 23.49, "learning_rate": 3.825416591914054e-05, "loss": 1.3863, "step": 2226000 }, { "epoch": 23.5, "learning_rate": 3.8251527591629114e-05, "loss": 1.4004, "step": 2226500 }, { "epoch": 23.5, "learning_rate": 3.8248889264117696e-05, "loss": 1.387, "step": 2227000 }, { "epoch": 23.51, "learning_rate": 3.824625093660627e-05, "loss": 1.3325, "step": 2227500 }, { "epoch": 23.51, "learning_rate": 3.824361260909484e-05, "loss": 1.4159, "step": 2228000 }, { "epoch": 23.52, "learning_rate": 3.824097428158342e-05, "loss": 1.3794, "step": 2228500 }, { "epoch": 23.52, "learning_rate": 3.8238335954072e-05, "loss": 1.3505, "step": 2229000 }, { "epoch": 23.53, "learning_rate": 3.823569762656057e-05, "loss": 1.4141, "step": 2229500 }, { "epoch": 23.53, "learning_rate": 3.823305929904915e-05, "loss": 1.3689, "step": 2230000 }, { "epoch": 23.54, "learning_rate": 3.8230420971537724e-05, "loss": 1.4094, "step": 2230500 }, { "epoch": 23.54, "learning_rate": 3.82277826440263e-05, "loss": 1.3689, "step": 2231000 }, { "epoch": 23.55, "learning_rate": 3.8225144316514875e-05, "loss": 1.3375, "step": 2231500 }, { "epoch": 23.55, "learning_rate": 3.822250598900346e-05, "loss": 1.3547, "step": 2232000 }, { "epoch": 23.56, "learning_rate": 3.821986766149203e-05, "loss": 1.3996, "step": 2232500 }, { "epoch": 23.57, "learning_rate": 3.82172293339806e-05, "loss": 1.4071, "step": 2233000 }, { "epoch": 23.57, "learning_rate": 3.821459100646918e-05, "loss": 1.3272, "step": 2233500 }, { "epoch": 23.58, "learning_rate": 3.821195267895776e-05, "loss": 1.3136, "step": 2234000 }, { "epoch": 23.58, "learning_rate": 3.8209314351446335e-05, "loss": 1.3888, "step": 2234500 }, { "epoch": 23.59, "learning_rate": 3.8206676023934904e-05, "loss": 1.365, "step": 2235000 }, { "epoch": 23.59, "learning_rate": 3.8204037696423486e-05, "loss": 1.396, "step": 2235500 }, { "epoch": 23.6, "learning_rate": 3.820139936891206e-05, "loss": 1.32, "step": 2236000 }, { "epoch": 23.6, "learning_rate": 3.819876104140064e-05, "loss": 1.3607, "step": 2236500 }, { "epoch": 23.61, "learning_rate": 3.819612271388921e-05, "loss": 1.3591, "step": 2237000 }, { "epoch": 23.61, "learning_rate": 3.819348438637779e-05, "loss": 1.3687, "step": 2237500 }, { "epoch": 23.62, "learning_rate": 3.8190846058866363e-05, "loss": 1.3437, "step": 2238000 }, { "epoch": 23.62, "learning_rate": 3.818820773135494e-05, "loss": 1.3831, "step": 2238500 }, { "epoch": 23.63, "learning_rate": 3.818556940384352e-05, "loss": 1.3939, "step": 2239000 }, { "epoch": 23.63, "learning_rate": 3.81829310763321e-05, "loss": 1.3556, "step": 2239500 }, { "epoch": 23.64, "learning_rate": 3.8180292748820665e-05, "loss": 1.3852, "step": 2240000 }, { "epoch": 23.64, "learning_rate": 3.817765442130925e-05, "loss": 1.3836, "step": 2240500 }, { "epoch": 23.65, "learning_rate": 3.817501609379782e-05, "loss": 1.3796, "step": 2241000 }, { "epoch": 23.66, "learning_rate": 3.81723777662864e-05, "loss": 1.4062, "step": 2241500 }, { "epoch": 23.66, "learning_rate": 3.8169739438774974e-05, "loss": 1.3856, "step": 2242000 }, { "epoch": 23.67, "learning_rate": 3.816710111126355e-05, "loss": 1.3635, "step": 2242500 }, { "epoch": 23.67, "learning_rate": 3.8164462783752125e-05, "loss": 1.4058, "step": 2243000 }, { "epoch": 23.68, "learning_rate": 3.81618244562407e-05, "loss": 1.371, "step": 2243500 }, { "epoch": 23.68, "learning_rate": 3.815918612872928e-05, "loss": 1.3858, "step": 2244000 }, { "epoch": 23.69, "learning_rate": 3.815654780121785e-05, "loss": 1.386, "step": 2244500 }, { "epoch": 23.69, "learning_rate": 3.815390947370643e-05, "loss": 1.3541, "step": 2245000 }, { "epoch": 23.7, "learning_rate": 3.8151271146195e-05, "loss": 1.3421, "step": 2245500 }, { "epoch": 23.7, "learning_rate": 3.8148632818683585e-05, "loss": 1.3792, "step": 2246000 }, { "epoch": 23.71, "learning_rate": 3.814599449117216e-05, "loss": 1.3385, "step": 2246500 }, { "epoch": 23.71, "learning_rate": 3.814335616366073e-05, "loss": 1.386, "step": 2247000 }, { "epoch": 23.72, "learning_rate": 3.814071783614931e-05, "loss": 1.4082, "step": 2247500 }, { "epoch": 23.72, "learning_rate": 3.813807950863789e-05, "loss": 1.3834, "step": 2248000 }, { "epoch": 23.73, "learning_rate": 3.813544118112646e-05, "loss": 1.3929, "step": 2248500 }, { "epoch": 23.73, "learning_rate": 3.813280285361504e-05, "loss": 1.3467, "step": 2249000 }, { "epoch": 23.74, "learning_rate": 3.813016452610361e-05, "loss": 1.3891, "step": 2249500 }, { "epoch": 23.74, "learning_rate": 3.812752619859219e-05, "loss": 1.3199, "step": 2250000 }, { "epoch": 23.75, "learning_rate": 3.8124887871080764e-05, "loss": 1.3333, "step": 2250500 }, { "epoch": 23.76, "learning_rate": 3.8122249543569346e-05, "loss": 1.3757, "step": 2251000 }, { "epoch": 23.76, "learning_rate": 3.811961121605792e-05, "loss": 1.3723, "step": 2251500 }, { "epoch": 23.77, "learning_rate": 3.811697288854649e-05, "loss": 1.3288, "step": 2252000 }, { "epoch": 23.77, "learning_rate": 3.811433456103507e-05, "loss": 1.3807, "step": 2252500 }, { "epoch": 23.78, "learning_rate": 3.811169623352365e-05, "loss": 1.4166, "step": 2253000 }, { "epoch": 23.78, "learning_rate": 3.8109057906012224e-05, "loss": 1.3975, "step": 2253500 }, { "epoch": 23.79, "learning_rate": 3.81064195785008e-05, "loss": 1.3406, "step": 2254000 }, { "epoch": 23.79, "learning_rate": 3.8103781250989375e-05, "loss": 1.3712, "step": 2254500 }, { "epoch": 23.8, "learning_rate": 3.810114292347795e-05, "loss": 1.3369, "step": 2255000 }, { "epoch": 23.8, "learning_rate": 3.8098504595966526e-05, "loss": 1.3574, "step": 2255500 }, { "epoch": 23.81, "learning_rate": 3.809586626845511e-05, "loss": 1.3552, "step": 2256000 }, { "epoch": 23.81, "learning_rate": 3.809322794094368e-05, "loss": 1.3622, "step": 2256500 }, { "epoch": 23.82, "learning_rate": 3.809058961343225e-05, "loss": 1.3961, "step": 2257000 }, { "epoch": 23.82, "learning_rate": 3.808795128592083e-05, "loss": 1.4173, "step": 2257500 }, { "epoch": 23.83, "learning_rate": 3.808531295840941e-05, "loss": 1.3407, "step": 2258000 }, { "epoch": 23.83, "learning_rate": 3.8082674630897985e-05, "loss": 1.3753, "step": 2258500 }, { "epoch": 23.84, "learning_rate": 3.8080036303386554e-05, "loss": 1.3137, "step": 2259000 }, { "epoch": 23.85, "learning_rate": 3.8077397975875136e-05, "loss": 1.4283, "step": 2259500 }, { "epoch": 23.85, "learning_rate": 3.807475964836371e-05, "loss": 1.3533, "step": 2260000 }, { "epoch": 23.86, "learning_rate": 3.807212132085229e-05, "loss": 1.3884, "step": 2260500 }, { "epoch": 23.86, "learning_rate": 3.806948299334086e-05, "loss": 1.401, "step": 2261000 }, { "epoch": 23.87, "learning_rate": 3.806684466582944e-05, "loss": 1.4219, "step": 2261500 }, { "epoch": 23.87, "learning_rate": 3.8064206338318014e-05, "loss": 1.377, "step": 2262000 }, { "epoch": 23.88, "learning_rate": 3.806156801080659e-05, "loss": 1.4328, "step": 2262500 }, { "epoch": 23.88, "learning_rate": 3.805892968329517e-05, "loss": 1.3641, "step": 2263000 }, { "epoch": 23.89, "learning_rate": 3.805629135578374e-05, "loss": 1.4016, "step": 2263500 }, { "epoch": 23.89, "learning_rate": 3.8053653028272316e-05, "loss": 1.3978, "step": 2264000 }, { "epoch": 23.9, "learning_rate": 3.80510147007609e-05, "loss": 1.3267, "step": 2264500 }, { "epoch": 23.9, "learning_rate": 3.8048376373249474e-05, "loss": 1.3655, "step": 2265000 }, { "epoch": 23.91, "learning_rate": 3.804573804573805e-05, "loss": 1.35, "step": 2265500 }, { "epoch": 23.91, "learning_rate": 3.8043099718226625e-05, "loss": 1.3881, "step": 2266000 }, { "epoch": 23.92, "learning_rate": 3.80404613907152e-05, "loss": 1.343, "step": 2266500 }, { "epoch": 23.92, "learning_rate": 3.8037823063203776e-05, "loss": 1.3613, "step": 2267000 }, { "epoch": 23.93, "learning_rate": 3.803518473569235e-05, "loss": 1.338, "step": 2267500 }, { "epoch": 23.93, "learning_rate": 3.803254640818093e-05, "loss": 1.3503, "step": 2268000 }, { "epoch": 23.94, "learning_rate": 3.80299080806695e-05, "loss": 1.3712, "step": 2268500 }, { "epoch": 23.95, "learning_rate": 3.802726975315808e-05, "loss": 1.3547, "step": 2269000 }, { "epoch": 23.95, "learning_rate": 3.802463142564665e-05, "loss": 1.3667, "step": 2269500 }, { "epoch": 23.96, "learning_rate": 3.8021993098135235e-05, "loss": 1.345, "step": 2270000 }, { "epoch": 23.96, "learning_rate": 3.801935477062381e-05, "loss": 1.3761, "step": 2270500 }, { "epoch": 23.97, "learning_rate": 3.801671644311238e-05, "loss": 1.3354, "step": 2271000 }, { "epoch": 23.97, "learning_rate": 3.801407811560096e-05, "loss": 1.3733, "step": 2271500 }, { "epoch": 23.98, "learning_rate": 3.801143978808954e-05, "loss": 1.3774, "step": 2272000 }, { "epoch": 23.98, "learning_rate": 3.800880146057811e-05, "loss": 1.3339, "step": 2272500 }, { "epoch": 23.99, "learning_rate": 3.800616313306669e-05, "loss": 1.4039, "step": 2273000 }, { "epoch": 23.99, "learning_rate": 3.8003524805555264e-05, "loss": 1.3974, "step": 2273500 }, { "epoch": 24.0, "learning_rate": 3.800088647804384e-05, "loss": 1.3454, "step": 2274000 }, { "epoch": 24.0, "learning_rate": 3.7998248150532415e-05, "loss": 1.3458, "step": 2274500 }, { "epoch": 24.01, "learning_rate": 3.7995609823021e-05, "loss": 1.4209, "step": 2275000 }, { "epoch": 24.01, "learning_rate": 3.7992971495509566e-05, "loss": 1.3783, "step": 2275500 }, { "epoch": 24.02, "learning_rate": 3.799033316799814e-05, "loss": 1.3879, "step": 2276000 }, { "epoch": 24.02, "learning_rate": 3.798769484048672e-05, "loss": 1.3593, "step": 2276500 }, { "epoch": 24.03, "learning_rate": 3.79850565129753e-05, "loss": 1.4137, "step": 2277000 }, { "epoch": 24.04, "learning_rate": 3.7982418185463874e-05, "loss": 1.4114, "step": 2277500 }, { "epoch": 24.04, "learning_rate": 3.797977985795245e-05, "loss": 1.3551, "step": 2278000 }, { "epoch": 24.05, "learning_rate": 3.7977141530441025e-05, "loss": 1.3305, "step": 2278500 }, { "epoch": 24.05, "learning_rate": 3.79745032029296e-05, "loss": 1.3346, "step": 2279000 }, { "epoch": 24.06, "learning_rate": 3.7971864875418176e-05, "loss": 1.4007, "step": 2279500 }, { "epoch": 24.06, "learning_rate": 3.796922654790676e-05, "loss": 1.297, "step": 2280000 }, { "epoch": 24.07, "learning_rate": 3.796658822039533e-05, "loss": 1.3829, "step": 2280500 }, { "epoch": 24.07, "learning_rate": 3.79639498928839e-05, "loss": 1.3887, "step": 2281000 }, { "epoch": 24.08, "learning_rate": 3.796131156537248e-05, "loss": 1.3508, "step": 2281500 }, { "epoch": 24.08, "learning_rate": 3.795867323786106e-05, "loss": 1.3669, "step": 2282000 }, { "epoch": 24.09, "learning_rate": 3.795603491034963e-05, "loss": 1.3171, "step": 2282500 }, { "epoch": 24.09, "learning_rate": 3.7953396582838205e-05, "loss": 1.3719, "step": 2283000 }, { "epoch": 24.1, "learning_rate": 3.795075825532679e-05, "loss": 1.4377, "step": 2283500 }, { "epoch": 24.1, "learning_rate": 3.794811992781536e-05, "loss": 1.4052, "step": 2284000 }, { "epoch": 24.11, "learning_rate": 3.794548160030394e-05, "loss": 1.335, "step": 2284500 }, { "epoch": 24.11, "learning_rate": 3.7942843272792513e-05, "loss": 1.3502, "step": 2285000 }, { "epoch": 24.12, "learning_rate": 3.794020494528109e-05, "loss": 1.3991, "step": 2285500 }, { "epoch": 24.12, "learning_rate": 3.7937566617769664e-05, "loss": 1.3343, "step": 2286000 }, { "epoch": 24.13, "learning_rate": 3.793492829025824e-05, "loss": 1.3683, "step": 2286500 }, { "epoch": 24.14, "learning_rate": 3.793228996274682e-05, "loss": 1.3602, "step": 2287000 }, { "epoch": 24.14, "learning_rate": 3.792965163523539e-05, "loss": 1.4024, "step": 2287500 }, { "epoch": 24.15, "learning_rate": 3.7927013307723966e-05, "loss": 1.3511, "step": 2288000 }, { "epoch": 24.15, "learning_rate": 3.792437498021255e-05, "loss": 1.3648, "step": 2288500 }, { "epoch": 24.16, "learning_rate": 3.7921736652701124e-05, "loss": 1.3665, "step": 2289000 }, { "epoch": 24.16, "learning_rate": 3.79190983251897e-05, "loss": 1.3529, "step": 2289500 }, { "epoch": 24.17, "learning_rate": 3.7916459997678275e-05, "loss": 1.4358, "step": 2290000 }, { "epoch": 24.17, "learning_rate": 3.791382167016685e-05, "loss": 1.3208, "step": 2290500 }, { "epoch": 24.18, "learning_rate": 3.7911183342655426e-05, "loss": 1.365, "step": 2291000 }, { "epoch": 24.18, "learning_rate": 3.7908545015144e-05, "loss": 1.4259, "step": 2291500 }, { "epoch": 24.19, "learning_rate": 3.790590668763258e-05, "loss": 1.3433, "step": 2292000 }, { "epoch": 24.19, "learning_rate": 3.790326836012115e-05, "loss": 1.3757, "step": 2292500 }, { "epoch": 24.2, "learning_rate": 3.790063003260973e-05, "loss": 1.4053, "step": 2293000 }, { "epoch": 24.2, "learning_rate": 3.789799170509831e-05, "loss": 1.3882, "step": 2293500 }, { "epoch": 24.21, "learning_rate": 3.7895353377586886e-05, "loss": 1.3505, "step": 2294000 }, { "epoch": 24.21, "learning_rate": 3.7892715050075454e-05, "loss": 1.3992, "step": 2294500 }, { "epoch": 24.22, "learning_rate": 3.789007672256403e-05, "loss": 1.3571, "step": 2295000 }, { "epoch": 24.23, "learning_rate": 3.788743839505261e-05, "loss": 1.4264, "step": 2295500 }, { "epoch": 24.23, "learning_rate": 3.788480006754119e-05, "loss": 1.3442, "step": 2296000 }, { "epoch": 24.24, "learning_rate": 3.788216174002976e-05, "loss": 1.3119, "step": 2296500 }, { "epoch": 24.24, "learning_rate": 3.787952341251834e-05, "loss": 1.3099, "step": 2297000 }, { "epoch": 24.25, "learning_rate": 3.7876885085006914e-05, "loss": 1.3482, "step": 2297500 }, { "epoch": 24.25, "learning_rate": 3.787424675749549e-05, "loss": 1.3571, "step": 2298000 }, { "epoch": 24.26, "learning_rate": 3.7871608429984065e-05, "loss": 1.3931, "step": 2298500 }, { "epoch": 24.26, "learning_rate": 3.786897010247265e-05, "loss": 1.3716, "step": 2299000 }, { "epoch": 24.27, "learning_rate": 3.7866331774961216e-05, "loss": 1.336, "step": 2299500 }, { "epoch": 24.27, "learning_rate": 3.786369344744979e-05, "loss": 1.3514, "step": 2300000 }, { "epoch": 24.28, "learning_rate": 3.7861055119938374e-05, "loss": 1.3719, "step": 2300500 }, { "epoch": 24.28, "learning_rate": 3.785841679242695e-05, "loss": 1.4373, "step": 2301000 }, { "epoch": 24.29, "learning_rate": 3.785577846491552e-05, "loss": 1.3997, "step": 2301500 }, { "epoch": 24.29, "learning_rate": 3.78531401374041e-05, "loss": 1.3859, "step": 2302000 }, { "epoch": 24.3, "learning_rate": 3.7850501809892676e-05, "loss": 1.3771, "step": 2302500 }, { "epoch": 24.3, "learning_rate": 3.784786348238125e-05, "loss": 1.4, "step": 2303000 }, { "epoch": 24.31, "learning_rate": 3.784522515486983e-05, "loss": 1.3964, "step": 2303500 }, { "epoch": 24.31, "learning_rate": 3.78425868273584e-05, "loss": 1.3204, "step": 2304000 }, { "epoch": 24.32, "learning_rate": 3.783994849984698e-05, "loss": 1.3601, "step": 2304500 }, { "epoch": 24.33, "learning_rate": 3.783731017233555e-05, "loss": 1.3959, "step": 2305000 }, { "epoch": 24.33, "learning_rate": 3.7834671844824136e-05, "loss": 1.3386, "step": 2305500 }, { "epoch": 24.34, "learning_rate": 3.783203351731271e-05, "loss": 1.3164, "step": 2306000 }, { "epoch": 24.34, "learning_rate": 3.782939518980128e-05, "loss": 1.3321, "step": 2306500 }, { "epoch": 24.35, "learning_rate": 3.7826756862289855e-05, "loss": 1.3189, "step": 2307000 }, { "epoch": 24.35, "learning_rate": 3.782411853477844e-05, "loss": 1.3292, "step": 2307500 }, { "epoch": 24.36, "learning_rate": 3.782148020726701e-05, "loss": 1.3606, "step": 2308000 }, { "epoch": 24.36, "learning_rate": 3.781884187975559e-05, "loss": 1.4157, "step": 2308500 }, { "epoch": 24.37, "learning_rate": 3.7816203552244164e-05, "loss": 1.371, "step": 2309000 }, { "epoch": 24.37, "learning_rate": 3.781356522473274e-05, "loss": 1.3421, "step": 2309500 }, { "epoch": 24.38, "learning_rate": 3.7810926897221315e-05, "loss": 1.345, "step": 2310000 }, { "epoch": 24.38, "learning_rate": 3.780828856970989e-05, "loss": 1.3504, "step": 2310500 }, { "epoch": 24.39, "learning_rate": 3.7805650242198466e-05, "loss": 1.4174, "step": 2311000 }, { "epoch": 24.39, "learning_rate": 3.780301191468704e-05, "loss": 1.3275, "step": 2311500 }, { "epoch": 24.4, "learning_rate": 3.780037358717562e-05, "loss": 1.3835, "step": 2312000 }, { "epoch": 24.4, "learning_rate": 3.77977352596642e-05, "loss": 1.3414, "step": 2312500 }, { "epoch": 24.41, "learning_rate": 3.7795096932152775e-05, "loss": 1.3488, "step": 2313000 }, { "epoch": 24.42, "learning_rate": 3.779245860464134e-05, "loss": 1.299, "step": 2313500 }, { "epoch": 24.42, "learning_rate": 3.7789820277129926e-05, "loss": 1.3496, "step": 2314000 }, { "epoch": 24.43, "learning_rate": 3.77871819496185e-05, "loss": 1.3858, "step": 2314500 }, { "epoch": 24.43, "learning_rate": 3.7784543622107077e-05, "loss": 1.436, "step": 2315000 }, { "epoch": 24.44, "learning_rate": 3.778190529459565e-05, "loss": 1.3539, "step": 2315500 }, { "epoch": 24.44, "learning_rate": 3.777926696708423e-05, "loss": 1.29, "step": 2316000 }, { "epoch": 24.45, "learning_rate": 3.77766286395728e-05, "loss": 1.3948, "step": 2316500 }, { "epoch": 24.45, "learning_rate": 3.777399031206138e-05, "loss": 1.3732, "step": 2317000 }, { "epoch": 24.46, "learning_rate": 3.777135198454996e-05, "loss": 1.3652, "step": 2317500 }, { "epoch": 24.46, "learning_rate": 3.7768713657038536e-05, "loss": 1.3683, "step": 2318000 }, { "epoch": 24.47, "learning_rate": 3.7766075329527105e-05, "loss": 1.3405, "step": 2318500 }, { "epoch": 24.47, "learning_rate": 3.776343700201568e-05, "loss": 1.366, "step": 2319000 }, { "epoch": 24.48, "learning_rate": 3.776079867450426e-05, "loss": 1.374, "step": 2319500 }, { "epoch": 24.48, "learning_rate": 3.775816034699284e-05, "loss": 1.392, "step": 2320000 }, { "epoch": 24.49, "learning_rate": 3.775552201948141e-05, "loss": 1.3753, "step": 2320500 }, { "epoch": 24.49, "learning_rate": 3.775288369196999e-05, "loss": 1.3557, "step": 2321000 }, { "epoch": 24.5, "learning_rate": 3.7750245364458565e-05, "loss": 1.4086, "step": 2321500 }, { "epoch": 24.5, "learning_rate": 3.774760703694714e-05, "loss": 1.3613, "step": 2322000 }, { "epoch": 24.51, "learning_rate": 3.7744968709435716e-05, "loss": 1.3841, "step": 2322500 }, { "epoch": 24.52, "learning_rate": 3.774233038192429e-05, "loss": 1.3519, "step": 2323000 }, { "epoch": 24.52, "learning_rate": 3.7739692054412867e-05, "loss": 1.3858, "step": 2323500 }, { "epoch": 24.53, "learning_rate": 3.773705372690144e-05, "loss": 1.3292, "step": 2324000 }, { "epoch": 24.53, "learning_rate": 3.7734415399390024e-05, "loss": 1.377, "step": 2324500 }, { "epoch": 24.54, "learning_rate": 3.77317770718786e-05, "loss": 1.3622, "step": 2325000 }, { "epoch": 24.54, "learning_rate": 3.772913874436717e-05, "loss": 1.3346, "step": 2325500 }, { "epoch": 24.55, "learning_rate": 3.772650041685575e-05, "loss": 1.3462, "step": 2326000 }, { "epoch": 24.55, "learning_rate": 3.7723862089344326e-05, "loss": 1.3469, "step": 2326500 }, { "epoch": 24.56, "learning_rate": 3.77212237618329e-05, "loss": 1.3469, "step": 2327000 }, { "epoch": 24.56, "learning_rate": 3.771858543432148e-05, "loss": 1.4152, "step": 2327500 }, { "epoch": 24.57, "learning_rate": 3.771594710681005e-05, "loss": 1.3669, "step": 2328000 }, { "epoch": 24.57, "learning_rate": 3.771330877929863e-05, "loss": 1.3266, "step": 2328500 }, { "epoch": 24.58, "learning_rate": 3.7710670451787204e-05, "loss": 1.3668, "step": 2329000 }, { "epoch": 24.58, "learning_rate": 3.7708032124275786e-05, "loss": 1.3871, "step": 2329500 }, { "epoch": 24.59, "learning_rate": 3.7705393796764355e-05, "loss": 1.3464, "step": 2330000 }, { "epoch": 24.59, "learning_rate": 3.770275546925293e-05, "loss": 1.3571, "step": 2330500 }, { "epoch": 24.6, "learning_rate": 3.7700117141741506e-05, "loss": 1.3817, "step": 2331000 }, { "epoch": 24.61, "learning_rate": 3.769747881423009e-05, "loss": 1.3565, "step": 2331500 }, { "epoch": 24.61, "learning_rate": 3.7694840486718663e-05, "loss": 1.3573, "step": 2332000 }, { "epoch": 24.62, "learning_rate": 3.769220215920723e-05, "loss": 1.3939, "step": 2332500 }, { "epoch": 24.62, "learning_rate": 3.7689563831695814e-05, "loss": 1.4119, "step": 2333000 }, { "epoch": 24.63, "learning_rate": 3.768692550418439e-05, "loss": 1.3524, "step": 2333500 }, { "epoch": 24.63, "learning_rate": 3.7684287176672965e-05, "loss": 1.3539, "step": 2334000 }, { "epoch": 24.64, "learning_rate": 3.768164884916154e-05, "loss": 1.3614, "step": 2334500 }, { "epoch": 24.64, "learning_rate": 3.7679010521650116e-05, "loss": 1.3179, "step": 2335000 }, { "epoch": 24.65, "learning_rate": 3.767637219413869e-05, "loss": 1.4013, "step": 2335500 }, { "epoch": 24.65, "learning_rate": 3.767373386662727e-05, "loss": 1.3649, "step": 2336000 }, { "epoch": 24.66, "learning_rate": 3.767109553911585e-05, "loss": 1.3323, "step": 2336500 }, { "epoch": 24.66, "learning_rate": 3.766845721160442e-05, "loss": 1.3686, "step": 2337000 }, { "epoch": 24.67, "learning_rate": 3.7665818884092994e-05, "loss": 1.3854, "step": 2337500 }, { "epoch": 24.67, "learning_rate": 3.7663180556581576e-05, "loss": 1.3663, "step": 2338000 }, { "epoch": 24.68, "learning_rate": 3.766054222907015e-05, "loss": 1.3773, "step": 2338500 }, { "epoch": 24.68, "learning_rate": 3.765790390155873e-05, "loss": 1.3352, "step": 2339000 }, { "epoch": 24.69, "learning_rate": 3.76552655740473e-05, "loss": 1.3573, "step": 2339500 }, { "epoch": 24.69, "learning_rate": 3.765262724653588e-05, "loss": 1.3849, "step": 2340000 }, { "epoch": 24.7, "learning_rate": 3.7649988919024453e-05, "loss": 1.3423, "step": 2340500 }, { "epoch": 24.71, "learning_rate": 3.764735059151303e-05, "loss": 1.3783, "step": 2341000 }, { "epoch": 24.71, "learning_rate": 3.764471226400161e-05, "loss": 1.3755, "step": 2341500 }, { "epoch": 24.72, "learning_rate": 3.764207393649018e-05, "loss": 1.3464, "step": 2342000 }, { "epoch": 24.72, "learning_rate": 3.7639435608978755e-05, "loss": 1.3971, "step": 2342500 }, { "epoch": 24.73, "learning_rate": 3.763679728146733e-05, "loss": 1.3545, "step": 2343000 }, { "epoch": 24.73, "learning_rate": 3.763415895395591e-05, "loss": 1.3789, "step": 2343500 }, { "epoch": 24.74, "learning_rate": 3.763152062644449e-05, "loss": 1.3714, "step": 2344000 }, { "epoch": 24.74, "learning_rate": 3.762888229893306e-05, "loss": 1.3865, "step": 2344500 }, { "epoch": 24.75, "learning_rate": 3.762624397142164e-05, "loss": 1.332, "step": 2345000 }, { "epoch": 24.75, "learning_rate": 3.7623605643910215e-05, "loss": 1.3781, "step": 2345500 }, { "epoch": 24.76, "learning_rate": 3.762096731639879e-05, "loss": 1.3107, "step": 2346000 }, { "epoch": 24.76, "learning_rate": 3.7618328988887366e-05, "loss": 1.3902, "step": 2346500 }, { "epoch": 24.77, "learning_rate": 3.761569066137594e-05, "loss": 1.3229, "step": 2347000 }, { "epoch": 24.77, "learning_rate": 3.761305233386452e-05, "loss": 1.409, "step": 2347500 }, { "epoch": 24.78, "learning_rate": 3.761041400635309e-05, "loss": 1.397, "step": 2348000 }, { "epoch": 24.78, "learning_rate": 3.7607775678841675e-05, "loss": 1.3586, "step": 2348500 }, { "epoch": 24.79, "learning_rate": 3.7605137351330244e-05, "loss": 1.3384, "step": 2349000 }, { "epoch": 24.8, "learning_rate": 3.760249902381882e-05, "loss": 1.3453, "step": 2349500 }, { "epoch": 24.8, "learning_rate": 3.75998606963074e-05, "loss": 1.3458, "step": 2350000 }, { "epoch": 24.81, "learning_rate": 3.759722236879598e-05, "loss": 1.4208, "step": 2350500 }, { "epoch": 24.81, "learning_rate": 3.759458404128455e-05, "loss": 1.3412, "step": 2351000 }, { "epoch": 24.82, "learning_rate": 3.759194571377313e-05, "loss": 1.3787, "step": 2351500 }, { "epoch": 24.82, "learning_rate": 3.75893073862617e-05, "loss": 1.3814, "step": 2352000 }, { "epoch": 24.83, "learning_rate": 3.758666905875028e-05, "loss": 1.3901, "step": 2352500 }, { "epoch": 24.83, "learning_rate": 3.7584030731238854e-05, "loss": 1.3826, "step": 2353000 }, { "epoch": 24.84, "learning_rate": 3.7581392403727436e-05, "loss": 1.3431, "step": 2353500 }, { "epoch": 24.84, "learning_rate": 3.7578754076216005e-05, "loss": 1.344, "step": 2354000 }, { "epoch": 24.85, "learning_rate": 3.757611574870458e-05, "loss": 1.3376, "step": 2354500 }, { "epoch": 24.85, "learning_rate": 3.7573477421193156e-05, "loss": 1.3964, "step": 2355000 }, { "epoch": 24.86, "learning_rate": 3.757083909368174e-05, "loss": 1.4085, "step": 2355500 }, { "epoch": 24.86, "learning_rate": 3.756820076617031e-05, "loss": 1.3868, "step": 2356000 }, { "epoch": 24.87, "learning_rate": 3.756556243865888e-05, "loss": 1.3089, "step": 2356500 }, { "epoch": 24.87, "learning_rate": 3.7562924111147465e-05, "loss": 1.3324, "step": 2357000 }, { "epoch": 24.88, "learning_rate": 3.756028578363604e-05, "loss": 1.4052, "step": 2357500 }, { "epoch": 24.88, "learning_rate": 3.7557647456124616e-05, "loss": 1.4208, "step": 2358000 }, { "epoch": 24.89, "learning_rate": 3.755500912861319e-05, "loss": 1.4041, "step": 2358500 }, { "epoch": 24.9, "learning_rate": 3.755237080110177e-05, "loss": 1.3417, "step": 2359000 }, { "epoch": 24.9, "learning_rate": 3.754973247359034e-05, "loss": 1.3781, "step": 2359500 }, { "epoch": 24.91, "learning_rate": 3.754709414607892e-05, "loss": 1.3519, "step": 2360000 }, { "epoch": 24.91, "learning_rate": 3.75444558185675e-05, "loss": 1.3492, "step": 2360500 }, { "epoch": 24.92, "learning_rate": 3.754181749105607e-05, "loss": 1.3572, "step": 2361000 }, { "epoch": 24.92, "learning_rate": 3.7539179163544644e-05, "loss": 1.3764, "step": 2361500 }, { "epoch": 24.93, "learning_rate": 3.7536540836033227e-05, "loss": 1.3535, "step": 2362000 }, { "epoch": 24.93, "learning_rate": 3.75339025085218e-05, "loss": 1.3433, "step": 2362500 }, { "epoch": 24.94, "learning_rate": 3.753126418101038e-05, "loss": 1.3574, "step": 2363000 }, { "epoch": 24.94, "learning_rate": 3.752862585349895e-05, "loss": 1.3752, "step": 2363500 }, { "epoch": 24.95, "learning_rate": 3.752598752598753e-05, "loss": 1.3062, "step": 2364000 }, { "epoch": 24.95, "learning_rate": 3.7523349198476104e-05, "loss": 1.3521, "step": 2364500 }, { "epoch": 24.96, "learning_rate": 3.752071087096468e-05, "loss": 1.3714, "step": 2365000 }, { "epoch": 24.96, "learning_rate": 3.7518072543453255e-05, "loss": 1.3816, "step": 2365500 }, { "epoch": 24.97, "learning_rate": 3.751543421594183e-05, "loss": 1.3608, "step": 2366000 }, { "epoch": 24.97, "learning_rate": 3.7512795888430406e-05, "loss": 1.4104, "step": 2366500 }, { "epoch": 24.98, "learning_rate": 3.751015756091898e-05, "loss": 1.3264, "step": 2367000 }, { "epoch": 24.98, "learning_rate": 3.7507519233407564e-05, "loss": 1.4156, "step": 2367500 }, { "epoch": 24.99, "learning_rate": 3.750488090589613e-05, "loss": 1.3883, "step": 2368000 }, { "epoch": 25.0, "learning_rate": 3.750224257838471e-05, "loss": 1.3332, "step": 2368500 }, { "epoch": 25.0, "learning_rate": 3.749960425087329e-05, "loss": 1.2987, "step": 2369000 }, { "epoch": 25.01, "learning_rate": 3.7496965923361866e-05, "loss": 1.3936, "step": 2369500 }, { "epoch": 25.01, "learning_rate": 3.749432759585044e-05, "loss": 1.3503, "step": 2370000 }, { "epoch": 25.02, "learning_rate": 3.749168926833902e-05, "loss": 1.3202, "step": 2370500 }, { "epoch": 25.02, "learning_rate": 3.748905094082759e-05, "loss": 1.3449, "step": 2371000 }, { "epoch": 25.03, "learning_rate": 3.748641261331617e-05, "loss": 1.4357, "step": 2371500 }, { "epoch": 25.03, "learning_rate": 3.748377428580474e-05, "loss": 1.3287, "step": 2372000 }, { "epoch": 25.04, "learning_rate": 3.7481135958293325e-05, "loss": 1.3781, "step": 2372500 }, { "epoch": 25.04, "learning_rate": 3.7478497630781894e-05, "loss": 1.3817, "step": 2373000 }, { "epoch": 25.05, "learning_rate": 3.747585930327047e-05, "loss": 1.3709, "step": 2373500 }, { "epoch": 25.05, "learning_rate": 3.747322097575905e-05, "loss": 1.3895, "step": 2374000 }, { "epoch": 25.06, "learning_rate": 3.747058264824763e-05, "loss": 1.2933, "step": 2374500 }, { "epoch": 25.06, "learning_rate": 3.7467944320736196e-05, "loss": 1.3555, "step": 2375000 }, { "epoch": 25.07, "learning_rate": 3.746530599322478e-05, "loss": 1.373, "step": 2375500 }, { "epoch": 25.07, "learning_rate": 3.7462667665713354e-05, "loss": 1.3368, "step": 2376000 }, { "epoch": 25.08, "learning_rate": 3.746002933820193e-05, "loss": 1.3493, "step": 2376500 }, { "epoch": 25.09, "learning_rate": 3.7457391010690505e-05, "loss": 1.3549, "step": 2377000 }, { "epoch": 25.09, "learning_rate": 3.745475268317908e-05, "loss": 1.4076, "step": 2377500 }, { "epoch": 25.1, "learning_rate": 3.7452114355667656e-05, "loss": 1.4236, "step": 2378000 }, { "epoch": 25.1, "learning_rate": 3.744947602815623e-05, "loss": 1.3479, "step": 2378500 }, { "epoch": 25.11, "learning_rate": 3.7446837700644813e-05, "loss": 1.3641, "step": 2379000 }, { "epoch": 25.11, "learning_rate": 3.744419937313339e-05, "loss": 1.3709, "step": 2379500 }, { "epoch": 25.12, "learning_rate": 3.744156104562196e-05, "loss": 1.3516, "step": 2380000 }, { "epoch": 25.12, "learning_rate": 3.743892271811053e-05, "loss": 1.365, "step": 2380500 }, { "epoch": 25.13, "learning_rate": 3.7436284390599115e-05, "loss": 1.3616, "step": 2381000 }, { "epoch": 25.13, "learning_rate": 3.743364606308769e-05, "loss": 1.363, "step": 2381500 }, { "epoch": 25.14, "learning_rate": 3.7431007735576266e-05, "loss": 1.3488, "step": 2382000 }, { "epoch": 25.14, "learning_rate": 3.742836940806484e-05, "loss": 1.3455, "step": 2382500 }, { "epoch": 25.15, "learning_rate": 3.742573108055342e-05, "loss": 1.3313, "step": 2383000 }, { "epoch": 25.15, "learning_rate": 3.742309275304199e-05, "loss": 1.3178, "step": 2383500 }, { "epoch": 25.16, "learning_rate": 3.742045442553057e-05, "loss": 1.3769, "step": 2384000 }, { "epoch": 25.16, "learning_rate": 3.7417816098019144e-05, "loss": 1.3725, "step": 2384500 }, { "epoch": 25.17, "learning_rate": 3.741517777050772e-05, "loss": 1.3334, "step": 2385000 }, { "epoch": 25.17, "learning_rate": 3.7412539442996295e-05, "loss": 1.3056, "step": 2385500 }, { "epoch": 25.18, "learning_rate": 3.740990111548488e-05, "loss": 1.3694, "step": 2386000 }, { "epoch": 25.19, "learning_rate": 3.740726278797345e-05, "loss": 1.3757, "step": 2386500 }, { "epoch": 25.19, "learning_rate": 3.740462446046202e-05, "loss": 1.3019, "step": 2387000 }, { "epoch": 25.2, "learning_rate": 3.7401986132950604e-05, "loss": 1.3407, "step": 2387500 }, { "epoch": 25.2, "learning_rate": 3.739934780543918e-05, "loss": 1.4105, "step": 2388000 }, { "epoch": 25.21, "learning_rate": 3.7396709477927754e-05, "loss": 1.3754, "step": 2388500 }, { "epoch": 25.21, "learning_rate": 3.739407115041633e-05, "loss": 1.4002, "step": 2389000 }, { "epoch": 25.22, "learning_rate": 3.7391432822904905e-05, "loss": 1.3491, "step": 2389500 }, { "epoch": 25.22, "learning_rate": 3.738879449539348e-05, "loss": 1.3888, "step": 2390000 }, { "epoch": 25.23, "learning_rate": 3.7386156167882056e-05, "loss": 1.3757, "step": 2390500 }, { "epoch": 25.23, "learning_rate": 3.738351784037064e-05, "loss": 1.3612, "step": 2391000 }, { "epoch": 25.24, "learning_rate": 3.7380879512859214e-05, "loss": 1.34, "step": 2391500 }, { "epoch": 25.24, "learning_rate": 3.737824118534778e-05, "loss": 1.3928, "step": 2392000 }, { "epoch": 25.25, "learning_rate": 3.737560285783636e-05, "loss": 1.3215, "step": 2392500 }, { "epoch": 25.25, "learning_rate": 3.737296453032494e-05, "loss": 1.391, "step": 2393000 }, { "epoch": 25.26, "learning_rate": 3.7370326202813516e-05, "loss": 1.3688, "step": 2393500 }, { "epoch": 25.26, "learning_rate": 3.7367687875302085e-05, "loss": 1.3635, "step": 2394000 }, { "epoch": 25.27, "learning_rate": 3.736504954779067e-05, "loss": 1.355, "step": 2394500 }, { "epoch": 25.28, "learning_rate": 3.736241122027924e-05, "loss": 1.3568, "step": 2395000 }, { "epoch": 25.28, "learning_rate": 3.735977289276782e-05, "loss": 1.3318, "step": 2395500 }, { "epoch": 25.29, "learning_rate": 3.7357134565256394e-05, "loss": 1.3466, "step": 2396000 }, { "epoch": 25.29, "learning_rate": 3.735449623774497e-05, "loss": 1.3885, "step": 2396500 }, { "epoch": 25.3, "learning_rate": 3.7351857910233545e-05, "loss": 1.3515, "step": 2397000 }, { "epoch": 25.3, "learning_rate": 3.734921958272212e-05, "loss": 1.3416, "step": 2397500 }, { "epoch": 25.31, "learning_rate": 3.73465812552107e-05, "loss": 1.3666, "step": 2398000 }, { "epoch": 25.31, "learning_rate": 3.734394292769928e-05, "loss": 1.3246, "step": 2398500 }, { "epoch": 25.32, "learning_rate": 3.7341304600187846e-05, "loss": 1.3543, "step": 2399000 }, { "epoch": 25.32, "learning_rate": 3.733866627267643e-05, "loss": 1.3916, "step": 2399500 }, { "epoch": 25.33, "learning_rate": 3.7336027945165004e-05, "loss": 1.395, "step": 2400000 }, { "epoch": 25.33, "learning_rate": 3.733338961765358e-05, "loss": 1.3966, "step": 2400500 }, { "epoch": 25.34, "learning_rate": 3.7330751290142155e-05, "loss": 1.3525, "step": 2401000 }, { "epoch": 25.34, "learning_rate": 3.732811296263073e-05, "loss": 1.3279, "step": 2401500 }, { "epoch": 25.35, "learning_rate": 3.7325474635119306e-05, "loss": 1.4072, "step": 2402000 }, { "epoch": 25.35, "learning_rate": 3.732283630760788e-05, "loss": 1.3323, "step": 2402500 }, { "epoch": 25.36, "learning_rate": 3.7320197980096464e-05, "loss": 1.3832, "step": 2403000 }, { "epoch": 25.36, "learning_rate": 3.731755965258503e-05, "loss": 1.3747, "step": 2403500 }, { "epoch": 25.37, "learning_rate": 3.731492132507361e-05, "loss": 1.3198, "step": 2404000 }, { "epoch": 25.38, "learning_rate": 3.7312282997562184e-05, "loss": 1.3994, "step": 2404500 }, { "epoch": 25.38, "learning_rate": 3.7309644670050766e-05, "loss": 1.4013, "step": 2405000 }, { "epoch": 25.39, "learning_rate": 3.730700634253934e-05, "loss": 1.3923, "step": 2405500 }, { "epoch": 25.39, "learning_rate": 3.730436801502791e-05, "loss": 1.3406, "step": 2406000 }, { "epoch": 25.4, "learning_rate": 3.730172968751649e-05, "loss": 1.3066, "step": 2406500 }, { "epoch": 25.4, "learning_rate": 3.729909136000507e-05, "loss": 1.397, "step": 2407000 }, { "epoch": 25.41, "learning_rate": 3.729645303249364e-05, "loss": 1.3581, "step": 2407500 }, { "epoch": 25.41, "learning_rate": 3.729381470498222e-05, "loss": 1.3965, "step": 2408000 }, { "epoch": 25.42, "learning_rate": 3.7291176377470794e-05, "loss": 1.3734, "step": 2408500 }, { "epoch": 25.42, "learning_rate": 3.728853804995937e-05, "loss": 1.348, "step": 2409000 }, { "epoch": 25.43, "learning_rate": 3.7285899722447945e-05, "loss": 1.339, "step": 2409500 }, { "epoch": 25.43, "learning_rate": 3.728326139493653e-05, "loss": 1.3839, "step": 2410000 }, { "epoch": 25.44, "learning_rate": 3.72806230674251e-05, "loss": 1.3744, "step": 2410500 }, { "epoch": 25.44, "learning_rate": 3.727798473991367e-05, "loss": 1.4064, "step": 2411000 }, { "epoch": 25.45, "learning_rate": 3.7275346412402254e-05, "loss": 1.2999, "step": 2411500 }, { "epoch": 25.45, "learning_rate": 3.727270808489083e-05, "loss": 1.3756, "step": 2412000 }, { "epoch": 25.46, "learning_rate": 3.7270069757379405e-05, "loss": 1.3702, "step": 2412500 }, { "epoch": 25.47, "learning_rate": 3.726743142986798e-05, "loss": 1.3935, "step": 2413000 }, { "epoch": 25.47, "learning_rate": 3.7264793102356556e-05, "loss": 1.3507, "step": 2413500 }, { "epoch": 25.48, "learning_rate": 3.726215477484513e-05, "loss": 1.3585, "step": 2414000 }, { "epoch": 25.48, "learning_rate": 3.725951644733371e-05, "loss": 1.3164, "step": 2414500 }, { "epoch": 25.49, "learning_rate": 3.725687811982229e-05, "loss": 1.344, "step": 2415000 }, { "epoch": 25.49, "learning_rate": 3.725423979231086e-05, "loss": 1.3383, "step": 2415500 }, { "epoch": 25.5, "learning_rate": 3.725160146479943e-05, "loss": 1.3613, "step": 2416000 }, { "epoch": 25.5, "learning_rate": 3.724896313728801e-05, "loss": 1.4194, "step": 2416500 }, { "epoch": 25.51, "learning_rate": 3.724632480977659e-05, "loss": 1.3886, "step": 2417000 }, { "epoch": 25.51, "learning_rate": 3.724368648226517e-05, "loss": 1.3192, "step": 2417500 }, { "epoch": 25.52, "learning_rate": 3.7241048154753735e-05, "loss": 1.3899, "step": 2418000 }, { "epoch": 25.52, "learning_rate": 3.723840982724232e-05, "loss": 1.3755, "step": 2418500 }, { "epoch": 25.53, "learning_rate": 3.723577149973089e-05, "loss": 1.3464, "step": 2419000 }, { "epoch": 25.53, "learning_rate": 3.723313317221947e-05, "loss": 1.341, "step": 2419500 }, { "epoch": 25.54, "learning_rate": 3.7230494844708044e-05, "loss": 1.375, "step": 2420000 }, { "epoch": 25.54, "learning_rate": 3.722785651719662e-05, "loss": 1.3891, "step": 2420500 }, { "epoch": 25.55, "learning_rate": 3.7225218189685195e-05, "loss": 1.3722, "step": 2421000 }, { "epoch": 25.55, "learning_rate": 3.722257986217377e-05, "loss": 1.3554, "step": 2421500 }, { "epoch": 25.56, "learning_rate": 3.721994153466235e-05, "loss": 1.3343, "step": 2422000 }, { "epoch": 25.57, "learning_rate": 3.721730320715092e-05, "loss": 1.3779, "step": 2422500 }, { "epoch": 25.57, "learning_rate": 3.72146648796395e-05, "loss": 1.3528, "step": 2423000 }, { "epoch": 25.58, "learning_rate": 3.721202655212808e-05, "loss": 1.348, "step": 2423500 }, { "epoch": 25.58, "learning_rate": 3.7209388224616655e-05, "loss": 1.3881, "step": 2424000 }, { "epoch": 25.59, "learning_rate": 3.720674989710523e-05, "loss": 1.3869, "step": 2424500 }, { "epoch": 25.59, "learning_rate": 3.7204111569593806e-05, "loss": 1.3665, "step": 2425000 }, { "epoch": 25.6, "learning_rate": 3.720147324208238e-05, "loss": 1.3509, "step": 2425500 }, { "epoch": 25.6, "learning_rate": 3.719883491457096e-05, "loss": 1.3711, "step": 2426000 }, { "epoch": 25.61, "learning_rate": 3.719619658705953e-05, "loss": 1.3374, "step": 2426500 }, { "epoch": 25.61, "learning_rate": 3.7193558259548114e-05, "loss": 1.3733, "step": 2427000 }, { "epoch": 25.62, "learning_rate": 3.719091993203668e-05, "loss": 1.3388, "step": 2427500 }, { "epoch": 25.62, "learning_rate": 3.718828160452526e-05, "loss": 1.4118, "step": 2428000 }, { "epoch": 25.63, "learning_rate": 3.7185643277013834e-05, "loss": 1.3866, "step": 2428500 }, { "epoch": 25.63, "learning_rate": 3.7183004949502416e-05, "loss": 1.3436, "step": 2429000 }, { "epoch": 25.64, "learning_rate": 3.718036662199099e-05, "loss": 1.3448, "step": 2429500 }, { "epoch": 25.64, "learning_rate": 3.717772829447956e-05, "loss": 1.3238, "step": 2430000 }, { "epoch": 25.65, "learning_rate": 3.717508996696814e-05, "loss": 1.3846, "step": 2430500 }, { "epoch": 25.66, "learning_rate": 3.717245163945672e-05, "loss": 1.4025, "step": 2431000 }, { "epoch": 25.66, "learning_rate": 3.7169813311945294e-05, "loss": 1.3441, "step": 2431500 }, { "epoch": 25.67, "learning_rate": 3.716717498443387e-05, "loss": 1.465, "step": 2432000 }, { "epoch": 25.67, "learning_rate": 3.7164536656922445e-05, "loss": 1.3238, "step": 2432500 }, { "epoch": 25.68, "learning_rate": 3.716189832941102e-05, "loss": 1.3157, "step": 2433000 }, { "epoch": 25.68, "learning_rate": 3.7159260001899596e-05, "loss": 1.3808, "step": 2433500 }, { "epoch": 25.69, "learning_rate": 3.715662167438818e-05, "loss": 1.3515, "step": 2434000 }, { "epoch": 25.69, "learning_rate": 3.715398334687675e-05, "loss": 1.3438, "step": 2434500 }, { "epoch": 25.7, "learning_rate": 3.715134501936532e-05, "loss": 1.3936, "step": 2435000 }, { "epoch": 25.7, "learning_rate": 3.7148706691853904e-05, "loss": 1.3786, "step": 2435500 }, { "epoch": 25.71, "learning_rate": 3.714606836434248e-05, "loss": 1.301, "step": 2436000 }, { "epoch": 25.71, "learning_rate": 3.7143430036831055e-05, "loss": 1.3434, "step": 2436500 }, { "epoch": 25.72, "learning_rate": 3.714079170931963e-05, "loss": 1.3468, "step": 2437000 }, { "epoch": 25.72, "learning_rate": 3.7138153381808206e-05, "loss": 1.355, "step": 2437500 }, { "epoch": 25.73, "learning_rate": 3.713551505429678e-05, "loss": 1.3532, "step": 2438000 }, { "epoch": 25.73, "learning_rate": 3.713287672678536e-05, "loss": 1.3144, "step": 2438500 }, { "epoch": 25.74, "learning_rate": 3.713023839927394e-05, "loss": 1.4006, "step": 2439000 }, { "epoch": 25.74, "learning_rate": 3.712760007176251e-05, "loss": 1.3611, "step": 2439500 }, { "epoch": 25.75, "learning_rate": 3.7124961744251084e-05, "loss": 1.3102, "step": 2440000 }, { "epoch": 25.76, "learning_rate": 3.712232341673966e-05, "loss": 1.3429, "step": 2440500 }, { "epoch": 25.76, "learning_rate": 3.711968508922824e-05, "loss": 1.3898, "step": 2441000 }, { "epoch": 25.77, "learning_rate": 3.711704676171681e-05, "loss": 1.3453, "step": 2441500 }, { "epoch": 25.77, "learning_rate": 3.7114408434205386e-05, "loss": 1.3482, "step": 2442000 }, { "epoch": 25.78, "learning_rate": 3.711177010669397e-05, "loss": 1.3192, "step": 2442500 }, { "epoch": 25.78, "learning_rate": 3.7109131779182544e-05, "loss": 1.334, "step": 2443000 }, { "epoch": 25.79, "learning_rate": 3.710649345167112e-05, "loss": 1.4056, "step": 2443500 }, { "epoch": 25.79, "learning_rate": 3.7103855124159695e-05, "loss": 1.3276, "step": 2444000 }, { "epoch": 25.8, "learning_rate": 3.710121679664827e-05, "loss": 1.3188, "step": 2444500 }, { "epoch": 25.8, "learning_rate": 3.7098578469136846e-05, "loss": 1.3418, "step": 2445000 }, { "epoch": 25.81, "learning_rate": 3.709594014162542e-05, "loss": 1.2901, "step": 2445500 }, { "epoch": 25.81, "learning_rate": 3.7093301814114e-05, "loss": 1.3399, "step": 2446000 }, { "epoch": 25.82, "learning_rate": 3.709066348660257e-05, "loss": 1.3667, "step": 2446500 }, { "epoch": 25.82, "learning_rate": 3.708802515909115e-05, "loss": 1.3769, "step": 2447000 }, { "epoch": 25.83, "learning_rate": 3.708538683157973e-05, "loss": 1.3241, "step": 2447500 }, { "epoch": 25.83, "learning_rate": 3.7082748504068305e-05, "loss": 1.3578, "step": 2448000 }, { "epoch": 25.84, "learning_rate": 3.708011017655688e-05, "loss": 1.4072, "step": 2448500 }, { "epoch": 25.85, "learning_rate": 3.7077471849045456e-05, "loss": 1.3517, "step": 2449000 }, { "epoch": 25.85, "learning_rate": 3.707483352153403e-05, "loss": 1.3934, "step": 2449500 }, { "epoch": 25.86, "learning_rate": 3.707219519402261e-05, "loss": 1.3699, "step": 2450000 }, { "epoch": 25.86, "learning_rate": 3.706955686651118e-05, "loss": 1.3378, "step": 2450500 }, { "epoch": 25.87, "learning_rate": 3.706691853899976e-05, "loss": 1.3342, "step": 2451000 }, { "epoch": 25.87, "learning_rate": 3.7064280211488334e-05, "loss": 1.3481, "step": 2451500 }, { "epoch": 25.88, "learning_rate": 3.706164188397691e-05, "loss": 1.3829, "step": 2452000 }, { "epoch": 25.88, "learning_rate": 3.705900355646549e-05, "loss": 1.413, "step": 2452500 }, { "epoch": 25.89, "learning_rate": 3.705636522895407e-05, "loss": 1.3596, "step": 2453000 }, { "epoch": 25.89, "learning_rate": 3.7053726901442636e-05, "loss": 1.3313, "step": 2453500 }, { "epoch": 25.9, "learning_rate": 3.705108857393121e-05, "loss": 1.3197, "step": 2454000 }, { "epoch": 25.9, "learning_rate": 3.704845024641979e-05, "loss": 1.4209, "step": 2454500 }, { "epoch": 25.91, "learning_rate": 3.704581191890837e-05, "loss": 1.3367, "step": 2455000 }, { "epoch": 25.91, "learning_rate": 3.7043173591396944e-05, "loss": 1.3579, "step": 2455500 }, { "epoch": 25.92, "learning_rate": 3.704053526388552e-05, "loss": 1.2887, "step": 2456000 }, { "epoch": 25.92, "learning_rate": 3.7037896936374095e-05, "loss": 1.3794, "step": 2456500 }, { "epoch": 25.93, "learning_rate": 3.703525860886267e-05, "loss": 1.3943, "step": 2457000 }, { "epoch": 25.93, "learning_rate": 3.7032620281351246e-05, "loss": 1.402, "step": 2457500 }, { "epoch": 25.94, "learning_rate": 3.702998195383983e-05, "loss": 1.3507, "step": 2458000 }, { "epoch": 25.95, "learning_rate": 3.70273436263284e-05, "loss": 1.3358, "step": 2458500 }, { "epoch": 25.95, "learning_rate": 3.702470529881697e-05, "loss": 1.3637, "step": 2459000 }, { "epoch": 25.96, "learning_rate": 3.7022066971305555e-05, "loss": 1.3708, "step": 2459500 }, { "epoch": 25.96, "learning_rate": 3.701942864379413e-05, "loss": 1.3969, "step": 2460000 }, { "epoch": 25.97, "learning_rate": 3.70167903162827e-05, "loss": 1.3513, "step": 2460500 }, { "epoch": 25.97, "learning_rate": 3.701415198877128e-05, "loss": 1.3996, "step": 2461000 }, { "epoch": 25.98, "learning_rate": 3.701151366125986e-05, "loss": 1.345, "step": 2461500 }, { "epoch": 25.98, "learning_rate": 3.700887533374843e-05, "loss": 1.3434, "step": 2462000 }, { "epoch": 25.99, "learning_rate": 3.700623700623701e-05, "loss": 1.3277, "step": 2462500 }, { "epoch": 25.99, "learning_rate": 3.7003598678725583e-05, "loss": 1.4212, "step": 2463000 }, { "epoch": 26.0, "learning_rate": 3.700096035121416e-05, "loss": 1.3699, "step": 2463500 }, { "epoch": 26.0, "learning_rate": 3.6998322023702734e-05, "loss": 1.3013, "step": 2464000 }, { "epoch": 26.01, "learning_rate": 3.699568369619132e-05, "loss": 1.3428, "step": 2464500 }, { "epoch": 26.01, "learning_rate": 3.699304536867989e-05, "loss": 1.2574, "step": 2465000 }, { "epoch": 26.02, "learning_rate": 3.699040704116846e-05, "loss": 1.3854, "step": 2465500 }, { "epoch": 26.02, "learning_rate": 3.6987768713657036e-05, "loss": 1.3696, "step": 2466000 }, { "epoch": 26.03, "learning_rate": 3.698513038614562e-05, "loss": 1.3759, "step": 2466500 }, { "epoch": 26.04, "learning_rate": 3.6982492058634194e-05, "loss": 1.3783, "step": 2467000 }, { "epoch": 26.04, "learning_rate": 3.697985373112277e-05, "loss": 1.3063, "step": 2467500 }, { "epoch": 26.05, "learning_rate": 3.6977215403611345e-05, "loss": 1.3347, "step": 2468000 }, { "epoch": 26.05, "learning_rate": 3.697457707609992e-05, "loss": 1.3041, "step": 2468500 }, { "epoch": 26.06, "learning_rate": 3.6971938748588496e-05, "loss": 1.3494, "step": 2469000 }, { "epoch": 26.06, "learning_rate": 3.696930042107707e-05, "loss": 1.3127, "step": 2469500 }, { "epoch": 26.07, "learning_rate": 3.696666209356565e-05, "loss": 1.3913, "step": 2470000 }, { "epoch": 26.07, "learning_rate": 3.696402376605422e-05, "loss": 1.3811, "step": 2470500 }, { "epoch": 26.08, "learning_rate": 3.69613854385428e-05, "loss": 1.343, "step": 2471000 }, { "epoch": 26.08, "learning_rate": 3.695874711103138e-05, "loss": 1.3715, "step": 2471500 }, { "epoch": 26.09, "learning_rate": 3.6956108783519956e-05, "loss": 1.3414, "step": 2472000 }, { "epoch": 26.09, "learning_rate": 3.6953470456008524e-05, "loss": 1.4038, "step": 2472500 }, { "epoch": 26.1, "learning_rate": 3.695083212849711e-05, "loss": 1.4104, "step": 2473000 }, { "epoch": 26.1, "learning_rate": 3.694819380098568e-05, "loss": 1.3027, "step": 2473500 }, { "epoch": 26.11, "learning_rate": 3.694555547347426e-05, "loss": 1.372, "step": 2474000 }, { "epoch": 26.11, "learning_rate": 3.694291714596283e-05, "loss": 1.363, "step": 2474500 }, { "epoch": 26.12, "learning_rate": 3.694027881845141e-05, "loss": 1.3896, "step": 2475000 }, { "epoch": 26.12, "learning_rate": 3.6937640490939984e-05, "loss": 1.3174, "step": 2475500 }, { "epoch": 26.13, "learning_rate": 3.693500216342856e-05, "loss": 1.352, "step": 2476000 }, { "epoch": 26.14, "learning_rate": 3.693236383591714e-05, "loss": 1.3138, "step": 2476500 }, { "epoch": 26.14, "learning_rate": 3.692972550840572e-05, "loss": 1.3225, "step": 2477000 }, { "epoch": 26.15, "learning_rate": 3.6927087180894286e-05, "loss": 1.3482, "step": 2477500 }, { "epoch": 26.15, "learning_rate": 3.692444885338286e-05, "loss": 1.3135, "step": 2478000 }, { "epoch": 26.16, "learning_rate": 3.6921810525871444e-05, "loss": 1.3681, "step": 2478500 }, { "epoch": 26.16, "learning_rate": 3.691917219836002e-05, "loss": 1.321, "step": 2479000 }, { "epoch": 26.17, "learning_rate": 3.691653387084859e-05, "loss": 1.2964, "step": 2479500 }, { "epoch": 26.17, "learning_rate": 3.691389554333717e-05, "loss": 1.3518, "step": 2480000 }, { "epoch": 26.18, "learning_rate": 3.6911257215825746e-05, "loss": 1.3324, "step": 2480500 }, { "epoch": 26.18, "learning_rate": 3.690861888831432e-05, "loss": 1.3562, "step": 2481000 }, { "epoch": 26.19, "learning_rate": 3.69059805608029e-05, "loss": 1.3932, "step": 2481500 }, { "epoch": 26.19, "learning_rate": 3.690334223329147e-05, "loss": 1.3351, "step": 2482000 }, { "epoch": 26.2, "learning_rate": 3.690070390578005e-05, "loss": 1.3524, "step": 2482500 }, { "epoch": 26.2, "learning_rate": 3.689806557826862e-05, "loss": 1.3658, "step": 2483000 }, { "epoch": 26.21, "learning_rate": 3.6895427250757205e-05, "loss": 1.3733, "step": 2483500 }, { "epoch": 26.21, "learning_rate": 3.689278892324578e-05, "loss": 1.3367, "step": 2484000 }, { "epoch": 26.22, "learning_rate": 3.689015059573435e-05, "loss": 1.3325, "step": 2484500 }, { "epoch": 26.22, "learning_rate": 3.688751226822293e-05, "loss": 1.4001, "step": 2485000 }, { "epoch": 26.23, "learning_rate": 3.688487394071151e-05, "loss": 1.3741, "step": 2485500 }, { "epoch": 26.24, "learning_rate": 3.688223561320008e-05, "loss": 1.3429, "step": 2486000 }, { "epoch": 26.24, "learning_rate": 3.687959728568866e-05, "loss": 1.3491, "step": 2486500 }, { "epoch": 26.25, "learning_rate": 3.6876958958177234e-05, "loss": 1.3686, "step": 2487000 }, { "epoch": 26.25, "learning_rate": 3.687432063066581e-05, "loss": 1.3468, "step": 2487500 }, { "epoch": 26.26, "learning_rate": 3.6871682303154385e-05, "loss": 1.3865, "step": 2488000 }, { "epoch": 26.26, "learning_rate": 3.686904397564297e-05, "loss": 1.3137, "step": 2488500 }, { "epoch": 26.27, "learning_rate": 3.6866405648131536e-05, "loss": 1.3697, "step": 2489000 }, { "epoch": 26.27, "learning_rate": 3.686376732062011e-05, "loss": 1.3388, "step": 2489500 }, { "epoch": 26.28, "learning_rate": 3.686112899310869e-05, "loss": 1.446, "step": 2490000 }, { "epoch": 26.28, "learning_rate": 3.685849066559727e-05, "loss": 1.3146, "step": 2490500 }, { "epoch": 26.29, "learning_rate": 3.6855852338085845e-05, "loss": 1.372, "step": 2491000 }, { "epoch": 26.29, "learning_rate": 3.685321401057441e-05, "loss": 1.3481, "step": 2491500 }, { "epoch": 26.3, "learning_rate": 3.6850575683062996e-05, "loss": 1.337, "step": 2492000 }, { "epoch": 26.3, "learning_rate": 3.684793735555157e-05, "loss": 1.4111, "step": 2492500 }, { "epoch": 26.31, "learning_rate": 3.6845299028040147e-05, "loss": 1.376, "step": 2493000 }, { "epoch": 26.31, "learning_rate": 3.684266070052872e-05, "loss": 1.3541, "step": 2493500 }, { "epoch": 26.32, "learning_rate": 3.68400223730173e-05, "loss": 1.3416, "step": 2494000 }, { "epoch": 26.33, "learning_rate": 3.683738404550587e-05, "loss": 1.3632, "step": 2494500 }, { "epoch": 26.33, "learning_rate": 3.683474571799445e-05, "loss": 1.4298, "step": 2495000 }, { "epoch": 26.34, "learning_rate": 3.683210739048303e-05, "loss": 1.3269, "step": 2495500 }, { "epoch": 26.34, "learning_rate": 3.6829469062971606e-05, "loss": 1.3046, "step": 2496000 }, { "epoch": 26.35, "learning_rate": 3.6826830735460175e-05, "loss": 1.3712, "step": 2496500 }, { "epoch": 26.35, "learning_rate": 3.682419240794876e-05, "loss": 1.3653, "step": 2497000 }, { "epoch": 26.36, "learning_rate": 3.682155408043733e-05, "loss": 1.3432, "step": 2497500 }, { "epoch": 26.36, "learning_rate": 3.681891575292591e-05, "loss": 1.3516, "step": 2498000 }, { "epoch": 26.37, "learning_rate": 3.6816277425414484e-05, "loss": 1.3801, "step": 2498500 }, { "epoch": 26.37, "learning_rate": 3.681363909790306e-05, "loss": 1.3609, "step": 2499000 }, { "epoch": 26.38, "learning_rate": 3.6811000770391635e-05, "loss": 1.4133, "step": 2499500 }, { "epoch": 26.38, "learning_rate": 3.680836244288021e-05, "loss": 1.3771, "step": 2500000 }, { "epoch": 26.39, "learning_rate": 3.680572411536879e-05, "loss": 1.3614, "step": 2500500 }, { "epoch": 26.39, "learning_rate": 3.680308578785736e-05, "loss": 1.3743, "step": 2501000 }, { "epoch": 26.4, "learning_rate": 3.6800447460345937e-05, "loss": 1.3287, "step": 2501500 }, { "epoch": 26.4, "learning_rate": 3.679780913283451e-05, "loss": 1.3114, "step": 2502000 }, { "epoch": 26.41, "learning_rate": 3.6795170805323094e-05, "loss": 1.3986, "step": 2502500 }, { "epoch": 26.41, "learning_rate": 3.679253247781167e-05, "loss": 1.3365, "step": 2503000 }, { "epoch": 26.42, "learning_rate": 3.678989415030024e-05, "loss": 1.3756, "step": 2503500 }, { "epoch": 26.43, "learning_rate": 3.678725582278882e-05, "loss": 1.3526, "step": 2504000 }, { "epoch": 26.43, "learning_rate": 3.6784617495277396e-05, "loss": 1.3316, "step": 2504500 }, { "epoch": 26.44, "learning_rate": 3.678197916776597e-05, "loss": 1.3574, "step": 2505000 }, { "epoch": 26.44, "learning_rate": 3.677934084025455e-05, "loss": 1.3347, "step": 2505500 }, { "epoch": 26.45, "learning_rate": 3.677670251274312e-05, "loss": 1.3611, "step": 2506000 }, { "epoch": 26.45, "learning_rate": 3.67740641852317e-05, "loss": 1.3449, "step": 2506500 }, { "epoch": 26.46, "learning_rate": 3.6771425857720274e-05, "loss": 1.336, "step": 2507000 }, { "epoch": 26.46, "learning_rate": 3.6768787530208856e-05, "loss": 1.3649, "step": 2507500 }, { "epoch": 26.47, "learning_rate": 3.6766149202697425e-05, "loss": 1.325, "step": 2508000 }, { "epoch": 26.47, "learning_rate": 3.6763510875186e-05, "loss": 1.3658, "step": 2508500 }, { "epoch": 26.48, "learning_rate": 3.676087254767458e-05, "loss": 1.3281, "step": 2509000 }, { "epoch": 26.48, "learning_rate": 3.675823422016316e-05, "loss": 1.3505, "step": 2509500 }, { "epoch": 26.49, "learning_rate": 3.6755595892651733e-05, "loss": 1.3867, "step": 2510000 }, { "epoch": 26.49, "learning_rate": 3.675295756514031e-05, "loss": 1.3758, "step": 2510500 }, { "epoch": 26.5, "learning_rate": 3.6750319237628884e-05, "loss": 1.4166, "step": 2511000 }, { "epoch": 26.5, "learning_rate": 3.674768091011746e-05, "loss": 1.3169, "step": 2511500 }, { "epoch": 26.51, "learning_rate": 3.6745042582606035e-05, "loss": 1.3472, "step": 2512000 }, { "epoch": 26.52, "learning_rate": 3.674240425509462e-05, "loss": 1.3334, "step": 2512500 }, { "epoch": 26.52, "learning_rate": 3.6739765927583186e-05, "loss": 1.337, "step": 2513000 }, { "epoch": 26.53, "learning_rate": 3.673712760007176e-05, "loss": 1.3874, "step": 2513500 }, { "epoch": 26.53, "learning_rate": 3.673448927256034e-05, "loss": 1.3847, "step": 2514000 }, { "epoch": 26.54, "learning_rate": 3.673185094504892e-05, "loss": 1.3103, "step": 2514500 }, { "epoch": 26.54, "learning_rate": 3.6729212617537495e-05, "loss": 1.3603, "step": 2515000 }, { "epoch": 26.55, "learning_rate": 3.6726574290026064e-05, "loss": 1.3266, "step": 2515500 }, { "epoch": 26.55, "learning_rate": 3.6723935962514646e-05, "loss": 1.3259, "step": 2516000 }, { "epoch": 26.56, "learning_rate": 3.672129763500322e-05, "loss": 1.3814, "step": 2516500 }, { "epoch": 26.56, "learning_rate": 3.67186593074918e-05, "loss": 1.3537, "step": 2517000 }, { "epoch": 26.57, "learning_rate": 3.671602097998037e-05, "loss": 1.319, "step": 2517500 }, { "epoch": 26.57, "learning_rate": 3.671338265246895e-05, "loss": 1.352, "step": 2518000 }, { "epoch": 26.58, "learning_rate": 3.6710744324957523e-05, "loss": 1.3588, "step": 2518500 }, { "epoch": 26.58, "learning_rate": 3.67081059974461e-05, "loss": 1.3351, "step": 2519000 }, { "epoch": 26.59, "learning_rate": 3.670546766993468e-05, "loss": 1.3651, "step": 2519500 }, { "epoch": 26.59, "learning_rate": 3.670282934242325e-05, "loss": 1.3589, "step": 2520000 }, { "epoch": 26.6, "learning_rate": 3.6700191014911825e-05, "loss": 1.3364, "step": 2520500 }, { "epoch": 26.6, "learning_rate": 3.669755268740041e-05, "loss": 1.3392, "step": 2521000 }, { "epoch": 26.61, "learning_rate": 3.669491435988898e-05, "loss": 1.3422, "step": 2521500 }, { "epoch": 26.62, "learning_rate": 3.669227603237756e-05, "loss": 1.3534, "step": 2522000 }, { "epoch": 26.62, "learning_rate": 3.6689637704866134e-05, "loss": 1.337, "step": 2522500 }, { "epoch": 26.63, "learning_rate": 3.668699937735471e-05, "loss": 1.3222, "step": 2523000 }, { "epoch": 26.63, "learning_rate": 3.6684361049843285e-05, "loss": 1.3742, "step": 2523500 }, { "epoch": 26.64, "learning_rate": 3.668172272233186e-05, "loss": 1.3587, "step": 2524000 }, { "epoch": 26.64, "learning_rate": 3.667908439482044e-05, "loss": 1.3457, "step": 2524500 }, { "epoch": 26.65, "learning_rate": 3.667644606730901e-05, "loss": 1.3334, "step": 2525000 }, { "epoch": 26.65, "learning_rate": 3.667380773979759e-05, "loss": 1.3614, "step": 2525500 }, { "epoch": 26.66, "learning_rate": 3.667116941228617e-05, "loss": 1.3573, "step": 2526000 }, { "epoch": 26.66, "learning_rate": 3.6668531084774745e-05, "loss": 1.3574, "step": 2526500 }, { "epoch": 26.67, "learning_rate": 3.6665892757263314e-05, "loss": 1.3789, "step": 2527000 }, { "epoch": 26.67, "learning_rate": 3.666325442975189e-05, "loss": 1.3829, "step": 2527500 }, { "epoch": 26.68, "learning_rate": 3.666061610224047e-05, "loss": 1.3696, "step": 2528000 }, { "epoch": 26.68, "learning_rate": 3.665797777472905e-05, "loss": 1.3563, "step": 2528500 }, { "epoch": 26.69, "learning_rate": 3.665533944721762e-05, "loss": 1.4009, "step": 2529000 }, { "epoch": 26.69, "learning_rate": 3.66527011197062e-05, "loss": 1.368, "step": 2529500 }, { "epoch": 26.7, "learning_rate": 3.665006279219477e-05, "loss": 1.3582, "step": 2530000 }, { "epoch": 26.71, "learning_rate": 3.664742446468335e-05, "loss": 1.3769, "step": 2530500 }, { "epoch": 26.71, "learning_rate": 3.6644786137171924e-05, "loss": 1.3822, "step": 2531000 }, { "epoch": 26.72, "learning_rate": 3.6642147809660506e-05, "loss": 1.3309, "step": 2531500 }, { "epoch": 26.72, "learning_rate": 3.6639509482149075e-05, "loss": 1.347, "step": 2532000 }, { "epoch": 26.73, "learning_rate": 3.663687115463765e-05, "loss": 1.3956, "step": 2532500 }, { "epoch": 26.73, "learning_rate": 3.663423282712623e-05, "loss": 1.3641, "step": 2533000 }, { "epoch": 26.74, "learning_rate": 3.663159449961481e-05, "loss": 1.3754, "step": 2533500 }, { "epoch": 26.74, "learning_rate": 3.6628956172103384e-05, "loss": 1.3452, "step": 2534000 }, { "epoch": 26.75, "learning_rate": 3.662631784459196e-05, "loss": 1.3523, "step": 2534500 }, { "epoch": 26.75, "learning_rate": 3.6623679517080535e-05, "loss": 1.3408, "step": 2535000 }, { "epoch": 26.76, "learning_rate": 3.662104118956911e-05, "loss": 1.3726, "step": 2535500 }, { "epoch": 26.76, "learning_rate": 3.6618402862057686e-05, "loss": 1.3318, "step": 2536000 }, { "epoch": 26.77, "learning_rate": 3.661576453454626e-05, "loss": 1.3601, "step": 2536500 }, { "epoch": 26.77, "learning_rate": 3.661312620703484e-05, "loss": 1.3809, "step": 2537000 }, { "epoch": 26.78, "learning_rate": 3.661048787952341e-05, "loss": 1.4179, "step": 2537500 }, { "epoch": 26.78, "learning_rate": 3.6607849552011995e-05, "loss": 1.3557, "step": 2538000 }, { "epoch": 26.79, "learning_rate": 3.660521122450057e-05, "loss": 1.299, "step": 2538500 }, { "epoch": 26.79, "learning_rate": 3.660257289698914e-05, "loss": 1.3212, "step": 2539000 }, { "epoch": 26.8, "learning_rate": 3.6599934569477714e-05, "loss": 1.3937, "step": 2539500 }, { "epoch": 26.81, "learning_rate": 3.6597296241966297e-05, "loss": 1.3356, "step": 2540000 }, { "epoch": 26.81, "learning_rate": 3.659465791445487e-05, "loss": 1.3562, "step": 2540500 }, { "epoch": 26.82, "learning_rate": 3.659201958694345e-05, "loss": 1.3936, "step": 2541000 }, { "epoch": 26.82, "learning_rate": 3.658938125943202e-05, "loss": 1.3158, "step": 2541500 }, { "epoch": 26.83, "learning_rate": 3.65867429319206e-05, "loss": 1.3589, "step": 2542000 }, { "epoch": 26.83, "learning_rate": 3.6584104604409174e-05, "loss": 1.3272, "step": 2542500 }, { "epoch": 26.84, "learning_rate": 3.658146627689775e-05, "loss": 1.3258, "step": 2543000 }, { "epoch": 26.84, "learning_rate": 3.657882794938633e-05, "loss": 1.4001, "step": 2543500 }, { "epoch": 26.85, "learning_rate": 3.65761896218749e-05, "loss": 1.3125, "step": 2544000 }, { "epoch": 26.85, "learning_rate": 3.6573551294363476e-05, "loss": 1.3205, "step": 2544500 }, { "epoch": 26.86, "learning_rate": 3.657091296685206e-05, "loss": 1.3596, "step": 2545000 }, { "epoch": 26.86, "learning_rate": 3.6568274639340634e-05, "loss": 1.2935, "step": 2545500 }, { "epoch": 26.87, "learning_rate": 3.65656363118292e-05, "loss": 1.358, "step": 2546000 }, { "epoch": 26.87, "learning_rate": 3.6562997984317785e-05, "loss": 1.3402, "step": 2546500 }, { "epoch": 26.88, "learning_rate": 3.656035965680636e-05, "loss": 1.3539, "step": 2547000 }, { "epoch": 26.88, "learning_rate": 3.6557721329294936e-05, "loss": 1.401, "step": 2547500 }, { "epoch": 26.89, "learning_rate": 3.655508300178351e-05, "loss": 1.3469, "step": 2548000 }, { "epoch": 26.9, "learning_rate": 3.6552444674272087e-05, "loss": 1.3675, "step": 2548500 }, { "epoch": 26.9, "learning_rate": 3.654980634676066e-05, "loss": 1.3576, "step": 2549000 }, { "epoch": 26.91, "learning_rate": 3.654716801924924e-05, "loss": 1.3927, "step": 2549500 }, { "epoch": 26.91, "learning_rate": 3.654452969173782e-05, "loss": 1.3256, "step": 2550000 }, { "epoch": 26.92, "learning_rate": 3.6541891364226395e-05, "loss": 1.3496, "step": 2550500 }, { "epoch": 26.92, "learning_rate": 3.6539253036714964e-05, "loss": 1.3766, "step": 2551000 }, { "epoch": 26.93, "learning_rate": 3.653661470920354e-05, "loss": 1.3899, "step": 2551500 }, { "epoch": 26.93, "learning_rate": 3.653397638169212e-05, "loss": 1.3737, "step": 2552000 }, { "epoch": 26.94, "learning_rate": 3.65313380541807e-05, "loss": 1.3328, "step": 2552500 }, { "epoch": 26.94, "learning_rate": 3.652869972666927e-05, "loss": 1.3138, "step": 2553000 }, { "epoch": 26.95, "learning_rate": 3.652606139915785e-05, "loss": 1.3515, "step": 2553500 }, { "epoch": 26.95, "learning_rate": 3.6523423071646424e-05, "loss": 1.3265, "step": 2554000 }, { "epoch": 26.96, "learning_rate": 3.6520784744135e-05, "loss": 1.333, "step": 2554500 }, { "epoch": 26.96, "learning_rate": 3.6518146416623575e-05, "loss": 1.3473, "step": 2555000 }, { "epoch": 26.97, "learning_rate": 3.651550808911215e-05, "loss": 1.3466, "step": 2555500 }, { "epoch": 26.97, "learning_rate": 3.6512869761600726e-05, "loss": 1.3503, "step": 2556000 }, { "epoch": 26.98, "learning_rate": 3.65102314340893e-05, "loss": 1.3572, "step": 2556500 }, { "epoch": 26.98, "learning_rate": 3.6507593106577883e-05, "loss": 1.4002, "step": 2557000 }, { "epoch": 26.99, "learning_rate": 3.650495477906646e-05, "loss": 1.3546, "step": 2557500 }, { "epoch": 27.0, "learning_rate": 3.650231645155503e-05, "loss": 1.3695, "step": 2558000 }, { "epoch": 27.0, "learning_rate": 3.649967812404361e-05, "loss": 1.3149, "step": 2558500 }, { "epoch": 27.01, "learning_rate": 3.6497039796532185e-05, "loss": 1.352, "step": 2559000 }, { "epoch": 27.01, "learning_rate": 3.649440146902076e-05, "loss": 1.363, "step": 2559500 }, { "epoch": 27.02, "learning_rate": 3.6491763141509336e-05, "loss": 1.318, "step": 2560000 }, { "epoch": 27.02, "learning_rate": 3.648912481399791e-05, "loss": 1.3496, "step": 2560500 }, { "epoch": 27.03, "learning_rate": 3.648648648648649e-05, "loss": 1.3475, "step": 2561000 }, { "epoch": 27.03, "learning_rate": 3.648384815897506e-05, "loss": 1.3555, "step": 2561500 }, { "epoch": 27.04, "learning_rate": 3.6481209831463645e-05, "loss": 1.3269, "step": 2562000 }, { "epoch": 27.04, "learning_rate": 3.647857150395222e-05, "loss": 1.2945, "step": 2562500 }, { "epoch": 27.05, "learning_rate": 3.647593317644079e-05, "loss": 1.3292, "step": 2563000 }, { "epoch": 27.05, "learning_rate": 3.6473294848929365e-05, "loss": 1.3606, "step": 2563500 }, { "epoch": 27.06, "learning_rate": 3.647065652141795e-05, "loss": 1.3701, "step": 2564000 }, { "epoch": 27.06, "learning_rate": 3.646801819390652e-05, "loss": 1.3597, "step": 2564500 }, { "epoch": 27.07, "learning_rate": 3.646537986639509e-05, "loss": 1.3778, "step": 2565000 }, { "epoch": 27.07, "learning_rate": 3.6462741538883673e-05, "loss": 1.3416, "step": 2565500 }, { "epoch": 27.08, "learning_rate": 3.646010321137225e-05, "loss": 1.3355, "step": 2566000 }, { "epoch": 27.09, "learning_rate": 3.6457464883860824e-05, "loss": 1.3466, "step": 2566500 }, { "epoch": 27.09, "learning_rate": 3.64548265563494e-05, "loss": 1.3485, "step": 2567000 }, { "epoch": 27.1, "learning_rate": 3.6452188228837975e-05, "loss": 1.3271, "step": 2567500 }, { "epoch": 27.1, "learning_rate": 3.644954990132655e-05, "loss": 1.3198, "step": 2568000 }, { "epoch": 27.11, "learning_rate": 3.6446911573815126e-05, "loss": 1.3552, "step": 2568500 }, { "epoch": 27.11, "learning_rate": 3.644427324630371e-05, "loss": 1.3554, "step": 2569000 }, { "epoch": 27.12, "learning_rate": 3.6441634918792284e-05, "loss": 1.3759, "step": 2569500 }, { "epoch": 27.12, "learning_rate": 3.643899659128085e-05, "loss": 1.3373, "step": 2570000 }, { "epoch": 27.13, "learning_rate": 3.6436358263769435e-05, "loss": 1.3461, "step": 2570500 }, { "epoch": 27.13, "learning_rate": 3.643371993625801e-05, "loss": 1.3432, "step": 2571000 }, { "epoch": 27.14, "learning_rate": 3.6431081608746586e-05, "loss": 1.3212, "step": 2571500 }, { "epoch": 27.14, "learning_rate": 3.642844328123516e-05, "loss": 1.3775, "step": 2572000 }, { "epoch": 27.15, "learning_rate": 3.642580495372374e-05, "loss": 1.3859, "step": 2572500 }, { "epoch": 27.15, "learning_rate": 3.642316662621231e-05, "loss": 1.3441, "step": 2573000 }, { "epoch": 27.16, "learning_rate": 3.642052829870089e-05, "loss": 1.3635, "step": 2573500 }, { "epoch": 27.16, "learning_rate": 3.641788997118947e-05, "loss": 1.3044, "step": 2574000 }, { "epoch": 27.17, "learning_rate": 3.641525164367804e-05, "loss": 1.3501, "step": 2574500 }, { "epoch": 27.17, "learning_rate": 3.6412613316166615e-05, "loss": 1.2977, "step": 2575000 }, { "epoch": 27.18, "learning_rate": 3.640997498865519e-05, "loss": 1.3371, "step": 2575500 }, { "epoch": 27.19, "learning_rate": 3.640733666114377e-05, "loss": 1.3444, "step": 2576000 }, { "epoch": 27.19, "learning_rate": 3.640469833363235e-05, "loss": 1.2901, "step": 2576500 }, { "epoch": 27.2, "learning_rate": 3.6402060006120916e-05, "loss": 1.3477, "step": 2577000 }, { "epoch": 27.2, "learning_rate": 3.63994216786095e-05, "loss": 1.3969, "step": 2577500 }, { "epoch": 27.21, "learning_rate": 3.6396783351098074e-05, "loss": 1.3177, "step": 2578000 }, { "epoch": 27.21, "learning_rate": 3.639414502358665e-05, "loss": 1.3732, "step": 2578500 }, { "epoch": 27.22, "learning_rate": 3.6391506696075225e-05, "loss": 1.3269, "step": 2579000 }, { "epoch": 27.22, "learning_rate": 3.63888683685638e-05, "loss": 1.3163, "step": 2579500 }, { "epoch": 27.23, "learning_rate": 3.6386230041052376e-05, "loss": 1.3143, "step": 2580000 }, { "epoch": 27.23, "learning_rate": 3.638359171354095e-05, "loss": 1.3698, "step": 2580500 }, { "epoch": 27.24, "learning_rate": 3.6380953386029534e-05, "loss": 1.3559, "step": 2581000 }, { "epoch": 27.24, "learning_rate": 3.63783150585181e-05, "loss": 1.3922, "step": 2581500 }, { "epoch": 27.25, "learning_rate": 3.637567673100668e-05, "loss": 1.3195, "step": 2582000 }, { "epoch": 27.25, "learning_rate": 3.637303840349526e-05, "loss": 1.3318, "step": 2582500 }, { "epoch": 27.26, "learning_rate": 3.6370400075983836e-05, "loss": 1.3864, "step": 2583000 }, { "epoch": 27.26, "learning_rate": 3.636776174847241e-05, "loss": 1.3151, "step": 2583500 }, { "epoch": 27.27, "learning_rate": 3.636512342096099e-05, "loss": 1.3536, "step": 2584000 }, { "epoch": 27.28, "learning_rate": 3.636248509344956e-05, "loss": 1.3935, "step": 2584500 }, { "epoch": 27.28, "learning_rate": 3.635984676593814e-05, "loss": 1.3547, "step": 2585000 }, { "epoch": 27.29, "learning_rate": 3.635720843842671e-05, "loss": 1.3806, "step": 2585500 }, { "epoch": 27.29, "learning_rate": 3.6354570110915296e-05, "loss": 1.3637, "step": 2586000 }, { "epoch": 27.3, "learning_rate": 3.6351931783403864e-05, "loss": 1.3019, "step": 2586500 }, { "epoch": 27.3, "learning_rate": 3.634929345589244e-05, "loss": 1.301, "step": 2587000 }, { "epoch": 27.31, "learning_rate": 3.6346655128381015e-05, "loss": 1.3738, "step": 2587500 }, { "epoch": 27.31, "learning_rate": 3.63440168008696e-05, "loss": 1.4064, "step": 2588000 }, { "epoch": 27.32, "learning_rate": 3.634137847335817e-05, "loss": 1.355, "step": 2588500 }, { "epoch": 27.32, "learning_rate": 3.633874014584674e-05, "loss": 1.3365, "step": 2589000 }, { "epoch": 27.33, "learning_rate": 3.6336101818335324e-05, "loss": 1.3379, "step": 2589500 }, { "epoch": 27.33, "learning_rate": 3.63334634908239e-05, "loss": 1.3369, "step": 2590000 }, { "epoch": 27.34, "learning_rate": 3.6330825163312475e-05, "loss": 1.3956, "step": 2590500 }, { "epoch": 27.34, "learning_rate": 3.632818683580105e-05, "loss": 1.3774, "step": 2591000 }, { "epoch": 27.35, "learning_rate": 3.6325548508289626e-05, "loss": 1.3899, "step": 2591500 }, { "epoch": 27.35, "learning_rate": 3.63229101807782e-05, "loss": 1.3468, "step": 2592000 }, { "epoch": 27.36, "learning_rate": 3.632027185326678e-05, "loss": 1.3451, "step": 2592500 }, { "epoch": 27.36, "learning_rate": 3.631763352575536e-05, "loss": 1.3487, "step": 2593000 }, { "epoch": 27.37, "learning_rate": 3.631499519824393e-05, "loss": 1.3564, "step": 2593500 }, { "epoch": 27.38, "learning_rate": 3.63123568707325e-05, "loss": 1.3334, "step": 2594000 }, { "epoch": 27.38, "learning_rate": 3.6309718543221086e-05, "loss": 1.3306, "step": 2594500 }, { "epoch": 27.39, "learning_rate": 3.630708021570966e-05, "loss": 1.3452, "step": 2595000 }, { "epoch": 27.39, "learning_rate": 3.6304441888198237e-05, "loss": 1.3637, "step": 2595500 }, { "epoch": 27.4, "learning_rate": 3.630180356068681e-05, "loss": 1.3055, "step": 2596000 }, { "epoch": 27.4, "learning_rate": 3.629916523317539e-05, "loss": 1.3603, "step": 2596500 }, { "epoch": 27.41, "learning_rate": 3.629652690566396e-05, "loss": 1.3266, "step": 2597000 }, { "epoch": 27.41, "learning_rate": 3.629388857815254e-05, "loss": 1.4133, "step": 2597500 }, { "epoch": 27.42, "learning_rate": 3.629125025064112e-05, "loss": 1.3899, "step": 2598000 }, { "epoch": 27.42, "learning_rate": 3.628861192312969e-05, "loss": 1.3664, "step": 2598500 }, { "epoch": 27.43, "learning_rate": 3.6285973595618265e-05, "loss": 1.3513, "step": 2599000 }, { "epoch": 27.43, "learning_rate": 3.628333526810685e-05, "loss": 1.3438, "step": 2599500 }, { "epoch": 27.44, "learning_rate": 3.628069694059542e-05, "loss": 1.332, "step": 2600000 }, { "epoch": 27.44, "learning_rate": 3.627805861308399e-05, "loss": 1.3389, "step": 2600500 }, { "epoch": 27.45, "learning_rate": 3.627542028557257e-05, "loss": 1.3386, "step": 2601000 }, { "epoch": 27.45, "learning_rate": 3.627278195806115e-05, "loss": 1.3711, "step": 2601500 }, { "epoch": 27.46, "learning_rate": 3.6270143630549725e-05, "loss": 1.3548, "step": 2602000 }, { "epoch": 27.46, "learning_rate": 3.62675053030383e-05, "loss": 1.2865, "step": 2602500 }, { "epoch": 27.47, "learning_rate": 3.6264866975526876e-05, "loss": 1.3838, "step": 2603000 }, { "epoch": 27.48, "learning_rate": 3.626222864801545e-05, "loss": 1.2911, "step": 2603500 }, { "epoch": 27.48, "learning_rate": 3.625959032050403e-05, "loss": 1.3341, "step": 2604000 }, { "epoch": 27.49, "learning_rate": 3.62569519929926e-05, "loss": 1.3807, "step": 2604500 }, { "epoch": 27.49, "learning_rate": 3.6254313665481184e-05, "loss": 1.342, "step": 2605000 }, { "epoch": 27.5, "learning_rate": 3.625167533796975e-05, "loss": 1.315, "step": 2605500 }, { "epoch": 27.5, "learning_rate": 3.624903701045833e-05, "loss": 1.3575, "step": 2606000 }, { "epoch": 27.51, "learning_rate": 3.624639868294691e-05, "loss": 1.3541, "step": 2606500 }, { "epoch": 27.51, "learning_rate": 3.6243760355435486e-05, "loss": 1.3309, "step": 2607000 }, { "epoch": 27.52, "learning_rate": 3.624112202792406e-05, "loss": 1.3619, "step": 2607500 }, { "epoch": 27.52, "learning_rate": 3.623848370041264e-05, "loss": 1.3566, "step": 2608000 }, { "epoch": 27.53, "learning_rate": 3.623584537290121e-05, "loss": 1.3679, "step": 2608500 }, { "epoch": 27.53, "learning_rate": 3.623320704538979e-05, "loss": 1.3365, "step": 2609000 }, { "epoch": 27.54, "learning_rate": 3.6230568717878364e-05, "loss": 1.2894, "step": 2609500 }, { "epoch": 27.54, "learning_rate": 3.622793039036694e-05, "loss": 1.3389, "step": 2610000 }, { "epoch": 27.55, "learning_rate": 3.6225292062855515e-05, "loss": 1.3351, "step": 2610500 }, { "epoch": 27.55, "learning_rate": 3.622265373534409e-05, "loss": 1.3958, "step": 2611000 }, { "epoch": 27.56, "learning_rate": 3.622001540783267e-05, "loss": 1.3391, "step": 2611500 }, { "epoch": 27.57, "learning_rate": 3.621737708032125e-05, "loss": 1.4, "step": 2612000 }, { "epoch": 27.57, "learning_rate": 3.621473875280982e-05, "loss": 1.3319, "step": 2612500 }, { "epoch": 27.58, "learning_rate": 3.621210042529839e-05, "loss": 1.3533, "step": 2613000 }, { "epoch": 27.58, "learning_rate": 3.6209462097786974e-05, "loss": 1.325, "step": 2613500 }, { "epoch": 27.59, "learning_rate": 3.620682377027555e-05, "loss": 1.4136, "step": 2614000 }, { "epoch": 27.59, "learning_rate": 3.6204185442764125e-05, "loss": 1.3561, "step": 2614500 }, { "epoch": 27.6, "learning_rate": 3.62015471152527e-05, "loss": 1.3691, "step": 2615000 }, { "epoch": 27.6, "learning_rate": 3.6198908787741276e-05, "loss": 1.3245, "step": 2615500 }, { "epoch": 27.61, "learning_rate": 3.619627046022985e-05, "loss": 1.3534, "step": 2616000 }, { "epoch": 27.61, "learning_rate": 3.619363213271843e-05, "loss": 1.3239, "step": 2616500 }, { "epoch": 27.62, "learning_rate": 3.619099380520701e-05, "loss": 1.363, "step": 2617000 }, { "epoch": 27.62, "learning_rate": 3.618835547769558e-05, "loss": 1.37, "step": 2617500 }, { "epoch": 27.63, "learning_rate": 3.6185717150184154e-05, "loss": 1.3355, "step": 2618000 }, { "epoch": 27.63, "learning_rate": 3.6183078822672736e-05, "loss": 1.3149, "step": 2618500 }, { "epoch": 27.64, "learning_rate": 3.618044049516131e-05, "loss": 1.3026, "step": 2619000 }, { "epoch": 27.64, "learning_rate": 3.617780216764988e-05, "loss": 1.3858, "step": 2619500 }, { "epoch": 27.65, "learning_rate": 3.617516384013846e-05, "loss": 1.3863, "step": 2620000 }, { "epoch": 27.65, "learning_rate": 3.617252551262704e-05, "loss": 1.3501, "step": 2620500 }, { "epoch": 27.66, "learning_rate": 3.6169887185115614e-05, "loss": 1.352, "step": 2621000 }, { "epoch": 27.67, "learning_rate": 3.616724885760419e-05, "loss": 1.3196, "step": 2621500 }, { "epoch": 27.67, "learning_rate": 3.6164610530092765e-05, "loss": 1.3435, "step": 2622000 }, { "epoch": 27.68, "learning_rate": 3.616197220258134e-05, "loss": 1.3861, "step": 2622500 }, { "epoch": 27.68, "learning_rate": 3.6159333875069916e-05, "loss": 1.3938, "step": 2623000 }, { "epoch": 27.69, "learning_rate": 3.61566955475585e-05, "loss": 1.3559, "step": 2623500 }, { "epoch": 27.69, "learning_rate": 3.615405722004707e-05, "loss": 1.328, "step": 2624000 }, { "epoch": 27.7, "learning_rate": 3.615141889253564e-05, "loss": 1.3376, "step": 2624500 }, { "epoch": 27.7, "learning_rate": 3.614878056502422e-05, "loss": 1.3192, "step": 2625000 }, { "epoch": 27.71, "learning_rate": 3.61461422375128e-05, "loss": 1.3558, "step": 2625500 }, { "epoch": 27.71, "learning_rate": 3.6143503910001375e-05, "loss": 1.3712, "step": 2626000 }, { "epoch": 27.72, "learning_rate": 3.614086558248995e-05, "loss": 1.343, "step": 2626500 }, { "epoch": 27.72, "learning_rate": 3.6138227254978526e-05, "loss": 1.3183, "step": 2627000 }, { "epoch": 27.73, "learning_rate": 3.61355889274671e-05, "loss": 1.3757, "step": 2627500 }, { "epoch": 27.73, "learning_rate": 3.613295059995568e-05, "loss": 1.3842, "step": 2628000 }, { "epoch": 27.74, "learning_rate": 3.613031227244425e-05, "loss": 1.3165, "step": 2628500 }, { "epoch": 27.74, "learning_rate": 3.612767394493283e-05, "loss": 1.3706, "step": 2629000 }, { "epoch": 27.75, "learning_rate": 3.6125035617421404e-05, "loss": 1.3591, "step": 2629500 }, { "epoch": 27.76, "learning_rate": 3.612239728990998e-05, "loss": 1.3385, "step": 2630000 }, { "epoch": 27.76, "learning_rate": 3.611975896239856e-05, "loss": 1.3138, "step": 2630500 }, { "epoch": 27.77, "learning_rate": 3.611712063488714e-05, "loss": 1.3832, "step": 2631000 }, { "epoch": 27.77, "learning_rate": 3.6114482307375706e-05, "loss": 1.3342, "step": 2631500 }, { "epoch": 27.78, "learning_rate": 3.611184397986429e-05, "loss": 1.3566, "step": 2632000 }, { "epoch": 27.78, "learning_rate": 3.610920565235286e-05, "loss": 1.353, "step": 2632500 }, { "epoch": 27.79, "learning_rate": 3.610656732484144e-05, "loss": 1.3718, "step": 2633000 }, { "epoch": 27.79, "learning_rate": 3.6103928997330014e-05, "loss": 1.3672, "step": 2633500 }, { "epoch": 27.8, "learning_rate": 3.610129066981859e-05, "loss": 1.3361, "step": 2634000 }, { "epoch": 27.8, "learning_rate": 3.6098652342307165e-05, "loss": 1.3431, "step": 2634500 }, { "epoch": 27.81, "learning_rate": 3.609601401479574e-05, "loss": 1.3133, "step": 2635000 }, { "epoch": 27.81, "learning_rate": 3.609337568728432e-05, "loss": 1.3534, "step": 2635500 }, { "epoch": 27.82, "learning_rate": 3.60907373597729e-05, "loss": 1.3316, "step": 2636000 }, { "epoch": 27.82, "learning_rate": 3.608809903226147e-05, "loss": 1.3491, "step": 2636500 }, { "epoch": 27.83, "learning_rate": 3.608546070475004e-05, "loss": 1.384, "step": 2637000 }, { "epoch": 27.83, "learning_rate": 3.6082822377238625e-05, "loss": 1.3609, "step": 2637500 }, { "epoch": 27.84, "learning_rate": 3.60801840497272e-05, "loss": 1.3686, "step": 2638000 }, { "epoch": 27.84, "learning_rate": 3.607754572221577e-05, "loss": 1.2952, "step": 2638500 }, { "epoch": 27.85, "learning_rate": 3.607490739470435e-05, "loss": 1.3604, "step": 2639000 }, { "epoch": 27.86, "learning_rate": 3.607226906719293e-05, "loss": 1.3819, "step": 2639500 }, { "epoch": 27.86, "learning_rate": 3.60696307396815e-05, "loss": 1.3664, "step": 2640000 }, { "epoch": 27.87, "learning_rate": 3.606699241217008e-05, "loss": 1.4005, "step": 2640500 }, { "epoch": 27.87, "learning_rate": 3.606435408465865e-05, "loss": 1.368, "step": 2641000 }, { "epoch": 27.88, "learning_rate": 3.606171575714723e-05, "loss": 1.3532, "step": 2641500 }, { "epoch": 27.88, "learning_rate": 3.6059077429635804e-05, "loss": 1.3709, "step": 2642000 }, { "epoch": 27.89, "learning_rate": 3.6056439102124387e-05, "loss": 1.3469, "step": 2642500 }, { "epoch": 27.89, "learning_rate": 3.605380077461296e-05, "loss": 1.3464, "step": 2643000 }, { "epoch": 27.9, "learning_rate": 3.605116244710153e-05, "loss": 1.4002, "step": 2643500 }, { "epoch": 27.9, "learning_rate": 3.604852411959011e-05, "loss": 1.3268, "step": 2644000 }, { "epoch": 27.91, "learning_rate": 3.604588579207869e-05, "loss": 1.3216, "step": 2644500 }, { "epoch": 27.91, "learning_rate": 3.6043247464567264e-05, "loss": 1.3628, "step": 2645000 }, { "epoch": 27.92, "learning_rate": 3.604060913705584e-05, "loss": 1.3726, "step": 2645500 }, { "epoch": 27.92, "learning_rate": 3.6037970809544415e-05, "loss": 1.3041, "step": 2646000 }, { "epoch": 27.93, "learning_rate": 3.603533248203299e-05, "loss": 1.37, "step": 2646500 }, { "epoch": 27.93, "learning_rate": 3.6032694154521566e-05, "loss": 1.3515, "step": 2647000 }, { "epoch": 27.94, "learning_rate": 3.603005582701015e-05, "loss": 1.3384, "step": 2647500 }, { "epoch": 27.95, "learning_rate": 3.602741749949872e-05, "loss": 1.2953, "step": 2648000 }, { "epoch": 27.95, "learning_rate": 3.602477917198729e-05, "loss": 1.3559, "step": 2648500 }, { "epoch": 27.96, "learning_rate": 3.602214084447587e-05, "loss": 1.3149, "step": 2649000 }, { "epoch": 27.96, "learning_rate": 3.601950251696445e-05, "loss": 1.3437, "step": 2649500 }, { "epoch": 27.97, "learning_rate": 3.6016864189453026e-05, "loss": 1.3566, "step": 2650000 }, { "epoch": 27.97, "learning_rate": 3.6014225861941594e-05, "loss": 1.3226, "step": 2650500 }, { "epoch": 27.98, "learning_rate": 3.601158753443018e-05, "loss": 1.3344, "step": 2651000 }, { "epoch": 27.98, "learning_rate": 3.600894920691875e-05, "loss": 1.3171, "step": 2651500 }, { "epoch": 27.99, "learning_rate": 3.600631087940733e-05, "loss": 1.3198, "step": 2652000 }, { "epoch": 27.99, "learning_rate": 3.60036725518959e-05, "loss": 1.3223, "step": 2652500 }, { "epoch": 28.0, "learning_rate": 3.600103422438448e-05, "loss": 1.3553, "step": 2653000 }, { "epoch": 28.0, "learning_rate": 3.5998395896873054e-05, "loss": 1.3302, "step": 2653500 }, { "epoch": 28.01, "learning_rate": 3.599575756936163e-05, "loss": 1.2676, "step": 2654000 }, { "epoch": 28.01, "learning_rate": 3.599311924185021e-05, "loss": 1.3442, "step": 2654500 }, { "epoch": 28.02, "learning_rate": 3.599048091433879e-05, "loss": 1.3381, "step": 2655000 }, { "epoch": 28.02, "learning_rate": 3.5987842586827356e-05, "loss": 1.3376, "step": 2655500 }, { "epoch": 28.03, "learning_rate": 3.598520425931594e-05, "loss": 1.2661, "step": 2656000 }, { "epoch": 28.03, "learning_rate": 3.5982565931804514e-05, "loss": 1.361, "step": 2656500 }, { "epoch": 28.04, "learning_rate": 3.597992760429309e-05, "loss": 1.315, "step": 2657000 }, { "epoch": 28.05, "learning_rate": 3.5977289276781665e-05, "loss": 1.3698, "step": 2657500 }, { "epoch": 28.05, "learning_rate": 3.597465094927024e-05, "loss": 1.3425, "step": 2658000 }, { "epoch": 28.06, "learning_rate": 3.5972012621758816e-05, "loss": 1.3727, "step": 2658500 }, { "epoch": 28.06, "learning_rate": 3.596937429424739e-05, "loss": 1.3278, "step": 2659000 }, { "epoch": 28.07, "learning_rate": 3.5966735966735974e-05, "loss": 1.3052, "step": 2659500 }, { "epoch": 28.07, "learning_rate": 3.596409763922454e-05, "loss": 1.3393, "step": 2660000 }, { "epoch": 28.08, "learning_rate": 3.596145931171312e-05, "loss": 1.3754, "step": 2660500 }, { "epoch": 28.08, "learning_rate": 3.595882098420169e-05, "loss": 1.3593, "step": 2661000 }, { "epoch": 28.09, "learning_rate": 3.5956182656690275e-05, "loss": 1.3709, "step": 2661500 }, { "epoch": 28.09, "learning_rate": 3.595354432917885e-05, "loss": 1.2909, "step": 2662000 }, { "epoch": 28.1, "learning_rate": 3.595090600166742e-05, "loss": 1.3407, "step": 2662500 }, { "epoch": 28.1, "learning_rate": 3.5948267674156e-05, "loss": 1.3932, "step": 2663000 }, { "epoch": 28.11, "learning_rate": 3.594562934664458e-05, "loss": 1.3449, "step": 2663500 }, { "epoch": 28.11, "learning_rate": 3.594299101913315e-05, "loss": 1.3392, "step": 2664000 }, { "epoch": 28.12, "learning_rate": 3.594035269162173e-05, "loss": 1.3602, "step": 2664500 }, { "epoch": 28.12, "learning_rate": 3.5937714364110304e-05, "loss": 1.3492, "step": 2665000 }, { "epoch": 28.13, "learning_rate": 3.593507603659888e-05, "loss": 1.3613, "step": 2665500 }, { "epoch": 28.14, "learning_rate": 3.5932437709087455e-05, "loss": 1.3144, "step": 2666000 }, { "epoch": 28.14, "learning_rate": 3.592979938157604e-05, "loss": 1.3137, "step": 2666500 }, { "epoch": 28.15, "learning_rate": 3.5927161054064606e-05, "loss": 1.2899, "step": 2667000 }, { "epoch": 28.15, "learning_rate": 3.592452272655318e-05, "loss": 1.3154, "step": 2667500 }, { "epoch": 28.16, "learning_rate": 3.5921884399041764e-05, "loss": 1.3147, "step": 2668000 }, { "epoch": 28.16, "learning_rate": 3.591924607153034e-05, "loss": 1.3832, "step": 2668500 }, { "epoch": 28.17, "learning_rate": 3.5916607744018915e-05, "loss": 1.3442, "step": 2669000 }, { "epoch": 28.17, "learning_rate": 3.591396941650749e-05, "loss": 1.3318, "step": 2669500 }, { "epoch": 28.18, "learning_rate": 3.5911331088996066e-05, "loss": 1.3788, "step": 2670000 }, { "epoch": 28.18, "learning_rate": 3.590869276148464e-05, "loss": 1.337, "step": 2670500 }, { "epoch": 28.19, "learning_rate": 3.5906054433973216e-05, "loss": 1.3619, "step": 2671000 }, { "epoch": 28.19, "learning_rate": 3.59034161064618e-05, "loss": 1.4116, "step": 2671500 }, { "epoch": 28.2, "learning_rate": 3.590077777895037e-05, "loss": 1.2991, "step": 2672000 }, { "epoch": 28.2, "learning_rate": 3.589813945143894e-05, "loss": 1.3112, "step": 2672500 }, { "epoch": 28.21, "learning_rate": 3.5895501123927525e-05, "loss": 1.3639, "step": 2673000 }, { "epoch": 28.21, "learning_rate": 3.58928627964161e-05, "loss": 1.3342, "step": 2673500 }, { "epoch": 28.22, "learning_rate": 3.5890224468904676e-05, "loss": 1.3647, "step": 2674000 }, { "epoch": 28.22, "learning_rate": 3.5887586141393245e-05, "loss": 1.3079, "step": 2674500 }, { "epoch": 28.23, "learning_rate": 3.588494781388183e-05, "loss": 1.3159, "step": 2675000 }, { "epoch": 28.24, "learning_rate": 3.58823094863704e-05, "loss": 1.3146, "step": 2675500 }, { "epoch": 28.24, "learning_rate": 3.587967115885898e-05, "loss": 1.3716, "step": 2676000 }, { "epoch": 28.25, "learning_rate": 3.5877032831347554e-05, "loss": 1.3581, "step": 2676500 }, { "epoch": 28.25, "learning_rate": 3.587439450383613e-05, "loss": 1.3256, "step": 2677000 }, { "epoch": 28.26, "learning_rate": 3.5871756176324705e-05, "loss": 1.3201, "step": 2677500 }, { "epoch": 28.26, "learning_rate": 3.586911784881328e-05, "loss": 1.3098, "step": 2678000 }, { "epoch": 28.27, "learning_rate": 3.586647952130186e-05, "loss": 1.3053, "step": 2678500 }, { "epoch": 28.27, "learning_rate": 3.586384119379043e-05, "loss": 1.3243, "step": 2679000 }, { "epoch": 28.28, "learning_rate": 3.5861202866279007e-05, "loss": 1.3197, "step": 2679500 }, { "epoch": 28.28, "learning_rate": 3.585856453876759e-05, "loss": 1.3234, "step": 2680000 }, { "epoch": 28.29, "learning_rate": 3.5855926211256164e-05, "loss": 1.3381, "step": 2680500 }, { "epoch": 28.29, "learning_rate": 3.585328788374474e-05, "loss": 1.3076, "step": 2681000 }, { "epoch": 28.3, "learning_rate": 3.5850649556233315e-05, "loss": 1.372, "step": 2681500 }, { "epoch": 28.3, "learning_rate": 3.584801122872189e-05, "loss": 1.2978, "step": 2682000 }, { "epoch": 28.31, "learning_rate": 3.5845372901210466e-05, "loss": 1.3247, "step": 2682500 }, { "epoch": 28.31, "learning_rate": 3.584273457369904e-05, "loss": 1.3751, "step": 2683000 }, { "epoch": 28.32, "learning_rate": 3.5840096246187624e-05, "loss": 1.3606, "step": 2683500 }, { "epoch": 28.33, "learning_rate": 3.583745791867619e-05, "loss": 1.3779, "step": 2684000 }, { "epoch": 28.33, "learning_rate": 3.583481959116477e-05, "loss": 1.3546, "step": 2684500 }, { "epoch": 28.34, "learning_rate": 3.583218126365335e-05, "loss": 1.2702, "step": 2685000 }, { "epoch": 28.34, "learning_rate": 3.5829542936141926e-05, "loss": 1.3305, "step": 2685500 }, { "epoch": 28.35, "learning_rate": 3.5826904608630495e-05, "loss": 1.3116, "step": 2686000 }, { "epoch": 28.35, "learning_rate": 3.582426628111907e-05, "loss": 1.3731, "step": 2686500 }, { "epoch": 28.36, "learning_rate": 3.582162795360765e-05, "loss": 1.3206, "step": 2687000 }, { "epoch": 28.36, "learning_rate": 3.581898962609623e-05, "loss": 1.3341, "step": 2687500 }, { "epoch": 28.37, "learning_rate": 3.58163512985848e-05, "loss": 1.3735, "step": 2688000 }, { "epoch": 28.37, "learning_rate": 3.581371297107338e-05, "loss": 1.313, "step": 2688500 }, { "epoch": 28.38, "learning_rate": 3.5811074643561954e-05, "loss": 1.3257, "step": 2689000 }, { "epoch": 28.38, "learning_rate": 3.580843631605053e-05, "loss": 1.3504, "step": 2689500 }, { "epoch": 28.39, "learning_rate": 3.5805797988539105e-05, "loss": 1.3201, "step": 2690000 }, { "epoch": 28.39, "learning_rate": 3.580315966102769e-05, "loss": 1.2742, "step": 2690500 }, { "epoch": 28.4, "learning_rate": 3.5800521333516256e-05, "loss": 1.3368, "step": 2691000 }, { "epoch": 28.4, "learning_rate": 3.579788300600483e-05, "loss": 1.3695, "step": 2691500 }, { "epoch": 28.41, "learning_rate": 3.5795244678493414e-05, "loss": 1.3401, "step": 2692000 }, { "epoch": 28.41, "learning_rate": 3.579260635098199e-05, "loss": 1.3148, "step": 2692500 }, { "epoch": 28.42, "learning_rate": 3.5789968023470565e-05, "loss": 1.3454, "step": 2693000 }, { "epoch": 28.43, "learning_rate": 3.578732969595914e-05, "loss": 1.3057, "step": 2693500 }, { "epoch": 28.43, "learning_rate": 3.5784691368447716e-05, "loss": 1.401, "step": 2694000 }, { "epoch": 28.44, "learning_rate": 3.578205304093629e-05, "loss": 1.3594, "step": 2694500 }, { "epoch": 28.44, "learning_rate": 3.577941471342487e-05, "loss": 1.3576, "step": 2695000 }, { "epoch": 28.45, "learning_rate": 3.577677638591344e-05, "loss": 1.359, "step": 2695500 }, { "epoch": 28.45, "learning_rate": 3.577413805840202e-05, "loss": 1.329, "step": 2696000 }, { "epoch": 28.46, "learning_rate": 3.5771499730890593e-05, "loss": 1.3598, "step": 2696500 }, { "epoch": 28.46, "learning_rate": 3.5768861403379176e-05, "loss": 1.3475, "step": 2697000 }, { "epoch": 28.47, "learning_rate": 3.576622307586775e-05, "loss": 1.407, "step": 2697500 }, { "epoch": 28.47, "learning_rate": 3.576358474835632e-05, "loss": 1.3719, "step": 2698000 }, { "epoch": 28.48, "learning_rate": 3.5760946420844895e-05, "loss": 1.3621, "step": 2698500 }, { "epoch": 28.48, "learning_rate": 3.575830809333348e-05, "loss": 1.3697, "step": 2699000 }, { "epoch": 28.49, "learning_rate": 3.575566976582205e-05, "loss": 1.3566, "step": 2699500 }, { "epoch": 28.49, "learning_rate": 3.575303143831063e-05, "loss": 1.3573, "step": 2700000 }, { "epoch": 28.5, "learning_rate": 3.5750393110799204e-05, "loss": 1.3753, "step": 2700500 }, { "epoch": 28.5, "learning_rate": 3.574775478328778e-05, "loss": 1.3246, "step": 2701000 }, { "epoch": 28.51, "learning_rate": 3.5745116455776355e-05, "loss": 1.3223, "step": 2701500 }, { "epoch": 28.52, "learning_rate": 3.574247812826493e-05, "loss": 1.3202, "step": 2702000 }, { "epoch": 28.52, "learning_rate": 3.573983980075351e-05, "loss": 1.3018, "step": 2702500 }, { "epoch": 28.53, "learning_rate": 3.573720147324208e-05, "loss": 1.3952, "step": 2703000 }, { "epoch": 28.53, "learning_rate": 3.573456314573066e-05, "loss": 1.3347, "step": 2703500 }, { "epoch": 28.54, "learning_rate": 3.573192481821924e-05, "loss": 1.2978, "step": 2704000 }, { "epoch": 28.54, "learning_rate": 3.5729286490707815e-05, "loss": 1.3396, "step": 2704500 }, { "epoch": 28.55, "learning_rate": 3.5726648163196383e-05, "loss": 1.2953, "step": 2705000 }, { "epoch": 28.55, "learning_rate": 3.5724009835684966e-05, "loss": 1.3264, "step": 2705500 }, { "epoch": 28.56, "learning_rate": 3.572137150817354e-05, "loss": 1.3315, "step": 2706000 }, { "epoch": 28.56, "learning_rate": 3.571873318066212e-05, "loss": 1.3353, "step": 2706500 }, { "epoch": 28.57, "learning_rate": 3.571609485315069e-05, "loss": 1.3104, "step": 2707000 }, { "epoch": 28.57, "learning_rate": 3.571345652563927e-05, "loss": 1.3164, "step": 2707500 }, { "epoch": 28.58, "learning_rate": 3.571081819812784e-05, "loss": 1.3342, "step": 2708000 }, { "epoch": 28.58, "learning_rate": 3.570817987061642e-05, "loss": 1.3265, "step": 2708500 }, { "epoch": 28.59, "learning_rate": 3.5705541543105e-05, "loss": 1.343, "step": 2709000 }, { "epoch": 28.59, "learning_rate": 3.5702903215593576e-05, "loss": 1.3957, "step": 2709500 }, { "epoch": 28.6, "learning_rate": 3.5700264888082145e-05, "loss": 1.3324, "step": 2710000 }, { "epoch": 28.6, "learning_rate": 3.569762656057072e-05, "loss": 1.3386, "step": 2710500 }, { "epoch": 28.61, "learning_rate": 3.56949882330593e-05, "loss": 1.3338, "step": 2711000 }, { "epoch": 28.62, "learning_rate": 3.569234990554788e-05, "loss": 1.3699, "step": 2711500 }, { "epoch": 28.62, "learning_rate": 3.5689711578036454e-05, "loss": 1.352, "step": 2712000 }, { "epoch": 28.63, "learning_rate": 3.568707325052503e-05, "loss": 1.2898, "step": 2712500 }, { "epoch": 28.63, "learning_rate": 3.5684434923013605e-05, "loss": 1.3697, "step": 2713000 }, { "epoch": 28.64, "learning_rate": 3.568179659550218e-05, "loss": 1.4058, "step": 2713500 }, { "epoch": 28.64, "learning_rate": 3.5679158267990756e-05, "loss": 1.3282, "step": 2714000 }, { "epoch": 28.65, "learning_rate": 3.567651994047933e-05, "loss": 1.3221, "step": 2714500 }, { "epoch": 28.65, "learning_rate": 3.567388161296791e-05, "loss": 1.334, "step": 2715000 }, { "epoch": 28.66, "learning_rate": 3.567124328545648e-05, "loss": 1.3021, "step": 2715500 }, { "epoch": 28.66, "learning_rate": 3.5668604957945065e-05, "loss": 1.3092, "step": 2716000 }, { "epoch": 28.67, "learning_rate": 3.566596663043364e-05, "loss": 1.3462, "step": 2716500 }, { "epoch": 28.67, "learning_rate": 3.566332830292221e-05, "loss": 1.3385, "step": 2717000 }, { "epoch": 28.68, "learning_rate": 3.566068997541079e-05, "loss": 1.3136, "step": 2717500 }, { "epoch": 28.68, "learning_rate": 3.5658051647899366e-05, "loss": 1.3897, "step": 2718000 }, { "epoch": 28.69, "learning_rate": 3.565541332038794e-05, "loss": 1.3515, "step": 2718500 }, { "epoch": 28.69, "learning_rate": 3.565277499287652e-05, "loss": 1.3181, "step": 2719000 }, { "epoch": 28.7, "learning_rate": 3.565013666536509e-05, "loss": 1.3, "step": 2719500 }, { "epoch": 28.71, "learning_rate": 3.564749833785367e-05, "loss": 1.3468, "step": 2720000 }, { "epoch": 28.71, "learning_rate": 3.5644860010342244e-05, "loss": 1.2738, "step": 2720500 }, { "epoch": 28.72, "learning_rate": 3.5642221682830826e-05, "loss": 1.3192, "step": 2721000 }, { "epoch": 28.72, "learning_rate": 3.56395833553194e-05, "loss": 1.3466, "step": 2721500 }, { "epoch": 28.73, "learning_rate": 3.563694502780797e-05, "loss": 1.3038, "step": 2722000 }, { "epoch": 28.73, "learning_rate": 3.5634306700296546e-05, "loss": 1.3299, "step": 2722500 }, { "epoch": 28.74, "learning_rate": 3.563166837278513e-05, "loss": 1.3771, "step": 2723000 }, { "epoch": 28.74, "learning_rate": 3.5629030045273704e-05, "loss": 1.2743, "step": 2723500 }, { "epoch": 28.75, "learning_rate": 3.562639171776227e-05, "loss": 1.3768, "step": 2724000 }, { "epoch": 28.75, "learning_rate": 3.5623753390250855e-05, "loss": 1.3493, "step": 2724500 }, { "epoch": 28.76, "learning_rate": 3.562111506273943e-05, "loss": 1.3595, "step": 2725000 }, { "epoch": 28.76, "learning_rate": 3.5618476735228006e-05, "loss": 1.3928, "step": 2725500 }, { "epoch": 28.77, "learning_rate": 3.561583840771658e-05, "loss": 1.3329, "step": 2726000 }, { "epoch": 28.77, "learning_rate": 3.5613200080205157e-05, "loss": 1.3028, "step": 2726500 }, { "epoch": 28.78, "learning_rate": 3.561056175269373e-05, "loss": 1.3594, "step": 2727000 }, { "epoch": 28.78, "learning_rate": 3.560792342518231e-05, "loss": 1.3072, "step": 2727500 }, { "epoch": 28.79, "learning_rate": 3.560528509767089e-05, "loss": 1.3367, "step": 2728000 }, { "epoch": 28.79, "learning_rate": 3.5602646770159465e-05, "loss": 1.3618, "step": 2728500 }, { "epoch": 28.8, "learning_rate": 3.5600008442648034e-05, "loss": 1.3428, "step": 2729000 }, { "epoch": 28.81, "learning_rate": 3.5597370115136616e-05, "loss": 1.3258, "step": 2729500 }, { "epoch": 28.81, "learning_rate": 3.559473178762519e-05, "loss": 1.3549, "step": 2730000 }, { "epoch": 28.82, "learning_rate": 3.559209346011377e-05, "loss": 1.3431, "step": 2730500 }, { "epoch": 28.82, "learning_rate": 3.558945513260234e-05, "loss": 1.332, "step": 2731000 }, { "epoch": 28.83, "learning_rate": 3.558681680509092e-05, "loss": 1.3724, "step": 2731500 }, { "epoch": 28.83, "learning_rate": 3.5584178477579494e-05, "loss": 1.3247, "step": 2732000 }, { "epoch": 28.84, "learning_rate": 3.558154015006807e-05, "loss": 1.3405, "step": 2732500 }, { "epoch": 28.84, "learning_rate": 3.557890182255665e-05, "loss": 1.3612, "step": 2733000 }, { "epoch": 28.85, "learning_rate": 3.557626349504522e-05, "loss": 1.3591, "step": 2733500 }, { "epoch": 28.85, "learning_rate": 3.5573625167533796e-05, "loss": 1.3294, "step": 2734000 }, { "epoch": 28.86, "learning_rate": 3.557098684002237e-05, "loss": 1.3484, "step": 2734500 }, { "epoch": 28.86, "learning_rate": 3.5568348512510953e-05, "loss": 1.3367, "step": 2735000 }, { "epoch": 28.87, "learning_rate": 3.556571018499953e-05, "loss": 1.3464, "step": 2735500 }, { "epoch": 28.87, "learning_rate": 3.55630718574881e-05, "loss": 1.3512, "step": 2736000 }, { "epoch": 28.88, "learning_rate": 3.556043352997668e-05, "loss": 1.3032, "step": 2736500 }, { "epoch": 28.88, "learning_rate": 3.5557795202465255e-05, "loss": 1.3406, "step": 2737000 }, { "epoch": 28.89, "learning_rate": 3.555515687495383e-05, "loss": 1.3589, "step": 2737500 }, { "epoch": 28.89, "learning_rate": 3.5552518547442406e-05, "loss": 1.3567, "step": 2738000 }, { "epoch": 28.9, "learning_rate": 3.554988021993098e-05, "loss": 1.4049, "step": 2738500 }, { "epoch": 28.91, "learning_rate": 3.554724189241956e-05, "loss": 1.3549, "step": 2739000 }, { "epoch": 28.91, "learning_rate": 3.554460356490813e-05, "loss": 1.3172, "step": 2739500 }, { "epoch": 28.92, "learning_rate": 3.5541965237396715e-05, "loss": 1.3427, "step": 2740000 }, { "epoch": 28.92, "learning_rate": 3.553932690988529e-05, "loss": 1.3326, "step": 2740500 }, { "epoch": 28.93, "learning_rate": 3.553668858237386e-05, "loss": 1.3507, "step": 2741000 }, { "epoch": 28.93, "learning_rate": 3.553405025486244e-05, "loss": 1.3804, "step": 2741500 }, { "epoch": 28.94, "learning_rate": 3.553141192735102e-05, "loss": 1.3095, "step": 2742000 }, { "epoch": 28.94, "learning_rate": 3.552877359983959e-05, "loss": 1.3898, "step": 2742500 }, { "epoch": 28.95, "learning_rate": 3.552613527232817e-05, "loss": 1.3305, "step": 2743000 }, { "epoch": 28.95, "learning_rate": 3.5523496944816743e-05, "loss": 1.3358, "step": 2743500 }, { "epoch": 28.96, "learning_rate": 3.552085861730532e-05, "loss": 1.36, "step": 2744000 }, { "epoch": 28.96, "learning_rate": 3.5518220289793894e-05, "loss": 1.3765, "step": 2744500 }, { "epoch": 28.97, "learning_rate": 3.551558196228248e-05, "loss": 1.3722, "step": 2745000 }, { "epoch": 28.97, "learning_rate": 3.5512943634771045e-05, "loss": 1.3139, "step": 2745500 }, { "epoch": 28.98, "learning_rate": 3.551030530725962e-05, "loss": 1.3428, "step": 2746000 }, { "epoch": 28.98, "learning_rate": 3.55076669797482e-05, "loss": 1.2769, "step": 2746500 }, { "epoch": 28.99, "learning_rate": 3.550502865223678e-05, "loss": 1.3042, "step": 2747000 }, { "epoch": 29.0, "learning_rate": 3.5502390324725354e-05, "loss": 1.318, "step": 2747500 }, { "epoch": 29.0, "learning_rate": 3.549975199721392e-05, "loss": 1.3484, "step": 2748000 }, { "epoch": 29.01, "learning_rate": 3.5497113669702505e-05, "loss": 1.3653, "step": 2748500 }, { "epoch": 29.01, "learning_rate": 3.549447534219108e-05, "loss": 1.3341, "step": 2749000 }, { "epoch": 29.02, "learning_rate": 3.5491837014679656e-05, "loss": 1.3617, "step": 2749500 }, { "epoch": 29.02, "learning_rate": 3.548919868716823e-05, "loss": 1.3244, "step": 2750000 }, { "epoch": 29.03, "learning_rate": 3.548656035965681e-05, "loss": 1.3499, "step": 2750500 }, { "epoch": 29.03, "learning_rate": 3.548392203214538e-05, "loss": 1.3284, "step": 2751000 }, { "epoch": 29.04, "learning_rate": 3.548128370463396e-05, "loss": 1.2954, "step": 2751500 }, { "epoch": 29.04, "learning_rate": 3.547864537712254e-05, "loss": 1.2955, "step": 2752000 }, { "epoch": 29.05, "learning_rate": 3.547600704961111e-05, "loss": 1.3836, "step": 2752500 }, { "epoch": 29.05, "learning_rate": 3.5473368722099684e-05, "loss": 1.3106, "step": 2753000 }, { "epoch": 29.06, "learning_rate": 3.547073039458827e-05, "loss": 1.3257, "step": 2753500 }, { "epoch": 29.06, "learning_rate": 3.546809206707684e-05, "loss": 1.3602, "step": 2754000 }, { "epoch": 29.07, "learning_rate": 3.546545373956542e-05, "loss": 1.3566, "step": 2754500 }, { "epoch": 29.07, "learning_rate": 3.546281541205399e-05, "loss": 1.3158, "step": 2755000 }, { "epoch": 29.08, "learning_rate": 3.546017708454257e-05, "loss": 1.3008, "step": 2755500 }, { "epoch": 29.08, "learning_rate": 3.5457538757031144e-05, "loss": 1.3793, "step": 2756000 }, { "epoch": 29.09, "learning_rate": 3.545490042951972e-05, "loss": 1.2961, "step": 2756500 }, { "epoch": 29.1, "learning_rate": 3.54522621020083e-05, "loss": 1.2915, "step": 2757000 }, { "epoch": 29.1, "learning_rate": 3.544962377449687e-05, "loss": 1.3538, "step": 2757500 }, { "epoch": 29.11, "learning_rate": 3.5446985446985446e-05, "loss": 1.3322, "step": 2758000 }, { "epoch": 29.11, "learning_rate": 3.544434711947403e-05, "loss": 1.3335, "step": 2758500 }, { "epoch": 29.12, "learning_rate": 3.5441708791962604e-05, "loss": 1.2885, "step": 2759000 }, { "epoch": 29.12, "learning_rate": 3.543907046445118e-05, "loss": 1.3741, "step": 2759500 }, { "epoch": 29.13, "learning_rate": 3.543643213693975e-05, "loss": 1.3415, "step": 2760000 }, { "epoch": 29.13, "learning_rate": 3.543379380942833e-05, "loss": 1.3394, "step": 2760500 }, { "epoch": 29.14, "learning_rate": 3.5431155481916906e-05, "loss": 1.3126, "step": 2761000 }, { "epoch": 29.14, "learning_rate": 3.542851715440548e-05, "loss": 1.3357, "step": 2761500 }, { "epoch": 29.15, "learning_rate": 3.542587882689406e-05, "loss": 1.3066, "step": 2762000 }, { "epoch": 29.15, "learning_rate": 3.542324049938263e-05, "loss": 1.3346, "step": 2762500 }, { "epoch": 29.16, "learning_rate": 3.542060217187121e-05, "loss": 1.3112, "step": 2763000 }, { "epoch": 29.16, "learning_rate": 3.541796384435978e-05, "loss": 1.3598, "step": 2763500 }, { "epoch": 29.17, "learning_rate": 3.5415325516848366e-05, "loss": 1.2821, "step": 2764000 }, { "epoch": 29.17, "learning_rate": 3.5412687189336934e-05, "loss": 1.3221, "step": 2764500 }, { "epoch": 29.18, "learning_rate": 3.541004886182551e-05, "loss": 1.3522, "step": 2765000 }, { "epoch": 29.19, "learning_rate": 3.540741053431409e-05, "loss": 1.31, "step": 2765500 }, { "epoch": 29.19, "learning_rate": 3.540477220680267e-05, "loss": 1.3418, "step": 2766000 }, { "epoch": 29.2, "learning_rate": 3.540213387929124e-05, "loss": 1.2949, "step": 2766500 }, { "epoch": 29.2, "learning_rate": 3.539949555177982e-05, "loss": 1.2607, "step": 2767000 }, { "epoch": 29.21, "learning_rate": 3.5396857224268394e-05, "loss": 1.3011, "step": 2767500 }, { "epoch": 29.21, "learning_rate": 3.539421889675697e-05, "loss": 1.3584, "step": 2768000 }, { "epoch": 29.22, "learning_rate": 3.5391580569245545e-05, "loss": 1.3382, "step": 2768500 }, { "epoch": 29.22, "learning_rate": 3.538894224173413e-05, "loss": 1.336, "step": 2769000 }, { "epoch": 29.23, "learning_rate": 3.5386303914222696e-05, "loss": 1.3159, "step": 2769500 }, { "epoch": 29.23, "learning_rate": 3.538366558671127e-05, "loss": 1.3248, "step": 2770000 }, { "epoch": 29.24, "learning_rate": 3.5381027259199854e-05, "loss": 1.3512, "step": 2770500 }, { "epoch": 29.24, "learning_rate": 3.537838893168843e-05, "loss": 1.3702, "step": 2771000 }, { "epoch": 29.25, "learning_rate": 3.5375750604177e-05, "loss": 1.3828, "step": 2771500 }, { "epoch": 29.25, "learning_rate": 3.537311227666557e-05, "loss": 1.3153, "step": 2772000 }, { "epoch": 29.26, "learning_rate": 3.5370473949154156e-05, "loss": 1.3333, "step": 2772500 }, { "epoch": 29.26, "learning_rate": 3.536783562164273e-05, "loss": 1.361, "step": 2773000 }, { "epoch": 29.27, "learning_rate": 3.5365197294131307e-05, "loss": 1.3787, "step": 2773500 }, { "epoch": 29.27, "learning_rate": 3.536255896661988e-05, "loss": 1.3367, "step": 2774000 }, { "epoch": 29.28, "learning_rate": 3.535992063910846e-05, "loss": 1.292, "step": 2774500 }, { "epoch": 29.29, "learning_rate": 3.535728231159703e-05, "loss": 1.3018, "step": 2775000 }, { "epoch": 29.29, "learning_rate": 3.535464398408561e-05, "loss": 1.3663, "step": 2775500 }, { "epoch": 29.3, "learning_rate": 3.535200565657419e-05, "loss": 1.3825, "step": 2776000 }, { "epoch": 29.3, "learning_rate": 3.534936732906276e-05, "loss": 1.3438, "step": 2776500 }, { "epoch": 29.31, "learning_rate": 3.5346729001551335e-05, "loss": 1.3556, "step": 2777000 }, { "epoch": 29.31, "learning_rate": 3.534409067403992e-05, "loss": 1.2937, "step": 2777500 }, { "epoch": 29.32, "learning_rate": 3.534145234652849e-05, "loss": 1.3201, "step": 2778000 }, { "epoch": 29.32, "learning_rate": 3.533881401901707e-05, "loss": 1.2989, "step": 2778500 }, { "epoch": 29.33, "learning_rate": 3.5336175691505644e-05, "loss": 1.3632, "step": 2779000 }, { "epoch": 29.33, "learning_rate": 3.533353736399422e-05, "loss": 1.3411, "step": 2779500 }, { "epoch": 29.34, "learning_rate": 3.5330899036482795e-05, "loss": 1.2687, "step": 2780000 }, { "epoch": 29.34, "learning_rate": 3.532826070897137e-05, "loss": 1.2844, "step": 2780500 }, { "epoch": 29.35, "learning_rate": 3.5325622381459946e-05, "loss": 1.3526, "step": 2781000 }, { "epoch": 29.35, "learning_rate": 3.532298405394852e-05, "loss": 1.3381, "step": 2781500 }, { "epoch": 29.36, "learning_rate": 3.53203457264371e-05, "loss": 1.325, "step": 2782000 }, { "epoch": 29.36, "learning_rate": 3.531770739892568e-05, "loss": 1.3048, "step": 2782500 }, { "epoch": 29.37, "learning_rate": 3.5315069071414254e-05, "loss": 1.3299, "step": 2783000 }, { "epoch": 29.38, "learning_rate": 3.531243074390282e-05, "loss": 1.35, "step": 2783500 }, { "epoch": 29.38, "learning_rate": 3.53097924163914e-05, "loss": 1.3268, "step": 2784000 }, { "epoch": 29.39, "learning_rate": 3.530715408887998e-05, "loss": 1.3719, "step": 2784500 }, { "epoch": 29.39, "learning_rate": 3.5304515761368556e-05, "loss": 1.4089, "step": 2785000 }, { "epoch": 29.4, "learning_rate": 3.530187743385713e-05, "loss": 1.3425, "step": 2785500 }, { "epoch": 29.4, "learning_rate": 3.529923910634571e-05, "loss": 1.31, "step": 2786000 }, { "epoch": 29.41, "learning_rate": 3.529660077883428e-05, "loss": 1.353, "step": 2786500 }, { "epoch": 29.41, "learning_rate": 3.529396245132286e-05, "loss": 1.3531, "step": 2787000 }, { "epoch": 29.42, "learning_rate": 3.5291324123811434e-05, "loss": 1.3165, "step": 2787500 }, { "epoch": 29.42, "learning_rate": 3.5288685796300016e-05, "loss": 1.346, "step": 2788000 }, { "epoch": 29.43, "learning_rate": 3.5286047468788585e-05, "loss": 1.3067, "step": 2788500 }, { "epoch": 29.43, "learning_rate": 3.528340914127716e-05, "loss": 1.3225, "step": 2789000 }, { "epoch": 29.44, "learning_rate": 3.528077081376574e-05, "loss": 1.289, "step": 2789500 }, { "epoch": 29.44, "learning_rate": 3.527813248625432e-05, "loss": 1.3482, "step": 2790000 }, { "epoch": 29.45, "learning_rate": 3.527549415874289e-05, "loss": 1.3327, "step": 2790500 }, { "epoch": 29.45, "learning_rate": 3.527285583123147e-05, "loss": 1.3255, "step": 2791000 }, { "epoch": 29.46, "learning_rate": 3.5270217503720044e-05, "loss": 1.3186, "step": 2791500 }, { "epoch": 29.46, "learning_rate": 3.526757917620862e-05, "loss": 1.3252, "step": 2792000 }, { "epoch": 29.47, "learning_rate": 3.5264940848697195e-05, "loss": 1.3553, "step": 2792500 }, { "epoch": 29.48, "learning_rate": 3.526230252118577e-05, "loss": 1.3318, "step": 2793000 }, { "epoch": 29.48, "learning_rate": 3.5259664193674346e-05, "loss": 1.3018, "step": 2793500 }, { "epoch": 29.49, "learning_rate": 3.525702586616292e-05, "loss": 1.3704, "step": 2794000 }, { "epoch": 29.49, "learning_rate": 3.5254387538651504e-05, "loss": 1.3132, "step": 2794500 }, { "epoch": 29.5, "learning_rate": 3.525174921114008e-05, "loss": 1.3249, "step": 2795000 }, { "epoch": 29.5, "learning_rate": 3.524911088362865e-05, "loss": 1.3382, "step": 2795500 }, { "epoch": 29.51, "learning_rate": 3.5246472556117224e-05, "loss": 1.3629, "step": 2796000 }, { "epoch": 29.51, "learning_rate": 3.5243834228605806e-05, "loss": 1.3628, "step": 2796500 }, { "epoch": 29.52, "learning_rate": 3.524119590109438e-05, "loss": 1.3559, "step": 2797000 }, { "epoch": 29.52, "learning_rate": 3.523855757358296e-05, "loss": 1.2758, "step": 2797500 }, { "epoch": 29.53, "learning_rate": 3.523591924607153e-05, "loss": 1.3567, "step": 2798000 }, { "epoch": 29.53, "learning_rate": 3.523328091856011e-05, "loss": 1.3172, "step": 2798500 }, { "epoch": 29.54, "learning_rate": 3.5230642591048684e-05, "loss": 1.3647, "step": 2799000 }, { "epoch": 29.54, "learning_rate": 3.522800426353726e-05, "loss": 1.3535, "step": 2799500 }, { "epoch": 29.55, "learning_rate": 3.5225365936025834e-05, "loss": 1.3995, "step": 2800000 }, { "epoch": 29.55, "learning_rate": 3.522272760851441e-05, "loss": 1.3477, "step": 2800500 }, { "epoch": 29.56, "learning_rate": 3.5220089281002985e-05, "loss": 1.3391, "step": 2801000 }, { "epoch": 29.57, "learning_rate": 3.521745095349157e-05, "loss": 1.329, "step": 2801500 }, { "epoch": 29.57, "learning_rate": 3.521481262598014e-05, "loss": 1.3838, "step": 2802000 }, { "epoch": 29.58, "learning_rate": 3.521217429846871e-05, "loss": 1.3195, "step": 2802500 }, { "epoch": 29.58, "learning_rate": 3.5209535970957294e-05, "loss": 1.3093, "step": 2803000 }, { "epoch": 29.59, "learning_rate": 3.520689764344587e-05, "loss": 1.35, "step": 2803500 }, { "epoch": 29.59, "learning_rate": 3.5204259315934445e-05, "loss": 1.3382, "step": 2804000 }, { "epoch": 29.6, "learning_rate": 3.520162098842302e-05, "loss": 1.362, "step": 2804500 }, { "epoch": 29.6, "learning_rate": 3.5198982660911596e-05, "loss": 1.3655, "step": 2805000 }, { "epoch": 29.61, "learning_rate": 3.519634433340017e-05, "loss": 1.3239, "step": 2805500 }, { "epoch": 29.61, "learning_rate": 3.519370600588875e-05, "loss": 1.4165, "step": 2806000 }, { "epoch": 29.62, "learning_rate": 3.519106767837733e-05, "loss": 1.2905, "step": 2806500 }, { "epoch": 29.62, "learning_rate": 3.5188429350865905e-05, "loss": 1.3606, "step": 2807000 }, { "epoch": 29.63, "learning_rate": 3.5185791023354474e-05, "loss": 1.3332, "step": 2807500 }, { "epoch": 29.63, "learning_rate": 3.518315269584305e-05, "loss": 1.3255, "step": 2808000 }, { "epoch": 29.64, "learning_rate": 3.518051436833163e-05, "loss": 1.3502, "step": 2808500 }, { "epoch": 29.64, "learning_rate": 3.517787604082021e-05, "loss": 1.3264, "step": 2809000 }, { "epoch": 29.65, "learning_rate": 3.5175237713308776e-05, "loss": 1.3353, "step": 2809500 }, { "epoch": 29.65, "learning_rate": 3.517259938579736e-05, "loss": 1.3422, "step": 2810000 }, { "epoch": 29.66, "learning_rate": 3.516996105828593e-05, "loss": 1.3315, "step": 2810500 }, { "epoch": 29.67, "learning_rate": 3.516732273077451e-05, "loss": 1.3925, "step": 2811000 }, { "epoch": 29.67, "learning_rate": 3.5164684403263084e-05, "loss": 1.3545, "step": 2811500 }, { "epoch": 29.68, "learning_rate": 3.516204607575166e-05, "loss": 1.3639, "step": 2812000 }, { "epoch": 29.68, "learning_rate": 3.5159407748240235e-05, "loss": 1.3077, "step": 2812500 }, { "epoch": 29.69, "learning_rate": 3.515676942072881e-05, "loss": 1.3173, "step": 2813000 }, { "epoch": 29.69, "learning_rate": 3.515413109321739e-05, "loss": 1.3352, "step": 2813500 }, { "epoch": 29.7, "learning_rate": 3.515149276570597e-05, "loss": 1.3024, "step": 2814000 }, { "epoch": 29.7, "learning_rate": 3.514885443819454e-05, "loss": 1.326, "step": 2814500 }, { "epoch": 29.71, "learning_rate": 3.514621611068312e-05, "loss": 1.3533, "step": 2815000 }, { "epoch": 29.71, "learning_rate": 3.5143577783171695e-05, "loss": 1.3348, "step": 2815500 }, { "epoch": 29.72, "learning_rate": 3.514093945566027e-05, "loss": 1.3541, "step": 2816000 }, { "epoch": 29.72, "learning_rate": 3.5138301128148846e-05, "loss": 1.4262, "step": 2816500 }, { "epoch": 29.73, "learning_rate": 3.513566280063742e-05, "loss": 1.3119, "step": 2817000 }, { "epoch": 29.73, "learning_rate": 3.5133024473126e-05, "loss": 1.326, "step": 2817500 }, { "epoch": 29.74, "learning_rate": 3.513038614561457e-05, "loss": 1.3345, "step": 2818000 }, { "epoch": 29.74, "learning_rate": 3.5127747818103155e-05, "loss": 1.2885, "step": 2818500 }, { "epoch": 29.75, "learning_rate": 3.512510949059172e-05, "loss": 1.4033, "step": 2819000 }, { "epoch": 29.76, "learning_rate": 3.51224711630803e-05, "loss": 1.3776, "step": 2819500 }, { "epoch": 29.76, "learning_rate": 3.511983283556888e-05, "loss": 1.3405, "step": 2820000 }, { "epoch": 29.77, "learning_rate": 3.5117194508057457e-05, "loss": 1.3978, "step": 2820500 }, { "epoch": 29.77, "learning_rate": 3.511455618054603e-05, "loss": 1.3328, "step": 2821000 }, { "epoch": 29.78, "learning_rate": 3.51119178530346e-05, "loss": 1.3489, "step": 2821500 }, { "epoch": 29.78, "learning_rate": 3.510927952552318e-05, "loss": 1.2912, "step": 2822000 }, { "epoch": 29.79, "learning_rate": 3.510664119801176e-05, "loss": 1.3637, "step": 2822500 }, { "epoch": 29.79, "learning_rate": 3.5104002870500334e-05, "loss": 1.3329, "step": 2823000 }, { "epoch": 29.8, "learning_rate": 3.510136454298891e-05, "loss": 1.3858, "step": 2823500 }, { "epoch": 29.8, "learning_rate": 3.5098726215477485e-05, "loss": 1.3196, "step": 2824000 }, { "epoch": 29.81, "learning_rate": 3.509608788796606e-05, "loss": 1.3037, "step": 2824500 }, { "epoch": 29.81, "learning_rate": 3.5093449560454636e-05, "loss": 1.3302, "step": 2825000 }, { "epoch": 29.82, "learning_rate": 3.509081123294322e-05, "loss": 1.3284, "step": 2825500 }, { "epoch": 29.82, "learning_rate": 3.508817290543179e-05, "loss": 1.3746, "step": 2826000 }, { "epoch": 29.83, "learning_rate": 3.508553457792036e-05, "loss": 1.2937, "step": 2826500 }, { "epoch": 29.83, "learning_rate": 3.5082896250408945e-05, "loss": 1.2837, "step": 2827000 }, { "epoch": 29.84, "learning_rate": 3.508025792289752e-05, "loss": 1.3365, "step": 2827500 }, { "epoch": 29.84, "learning_rate": 3.5077619595386096e-05, "loss": 1.291, "step": 2828000 }, { "epoch": 29.85, "learning_rate": 3.507498126787467e-05, "loss": 1.2045, "step": 2828500 }, { "epoch": 29.86, "learning_rate": 3.507234294036325e-05, "loss": 1.2879, "step": 2829000 }, { "epoch": 29.86, "learning_rate": 3.506970461285182e-05, "loss": 1.351, "step": 2829500 }, { "epoch": 29.87, "learning_rate": 3.50670662853404e-05, "loss": 1.3518, "step": 2830000 }, { "epoch": 29.87, "learning_rate": 3.506442795782898e-05, "loss": 1.4041, "step": 2830500 }, { "epoch": 29.88, "learning_rate": 3.506178963031755e-05, "loss": 1.3528, "step": 2831000 }, { "epoch": 29.88, "learning_rate": 3.5059151302806124e-05, "loss": 1.3347, "step": 2831500 }, { "epoch": 29.89, "learning_rate": 3.5056512975294706e-05, "loss": 1.3821, "step": 2832000 }, { "epoch": 29.89, "learning_rate": 3.505387464778328e-05, "loss": 1.3287, "step": 2832500 }, { "epoch": 29.9, "learning_rate": 3.505123632027186e-05, "loss": 1.3165, "step": 2833000 }, { "epoch": 29.9, "learning_rate": 3.5048597992760426e-05, "loss": 1.365, "step": 2833500 }, { "epoch": 29.91, "learning_rate": 3.504595966524901e-05, "loss": 1.3363, "step": 2834000 }, { "epoch": 29.91, "learning_rate": 3.5043321337737584e-05, "loss": 1.3637, "step": 2834500 }, { "epoch": 29.92, "learning_rate": 3.504068301022616e-05, "loss": 1.327, "step": 2835000 }, { "epoch": 29.92, "learning_rate": 3.5038044682714735e-05, "loss": 1.343, "step": 2835500 }, { "epoch": 29.93, "learning_rate": 3.503540635520331e-05, "loss": 1.307, "step": 2836000 }, { "epoch": 29.93, "learning_rate": 3.5032768027691886e-05, "loss": 1.2977, "step": 2836500 }, { "epoch": 29.94, "learning_rate": 3.503012970018046e-05, "loss": 1.3526, "step": 2837000 }, { "epoch": 29.95, "learning_rate": 3.5027491372669043e-05, "loss": 1.3344, "step": 2837500 }, { "epoch": 29.95, "learning_rate": 3.502485304515761e-05, "loss": 1.3302, "step": 2838000 }, { "epoch": 29.96, "learning_rate": 3.502221471764619e-05, "loss": 1.3847, "step": 2838500 }, { "epoch": 29.96, "learning_rate": 3.501957639013477e-05, "loss": 1.3983, "step": 2839000 }, { "epoch": 29.97, "learning_rate": 3.5016938062623345e-05, "loss": 1.4041, "step": 2839500 }, { "epoch": 29.97, "learning_rate": 3.501429973511192e-05, "loss": 1.3696, "step": 2840000 }, { "epoch": 29.98, "learning_rate": 3.5011661407600496e-05, "loss": 1.2761, "step": 2840500 }, { "epoch": 29.98, "learning_rate": 3.500902308008907e-05, "loss": 1.3378, "step": 2841000 }, { "epoch": 29.99, "learning_rate": 3.500638475257765e-05, "loss": 1.3479, "step": 2841500 }, { "epoch": 29.99, "learning_rate": 3.500374642506622e-05, "loss": 1.3985, "step": 2842000 }, { "epoch": 30.0, "learning_rate": 3.5001108097554805e-05, "loss": 1.3263, "step": 2842500 }, { "epoch": 30.0, "learning_rate": 3.4998469770043374e-05, "loss": 1.3354, "step": 2843000 }, { "epoch": 30.01, "learning_rate": 3.499583144253195e-05, "loss": 1.297, "step": 2843500 }, { "epoch": 30.01, "learning_rate": 3.499319311502053e-05, "loss": 1.3673, "step": 2844000 }, { "epoch": 30.02, "learning_rate": 3.499055478750911e-05, "loss": 1.2911, "step": 2844500 }, { "epoch": 30.02, "learning_rate": 3.4987916459997676e-05, "loss": 1.3313, "step": 2845000 }, { "epoch": 30.03, "learning_rate": 3.498527813248625e-05, "loss": 1.3618, "step": 2845500 }, { "epoch": 30.03, "learning_rate": 3.4982639804974834e-05, "loss": 1.2773, "step": 2846000 }, { "epoch": 30.04, "learning_rate": 3.498000147746341e-05, "loss": 1.3124, "step": 2846500 }, { "epoch": 30.05, "learning_rate": 3.4977363149951985e-05, "loss": 1.3246, "step": 2847000 }, { "epoch": 30.05, "learning_rate": 3.497472482244056e-05, "loss": 1.365, "step": 2847500 }, { "epoch": 30.06, "learning_rate": 3.4972086494929135e-05, "loss": 1.3538, "step": 2848000 }, { "epoch": 30.06, "learning_rate": 3.496944816741771e-05, "loss": 1.3252, "step": 2848500 }, { "epoch": 30.07, "learning_rate": 3.4966809839906286e-05, "loss": 1.3673, "step": 2849000 }, { "epoch": 30.07, "learning_rate": 3.496417151239487e-05, "loss": 1.323, "step": 2849500 }, { "epoch": 30.08, "learning_rate": 3.496153318488344e-05, "loss": 1.3204, "step": 2850000 }, { "epoch": 30.08, "learning_rate": 3.495889485737201e-05, "loss": 1.3345, "step": 2850500 }, { "epoch": 30.09, "learning_rate": 3.4956256529860595e-05, "loss": 1.3032, "step": 2851000 }, { "epoch": 30.09, "learning_rate": 3.495361820234917e-05, "loss": 1.3146, "step": 2851500 }, { "epoch": 30.1, "learning_rate": 3.4950979874837746e-05, "loss": 1.2691, "step": 2852000 }, { "epoch": 30.1, "learning_rate": 3.494834154732632e-05, "loss": 1.2565, "step": 2852500 }, { "epoch": 30.11, "learning_rate": 3.49457032198149e-05, "loss": 1.338, "step": 2853000 }, { "epoch": 30.11, "learning_rate": 3.494306489230347e-05, "loss": 1.3606, "step": 2853500 }, { "epoch": 30.12, "learning_rate": 3.494042656479205e-05, "loss": 1.3348, "step": 2854000 }, { "epoch": 30.12, "learning_rate": 3.4937788237280624e-05, "loss": 1.3549, "step": 2854500 }, { "epoch": 30.13, "learning_rate": 3.49351499097692e-05, "loss": 1.3724, "step": 2855000 }, { "epoch": 30.13, "learning_rate": 3.4932511582257775e-05, "loss": 1.3598, "step": 2855500 }, { "epoch": 30.14, "learning_rate": 3.492987325474636e-05, "loss": 1.3069, "step": 2856000 }, { "epoch": 30.15, "learning_rate": 3.492723492723493e-05, "loss": 1.2927, "step": 2856500 }, { "epoch": 30.15, "learning_rate": 3.49245965997235e-05, "loss": 1.3274, "step": 2857000 }, { "epoch": 30.16, "learning_rate": 3.4921958272212077e-05, "loss": 1.3407, "step": 2857500 }, { "epoch": 30.16, "learning_rate": 3.491931994470066e-05, "loss": 1.3177, "step": 2858000 }, { "epoch": 30.17, "learning_rate": 3.4916681617189234e-05, "loss": 1.296, "step": 2858500 }, { "epoch": 30.17, "learning_rate": 3.491404328967781e-05, "loss": 1.3405, "step": 2859000 }, { "epoch": 30.18, "learning_rate": 3.4911404962166385e-05, "loss": 1.3365, "step": 2859500 }, { "epoch": 30.18, "learning_rate": 3.490876663465496e-05, "loss": 1.3517, "step": 2860000 }, { "epoch": 30.19, "learning_rate": 3.4906128307143536e-05, "loss": 1.3104, "step": 2860500 }, { "epoch": 30.19, "learning_rate": 3.490348997963211e-05, "loss": 1.3474, "step": 2861000 }, { "epoch": 30.2, "learning_rate": 3.4900851652120694e-05, "loss": 1.3436, "step": 2861500 }, { "epoch": 30.2, "learning_rate": 3.489821332460926e-05, "loss": 1.3208, "step": 2862000 }, { "epoch": 30.21, "learning_rate": 3.489557499709784e-05, "loss": 1.2606, "step": 2862500 }, { "epoch": 30.21, "learning_rate": 3.489293666958642e-05, "loss": 1.3293, "step": 2863000 }, { "epoch": 30.22, "learning_rate": 3.4890298342074996e-05, "loss": 1.3745, "step": 2863500 }, { "epoch": 30.22, "learning_rate": 3.4887660014563565e-05, "loss": 1.3078, "step": 2864000 }, { "epoch": 30.23, "learning_rate": 3.488502168705215e-05, "loss": 1.3446, "step": 2864500 }, { "epoch": 30.24, "learning_rate": 3.488238335954072e-05, "loss": 1.3682, "step": 2865000 }, { "epoch": 30.24, "learning_rate": 3.48797450320293e-05, "loss": 1.3014, "step": 2865500 }, { "epoch": 30.25, "learning_rate": 3.487710670451787e-05, "loss": 1.3258, "step": 2866000 }, { "epoch": 30.25, "learning_rate": 3.487446837700645e-05, "loss": 1.309, "step": 2866500 }, { "epoch": 30.26, "learning_rate": 3.4871830049495024e-05, "loss": 1.271, "step": 2867000 }, { "epoch": 30.26, "learning_rate": 3.48691917219836e-05, "loss": 1.4101, "step": 2867500 }, { "epoch": 30.27, "learning_rate": 3.486655339447218e-05, "loss": 1.3216, "step": 2868000 }, { "epoch": 30.27, "learning_rate": 3.486391506696076e-05, "loss": 1.3255, "step": 2868500 }, { "epoch": 30.28, "learning_rate": 3.4861276739449326e-05, "loss": 1.3041, "step": 2869000 }, { "epoch": 30.28, "learning_rate": 3.48586384119379e-05, "loss": 1.3606, "step": 2869500 }, { "epoch": 30.29, "learning_rate": 3.4856000084426484e-05, "loss": 1.3926, "step": 2870000 }, { "epoch": 30.29, "learning_rate": 3.485336175691506e-05, "loss": 1.3274, "step": 2870500 }, { "epoch": 30.3, "learning_rate": 3.4850723429403635e-05, "loss": 1.3579, "step": 2871000 }, { "epoch": 30.3, "learning_rate": 3.484808510189221e-05, "loss": 1.3663, "step": 2871500 }, { "epoch": 30.31, "learning_rate": 3.4845446774380786e-05, "loss": 1.3301, "step": 2872000 }, { "epoch": 30.31, "learning_rate": 3.484280844686936e-05, "loss": 1.3285, "step": 2872500 }, { "epoch": 30.32, "learning_rate": 3.484017011935794e-05, "loss": 1.3695, "step": 2873000 }, { "epoch": 30.32, "learning_rate": 3.483753179184651e-05, "loss": 1.3494, "step": 2873500 }, { "epoch": 30.33, "learning_rate": 3.483489346433509e-05, "loss": 1.3069, "step": 2874000 }, { "epoch": 30.34, "learning_rate": 3.4832255136823663e-05, "loss": 1.3265, "step": 2874500 }, { "epoch": 30.34, "learning_rate": 3.4829616809312246e-05, "loss": 1.3155, "step": 2875000 }, { "epoch": 30.35, "learning_rate": 3.482697848180082e-05, "loss": 1.3108, "step": 2875500 }, { "epoch": 30.35, "learning_rate": 3.482434015428939e-05, "loss": 1.355, "step": 2876000 }, { "epoch": 30.36, "learning_rate": 3.482170182677797e-05, "loss": 1.3217, "step": 2876500 }, { "epoch": 30.36, "learning_rate": 3.481906349926655e-05, "loss": 1.3268, "step": 2877000 }, { "epoch": 30.37, "learning_rate": 3.481642517175512e-05, "loss": 1.3192, "step": 2877500 }, { "epoch": 30.37, "learning_rate": 3.48137868442437e-05, "loss": 1.3787, "step": 2878000 }, { "epoch": 30.38, "learning_rate": 3.4811148516732274e-05, "loss": 1.317, "step": 2878500 }, { "epoch": 30.38, "learning_rate": 3.480851018922085e-05, "loss": 1.3115, "step": 2879000 }, { "epoch": 30.39, "learning_rate": 3.4805871861709425e-05, "loss": 1.3282, "step": 2879500 }, { "epoch": 30.39, "learning_rate": 3.480323353419801e-05, "loss": 1.3542, "step": 2880000 }, { "epoch": 30.4, "learning_rate": 3.480059520668658e-05, "loss": 1.3346, "step": 2880500 }, { "epoch": 30.4, "learning_rate": 3.479795687917515e-05, "loss": 1.331, "step": 2881000 }, { "epoch": 30.41, "learning_rate": 3.479531855166373e-05, "loss": 1.3419, "step": 2881500 }, { "epoch": 30.41, "learning_rate": 3.479268022415231e-05, "loss": 1.3342, "step": 2882000 }, { "epoch": 30.42, "learning_rate": 3.4790041896640885e-05, "loss": 1.3467, "step": 2882500 }, { "epoch": 30.43, "learning_rate": 3.4787403569129453e-05, "loss": 1.3529, "step": 2883000 }, { "epoch": 30.43, "learning_rate": 3.4784765241618036e-05, "loss": 1.3166, "step": 2883500 }, { "epoch": 30.44, "learning_rate": 3.478212691410661e-05, "loss": 1.3354, "step": 2884000 }, { "epoch": 30.44, "learning_rate": 3.477948858659519e-05, "loss": 1.3521, "step": 2884500 }, { "epoch": 30.45, "learning_rate": 3.477685025908376e-05, "loss": 1.2843, "step": 2885000 }, { "epoch": 30.45, "learning_rate": 3.477421193157234e-05, "loss": 1.3195, "step": 2885500 }, { "epoch": 30.46, "learning_rate": 3.477157360406091e-05, "loss": 1.3785, "step": 2886000 }, { "epoch": 30.46, "learning_rate": 3.476893527654949e-05, "loss": 1.3603, "step": 2886500 }, { "epoch": 30.47, "learning_rate": 3.476629694903807e-05, "loss": 1.3453, "step": 2887000 }, { "epoch": 30.47, "learning_rate": 3.4763658621526646e-05, "loss": 1.3165, "step": 2887500 }, { "epoch": 30.48, "learning_rate": 3.4761020294015215e-05, "loss": 1.3622, "step": 2888000 }, { "epoch": 30.48, "learning_rate": 3.47583819665038e-05, "loss": 1.2965, "step": 2888500 }, { "epoch": 30.49, "learning_rate": 3.475574363899237e-05, "loss": 1.3279, "step": 2889000 }, { "epoch": 30.49, "learning_rate": 3.475310531148095e-05, "loss": 1.3035, "step": 2889500 }, { "epoch": 30.5, "learning_rate": 3.4750466983969524e-05, "loss": 1.3706, "step": 2890000 }, { "epoch": 30.5, "learning_rate": 3.47478286564581e-05, "loss": 1.3035, "step": 2890500 }, { "epoch": 30.51, "learning_rate": 3.4745190328946675e-05, "loss": 1.3108, "step": 2891000 }, { "epoch": 30.51, "learning_rate": 3.474255200143525e-05, "loss": 1.3542, "step": 2891500 }, { "epoch": 30.52, "learning_rate": 3.473991367392383e-05, "loss": 1.2968, "step": 2892000 }, { "epoch": 30.53, "learning_rate": 3.47372753464124e-05, "loss": 1.2883, "step": 2892500 }, { "epoch": 30.53, "learning_rate": 3.473463701890098e-05, "loss": 1.344, "step": 2893000 }, { "epoch": 30.54, "learning_rate": 3.473199869138956e-05, "loss": 1.3019, "step": 2893500 }, { "epoch": 30.54, "learning_rate": 3.4729360363878135e-05, "loss": 1.2628, "step": 2894000 }, { "epoch": 30.55, "learning_rate": 3.472672203636671e-05, "loss": 1.2929, "step": 2894500 }, { "epoch": 30.55, "learning_rate": 3.472408370885528e-05, "loss": 1.3142, "step": 2895000 }, { "epoch": 30.56, "learning_rate": 3.472144538134386e-05, "loss": 1.3495, "step": 2895500 }, { "epoch": 30.56, "learning_rate": 3.4718807053832436e-05, "loss": 1.3095, "step": 2896000 }, { "epoch": 30.57, "learning_rate": 3.471616872632101e-05, "loss": 1.3274, "step": 2896500 }, { "epoch": 30.57, "learning_rate": 3.471353039880959e-05, "loss": 1.3288, "step": 2897000 }, { "epoch": 30.58, "learning_rate": 3.471089207129816e-05, "loss": 1.2937, "step": 2897500 }, { "epoch": 30.58, "learning_rate": 3.470825374378674e-05, "loss": 1.2975, "step": 2898000 }, { "epoch": 30.59, "learning_rate": 3.4705615416275314e-05, "loss": 1.3696, "step": 2898500 }, { "epoch": 30.59, "learning_rate": 3.4702977088763896e-05, "loss": 1.3258, "step": 2899000 }, { "epoch": 30.6, "learning_rate": 3.470033876125247e-05, "loss": 1.2951, "step": 2899500 }, { "epoch": 30.6, "learning_rate": 3.469770043374104e-05, "loss": 1.3627, "step": 2900000 }, { "epoch": 30.61, "learning_rate": 3.469506210622962e-05, "loss": 1.338, "step": 2900500 }, { "epoch": 30.62, "learning_rate": 3.46924237787182e-05, "loss": 1.3357, "step": 2901000 }, { "epoch": 30.62, "learning_rate": 3.4689785451206774e-05, "loss": 1.3606, "step": 2901500 }, { "epoch": 30.63, "learning_rate": 3.468714712369535e-05, "loss": 1.2858, "step": 2902000 }, { "epoch": 30.63, "learning_rate": 3.4684508796183925e-05, "loss": 1.3089, "step": 2902500 }, { "epoch": 30.64, "learning_rate": 3.46818704686725e-05, "loss": 1.3152, "step": 2903000 }, { "epoch": 30.64, "learning_rate": 3.4679232141161076e-05, "loss": 1.3097, "step": 2903500 }, { "epoch": 30.65, "learning_rate": 3.467659381364966e-05, "loss": 1.3026, "step": 2904000 }, { "epoch": 30.65, "learning_rate": 3.4673955486138227e-05, "loss": 1.3191, "step": 2904500 }, { "epoch": 30.66, "learning_rate": 3.46713171586268e-05, "loss": 1.2856, "step": 2905000 }, { "epoch": 30.66, "learning_rate": 3.4668678831115384e-05, "loss": 1.3313, "step": 2905500 }, { "epoch": 30.67, "learning_rate": 3.466604050360396e-05, "loss": 1.3477, "step": 2906000 }, { "epoch": 30.67, "learning_rate": 3.4663402176092535e-05, "loss": 1.3585, "step": 2906500 }, { "epoch": 30.68, "learning_rate": 3.4660763848581104e-05, "loss": 1.2758, "step": 2907000 }, { "epoch": 30.68, "learning_rate": 3.4658125521069686e-05, "loss": 1.3542, "step": 2907500 }, { "epoch": 30.69, "learning_rate": 3.465548719355826e-05, "loss": 1.3456, "step": 2908000 }, { "epoch": 30.69, "learning_rate": 3.465284886604684e-05, "loss": 1.3215, "step": 2908500 }, { "epoch": 30.7, "learning_rate": 3.465021053853541e-05, "loss": 1.3375, "step": 2909000 }, { "epoch": 30.7, "learning_rate": 3.464757221102399e-05, "loss": 1.3852, "step": 2909500 }, { "epoch": 30.71, "learning_rate": 3.4644933883512564e-05, "loss": 1.3661, "step": 2910000 }, { "epoch": 30.72, "learning_rate": 3.464229555600114e-05, "loss": 1.2937, "step": 2910500 }, { "epoch": 30.72, "learning_rate": 3.463965722848972e-05, "loss": 1.3217, "step": 2911000 }, { "epoch": 30.73, "learning_rate": 3.463701890097829e-05, "loss": 1.3579, "step": 2911500 }, { "epoch": 30.73, "learning_rate": 3.4634380573466866e-05, "loss": 1.3153, "step": 2912000 }, { "epoch": 30.74, "learning_rate": 3.463174224595545e-05, "loss": 1.3653, "step": 2912500 }, { "epoch": 30.74, "learning_rate": 3.462910391844402e-05, "loss": 1.3174, "step": 2913000 }, { "epoch": 30.75, "learning_rate": 3.46264655909326e-05, "loss": 1.2952, "step": 2913500 }, { "epoch": 30.75, "learning_rate": 3.4623827263421174e-05, "loss": 1.2989, "step": 2914000 }, { "epoch": 30.76, "learning_rate": 3.462118893590975e-05, "loss": 1.2874, "step": 2914500 }, { "epoch": 30.76, "learning_rate": 3.4618550608398325e-05, "loss": 1.3298, "step": 2915000 }, { "epoch": 30.77, "learning_rate": 3.46159122808869e-05, "loss": 1.3423, "step": 2915500 }, { "epoch": 30.77, "learning_rate": 3.461327395337548e-05, "loss": 1.3617, "step": 2916000 }, { "epoch": 30.78, "learning_rate": 3.461063562586405e-05, "loss": 1.3311, "step": 2916500 }, { "epoch": 30.78, "learning_rate": 3.460799729835263e-05, "loss": 1.3634, "step": 2917000 }, { "epoch": 30.79, "learning_rate": 3.460535897084121e-05, "loss": 1.3637, "step": 2917500 }, { "epoch": 30.79, "learning_rate": 3.4602720643329785e-05, "loss": 1.3869, "step": 2918000 }, { "epoch": 30.8, "learning_rate": 3.460008231581836e-05, "loss": 1.2896, "step": 2918500 }, { "epoch": 30.81, "learning_rate": 3.459744398830693e-05, "loss": 1.2858, "step": 2919000 }, { "epoch": 30.81, "learning_rate": 3.459480566079551e-05, "loss": 1.2768, "step": 2919500 }, { "epoch": 30.82, "learning_rate": 3.459216733328409e-05, "loss": 1.3199, "step": 2920000 }, { "epoch": 30.82, "learning_rate": 3.458952900577266e-05, "loss": 1.3491, "step": 2920500 }, { "epoch": 30.83, "learning_rate": 3.458689067826124e-05, "loss": 1.333, "step": 2921000 }, { "epoch": 30.83, "learning_rate": 3.4584252350749813e-05, "loss": 1.3955, "step": 2921500 }, { "epoch": 30.84, "learning_rate": 3.458161402323839e-05, "loss": 1.3572, "step": 2922000 }, { "epoch": 30.84, "learning_rate": 3.4578975695726964e-05, "loss": 1.3583, "step": 2922500 }, { "epoch": 30.85, "learning_rate": 3.457633736821555e-05, "loss": 1.3276, "step": 2923000 }, { "epoch": 30.85, "learning_rate": 3.4573699040704115e-05, "loss": 1.3544, "step": 2923500 }, { "epoch": 30.86, "learning_rate": 3.457106071319269e-05, "loss": 1.3462, "step": 2924000 }, { "epoch": 30.86, "learning_rate": 3.456842238568127e-05, "loss": 1.3738, "step": 2924500 }, { "epoch": 30.87, "learning_rate": 3.456578405816985e-05, "loss": 1.2898, "step": 2925000 }, { "epoch": 30.87, "learning_rate": 3.4563145730658424e-05, "loss": 1.2812, "step": 2925500 }, { "epoch": 30.88, "learning_rate": 3.4560507403147e-05, "loss": 1.2996, "step": 2926000 }, { "epoch": 30.88, "learning_rate": 3.4557869075635575e-05, "loss": 1.3454, "step": 2926500 }, { "epoch": 30.89, "learning_rate": 3.455523074812415e-05, "loss": 1.3464, "step": 2927000 }, { "epoch": 30.89, "learning_rate": 3.4552592420612726e-05, "loss": 1.3269, "step": 2927500 }, { "epoch": 30.9, "learning_rate": 3.454995409310131e-05, "loss": 1.3106, "step": 2928000 }, { "epoch": 30.91, "learning_rate": 3.454731576558988e-05, "loss": 1.3222, "step": 2928500 }, { "epoch": 30.91, "learning_rate": 3.454467743807845e-05, "loss": 1.3829, "step": 2929000 }, { "epoch": 30.92, "learning_rate": 3.4542039110567035e-05, "loss": 1.2961, "step": 2929500 }, { "epoch": 30.92, "learning_rate": 3.453940078305561e-05, "loss": 1.2825, "step": 2930000 }, { "epoch": 30.93, "learning_rate": 3.453676245554418e-05, "loss": 1.3756, "step": 2930500 }, { "epoch": 30.93, "learning_rate": 3.4534124128032754e-05, "loss": 1.351, "step": 2931000 }, { "epoch": 30.94, "learning_rate": 3.453148580052134e-05, "loss": 1.3243, "step": 2931500 }, { "epoch": 30.94, "learning_rate": 3.452884747300991e-05, "loss": 1.3259, "step": 2932000 }, { "epoch": 30.95, "learning_rate": 3.452620914549849e-05, "loss": 1.279, "step": 2932500 }, { "epoch": 30.95, "learning_rate": 3.452357081798706e-05, "loss": 1.3576, "step": 2933000 }, { "epoch": 30.96, "learning_rate": 3.452093249047564e-05, "loss": 1.309, "step": 2933500 }, { "epoch": 30.96, "learning_rate": 3.4518294162964214e-05, "loss": 1.3545, "step": 2934000 }, { "epoch": 30.97, "learning_rate": 3.451565583545279e-05, "loss": 1.2933, "step": 2934500 }, { "epoch": 30.97, "learning_rate": 3.451301750794137e-05, "loss": 1.3522, "step": 2935000 }, { "epoch": 30.98, "learning_rate": 3.451037918042994e-05, "loss": 1.3658, "step": 2935500 }, { "epoch": 30.98, "learning_rate": 3.4507740852918516e-05, "loss": 1.3539, "step": 2936000 }, { "epoch": 30.99, "learning_rate": 3.45051025254071e-05, "loss": 1.3368, "step": 2936500 }, { "epoch": 31.0, "learning_rate": 3.4502464197895674e-05, "loss": 1.3163, "step": 2937000 }, { "epoch": 31.0, "learning_rate": 3.449982587038425e-05, "loss": 1.2974, "step": 2937500 }, { "epoch": 31.01, "learning_rate": 3.4497187542872825e-05, "loss": 1.308, "step": 2938000 }, { "epoch": 31.01, "learning_rate": 3.44945492153614e-05, "loss": 1.3159, "step": 2938500 }, { "epoch": 31.02, "learning_rate": 3.4491910887849976e-05, "loss": 1.339, "step": 2939000 }, { "epoch": 31.02, "learning_rate": 3.448927256033855e-05, "loss": 1.325, "step": 2939500 }, { "epoch": 31.03, "learning_rate": 3.448663423282713e-05, "loss": 1.2808, "step": 2940000 }, { "epoch": 31.03, "learning_rate": 3.44839959053157e-05, "loss": 1.3359, "step": 2940500 }, { "epoch": 31.04, "learning_rate": 3.448135757780428e-05, "loss": 1.3107, "step": 2941000 }, { "epoch": 31.04, "learning_rate": 3.447871925029286e-05, "loss": 1.297, "step": 2941500 }, { "epoch": 31.05, "learning_rate": 3.4476080922781436e-05, "loss": 1.2868, "step": 2942000 }, { "epoch": 31.05, "learning_rate": 3.4473442595270004e-05, "loss": 1.3611, "step": 2942500 }, { "epoch": 31.06, "learning_rate": 3.447080426775858e-05, "loss": 1.3521, "step": 2943000 }, { "epoch": 31.06, "learning_rate": 3.446816594024716e-05, "loss": 1.2947, "step": 2943500 }, { "epoch": 31.07, "learning_rate": 3.446552761273574e-05, "loss": 1.3434, "step": 2944000 }, { "epoch": 31.07, "learning_rate": 3.446288928522431e-05, "loss": 1.2927, "step": 2944500 }, { "epoch": 31.08, "learning_rate": 3.446025095771289e-05, "loss": 1.2569, "step": 2945000 }, { "epoch": 31.08, "learning_rate": 3.4457612630201464e-05, "loss": 1.3203, "step": 2945500 }, { "epoch": 31.09, "learning_rate": 3.445497430269004e-05, "loss": 1.3287, "step": 2946000 }, { "epoch": 31.1, "learning_rate": 3.4452335975178615e-05, "loss": 1.3026, "step": 2946500 }, { "epoch": 31.1, "learning_rate": 3.44496976476672e-05, "loss": 1.3099, "step": 2947000 }, { "epoch": 31.11, "learning_rate": 3.4447059320155766e-05, "loss": 1.3329, "step": 2947500 }, { "epoch": 31.11, "learning_rate": 3.444442099264434e-05, "loss": 1.3313, "step": 2948000 }, { "epoch": 31.12, "learning_rate": 3.4441782665132924e-05, "loss": 1.3335, "step": 2948500 }, { "epoch": 31.12, "learning_rate": 3.44391443376215e-05, "loss": 1.2974, "step": 2949000 }, { "epoch": 31.13, "learning_rate": 3.443650601011007e-05, "loss": 1.2974, "step": 2949500 }, { "epoch": 31.13, "learning_rate": 3.443386768259865e-05, "loss": 1.3266, "step": 2950000 }, { "epoch": 31.14, "learning_rate": 3.4431229355087226e-05, "loss": 1.3037, "step": 2950500 }, { "epoch": 31.14, "learning_rate": 3.44285910275758e-05, "loss": 1.3332, "step": 2951000 }, { "epoch": 31.15, "learning_rate": 3.4425952700064377e-05, "loss": 1.3212, "step": 2951500 }, { "epoch": 31.15, "learning_rate": 3.442331437255295e-05, "loss": 1.3523, "step": 2952000 }, { "epoch": 31.16, "learning_rate": 3.442067604504153e-05, "loss": 1.2899, "step": 2952500 }, { "epoch": 31.16, "learning_rate": 3.44180377175301e-05, "loss": 1.2977, "step": 2953000 }, { "epoch": 31.17, "learning_rate": 3.4415399390018685e-05, "loss": 1.3376, "step": 2953500 }, { "epoch": 31.17, "learning_rate": 3.441276106250726e-05, "loss": 1.3528, "step": 2954000 }, { "epoch": 31.18, "learning_rate": 3.441012273499583e-05, "loss": 1.2765, "step": 2954500 }, { "epoch": 31.19, "learning_rate": 3.4407484407484405e-05, "loss": 1.2993, "step": 2955000 }, { "epoch": 31.19, "learning_rate": 3.440484607997299e-05, "loss": 1.3049, "step": 2955500 }, { "epoch": 31.2, "learning_rate": 3.440220775246156e-05, "loss": 1.3373, "step": 2956000 }, { "epoch": 31.2, "learning_rate": 3.439956942495014e-05, "loss": 1.2966, "step": 2956500 }, { "epoch": 31.21, "learning_rate": 3.4396931097438714e-05, "loss": 1.3182, "step": 2957000 }, { "epoch": 31.21, "learning_rate": 3.439429276992729e-05, "loss": 1.3135, "step": 2957500 }, { "epoch": 31.22, "learning_rate": 3.4391654442415865e-05, "loss": 1.3739, "step": 2958000 }, { "epoch": 31.22, "learning_rate": 3.438901611490444e-05, "loss": 1.3245, "step": 2958500 }, { "epoch": 31.23, "learning_rate": 3.4386377787393016e-05, "loss": 1.3188, "step": 2959000 }, { "epoch": 31.23, "learning_rate": 3.438373945988159e-05, "loss": 1.3596, "step": 2959500 }, { "epoch": 31.24, "learning_rate": 3.4381101132370167e-05, "loss": 1.3569, "step": 2960000 }, { "epoch": 31.24, "learning_rate": 3.437846280485875e-05, "loss": 1.2994, "step": 2960500 }, { "epoch": 31.25, "learning_rate": 3.4375824477347324e-05, "loss": 1.3253, "step": 2961000 }, { "epoch": 31.25, "learning_rate": 3.437318614983589e-05, "loss": 1.2827, "step": 2961500 }, { "epoch": 31.26, "learning_rate": 3.4370547822324475e-05, "loss": 1.2817, "step": 2962000 }, { "epoch": 31.26, "learning_rate": 3.436790949481305e-05, "loss": 1.3932, "step": 2962500 }, { "epoch": 31.27, "learning_rate": 3.4365271167301626e-05, "loss": 1.2973, "step": 2963000 }, { "epoch": 31.27, "learning_rate": 3.43626328397902e-05, "loss": 1.3379, "step": 2963500 }, { "epoch": 31.28, "learning_rate": 3.435999451227878e-05, "loss": 1.3333, "step": 2964000 }, { "epoch": 31.29, "learning_rate": 3.435735618476735e-05, "loss": 1.3082, "step": 2964500 }, { "epoch": 31.29, "learning_rate": 3.435471785725593e-05, "loss": 1.334, "step": 2965000 }, { "epoch": 31.3, "learning_rate": 3.435207952974451e-05, "loss": 1.3176, "step": 2965500 }, { "epoch": 31.3, "learning_rate": 3.4349441202233086e-05, "loss": 1.3792, "step": 2966000 }, { "epoch": 31.31, "learning_rate": 3.4346802874721655e-05, "loss": 1.3113, "step": 2966500 }, { "epoch": 31.31, "learning_rate": 3.434416454721023e-05, "loss": 1.3345, "step": 2967000 }, { "epoch": 31.32, "learning_rate": 3.434152621969881e-05, "loss": 1.3111, "step": 2967500 }, { "epoch": 31.32, "learning_rate": 3.433888789218739e-05, "loss": 1.2871, "step": 2968000 }, { "epoch": 31.33, "learning_rate": 3.433624956467596e-05, "loss": 1.358, "step": 2968500 }, { "epoch": 31.33, "learning_rate": 3.433361123716454e-05, "loss": 1.3116, "step": 2969000 }, { "epoch": 31.34, "learning_rate": 3.4330972909653114e-05, "loss": 1.3022, "step": 2969500 }, { "epoch": 31.34, "learning_rate": 3.432833458214169e-05, "loss": 1.2764, "step": 2970000 }, { "epoch": 31.35, "learning_rate": 3.4325696254630265e-05, "loss": 1.3061, "step": 2970500 }, { "epoch": 31.35, "learning_rate": 3.432305792711884e-05, "loss": 1.3034, "step": 2971000 }, { "epoch": 31.36, "learning_rate": 3.4320419599607416e-05, "loss": 1.3481, "step": 2971500 }, { "epoch": 31.36, "learning_rate": 3.431778127209599e-05, "loss": 1.3153, "step": 2972000 }, { "epoch": 31.37, "learning_rate": 3.4315142944584574e-05, "loss": 1.3302, "step": 2972500 }, { "epoch": 31.37, "learning_rate": 3.431250461707315e-05, "loss": 1.3583, "step": 2973000 }, { "epoch": 31.38, "learning_rate": 3.430986628956172e-05, "loss": 1.3437, "step": 2973500 }, { "epoch": 31.39, "learning_rate": 3.43072279620503e-05, "loss": 1.3757, "step": 2974000 }, { "epoch": 31.39, "learning_rate": 3.4304589634538876e-05, "loss": 1.3672, "step": 2974500 }, { "epoch": 31.4, "learning_rate": 3.430195130702745e-05, "loss": 1.3432, "step": 2975000 }, { "epoch": 31.4, "learning_rate": 3.429931297951603e-05, "loss": 1.3246, "step": 2975500 }, { "epoch": 31.41, "learning_rate": 3.42966746520046e-05, "loss": 1.2494, "step": 2976000 }, { "epoch": 31.41, "learning_rate": 3.429403632449318e-05, "loss": 1.3153, "step": 2976500 }, { "epoch": 31.42, "learning_rate": 3.4291397996981753e-05, "loss": 1.3191, "step": 2977000 }, { "epoch": 31.42, "learning_rate": 3.4288759669470336e-05, "loss": 1.3546, "step": 2977500 }, { "epoch": 31.43, "learning_rate": 3.4286121341958904e-05, "loss": 1.2933, "step": 2978000 }, { "epoch": 31.43, "learning_rate": 3.428348301444748e-05, "loss": 1.3156, "step": 2978500 }, { "epoch": 31.44, "learning_rate": 3.428084468693606e-05, "loss": 1.3106, "step": 2979000 }, { "epoch": 31.44, "learning_rate": 3.427820635942464e-05, "loss": 1.3104, "step": 2979500 }, { "epoch": 31.45, "learning_rate": 3.427556803191321e-05, "loss": 1.4219, "step": 2980000 }, { "epoch": 31.45, "learning_rate": 3.427292970440178e-05, "loss": 1.4534, "step": 2980500 }, { "epoch": 31.46, "learning_rate": 3.4270291376890364e-05, "loss": 1.2592, "step": 2981000 }, { "epoch": 31.46, "learning_rate": 3.426765304937894e-05, "loss": 1.3483, "step": 2981500 }, { "epoch": 31.47, "learning_rate": 3.4265014721867515e-05, "loss": 1.3004, "step": 2982000 }, { "epoch": 31.48, "learning_rate": 3.426237639435609e-05, "loss": 1.3185, "step": 2982500 }, { "epoch": 31.48, "learning_rate": 3.4259738066844666e-05, "loss": 1.3617, "step": 2983000 }, { "epoch": 31.49, "learning_rate": 3.425709973933324e-05, "loss": 1.3282, "step": 2983500 }, { "epoch": 31.49, "learning_rate": 3.425446141182182e-05, "loss": 1.3373, "step": 2984000 }, { "epoch": 31.5, "learning_rate": 3.42518230843104e-05, "loss": 1.2853, "step": 2984500 }, { "epoch": 31.5, "learning_rate": 3.4249184756798975e-05, "loss": 1.3133, "step": 2985000 }, { "epoch": 31.51, "learning_rate": 3.4246546429287544e-05, "loss": 1.3601, "step": 2985500 }, { "epoch": 31.51, "learning_rate": 3.4243908101776126e-05, "loss": 1.3133, "step": 2986000 }, { "epoch": 31.52, "learning_rate": 3.42412697742647e-05, "loss": 1.3055, "step": 2986500 }, { "epoch": 31.52, "learning_rate": 3.423863144675328e-05, "loss": 1.2773, "step": 2987000 }, { "epoch": 31.53, "learning_rate": 3.423599311924185e-05, "loss": 1.3604, "step": 2987500 }, { "epoch": 31.53, "learning_rate": 3.423335479173043e-05, "loss": 1.3502, "step": 2988000 }, { "epoch": 31.54, "learning_rate": 3.4230716464219e-05, "loss": 1.3072, "step": 2988500 }, { "epoch": 31.54, "learning_rate": 3.422807813670758e-05, "loss": 1.3283, "step": 2989000 }, { "epoch": 31.55, "learning_rate": 3.422543980919616e-05, "loss": 1.356, "step": 2989500 }, { "epoch": 31.55, "learning_rate": 3.422280148168473e-05, "loss": 1.3593, "step": 2990000 }, { "epoch": 31.56, "learning_rate": 3.4220163154173305e-05, "loss": 1.2088, "step": 2990500 }, { "epoch": 31.56, "learning_rate": 3.421752482666189e-05, "loss": 1.3283, "step": 2991000 }, { "epoch": 31.57, "learning_rate": 3.421488649915046e-05, "loss": 1.3149, "step": 2991500 }, { "epoch": 31.58, "learning_rate": 3.421224817163904e-05, "loss": 1.3603, "step": 2992000 }, { "epoch": 31.58, "learning_rate": 3.420960984412761e-05, "loss": 1.3524, "step": 2992500 }, { "epoch": 31.59, "learning_rate": 3.420697151661619e-05, "loss": 1.2835, "step": 2993000 }, { "epoch": 31.59, "learning_rate": 3.4204333189104765e-05, "loss": 1.2831, "step": 2993500 }, { "epoch": 31.6, "learning_rate": 3.420169486159334e-05, "loss": 1.2941, "step": 2994000 }, { "epoch": 31.6, "learning_rate": 3.4199056534081916e-05, "loss": 1.3121, "step": 2994500 }, { "epoch": 31.61, "learning_rate": 3.419641820657049e-05, "loss": 1.3653, "step": 2995000 }, { "epoch": 31.61, "learning_rate": 3.419377987905907e-05, "loss": 1.329, "step": 2995500 }, { "epoch": 31.62, "learning_rate": 3.419114155154764e-05, "loss": 1.288, "step": 2996000 }, { "epoch": 31.62, "learning_rate": 3.4188503224036225e-05, "loss": 1.3552, "step": 2996500 }, { "epoch": 31.63, "learning_rate": 3.418586489652479e-05, "loss": 1.3678, "step": 2997000 }, { "epoch": 31.63, "learning_rate": 3.418322656901337e-05, "loss": 1.3085, "step": 2997500 }, { "epoch": 31.64, "learning_rate": 3.418058824150195e-05, "loss": 1.3188, "step": 2998000 }, { "epoch": 31.64, "learning_rate": 3.4177949913990527e-05, "loss": 1.3274, "step": 2998500 }, { "epoch": 31.65, "learning_rate": 3.41753115864791e-05, "loss": 1.3021, "step": 2999000 }, { "epoch": 31.65, "learning_rate": 3.417267325896768e-05, "loss": 1.3229, "step": 2999500 }, { "epoch": 31.66, "learning_rate": 3.417003493145625e-05, "loss": 1.3432, "step": 3000000 }, { "epoch": 31.67, "learning_rate": 3.416739660394483e-05, "loss": 1.3627, "step": 3000500 }, { "epoch": 31.67, "learning_rate": 3.4164758276433404e-05, "loss": 1.3333, "step": 3001000 }, { "epoch": 31.68, "learning_rate": 3.4162119948921986e-05, "loss": 1.3303, "step": 3001500 }, { "epoch": 31.68, "learning_rate": 3.4159481621410555e-05, "loss": 1.3361, "step": 3002000 }, { "epoch": 31.69, "learning_rate": 3.415684329389913e-05, "loss": 1.3061, "step": 3002500 }, { "epoch": 31.69, "learning_rate": 3.415420496638771e-05, "loss": 1.259, "step": 3003000 }, { "epoch": 31.7, "learning_rate": 3.415156663887629e-05, "loss": 1.3602, "step": 3003500 }, { "epoch": 31.7, "learning_rate": 3.4148928311364864e-05, "loss": 1.3303, "step": 3004000 }, { "epoch": 31.71, "learning_rate": 3.414628998385343e-05, "loss": 1.3541, "step": 3004500 }, { "epoch": 31.71, "learning_rate": 3.4143651656342015e-05, "loss": 1.34, "step": 3005000 }, { "epoch": 31.72, "learning_rate": 3.414101332883059e-05, "loss": 1.2543, "step": 3005500 }, { "epoch": 31.72, "learning_rate": 3.4138375001319166e-05, "loss": 1.3023, "step": 3006000 }, { "epoch": 31.73, "learning_rate": 3.413573667380774e-05, "loss": 1.3957, "step": 3006500 }, { "epoch": 31.73, "learning_rate": 3.4133098346296317e-05, "loss": 1.3615, "step": 3007000 }, { "epoch": 31.74, "learning_rate": 3.413046001878489e-05, "loss": 1.3752, "step": 3007500 }, { "epoch": 31.74, "learning_rate": 3.412782169127347e-05, "loss": 1.3343, "step": 3008000 }, { "epoch": 31.75, "learning_rate": 3.412518336376205e-05, "loss": 1.2833, "step": 3008500 }, { "epoch": 31.75, "learning_rate": 3.412254503625062e-05, "loss": 1.3056, "step": 3009000 }, { "epoch": 31.76, "learning_rate": 3.4119906708739194e-05, "loss": 1.2981, "step": 3009500 }, { "epoch": 31.77, "learning_rate": 3.4117268381227776e-05, "loss": 1.2663, "step": 3010000 }, { "epoch": 31.77, "learning_rate": 3.411463005371635e-05, "loss": 1.3244, "step": 3010500 }, { "epoch": 31.78, "learning_rate": 3.411199172620493e-05, "loss": 1.3485, "step": 3011000 }, { "epoch": 31.78, "learning_rate": 3.41093533986935e-05, "loss": 1.3011, "step": 3011500 }, { "epoch": 31.79, "learning_rate": 3.410671507118208e-05, "loss": 1.3208, "step": 3012000 }, { "epoch": 31.79, "learning_rate": 3.4104076743670654e-05, "loss": 1.3376, "step": 3012500 }, { "epoch": 31.8, "learning_rate": 3.410143841615923e-05, "loss": 1.3114, "step": 3013000 }, { "epoch": 31.8, "learning_rate": 3.409880008864781e-05, "loss": 1.2925, "step": 3013500 }, { "epoch": 31.81, "learning_rate": 3.409616176113638e-05, "loss": 1.3269, "step": 3014000 }, { "epoch": 31.81, "learning_rate": 3.4093523433624956e-05, "loss": 1.311, "step": 3014500 }, { "epoch": 31.82, "learning_rate": 3.409088510611354e-05, "loss": 1.378, "step": 3015000 }, { "epoch": 31.82, "learning_rate": 3.4088246778602113e-05, "loss": 1.3422, "step": 3015500 }, { "epoch": 31.83, "learning_rate": 3.408560845109068e-05, "loss": 1.383, "step": 3016000 }, { "epoch": 31.83, "learning_rate": 3.408297012357926e-05, "loss": 1.3558, "step": 3016500 }, { "epoch": 31.84, "learning_rate": 3.408033179606784e-05, "loss": 1.3211, "step": 3017000 }, { "epoch": 31.84, "learning_rate": 3.4077693468556415e-05, "loss": 1.3365, "step": 3017500 }, { "epoch": 31.85, "learning_rate": 3.407505514104499e-05, "loss": 1.344, "step": 3018000 }, { "epoch": 31.86, "learning_rate": 3.4072416813533566e-05, "loss": 1.3664, "step": 3018500 }, { "epoch": 31.86, "learning_rate": 3.406977848602214e-05, "loss": 1.3253, "step": 3019000 }, { "epoch": 31.87, "learning_rate": 3.406714015851072e-05, "loss": 1.3528, "step": 3019500 }, { "epoch": 31.87, "learning_rate": 3.406450183099929e-05, "loss": 1.3579, "step": 3020000 }, { "epoch": 31.88, "learning_rate": 3.4061863503487875e-05, "loss": 1.2794, "step": 3020500 }, { "epoch": 31.88, "learning_rate": 3.4059225175976444e-05, "loss": 1.3004, "step": 3021000 }, { "epoch": 31.89, "learning_rate": 3.405658684846502e-05, "loss": 1.3088, "step": 3021500 }, { "epoch": 31.89, "learning_rate": 3.40539485209536e-05, "loss": 1.3516, "step": 3022000 }, { "epoch": 31.9, "learning_rate": 3.405131019344218e-05, "loss": 1.3139, "step": 3022500 }, { "epoch": 31.9, "learning_rate": 3.404867186593075e-05, "loss": 1.2817, "step": 3023000 }, { "epoch": 31.91, "learning_rate": 3.404603353841933e-05, "loss": 1.262, "step": 3023500 }, { "epoch": 31.91, "learning_rate": 3.4043395210907904e-05, "loss": 1.3042, "step": 3024000 }, { "epoch": 31.92, "learning_rate": 3.404075688339648e-05, "loss": 1.3038, "step": 3024500 }, { "epoch": 31.92, "learning_rate": 3.4038118555885054e-05, "loss": 1.2466, "step": 3025000 }, { "epoch": 31.93, "learning_rate": 3.403548022837363e-05, "loss": 1.2899, "step": 3025500 }, { "epoch": 31.93, "learning_rate": 3.4032841900862205e-05, "loss": 1.3064, "step": 3026000 }, { "epoch": 31.94, "learning_rate": 3.403020357335078e-05, "loss": 1.3365, "step": 3026500 }, { "epoch": 31.94, "learning_rate": 3.402756524583936e-05, "loss": 1.3411, "step": 3027000 }, { "epoch": 31.95, "learning_rate": 3.402492691832794e-05, "loss": 1.3403, "step": 3027500 }, { "epoch": 31.96, "learning_rate": 3.402228859081651e-05, "loss": 1.3067, "step": 3028000 }, { "epoch": 31.96, "learning_rate": 3.401965026330508e-05, "loss": 1.3516, "step": 3028500 }, { "epoch": 31.97, "learning_rate": 3.4017011935793665e-05, "loss": 1.3256, "step": 3029000 }, { "epoch": 31.97, "learning_rate": 3.401437360828224e-05, "loss": 1.3752, "step": 3029500 }, { "epoch": 31.98, "learning_rate": 3.4011735280770816e-05, "loss": 1.3268, "step": 3030000 }, { "epoch": 31.98, "learning_rate": 3.400909695325939e-05, "loss": 1.3252, "step": 3030500 }, { "epoch": 31.99, "learning_rate": 3.400645862574797e-05, "loss": 1.3327, "step": 3031000 }, { "epoch": 31.99, "learning_rate": 3.400382029823654e-05, "loss": 1.3377, "step": 3031500 }, { "epoch": 32.0, "learning_rate": 3.400118197072512e-05, "loss": 1.274, "step": 3032000 }, { "epoch": 32.0, "learning_rate": 3.39985436432137e-05, "loss": 1.3466, "step": 3032500 }, { "epoch": 32.01, "learning_rate": 3.399590531570227e-05, "loss": 1.351, "step": 3033000 }, { "epoch": 32.01, "learning_rate": 3.3993266988190845e-05, "loss": 1.2676, "step": 3033500 }, { "epoch": 32.02, "learning_rate": 3.399062866067943e-05, "loss": 1.3439, "step": 3034000 }, { "epoch": 32.02, "learning_rate": 3.3987990333168e-05, "loss": 1.293, "step": 3034500 }, { "epoch": 32.03, "learning_rate": 3.398535200565657e-05, "loss": 1.3005, "step": 3035000 }, { "epoch": 32.03, "learning_rate": 3.398271367814515e-05, "loss": 1.2846, "step": 3035500 }, { "epoch": 32.04, "learning_rate": 3.398007535063373e-05, "loss": 1.3229, "step": 3036000 }, { "epoch": 32.05, "learning_rate": 3.3977437023122304e-05, "loss": 1.3119, "step": 3036500 }, { "epoch": 32.05, "learning_rate": 3.397479869561088e-05, "loss": 1.3376, "step": 3037000 }, { "epoch": 32.06, "learning_rate": 3.3972160368099455e-05, "loss": 1.2995, "step": 3037500 }, { "epoch": 32.06, "learning_rate": 3.396952204058803e-05, "loss": 1.308, "step": 3038000 }, { "epoch": 32.07, "learning_rate": 3.3966883713076606e-05, "loss": 1.3327, "step": 3038500 }, { "epoch": 32.07, "learning_rate": 3.396424538556519e-05, "loss": 1.2938, "step": 3039000 }, { "epoch": 32.08, "learning_rate": 3.3961607058053764e-05, "loss": 1.2791, "step": 3039500 }, { "epoch": 32.08, "learning_rate": 3.395896873054233e-05, "loss": 1.3358, "step": 3040000 }, { "epoch": 32.09, "learning_rate": 3.395633040303091e-05, "loss": 1.2996, "step": 3040500 }, { "epoch": 32.09, "learning_rate": 3.395369207551949e-05, "loss": 1.2983, "step": 3041000 }, { "epoch": 32.1, "learning_rate": 3.3951053748008066e-05, "loss": 1.2655, "step": 3041500 }, { "epoch": 32.1, "learning_rate": 3.394841542049664e-05, "loss": 1.3048, "step": 3042000 }, { "epoch": 32.11, "learning_rate": 3.394577709298522e-05, "loss": 1.3368, "step": 3042500 }, { "epoch": 32.11, "learning_rate": 3.394313876547379e-05, "loss": 1.3417, "step": 3043000 }, { "epoch": 32.12, "learning_rate": 3.394050043796237e-05, "loss": 1.3506, "step": 3043500 }, { "epoch": 32.12, "learning_rate": 3.393786211045094e-05, "loss": 1.3002, "step": 3044000 }, { "epoch": 32.13, "learning_rate": 3.393522378293952e-05, "loss": 1.3223, "step": 3044500 }, { "epoch": 32.13, "learning_rate": 3.3932585455428094e-05, "loss": 1.3562, "step": 3045000 }, { "epoch": 32.14, "learning_rate": 3.392994712791667e-05, "loss": 1.3079, "step": 3045500 }, { "epoch": 32.15, "learning_rate": 3.392730880040525e-05, "loss": 1.3599, "step": 3046000 }, { "epoch": 32.15, "learning_rate": 3.392467047289383e-05, "loss": 1.3633, "step": 3046500 }, { "epoch": 32.16, "learning_rate": 3.3922032145382396e-05, "loss": 1.3232, "step": 3047000 }, { "epoch": 32.16, "learning_rate": 3.391939381787098e-05, "loss": 1.2371, "step": 3047500 }, { "epoch": 32.17, "learning_rate": 3.3916755490359554e-05, "loss": 1.343, "step": 3048000 }, { "epoch": 32.17, "learning_rate": 3.391411716284813e-05, "loss": 1.3193, "step": 3048500 }, { "epoch": 32.18, "learning_rate": 3.3911478835336705e-05, "loss": 1.2731, "step": 3049000 }, { "epoch": 32.18, "learning_rate": 3.390884050782528e-05, "loss": 1.304, "step": 3049500 }, { "epoch": 32.19, "learning_rate": 3.3906202180313856e-05, "loss": 1.2939, "step": 3050000 }, { "epoch": 32.19, "learning_rate": 3.390356385280243e-05, "loss": 1.3219, "step": 3050500 }, { "epoch": 32.2, "learning_rate": 3.3900925525291014e-05, "loss": 1.3022, "step": 3051000 }, { "epoch": 32.2, "learning_rate": 3.389828719777959e-05, "loss": 1.3416, "step": 3051500 }, { "epoch": 32.21, "learning_rate": 3.389564887026816e-05, "loss": 1.2794, "step": 3052000 }, { "epoch": 32.21, "learning_rate": 3.389301054275674e-05, "loss": 1.2673, "step": 3052500 }, { "epoch": 32.22, "learning_rate": 3.3890372215245316e-05, "loss": 1.3195, "step": 3053000 }, { "epoch": 32.22, "learning_rate": 3.388773388773389e-05, "loss": 1.3055, "step": 3053500 }, { "epoch": 32.23, "learning_rate": 3.388509556022246e-05, "loss": 1.3559, "step": 3054000 }, { "epoch": 32.24, "learning_rate": 3.388245723271104e-05, "loss": 1.3352, "step": 3054500 }, { "epoch": 32.24, "learning_rate": 3.387981890519962e-05, "loss": 1.2975, "step": 3055000 }, { "epoch": 32.25, "learning_rate": 3.387718057768819e-05, "loss": 1.3181, "step": 3055500 }, { "epoch": 32.25, "learning_rate": 3.387454225017677e-05, "loss": 1.2834, "step": 3056000 }, { "epoch": 32.26, "learning_rate": 3.3871903922665344e-05, "loss": 1.382, "step": 3056500 }, { "epoch": 32.26, "learning_rate": 3.386926559515392e-05, "loss": 1.3915, "step": 3057000 }, { "epoch": 32.27, "learning_rate": 3.3866627267642495e-05, "loss": 1.329, "step": 3057500 }, { "epoch": 32.27, "learning_rate": 3.386398894013108e-05, "loss": 1.306, "step": 3058000 }, { "epoch": 32.28, "learning_rate": 3.386135061261965e-05, "loss": 1.3305, "step": 3058500 }, { "epoch": 32.28, "learning_rate": 3.385871228510822e-05, "loss": 1.3088, "step": 3059000 }, { "epoch": 32.29, "learning_rate": 3.3856073957596804e-05, "loss": 1.2924, "step": 3059500 }, { "epoch": 32.29, "learning_rate": 3.385343563008538e-05, "loss": 1.2951, "step": 3060000 }, { "epoch": 32.3, "learning_rate": 3.3850797302573955e-05, "loss": 1.3224, "step": 3060500 }, { "epoch": 32.3, "learning_rate": 3.384815897506253e-05, "loss": 1.3246, "step": 3061000 }, { "epoch": 32.31, "learning_rate": 3.3845520647551106e-05, "loss": 1.3105, "step": 3061500 }, { "epoch": 32.31, "learning_rate": 3.384288232003968e-05, "loss": 1.311, "step": 3062000 }, { "epoch": 32.32, "learning_rate": 3.384024399252826e-05, "loss": 1.3238, "step": 3062500 }, { "epoch": 32.32, "learning_rate": 3.383760566501684e-05, "loss": 1.3135, "step": 3063000 }, { "epoch": 32.33, "learning_rate": 3.383496733750541e-05, "loss": 1.325, "step": 3063500 }, { "epoch": 32.34, "learning_rate": 3.383232900999398e-05, "loss": 1.3277, "step": 3064000 }, { "epoch": 32.34, "learning_rate": 3.3829690682482565e-05, "loss": 1.3236, "step": 3064500 }, { "epoch": 32.35, "learning_rate": 3.382705235497114e-05, "loss": 1.3021, "step": 3065000 }, { "epoch": 32.35, "learning_rate": 3.3824414027459716e-05, "loss": 1.3202, "step": 3065500 }, { "epoch": 32.36, "learning_rate": 3.3821775699948285e-05, "loss": 1.2826, "step": 3066000 }, { "epoch": 32.36, "learning_rate": 3.381913737243687e-05, "loss": 1.341, "step": 3066500 }, { "epoch": 32.37, "learning_rate": 3.381649904492544e-05, "loss": 1.3404, "step": 3067000 }, { "epoch": 32.37, "learning_rate": 3.381386071741402e-05, "loss": 1.3299, "step": 3067500 }, { "epoch": 32.38, "learning_rate": 3.3811222389902594e-05, "loss": 1.3352, "step": 3068000 }, { "epoch": 32.38, "learning_rate": 3.380858406239117e-05, "loss": 1.3778, "step": 3068500 }, { "epoch": 32.39, "learning_rate": 3.3805945734879745e-05, "loss": 1.3598, "step": 3069000 }, { "epoch": 32.39, "learning_rate": 3.380330740736832e-05, "loss": 1.3169, "step": 3069500 }, { "epoch": 32.4, "learning_rate": 3.38006690798569e-05, "loss": 1.3017, "step": 3070000 }, { "epoch": 32.4, "learning_rate": 3.379803075234547e-05, "loss": 1.3544, "step": 3070500 }, { "epoch": 32.41, "learning_rate": 3.379539242483405e-05, "loss": 1.2965, "step": 3071000 }, { "epoch": 32.41, "learning_rate": 3.379275409732263e-05, "loss": 1.3169, "step": 3071500 }, { "epoch": 32.42, "learning_rate": 3.3790115769811204e-05, "loss": 1.3275, "step": 3072000 }, { "epoch": 32.43, "learning_rate": 3.378747744229978e-05, "loss": 1.2824, "step": 3072500 }, { "epoch": 32.43, "learning_rate": 3.3784839114788355e-05, "loss": 1.3781, "step": 3073000 }, { "epoch": 32.44, "learning_rate": 3.378220078727693e-05, "loss": 1.3157, "step": 3073500 }, { "epoch": 32.44, "learning_rate": 3.3779562459765506e-05, "loss": 1.3387, "step": 3074000 }, { "epoch": 32.45, "learning_rate": 3.377692413225408e-05, "loss": 1.3354, "step": 3074500 }, { "epoch": 32.45, "learning_rate": 3.3774285804742664e-05, "loss": 1.3077, "step": 3075000 }, { "epoch": 32.46, "learning_rate": 3.377164747723123e-05, "loss": 1.3129, "step": 3075500 }, { "epoch": 32.46, "learning_rate": 3.376900914971981e-05, "loss": 1.3193, "step": 3076000 }, { "epoch": 32.47, "learning_rate": 3.376637082220839e-05, "loss": 1.3091, "step": 3076500 }, { "epoch": 32.47, "learning_rate": 3.3763732494696966e-05, "loss": 1.3142, "step": 3077000 }, { "epoch": 32.48, "learning_rate": 3.376109416718554e-05, "loss": 1.2785, "step": 3077500 }, { "epoch": 32.48, "learning_rate": 3.375845583967411e-05, "loss": 1.3893, "step": 3078000 }, { "epoch": 32.49, "learning_rate": 3.375581751216269e-05, "loss": 1.3043, "step": 3078500 }, { "epoch": 32.49, "learning_rate": 3.375317918465127e-05, "loss": 1.2967, "step": 3079000 }, { "epoch": 32.5, "learning_rate": 3.3750540857139844e-05, "loss": 1.32, "step": 3079500 }, { "epoch": 32.5, "learning_rate": 3.374790252962842e-05, "loss": 1.2949, "step": 3080000 }, { "epoch": 32.51, "learning_rate": 3.3745264202116995e-05, "loss": 1.3191, "step": 3080500 }, { "epoch": 32.51, "learning_rate": 3.374262587460557e-05, "loss": 1.3227, "step": 3081000 }, { "epoch": 32.52, "learning_rate": 3.3739987547094146e-05, "loss": 1.3086, "step": 3081500 }, { "epoch": 32.53, "learning_rate": 3.373734921958273e-05, "loss": 1.2742, "step": 3082000 }, { "epoch": 32.53, "learning_rate": 3.3734710892071296e-05, "loss": 1.2619, "step": 3082500 }, { "epoch": 32.54, "learning_rate": 3.373207256455987e-05, "loss": 1.3296, "step": 3083000 }, { "epoch": 32.54, "learning_rate": 3.3729434237048454e-05, "loss": 1.3891, "step": 3083500 }, { "epoch": 32.55, "learning_rate": 3.372679590953703e-05, "loss": 1.3308, "step": 3084000 }, { "epoch": 32.55, "learning_rate": 3.3724157582025605e-05, "loss": 1.3052, "step": 3084500 }, { "epoch": 32.56, "learning_rate": 3.372151925451418e-05, "loss": 1.3007, "step": 3085000 }, { "epoch": 32.56, "learning_rate": 3.3718880927002756e-05, "loss": 1.3954, "step": 3085500 }, { "epoch": 32.57, "learning_rate": 3.371624259949133e-05, "loss": 1.3383, "step": 3086000 }, { "epoch": 32.57, "learning_rate": 3.371360427197991e-05, "loss": 1.3261, "step": 3086500 }, { "epoch": 32.58, "learning_rate": 3.371096594446849e-05, "loss": 1.302, "step": 3087000 }, { "epoch": 32.58, "learning_rate": 3.370832761695706e-05, "loss": 1.3609, "step": 3087500 }, { "epoch": 32.59, "learning_rate": 3.3705689289445634e-05, "loss": 1.3172, "step": 3088000 }, { "epoch": 32.59, "learning_rate": 3.3703050961934216e-05, "loss": 1.3594, "step": 3088500 }, { "epoch": 32.6, "learning_rate": 3.370041263442279e-05, "loss": 1.3406, "step": 3089000 }, { "epoch": 32.6, "learning_rate": 3.369777430691136e-05, "loss": 1.3072, "step": 3089500 }, { "epoch": 32.61, "learning_rate": 3.3695135979399936e-05, "loss": 1.3432, "step": 3090000 }, { "epoch": 32.62, "learning_rate": 3.369249765188852e-05, "loss": 1.337, "step": 3090500 }, { "epoch": 32.62, "learning_rate": 3.368985932437709e-05, "loss": 1.3223, "step": 3091000 }, { "epoch": 32.63, "learning_rate": 3.368722099686567e-05, "loss": 1.3863, "step": 3091500 }, { "epoch": 32.63, "learning_rate": 3.3684582669354244e-05, "loss": 1.2792, "step": 3092000 }, { "epoch": 32.64, "learning_rate": 3.368194434184282e-05, "loss": 1.3809, "step": 3092500 }, { "epoch": 32.64, "learning_rate": 3.3679306014331395e-05, "loss": 1.2583, "step": 3093000 }, { "epoch": 32.65, "learning_rate": 3.367666768681997e-05, "loss": 1.284, "step": 3093500 }, { "epoch": 32.65, "learning_rate": 3.367402935930855e-05, "loss": 1.3344, "step": 3094000 }, { "epoch": 32.66, "learning_rate": 3.367139103179712e-05, "loss": 1.2422, "step": 3094500 }, { "epoch": 32.66, "learning_rate": 3.36687527042857e-05, "loss": 1.2904, "step": 3095000 }, { "epoch": 32.67, "learning_rate": 3.366611437677428e-05, "loss": 1.2966, "step": 3095500 }, { "epoch": 32.67, "learning_rate": 3.3663476049262855e-05, "loss": 1.3461, "step": 3096000 }, { "epoch": 32.68, "learning_rate": 3.366083772175143e-05, "loss": 1.319, "step": 3096500 }, { "epoch": 32.68, "learning_rate": 3.3658199394240006e-05, "loss": 1.3128, "step": 3097000 }, { "epoch": 32.69, "learning_rate": 3.365556106672858e-05, "loss": 1.3462, "step": 3097500 }, { "epoch": 32.69, "learning_rate": 3.365292273921716e-05, "loss": 1.3279, "step": 3098000 }, { "epoch": 32.7, "learning_rate": 3.365028441170573e-05, "loss": 1.3106, "step": 3098500 }, { "epoch": 32.7, "learning_rate": 3.364764608419431e-05, "loss": 1.2906, "step": 3099000 }, { "epoch": 32.71, "learning_rate": 3.3645007756682883e-05, "loss": 1.297, "step": 3099500 }, { "epoch": 32.72, "learning_rate": 3.364236942917146e-05, "loss": 1.3876, "step": 3100000 }, { "epoch": 32.72, "learning_rate": 3.363973110166004e-05, "loss": 1.3538, "step": 3100500 }, { "epoch": 32.73, "learning_rate": 3.363709277414862e-05, "loss": 1.318, "step": 3101000 }, { "epoch": 32.73, "learning_rate": 3.3634454446637185e-05, "loss": 1.3282, "step": 3101500 }, { "epoch": 32.74, "learning_rate": 3.363181611912576e-05, "loss": 1.3573, "step": 3102000 }, { "epoch": 32.74, "learning_rate": 3.362917779161434e-05, "loss": 1.3514, "step": 3102500 }, { "epoch": 32.75, "learning_rate": 3.362653946410292e-05, "loss": 1.2824, "step": 3103000 }, { "epoch": 32.75, "learning_rate": 3.3623901136591494e-05, "loss": 1.2605, "step": 3103500 }, { "epoch": 32.76, "learning_rate": 3.362126280908007e-05, "loss": 1.279, "step": 3104000 }, { "epoch": 32.76, "learning_rate": 3.3618624481568645e-05, "loss": 1.3289, "step": 3104500 }, { "epoch": 32.77, "learning_rate": 3.361598615405722e-05, "loss": 1.3502, "step": 3105000 }, { "epoch": 32.77, "learning_rate": 3.3613347826545796e-05, "loss": 1.3815, "step": 3105500 }, { "epoch": 32.78, "learning_rate": 3.361070949903438e-05, "loss": 1.3624, "step": 3106000 }, { "epoch": 32.78, "learning_rate": 3.360807117152295e-05, "loss": 1.299, "step": 3106500 }, { "epoch": 32.79, "learning_rate": 3.360543284401152e-05, "loss": 1.3467, "step": 3107000 }, { "epoch": 32.79, "learning_rate": 3.3602794516500105e-05, "loss": 1.2956, "step": 3107500 }, { "epoch": 32.8, "learning_rate": 3.360015618898868e-05, "loss": 1.3669, "step": 3108000 }, { "epoch": 32.8, "learning_rate": 3.359751786147725e-05, "loss": 1.3356, "step": 3108500 }, { "epoch": 32.81, "learning_rate": 3.359487953396583e-05, "loss": 1.2488, "step": 3109000 }, { "epoch": 32.82, "learning_rate": 3.359224120645441e-05, "loss": 1.3108, "step": 3109500 }, { "epoch": 32.82, "learning_rate": 3.358960287894298e-05, "loss": 1.3443, "step": 3110000 }, { "epoch": 32.83, "learning_rate": 3.358696455143156e-05, "loss": 1.3172, "step": 3110500 }, { "epoch": 32.83, "learning_rate": 3.358432622392013e-05, "loss": 1.3387, "step": 3111000 }, { "epoch": 32.84, "learning_rate": 3.358168789640871e-05, "loss": 1.2983, "step": 3111500 }, { "epoch": 32.84, "learning_rate": 3.3579049568897284e-05, "loss": 1.34, "step": 3112000 }, { "epoch": 32.85, "learning_rate": 3.3576411241385866e-05, "loss": 1.324, "step": 3112500 }, { "epoch": 32.85, "learning_rate": 3.357377291387444e-05, "loss": 1.3058, "step": 3113000 }, { "epoch": 32.86, "learning_rate": 3.357113458636301e-05, "loss": 1.363, "step": 3113500 }, { "epoch": 32.86, "learning_rate": 3.3568496258851586e-05, "loss": 1.2958, "step": 3114000 }, { "epoch": 32.87, "learning_rate": 3.356585793134017e-05, "loss": 1.2676, "step": 3114500 }, { "epoch": 32.87, "learning_rate": 3.3563219603828744e-05, "loss": 1.2565, "step": 3115000 }, { "epoch": 32.88, "learning_rate": 3.356058127631732e-05, "loss": 1.297, "step": 3115500 }, { "epoch": 32.88, "learning_rate": 3.3557942948805895e-05, "loss": 1.3434, "step": 3116000 }, { "epoch": 32.89, "learning_rate": 3.355530462129447e-05, "loss": 1.3295, "step": 3116500 }, { "epoch": 32.89, "learning_rate": 3.3552666293783046e-05, "loss": 1.3651, "step": 3117000 }, { "epoch": 32.9, "learning_rate": 3.355002796627162e-05, "loss": 1.3809, "step": 3117500 }, { "epoch": 32.91, "learning_rate": 3.35473896387602e-05, "loss": 1.2903, "step": 3118000 }, { "epoch": 32.91, "learning_rate": 3.354475131124877e-05, "loss": 1.3358, "step": 3118500 }, { "epoch": 32.92, "learning_rate": 3.354211298373735e-05, "loss": 1.3558, "step": 3119000 }, { "epoch": 32.92, "learning_rate": 3.353947465622593e-05, "loss": 1.3355, "step": 3119500 }, { "epoch": 32.93, "learning_rate": 3.3536836328714505e-05, "loss": 1.2515, "step": 3120000 }, { "epoch": 32.93, "learning_rate": 3.3534198001203074e-05, "loss": 1.2607, "step": 3120500 }, { "epoch": 32.94, "learning_rate": 3.3531559673691656e-05, "loss": 1.3547, "step": 3121000 }, { "epoch": 32.94, "learning_rate": 3.352892134618023e-05, "loss": 1.323, "step": 3121500 }, { "epoch": 32.95, "learning_rate": 3.352628301866881e-05, "loss": 1.3893, "step": 3122000 }, { "epoch": 32.95, "learning_rate": 3.352364469115738e-05, "loss": 1.3341, "step": 3122500 }, { "epoch": 32.96, "learning_rate": 3.352100636364596e-05, "loss": 1.3107, "step": 3123000 }, { "epoch": 32.96, "learning_rate": 3.3518368036134534e-05, "loss": 1.3091, "step": 3123500 }, { "epoch": 32.97, "learning_rate": 3.351572970862311e-05, "loss": 1.2871, "step": 3124000 }, { "epoch": 32.97, "learning_rate": 3.351309138111169e-05, "loss": 1.3001, "step": 3124500 }, { "epoch": 32.98, "learning_rate": 3.351045305360027e-05, "loss": 1.2986, "step": 3125000 }, { "epoch": 32.98, "learning_rate": 3.3507814726088836e-05, "loss": 1.3866, "step": 3125500 }, { "epoch": 32.99, "learning_rate": 3.350517639857742e-05, "loss": 1.3474, "step": 3126000 }, { "epoch": 32.99, "learning_rate": 3.3502538071065994e-05, "loss": 1.3552, "step": 3126500 }, { "epoch": 33.0, "learning_rate": 3.349989974355457e-05, "loss": 1.3064, "step": 3127000 }, { "epoch": 33.01, "learning_rate": 3.349726141604314e-05, "loss": 1.2893, "step": 3127500 }, { "epoch": 33.01, "learning_rate": 3.349462308853172e-05, "loss": 1.3011, "step": 3128000 }, { "epoch": 33.02, "learning_rate": 3.3491984761020296e-05, "loss": 1.2912, "step": 3128500 }, { "epoch": 33.02, "learning_rate": 3.348934643350887e-05, "loss": 1.2716, "step": 3129000 }, { "epoch": 33.03, "learning_rate": 3.3486708105997447e-05, "loss": 1.2622, "step": 3129500 }, { "epoch": 33.03, "learning_rate": 3.348406977848602e-05, "loss": 1.352, "step": 3130000 }, { "epoch": 33.04, "learning_rate": 3.34814314509746e-05, "loss": 1.2974, "step": 3130500 }, { "epoch": 33.04, "learning_rate": 3.347879312346317e-05, "loss": 1.2722, "step": 3131000 }, { "epoch": 33.05, "learning_rate": 3.3476154795951755e-05, "loss": 1.33, "step": 3131500 }, { "epoch": 33.05, "learning_rate": 3.347351646844033e-05, "loss": 1.3298, "step": 3132000 }, { "epoch": 33.06, "learning_rate": 3.34708781409289e-05, "loss": 1.3995, "step": 3132500 }, { "epoch": 33.06, "learning_rate": 3.346823981341748e-05, "loss": 1.3107, "step": 3133000 }, { "epoch": 33.07, "learning_rate": 3.346560148590606e-05, "loss": 1.2894, "step": 3133500 }, { "epoch": 33.07, "learning_rate": 3.346296315839463e-05, "loss": 1.3711, "step": 3134000 }, { "epoch": 33.08, "learning_rate": 3.346032483088321e-05, "loss": 1.3176, "step": 3134500 }, { "epoch": 33.08, "learning_rate": 3.3457686503371784e-05, "loss": 1.3349, "step": 3135000 }, { "epoch": 33.09, "learning_rate": 3.345504817586036e-05, "loss": 1.2919, "step": 3135500 }, { "epoch": 33.1, "learning_rate": 3.3452409848348935e-05, "loss": 1.3352, "step": 3136000 }, { "epoch": 33.1, "learning_rate": 3.344977152083752e-05, "loss": 1.2971, "step": 3136500 }, { "epoch": 33.11, "learning_rate": 3.3447133193326086e-05, "loss": 1.2955, "step": 3137000 }, { "epoch": 33.11, "learning_rate": 3.344449486581466e-05, "loss": 1.2467, "step": 3137500 }, { "epoch": 33.12, "learning_rate": 3.344185653830324e-05, "loss": 1.2866, "step": 3138000 }, { "epoch": 33.12, "learning_rate": 3.343921821079182e-05, "loss": 1.327, "step": 3138500 }, { "epoch": 33.13, "learning_rate": 3.3436579883280394e-05, "loss": 1.2242, "step": 3139000 }, { "epoch": 33.13, "learning_rate": 3.343394155576896e-05, "loss": 1.3064, "step": 3139500 }, { "epoch": 33.14, "learning_rate": 3.3431303228257545e-05, "loss": 1.3097, "step": 3140000 }, { "epoch": 33.14, "learning_rate": 3.342866490074612e-05, "loss": 1.2946, "step": 3140500 }, { "epoch": 33.15, "learning_rate": 3.3426026573234696e-05, "loss": 1.2597, "step": 3141000 }, { "epoch": 33.15, "learning_rate": 3.342338824572327e-05, "loss": 1.3384, "step": 3141500 }, { "epoch": 33.16, "learning_rate": 3.342074991821185e-05, "loss": 1.3398, "step": 3142000 }, { "epoch": 33.16, "learning_rate": 3.341811159070042e-05, "loss": 1.2819, "step": 3142500 }, { "epoch": 33.17, "learning_rate": 3.3415473263189e-05, "loss": 1.2956, "step": 3143000 }, { "epoch": 33.17, "learning_rate": 3.341283493567758e-05, "loss": 1.3216, "step": 3143500 }, { "epoch": 33.18, "learning_rate": 3.3410196608166156e-05, "loss": 1.3346, "step": 3144000 }, { "epoch": 33.18, "learning_rate": 3.3407558280654725e-05, "loss": 1.3153, "step": 3144500 }, { "epoch": 33.19, "learning_rate": 3.340491995314331e-05, "loss": 1.338, "step": 3145000 }, { "epoch": 33.2, "learning_rate": 3.340228162563188e-05, "loss": 1.3357, "step": 3145500 }, { "epoch": 33.2, "learning_rate": 3.339964329812046e-05, "loss": 1.2876, "step": 3146000 }, { "epoch": 33.21, "learning_rate": 3.3397004970609033e-05, "loss": 1.2794, "step": 3146500 }, { "epoch": 33.21, "learning_rate": 3.339436664309761e-05, "loss": 1.3514, "step": 3147000 }, { "epoch": 33.22, "learning_rate": 3.3391728315586184e-05, "loss": 1.351, "step": 3147500 }, { "epoch": 33.22, "learning_rate": 3.338908998807476e-05, "loss": 1.2531, "step": 3148000 }, { "epoch": 33.23, "learning_rate": 3.338645166056334e-05, "loss": 1.3592, "step": 3148500 }, { "epoch": 33.23, "learning_rate": 3.338381333305191e-05, "loss": 1.3355, "step": 3149000 }, { "epoch": 33.24, "learning_rate": 3.3381175005540486e-05, "loss": 1.3561, "step": 3149500 }, { "epoch": 33.24, "learning_rate": 3.337853667802907e-05, "loss": 1.3022, "step": 3150000 }, { "epoch": 33.25, "learning_rate": 3.3375898350517644e-05, "loss": 1.287, "step": 3150500 }, { "epoch": 33.25, "learning_rate": 3.337326002300622e-05, "loss": 1.3155, "step": 3151000 }, { "epoch": 33.26, "learning_rate": 3.337062169549479e-05, "loss": 1.3188, "step": 3151500 }, { "epoch": 33.26, "learning_rate": 3.336798336798337e-05, "loss": 1.3827, "step": 3152000 }, { "epoch": 33.27, "learning_rate": 3.3365345040471946e-05, "loss": 1.2648, "step": 3152500 }, { "epoch": 33.27, "learning_rate": 3.336270671296052e-05, "loss": 1.3122, "step": 3153000 }, { "epoch": 33.28, "learning_rate": 3.33600683854491e-05, "loss": 1.3149, "step": 3153500 }, { "epoch": 33.29, "learning_rate": 3.335743005793767e-05, "loss": 1.3203, "step": 3154000 }, { "epoch": 33.29, "learning_rate": 3.335479173042625e-05, "loss": 1.3852, "step": 3154500 }, { "epoch": 33.3, "learning_rate": 3.3352153402914823e-05, "loss": 1.3158, "step": 3155000 }, { "epoch": 33.3, "learning_rate": 3.3349515075403406e-05, "loss": 1.3343, "step": 3155500 }, { "epoch": 33.31, "learning_rate": 3.3346876747891974e-05, "loss": 1.2996, "step": 3156000 }, { "epoch": 33.31, "learning_rate": 3.334423842038055e-05, "loss": 1.3486, "step": 3156500 }, { "epoch": 33.32, "learning_rate": 3.334160009286913e-05, "loss": 1.3468, "step": 3157000 }, { "epoch": 33.32, "learning_rate": 3.333896176535771e-05, "loss": 1.3262, "step": 3157500 }, { "epoch": 33.33, "learning_rate": 3.333632343784628e-05, "loss": 1.3404, "step": 3158000 }, { "epoch": 33.33, "learning_rate": 3.333368511033486e-05, "loss": 1.2765, "step": 3158500 }, { "epoch": 33.34, "learning_rate": 3.3331046782823434e-05, "loss": 1.3066, "step": 3159000 }, { "epoch": 33.34, "learning_rate": 3.332840845531201e-05, "loss": 1.3136, "step": 3159500 }, { "epoch": 33.35, "learning_rate": 3.3325770127800585e-05, "loss": 1.2696, "step": 3160000 }, { "epoch": 33.35, "learning_rate": 3.332313180028917e-05, "loss": 1.291, "step": 3160500 }, { "epoch": 33.36, "learning_rate": 3.3320493472777736e-05, "loss": 1.3075, "step": 3161000 }, { "epoch": 33.36, "learning_rate": 3.331785514526631e-05, "loss": 1.2606, "step": 3161500 }, { "epoch": 33.37, "learning_rate": 3.3315216817754894e-05, "loss": 1.3536, "step": 3162000 }, { "epoch": 33.37, "learning_rate": 3.331257849024347e-05, "loss": 1.3163, "step": 3162500 }, { "epoch": 33.38, "learning_rate": 3.3309940162732045e-05, "loss": 1.2909, "step": 3163000 }, { "epoch": 33.39, "learning_rate": 3.3307301835220614e-05, "loss": 1.3512, "step": 3163500 }, { "epoch": 33.39, "learning_rate": 3.3304663507709196e-05, "loss": 1.3493, "step": 3164000 }, { "epoch": 33.4, "learning_rate": 3.330202518019777e-05, "loss": 1.3511, "step": 3164500 }, { "epoch": 33.4, "learning_rate": 3.329938685268635e-05, "loss": 1.2561, "step": 3165000 }, { "epoch": 33.41, "learning_rate": 3.329674852517492e-05, "loss": 1.2963, "step": 3165500 }, { "epoch": 33.41, "learning_rate": 3.32941101976635e-05, "loss": 1.3398, "step": 3166000 }, { "epoch": 33.42, "learning_rate": 3.329147187015207e-05, "loss": 1.2586, "step": 3166500 }, { "epoch": 33.42, "learning_rate": 3.328883354264065e-05, "loss": 1.3096, "step": 3167000 }, { "epoch": 33.43, "learning_rate": 3.328619521512923e-05, "loss": 1.3269, "step": 3167500 }, { "epoch": 33.43, "learning_rate": 3.32835568876178e-05, "loss": 1.3565, "step": 3168000 }, { "epoch": 33.44, "learning_rate": 3.3280918560106375e-05, "loss": 1.3701, "step": 3168500 }, { "epoch": 33.44, "learning_rate": 3.327828023259496e-05, "loss": 1.3081, "step": 3169000 }, { "epoch": 33.45, "learning_rate": 3.327564190508353e-05, "loss": 1.3246, "step": 3169500 }, { "epoch": 33.45, "learning_rate": 3.327300357757211e-05, "loss": 1.3195, "step": 3170000 }, { "epoch": 33.46, "learning_rate": 3.3270365250060684e-05, "loss": 1.2866, "step": 3170500 }, { "epoch": 33.46, "learning_rate": 3.326772692254926e-05, "loss": 1.2834, "step": 3171000 }, { "epoch": 33.47, "learning_rate": 3.3265088595037835e-05, "loss": 1.2788, "step": 3171500 }, { "epoch": 33.48, "learning_rate": 3.326245026752641e-05, "loss": 1.3093, "step": 3172000 }, { "epoch": 33.48, "learning_rate": 3.325981194001499e-05, "loss": 1.3269, "step": 3172500 }, { "epoch": 33.49, "learning_rate": 3.325717361250356e-05, "loss": 1.2981, "step": 3173000 }, { "epoch": 33.49, "learning_rate": 3.325453528499214e-05, "loss": 1.3106, "step": 3173500 }, { "epoch": 33.5, "learning_rate": 3.325189695748072e-05, "loss": 1.3352, "step": 3174000 }, { "epoch": 33.5, "learning_rate": 3.3249258629969295e-05, "loss": 1.3502, "step": 3174500 }, { "epoch": 33.51, "learning_rate": 3.324662030245786e-05, "loss": 1.3368, "step": 3175000 }, { "epoch": 33.51, "learning_rate": 3.324398197494644e-05, "loss": 1.2912, "step": 3175500 }, { "epoch": 33.52, "learning_rate": 3.324134364743502e-05, "loss": 1.3728, "step": 3176000 }, { "epoch": 33.52, "learning_rate": 3.3238705319923597e-05, "loss": 1.3164, "step": 3176500 }, { "epoch": 33.53, "learning_rate": 3.323606699241217e-05, "loss": 1.3249, "step": 3177000 }, { "epoch": 33.53, "learning_rate": 3.323342866490075e-05, "loss": 1.3019, "step": 3177500 }, { "epoch": 33.54, "learning_rate": 3.323079033738932e-05, "loss": 1.3066, "step": 3178000 }, { "epoch": 33.54, "learning_rate": 3.32281520098779e-05, "loss": 1.3147, "step": 3178500 }, { "epoch": 33.55, "learning_rate": 3.3225513682366474e-05, "loss": 1.2926, "step": 3179000 }, { "epoch": 33.55, "learning_rate": 3.3222875354855056e-05, "loss": 1.307, "step": 3179500 }, { "epoch": 33.56, "learning_rate": 3.3220237027343625e-05, "loss": 1.3108, "step": 3180000 }, { "epoch": 33.56, "learning_rate": 3.32175986998322e-05, "loss": 1.3267, "step": 3180500 }, { "epoch": 33.57, "learning_rate": 3.321496037232078e-05, "loss": 1.3503, "step": 3181000 }, { "epoch": 33.58, "learning_rate": 3.321232204480936e-05, "loss": 1.2908, "step": 3181500 }, { "epoch": 33.58, "learning_rate": 3.3209683717297934e-05, "loss": 1.3671, "step": 3182000 }, { "epoch": 33.59, "learning_rate": 3.320704538978651e-05, "loss": 1.3109, "step": 3182500 }, { "epoch": 33.59, "learning_rate": 3.3204407062275085e-05, "loss": 1.3116, "step": 3183000 }, { "epoch": 33.6, "learning_rate": 3.320176873476366e-05, "loss": 1.316, "step": 3183500 }, { "epoch": 33.6, "learning_rate": 3.3199130407252236e-05, "loss": 1.3167, "step": 3184000 }, { "epoch": 33.61, "learning_rate": 3.319649207974081e-05, "loss": 1.301, "step": 3184500 }, { "epoch": 33.61, "learning_rate": 3.3193853752229387e-05, "loss": 1.3861, "step": 3185000 }, { "epoch": 33.62, "learning_rate": 3.319121542471796e-05, "loss": 1.3462, "step": 3185500 }, { "epoch": 33.62, "learning_rate": 3.3188577097206544e-05, "loss": 1.3412, "step": 3186000 }, { "epoch": 33.63, "learning_rate": 3.318593876969512e-05, "loss": 1.3537, "step": 3186500 }, { "epoch": 33.63, "learning_rate": 3.318330044218369e-05, "loss": 1.3113, "step": 3187000 }, { "epoch": 33.64, "learning_rate": 3.3180662114672264e-05, "loss": 1.291, "step": 3187500 }, { "epoch": 33.64, "learning_rate": 3.3178023787160846e-05, "loss": 1.3276, "step": 3188000 }, { "epoch": 33.65, "learning_rate": 3.317538545964942e-05, "loss": 1.3084, "step": 3188500 }, { "epoch": 33.65, "learning_rate": 3.3172747132138e-05, "loss": 1.3225, "step": 3189000 }, { "epoch": 33.66, "learning_rate": 3.317010880462657e-05, "loss": 1.3293, "step": 3189500 }, { "epoch": 33.67, "learning_rate": 3.316747047711515e-05, "loss": 1.3066, "step": 3190000 }, { "epoch": 33.67, "learning_rate": 3.3164832149603724e-05, "loss": 1.2862, "step": 3190500 }, { "epoch": 33.68, "learning_rate": 3.31621938220923e-05, "loss": 1.327, "step": 3191000 }, { "epoch": 33.68, "learning_rate": 3.315955549458088e-05, "loss": 1.3003, "step": 3191500 }, { "epoch": 33.69, "learning_rate": 3.315691716706945e-05, "loss": 1.2913, "step": 3192000 }, { "epoch": 33.69, "learning_rate": 3.3154278839558026e-05, "loss": 1.3118, "step": 3192500 }, { "epoch": 33.7, "learning_rate": 3.315164051204661e-05, "loss": 1.3427, "step": 3193000 }, { "epoch": 33.7, "learning_rate": 3.3149002184535183e-05, "loss": 1.3039, "step": 3193500 }, { "epoch": 33.71, "learning_rate": 3.314636385702375e-05, "loss": 1.3641, "step": 3194000 }, { "epoch": 33.71, "learning_rate": 3.3143725529512334e-05, "loss": 1.317, "step": 3194500 }, { "epoch": 33.72, "learning_rate": 3.314108720200091e-05, "loss": 1.3056, "step": 3195000 }, { "epoch": 33.72, "learning_rate": 3.3138448874489485e-05, "loss": 1.2794, "step": 3195500 }, { "epoch": 33.73, "learning_rate": 3.313581054697806e-05, "loss": 1.3115, "step": 3196000 }, { "epoch": 33.73, "learning_rate": 3.3133172219466636e-05, "loss": 1.3187, "step": 3196500 }, { "epoch": 33.74, "learning_rate": 3.313053389195521e-05, "loss": 1.307, "step": 3197000 }, { "epoch": 33.74, "learning_rate": 3.312789556444379e-05, "loss": 1.3294, "step": 3197500 }, { "epoch": 33.75, "learning_rate": 3.312525723693237e-05, "loss": 1.3329, "step": 3198000 }, { "epoch": 33.75, "learning_rate": 3.3122618909420945e-05, "loss": 1.2826, "step": 3198500 }, { "epoch": 33.76, "learning_rate": 3.3119980581909514e-05, "loss": 1.3323, "step": 3199000 }, { "epoch": 33.77, "learning_rate": 3.3117342254398096e-05, "loss": 1.3218, "step": 3199500 }, { "epoch": 33.77, "learning_rate": 3.311470392688667e-05, "loss": 1.3738, "step": 3200000 }, { "epoch": 33.78, "learning_rate": 3.311206559937525e-05, "loss": 1.2933, "step": 3200500 }, { "epoch": 33.78, "learning_rate": 3.310942727186382e-05, "loss": 1.2597, "step": 3201000 }, { "epoch": 33.79, "learning_rate": 3.31067889443524e-05, "loss": 1.3242, "step": 3201500 }, { "epoch": 33.79, "learning_rate": 3.3104150616840973e-05, "loss": 1.3009, "step": 3202000 }, { "epoch": 33.8, "learning_rate": 3.310151228932955e-05, "loss": 1.3434, "step": 3202500 }, { "epoch": 33.8, "learning_rate": 3.3098873961818124e-05, "loss": 1.3552, "step": 3203000 }, { "epoch": 33.81, "learning_rate": 3.30962356343067e-05, "loss": 1.3384, "step": 3203500 }, { "epoch": 33.81, "learning_rate": 3.3093597306795275e-05, "loss": 1.2885, "step": 3204000 }, { "epoch": 33.82, "learning_rate": 3.309095897928385e-05, "loss": 1.3533, "step": 3204500 }, { "epoch": 33.82, "learning_rate": 3.308832065177243e-05, "loss": 1.2907, "step": 3205000 }, { "epoch": 33.83, "learning_rate": 3.308568232426101e-05, "loss": 1.3217, "step": 3205500 }, { "epoch": 33.83, "learning_rate": 3.308304399674958e-05, "loss": 1.3183, "step": 3206000 }, { "epoch": 33.84, "learning_rate": 3.308040566923816e-05, "loss": 1.321, "step": 3206500 }, { "epoch": 33.84, "learning_rate": 3.3077767341726735e-05, "loss": 1.3482, "step": 3207000 }, { "epoch": 33.85, "learning_rate": 3.307512901421531e-05, "loss": 1.3263, "step": 3207500 }, { "epoch": 33.86, "learning_rate": 3.3072490686703886e-05, "loss": 1.3386, "step": 3208000 }, { "epoch": 33.86, "learning_rate": 3.306985235919246e-05, "loss": 1.2713, "step": 3208500 }, { "epoch": 33.87, "learning_rate": 3.306721403168104e-05, "loss": 1.278, "step": 3209000 }, { "epoch": 33.87, "learning_rate": 3.306457570416961e-05, "loss": 1.3538, "step": 3209500 }, { "epoch": 33.88, "learning_rate": 3.3061937376658195e-05, "loss": 1.2459, "step": 3210000 }, { "epoch": 33.88, "learning_rate": 3.305929904914677e-05, "loss": 1.3107, "step": 3210500 }, { "epoch": 33.89, "learning_rate": 3.305666072163534e-05, "loss": 1.2672, "step": 3211000 }, { "epoch": 33.89, "learning_rate": 3.305402239412392e-05, "loss": 1.3116, "step": 3211500 }, { "epoch": 33.9, "learning_rate": 3.30513840666125e-05, "loss": 1.3515, "step": 3212000 }, { "epoch": 33.9, "learning_rate": 3.304874573910107e-05, "loss": 1.3336, "step": 3212500 }, { "epoch": 33.91, "learning_rate": 3.304610741158964e-05, "loss": 1.367, "step": 3213000 }, { "epoch": 33.91, "learning_rate": 3.304346908407822e-05, "loss": 1.3202, "step": 3213500 }, { "epoch": 33.92, "learning_rate": 3.30408307565668e-05, "loss": 1.3372, "step": 3214000 }, { "epoch": 33.92, "learning_rate": 3.3038192429055374e-05, "loss": 1.2827, "step": 3214500 }, { "epoch": 33.93, "learning_rate": 3.303555410154395e-05, "loss": 1.3017, "step": 3215000 }, { "epoch": 33.93, "learning_rate": 3.3032915774032525e-05, "loss": 1.3545, "step": 3215500 }, { "epoch": 33.94, "learning_rate": 3.30302774465211e-05, "loss": 1.3382, "step": 3216000 }, { "epoch": 33.94, "learning_rate": 3.3027639119009676e-05, "loss": 1.324, "step": 3216500 }, { "epoch": 33.95, "learning_rate": 3.302500079149826e-05, "loss": 1.344, "step": 3217000 }, { "epoch": 33.96, "learning_rate": 3.3022362463986834e-05, "loss": 1.3175, "step": 3217500 }, { "epoch": 33.96, "learning_rate": 3.30197241364754e-05, "loss": 1.3327, "step": 3218000 }, { "epoch": 33.97, "learning_rate": 3.3017085808963985e-05, "loss": 1.3199, "step": 3218500 }, { "epoch": 33.97, "learning_rate": 3.301444748145256e-05, "loss": 1.3494, "step": 3219000 }, { "epoch": 33.98, "learning_rate": 3.3011809153941136e-05, "loss": 1.3002, "step": 3219500 }, { "epoch": 33.98, "learning_rate": 3.300917082642971e-05, "loss": 1.3497, "step": 3220000 }, { "epoch": 33.99, "learning_rate": 3.300653249891829e-05, "loss": 1.3129, "step": 3220500 }, { "epoch": 33.99, "learning_rate": 3.300389417140686e-05, "loss": 1.3057, "step": 3221000 }, { "epoch": 34.0, "learning_rate": 3.300125584389544e-05, "loss": 1.2889, "step": 3221500 }, { "epoch": 34.0, "learning_rate": 3.299861751638402e-05, "loss": 1.2558, "step": 3222000 }, { "epoch": 34.01, "learning_rate": 3.299597918887259e-05, "loss": 1.2933, "step": 3222500 }, { "epoch": 34.01, "learning_rate": 3.2993340861361164e-05, "loss": 1.3215, "step": 3223000 }, { "epoch": 34.02, "learning_rate": 3.2990702533849747e-05, "loss": 1.2686, "step": 3223500 }, { "epoch": 34.02, "learning_rate": 3.298806420633832e-05, "loss": 1.2178, "step": 3224000 }, { "epoch": 34.03, "learning_rate": 3.29854258788269e-05, "loss": 1.3814, "step": 3224500 }, { "epoch": 34.03, "learning_rate": 3.2982787551315466e-05, "loss": 1.2955, "step": 3225000 }, { "epoch": 34.04, "learning_rate": 3.298014922380405e-05, "loss": 1.2715, "step": 3225500 }, { "epoch": 34.04, "learning_rate": 3.2977510896292624e-05, "loss": 1.299, "step": 3226000 }, { "epoch": 34.05, "learning_rate": 3.29748725687812e-05, "loss": 1.3215, "step": 3226500 }, { "epoch": 34.06, "learning_rate": 3.2972234241269775e-05, "loss": 1.2777, "step": 3227000 }, { "epoch": 34.06, "learning_rate": 3.296959591375835e-05, "loss": 1.3414, "step": 3227500 }, { "epoch": 34.07, "learning_rate": 3.2966957586246926e-05, "loss": 1.3415, "step": 3228000 }, { "epoch": 34.07, "learning_rate": 3.29643192587355e-05, "loss": 1.3075, "step": 3228500 }, { "epoch": 34.08, "learning_rate": 3.2961680931224084e-05, "loss": 1.4117, "step": 3229000 }, { "epoch": 34.08, "learning_rate": 3.295904260371266e-05, "loss": 1.3101, "step": 3229500 }, { "epoch": 34.09, "learning_rate": 3.295640427620123e-05, "loss": 1.3818, "step": 3230000 }, { "epoch": 34.09, "learning_rate": 3.295376594868981e-05, "loss": 1.3704, "step": 3230500 }, { "epoch": 34.1, "learning_rate": 3.2951127621178386e-05, "loss": 1.2713, "step": 3231000 }, { "epoch": 34.1, "learning_rate": 3.294848929366696e-05, "loss": 1.2711, "step": 3231500 }, { "epoch": 34.11, "learning_rate": 3.2945850966155537e-05, "loss": 1.27, "step": 3232000 }, { "epoch": 34.11, "learning_rate": 3.294321263864411e-05, "loss": 1.3425, "step": 3232500 }, { "epoch": 34.12, "learning_rate": 3.294057431113269e-05, "loss": 1.3171, "step": 3233000 }, { "epoch": 34.12, "learning_rate": 3.293793598362126e-05, "loss": 1.3296, "step": 3233500 }, { "epoch": 34.13, "learning_rate": 3.2935297656109845e-05, "loss": 1.3085, "step": 3234000 }, { "epoch": 34.13, "learning_rate": 3.2932659328598414e-05, "loss": 1.295, "step": 3234500 }, { "epoch": 34.14, "learning_rate": 3.293002100108699e-05, "loss": 1.3112, "step": 3235000 }, { "epoch": 34.15, "learning_rate": 3.292738267357557e-05, "loss": 1.2964, "step": 3235500 }, { "epoch": 34.15, "learning_rate": 3.292474434606415e-05, "loss": 1.3135, "step": 3236000 }, { "epoch": 34.16, "learning_rate": 3.292210601855272e-05, "loss": 1.3481, "step": 3236500 }, { "epoch": 34.16, "learning_rate": 3.291946769104129e-05, "loss": 1.3046, "step": 3237000 }, { "epoch": 34.17, "learning_rate": 3.2916829363529874e-05, "loss": 1.2585, "step": 3237500 }, { "epoch": 34.17, "learning_rate": 3.291419103601845e-05, "loss": 1.2676, "step": 3238000 }, { "epoch": 34.18, "learning_rate": 3.2911552708507025e-05, "loss": 1.2602, "step": 3238500 }, { "epoch": 34.18, "learning_rate": 3.29089143809956e-05, "loss": 1.2973, "step": 3239000 }, { "epoch": 34.19, "learning_rate": 3.2906276053484176e-05, "loss": 1.2955, "step": 3239500 }, { "epoch": 34.19, "learning_rate": 3.290363772597275e-05, "loss": 1.2571, "step": 3240000 }, { "epoch": 34.2, "learning_rate": 3.290099939846133e-05, "loss": 1.3381, "step": 3240500 }, { "epoch": 34.2, "learning_rate": 3.289836107094991e-05, "loss": 1.2416, "step": 3241000 }, { "epoch": 34.21, "learning_rate": 3.289572274343848e-05, "loss": 1.3953, "step": 3241500 }, { "epoch": 34.21, "learning_rate": 3.289308441592705e-05, "loss": 1.3548, "step": 3242000 }, { "epoch": 34.22, "learning_rate": 3.2890446088415635e-05, "loss": 1.3128, "step": 3242500 }, { "epoch": 34.22, "learning_rate": 3.288780776090421e-05, "loss": 1.29, "step": 3243000 }, { "epoch": 34.23, "learning_rate": 3.2885169433392786e-05, "loss": 1.2992, "step": 3243500 }, { "epoch": 34.23, "learning_rate": 3.288253110588136e-05, "loss": 1.3104, "step": 3244000 }, { "epoch": 34.24, "learning_rate": 3.287989277836994e-05, "loss": 1.3415, "step": 3244500 }, { "epoch": 34.25, "learning_rate": 3.287725445085851e-05, "loss": 1.3043, "step": 3245000 }, { "epoch": 34.25, "learning_rate": 3.287461612334709e-05, "loss": 1.3252, "step": 3245500 }, { "epoch": 34.26, "learning_rate": 3.287197779583567e-05, "loss": 1.2893, "step": 3246000 }, { "epoch": 34.26, "learning_rate": 3.286933946832424e-05, "loss": 1.3089, "step": 3246500 }, { "epoch": 34.27, "learning_rate": 3.2866701140812815e-05, "loss": 1.3389, "step": 3247000 }, { "epoch": 34.27, "learning_rate": 3.28640628133014e-05, "loss": 1.3125, "step": 3247500 }, { "epoch": 34.28, "learning_rate": 3.286142448578997e-05, "loss": 1.336, "step": 3248000 }, { "epoch": 34.28, "learning_rate": 3.285878615827855e-05, "loss": 1.3375, "step": 3248500 }, { "epoch": 34.29, "learning_rate": 3.285614783076712e-05, "loss": 1.2896, "step": 3249000 }, { "epoch": 34.29, "learning_rate": 3.28535095032557e-05, "loss": 1.2884, "step": 3249500 }, { "epoch": 34.3, "learning_rate": 3.2850871175744274e-05, "loss": 1.3394, "step": 3250000 }, { "epoch": 34.3, "learning_rate": 3.284823284823285e-05, "loss": 1.2778, "step": 3250500 }, { "epoch": 34.31, "learning_rate": 3.2845594520721425e-05, "loss": 1.3489, "step": 3251000 }, { "epoch": 34.31, "learning_rate": 3.284295619321e-05, "loss": 1.2453, "step": 3251500 }, { "epoch": 34.32, "learning_rate": 3.2840317865698576e-05, "loss": 1.3505, "step": 3252000 }, { "epoch": 34.32, "learning_rate": 3.283767953818715e-05, "loss": 1.2954, "step": 3252500 }, { "epoch": 34.33, "learning_rate": 3.2835041210675734e-05, "loss": 1.3283, "step": 3253000 }, { "epoch": 34.34, "learning_rate": 3.28324028831643e-05, "loss": 1.3218, "step": 3253500 }, { "epoch": 34.34, "learning_rate": 3.282976455565288e-05, "loss": 1.2296, "step": 3254000 }, { "epoch": 34.35, "learning_rate": 3.282712622814146e-05, "loss": 1.3185, "step": 3254500 }, { "epoch": 34.35, "learning_rate": 3.2824487900630036e-05, "loss": 1.3322, "step": 3255000 }, { "epoch": 34.36, "learning_rate": 3.282184957311861e-05, "loss": 1.2891, "step": 3255500 }, { "epoch": 34.36, "learning_rate": 3.281921124560719e-05, "loss": 1.3007, "step": 3256000 }, { "epoch": 34.37, "learning_rate": 3.281657291809576e-05, "loss": 1.3372, "step": 3256500 }, { "epoch": 34.37, "learning_rate": 3.281393459058434e-05, "loss": 1.3206, "step": 3257000 }, { "epoch": 34.38, "learning_rate": 3.2811296263072914e-05, "loss": 1.3174, "step": 3257500 }, { "epoch": 34.38, "learning_rate": 3.2808657935561496e-05, "loss": 1.271, "step": 3258000 }, { "epoch": 34.39, "learning_rate": 3.2806019608050065e-05, "loss": 1.3047, "step": 3258500 }, { "epoch": 34.39, "learning_rate": 3.280338128053864e-05, "loss": 1.2941, "step": 3259000 }, { "epoch": 34.4, "learning_rate": 3.280074295302722e-05, "loss": 1.3239, "step": 3259500 }, { "epoch": 34.4, "learning_rate": 3.27981046255158e-05, "loss": 1.3285, "step": 3260000 }, { "epoch": 34.41, "learning_rate": 3.2795466298004366e-05, "loss": 1.3125, "step": 3260500 }, { "epoch": 34.41, "learning_rate": 3.279282797049294e-05, "loss": 1.312, "step": 3261000 }, { "epoch": 34.42, "learning_rate": 3.2790189642981524e-05, "loss": 1.2865, "step": 3261500 }, { "epoch": 34.42, "learning_rate": 3.27875513154701e-05, "loss": 1.2769, "step": 3262000 }, { "epoch": 34.43, "learning_rate": 3.2784912987958675e-05, "loss": 1.317, "step": 3262500 }, { "epoch": 34.44, "learning_rate": 3.278227466044725e-05, "loss": 1.3048, "step": 3263000 }, { "epoch": 34.44, "learning_rate": 3.2779636332935826e-05, "loss": 1.313, "step": 3263500 }, { "epoch": 34.45, "learning_rate": 3.27769980054244e-05, "loss": 1.3216, "step": 3264000 }, { "epoch": 34.45, "learning_rate": 3.277435967791298e-05, "loss": 1.3537, "step": 3264500 }, { "epoch": 34.46, "learning_rate": 3.277172135040156e-05, "loss": 1.288, "step": 3265000 }, { "epoch": 34.46, "learning_rate": 3.276908302289013e-05, "loss": 1.2625, "step": 3265500 }, { "epoch": 34.47, "learning_rate": 3.2766444695378704e-05, "loss": 1.3367, "step": 3266000 }, { "epoch": 34.47, "learning_rate": 3.2763806367867286e-05, "loss": 1.3401, "step": 3266500 }, { "epoch": 34.48, "learning_rate": 3.276116804035586e-05, "loss": 1.2935, "step": 3267000 }, { "epoch": 34.48, "learning_rate": 3.275852971284444e-05, "loss": 1.3239, "step": 3267500 }, { "epoch": 34.49, "learning_rate": 3.275589138533301e-05, "loss": 1.3044, "step": 3268000 }, { "epoch": 34.49, "learning_rate": 3.275325305782159e-05, "loss": 1.3248, "step": 3268500 }, { "epoch": 34.5, "learning_rate": 3.275061473031016e-05, "loss": 1.3818, "step": 3269000 }, { "epoch": 34.5, "learning_rate": 3.274797640279874e-05, "loss": 1.3073, "step": 3269500 }, { "epoch": 34.51, "learning_rate": 3.2745338075287314e-05, "loss": 1.3256, "step": 3270000 }, { "epoch": 34.51, "learning_rate": 3.274269974777589e-05, "loss": 1.3106, "step": 3270500 }, { "epoch": 34.52, "learning_rate": 3.2740061420264465e-05, "loss": 1.333, "step": 3271000 }, { "epoch": 34.53, "learning_rate": 3.273742309275305e-05, "loss": 1.3467, "step": 3271500 }, { "epoch": 34.53, "learning_rate": 3.273478476524162e-05, "loss": 1.3422, "step": 3272000 }, { "epoch": 34.54, "learning_rate": 3.273214643773019e-05, "loss": 1.3051, "step": 3272500 }, { "epoch": 34.54, "learning_rate": 3.2729508110218774e-05, "loss": 1.3029, "step": 3273000 }, { "epoch": 34.55, "learning_rate": 3.272686978270735e-05, "loss": 1.328, "step": 3273500 }, { "epoch": 34.55, "learning_rate": 3.2724231455195925e-05, "loss": 1.3118, "step": 3274000 }, { "epoch": 34.56, "learning_rate": 3.27215931276845e-05, "loss": 1.3052, "step": 3274500 }, { "epoch": 34.56, "learning_rate": 3.2718954800173076e-05, "loss": 1.2605, "step": 3275000 }, { "epoch": 34.57, "learning_rate": 3.271631647266165e-05, "loss": 1.3502, "step": 3275500 }, { "epoch": 34.57, "learning_rate": 3.271367814515023e-05, "loss": 1.3527, "step": 3276000 }, { "epoch": 34.58, "learning_rate": 3.27110398176388e-05, "loss": 1.2374, "step": 3276500 }, { "epoch": 34.58, "learning_rate": 3.2708401490127385e-05, "loss": 1.2863, "step": 3277000 }, { "epoch": 34.59, "learning_rate": 3.270576316261595e-05, "loss": 1.3758, "step": 3277500 }, { "epoch": 34.59, "learning_rate": 3.270312483510453e-05, "loss": 1.2928, "step": 3278000 }, { "epoch": 34.6, "learning_rate": 3.270048650759311e-05, "loss": 1.3418, "step": 3278500 }, { "epoch": 34.6, "learning_rate": 3.2697848180081687e-05, "loss": 1.3556, "step": 3279000 }, { "epoch": 34.61, "learning_rate": 3.2695209852570255e-05, "loss": 1.3507, "step": 3279500 }, { "epoch": 34.61, "learning_rate": 3.269257152505884e-05, "loss": 1.3493, "step": 3280000 }, { "epoch": 34.62, "learning_rate": 3.268993319754741e-05, "loss": 1.3556, "step": 3280500 }, { "epoch": 34.63, "learning_rate": 3.268729487003599e-05, "loss": 1.3153, "step": 3281000 }, { "epoch": 34.63, "learning_rate": 3.2684656542524564e-05, "loss": 1.2871, "step": 3281500 }, { "epoch": 34.64, "learning_rate": 3.268201821501314e-05, "loss": 1.2688, "step": 3282000 }, { "epoch": 34.64, "learning_rate": 3.2679379887501715e-05, "loss": 1.3194, "step": 3282500 }, { "epoch": 34.65, "learning_rate": 3.267674155999029e-05, "loss": 1.3175, "step": 3283000 }, { "epoch": 34.65, "learning_rate": 3.267410323247887e-05, "loss": 1.3148, "step": 3283500 }, { "epoch": 34.66, "learning_rate": 3.267146490496745e-05, "loss": 1.334, "step": 3284000 }, { "epoch": 34.66, "learning_rate": 3.266882657745602e-05, "loss": 1.3011, "step": 3284500 }, { "epoch": 34.67, "learning_rate": 3.26661882499446e-05, "loss": 1.3726, "step": 3285000 }, { "epoch": 34.67, "learning_rate": 3.2663549922433175e-05, "loss": 1.3204, "step": 3285500 }, { "epoch": 34.68, "learning_rate": 3.266091159492175e-05, "loss": 1.3395, "step": 3286000 }, { "epoch": 34.68, "learning_rate": 3.2658273267410326e-05, "loss": 1.3144, "step": 3286500 }, { "epoch": 34.69, "learning_rate": 3.26556349398989e-05, "loss": 1.3257, "step": 3287000 }, { "epoch": 34.69, "learning_rate": 3.265299661238748e-05, "loss": 1.2427, "step": 3287500 }, { "epoch": 34.7, "learning_rate": 3.265035828487605e-05, "loss": 1.2841, "step": 3288000 }, { "epoch": 34.7, "learning_rate": 3.264771995736463e-05, "loss": 1.3274, "step": 3288500 }, { "epoch": 34.71, "learning_rate": 3.26450816298532e-05, "loss": 1.3338, "step": 3289000 }, { "epoch": 34.72, "learning_rate": 3.264244330234178e-05, "loss": 1.3277, "step": 3289500 }, { "epoch": 34.72, "learning_rate": 3.2639804974830354e-05, "loss": 1.3306, "step": 3290000 }, { "epoch": 34.73, "learning_rate": 3.2637166647318936e-05, "loss": 1.341, "step": 3290500 }, { "epoch": 34.73, "learning_rate": 3.263452831980751e-05, "loss": 1.2837, "step": 3291000 }, { "epoch": 34.74, "learning_rate": 3.263188999229608e-05, "loss": 1.3344, "step": 3291500 }, { "epoch": 34.74, "learning_rate": 3.262925166478466e-05, "loss": 1.2827, "step": 3292000 }, { "epoch": 34.75, "learning_rate": 3.262661333727324e-05, "loss": 1.2952, "step": 3292500 }, { "epoch": 34.75, "learning_rate": 3.2623975009761814e-05, "loss": 1.278, "step": 3293000 }, { "epoch": 34.76, "learning_rate": 3.262133668225039e-05, "loss": 1.2901, "step": 3293500 }, { "epoch": 34.76, "learning_rate": 3.2618698354738965e-05, "loss": 1.313, "step": 3294000 }, { "epoch": 34.77, "learning_rate": 3.261606002722754e-05, "loss": 1.3016, "step": 3294500 }, { "epoch": 34.77, "learning_rate": 3.2613421699716116e-05, "loss": 1.302, "step": 3295000 }, { "epoch": 34.78, "learning_rate": 3.26107833722047e-05, "loss": 1.3227, "step": 3295500 }, { "epoch": 34.78, "learning_rate": 3.2608145044693273e-05, "loss": 1.2401, "step": 3296000 }, { "epoch": 34.79, "learning_rate": 3.260550671718184e-05, "loss": 1.339, "step": 3296500 }, { "epoch": 34.79, "learning_rate": 3.2602868389670424e-05, "loss": 1.3217, "step": 3297000 }, { "epoch": 34.8, "learning_rate": 3.2600230062159e-05, "loss": 1.3168, "step": 3297500 }, { "epoch": 34.8, "learning_rate": 3.2597591734647575e-05, "loss": 1.2718, "step": 3298000 }, { "epoch": 34.81, "learning_rate": 3.2594953407136144e-05, "loss": 1.2859, "step": 3298500 }, { "epoch": 34.82, "learning_rate": 3.2592315079624726e-05, "loss": 1.3084, "step": 3299000 }, { "epoch": 34.82, "learning_rate": 3.25896767521133e-05, "loss": 1.3176, "step": 3299500 }, { "epoch": 34.83, "learning_rate": 3.258703842460188e-05, "loss": 1.3069, "step": 3300000 }, { "epoch": 34.83, "learning_rate": 3.258440009709045e-05, "loss": 1.3093, "step": 3300500 }, { "epoch": 34.84, "learning_rate": 3.258176176957903e-05, "loss": 1.2626, "step": 3301000 }, { "epoch": 34.84, "learning_rate": 3.2579123442067604e-05, "loss": 1.3116, "step": 3301500 }, { "epoch": 34.85, "learning_rate": 3.257648511455618e-05, "loss": 1.3213, "step": 3302000 }, { "epoch": 34.85, "learning_rate": 3.257384678704476e-05, "loss": 1.3029, "step": 3302500 }, { "epoch": 34.86, "learning_rate": 3.257120845953334e-05, "loss": 1.3392, "step": 3303000 }, { "epoch": 34.86, "learning_rate": 3.2568570132021906e-05, "loss": 1.2915, "step": 3303500 }, { "epoch": 34.87, "learning_rate": 3.256593180451049e-05, "loss": 1.2715, "step": 3304000 }, { "epoch": 34.87, "learning_rate": 3.2563293476999064e-05, "loss": 1.3344, "step": 3304500 }, { "epoch": 34.88, "learning_rate": 3.256065514948764e-05, "loss": 1.3022, "step": 3305000 }, { "epoch": 34.88, "learning_rate": 3.2558016821976215e-05, "loss": 1.289, "step": 3305500 }, { "epoch": 34.89, "learning_rate": 3.255537849446479e-05, "loss": 1.3373, "step": 3306000 }, { "epoch": 34.89, "learning_rate": 3.2552740166953366e-05, "loss": 1.3209, "step": 3306500 }, { "epoch": 34.9, "learning_rate": 3.255010183944194e-05, "loss": 1.3425, "step": 3307000 }, { "epoch": 34.91, "learning_rate": 3.254746351193052e-05, "loss": 1.3134, "step": 3307500 }, { "epoch": 34.91, "learning_rate": 3.254482518441909e-05, "loss": 1.2812, "step": 3308000 }, { "epoch": 34.92, "learning_rate": 3.254218685690767e-05, "loss": 1.2592, "step": 3308500 }, { "epoch": 34.92, "learning_rate": 3.253954852939625e-05, "loss": 1.3604, "step": 3309000 }, { "epoch": 34.93, "learning_rate": 3.2536910201884825e-05, "loss": 1.3166, "step": 3309500 }, { "epoch": 34.93, "learning_rate": 3.25342718743734e-05, "loss": 1.3783, "step": 3310000 }, { "epoch": 34.94, "learning_rate": 3.253163354686197e-05, "loss": 1.3289, "step": 3310500 }, { "epoch": 34.94, "learning_rate": 3.252899521935055e-05, "loss": 1.263, "step": 3311000 }, { "epoch": 34.95, "learning_rate": 3.252635689183913e-05, "loss": 1.3541, "step": 3311500 }, { "epoch": 34.95, "learning_rate": 3.25237185643277e-05, "loss": 1.2955, "step": 3312000 }, { "epoch": 34.96, "learning_rate": 3.252108023681628e-05, "loss": 1.3328, "step": 3312500 }, { "epoch": 34.96, "learning_rate": 3.2518441909304854e-05, "loss": 1.3573, "step": 3313000 }, { "epoch": 34.97, "learning_rate": 3.251580358179343e-05, "loss": 1.2823, "step": 3313500 }, { "epoch": 34.97, "learning_rate": 3.2513165254282005e-05, "loss": 1.3211, "step": 3314000 }, { "epoch": 34.98, "learning_rate": 3.251052692677059e-05, "loss": 1.347, "step": 3314500 }, { "epoch": 34.98, "learning_rate": 3.2507888599259156e-05, "loss": 1.2679, "step": 3315000 }, { "epoch": 34.99, "learning_rate": 3.250525027174773e-05, "loss": 1.3187, "step": 3315500 }, { "epoch": 34.99, "learning_rate": 3.250261194423631e-05, "loss": 1.3376, "step": 3316000 }, { "epoch": 35.0, "learning_rate": 3.249997361672489e-05, "loss": 1.3143, "step": 3316500 }, { "epoch": 35.01, "learning_rate": 3.2497335289213464e-05, "loss": 1.3282, "step": 3317000 }, { "epoch": 35.01, "learning_rate": 3.249469696170204e-05, "loss": 1.267, "step": 3317500 }, { "epoch": 35.02, "learning_rate": 3.2492058634190615e-05, "loss": 1.2521, "step": 3318000 }, { "epoch": 35.02, "learning_rate": 3.248942030667919e-05, "loss": 1.3308, "step": 3318500 }, { "epoch": 35.03, "learning_rate": 3.2486781979167766e-05, "loss": 1.2637, "step": 3319000 }, { "epoch": 35.03, "learning_rate": 3.248414365165635e-05, "loss": 1.3144, "step": 3319500 }, { "epoch": 35.04, "learning_rate": 3.248150532414492e-05, "loss": 1.3129, "step": 3320000 }, { "epoch": 35.04, "learning_rate": 3.247886699663349e-05, "loss": 1.2922, "step": 3320500 }, { "epoch": 35.05, "learning_rate": 3.2476228669122075e-05, "loss": 1.3077, "step": 3321000 }, { "epoch": 35.05, "learning_rate": 3.247359034161065e-05, "loss": 1.2704, "step": 3321500 }, { "epoch": 35.06, "learning_rate": 3.2470952014099226e-05, "loss": 1.2828, "step": 3322000 }, { "epoch": 35.06, "learning_rate": 3.2468313686587795e-05, "loss": 1.3033, "step": 3322500 }, { "epoch": 35.07, "learning_rate": 3.246567535907638e-05, "loss": 1.2496, "step": 3323000 }, { "epoch": 35.07, "learning_rate": 3.246303703156495e-05, "loss": 1.3323, "step": 3323500 }, { "epoch": 35.08, "learning_rate": 3.246039870405353e-05, "loss": 1.3229, "step": 3324000 }, { "epoch": 35.08, "learning_rate": 3.24577603765421e-05, "loss": 1.3097, "step": 3324500 }, { "epoch": 35.09, "learning_rate": 3.245512204903068e-05, "loss": 1.3332, "step": 3325000 }, { "epoch": 35.1, "learning_rate": 3.2452483721519254e-05, "loss": 1.2869, "step": 3325500 }, { "epoch": 35.1, "learning_rate": 3.244984539400783e-05, "loss": 1.3401, "step": 3326000 }, { "epoch": 35.11, "learning_rate": 3.244720706649641e-05, "loss": 1.3031, "step": 3326500 }, { "epoch": 35.11, "learning_rate": 3.244456873898498e-05, "loss": 1.2857, "step": 3327000 }, { "epoch": 35.12, "learning_rate": 3.2441930411473556e-05, "loss": 1.3691, "step": 3327500 }, { "epoch": 35.12, "learning_rate": 3.243929208396214e-05, "loss": 1.3464, "step": 3328000 }, { "epoch": 35.13, "learning_rate": 3.2436653756450714e-05, "loss": 1.2873, "step": 3328500 }, { "epoch": 35.13, "learning_rate": 3.243401542893929e-05, "loss": 1.2936, "step": 3329000 }, { "epoch": 35.14, "learning_rate": 3.2431377101427865e-05, "loss": 1.283, "step": 3329500 }, { "epoch": 35.14, "learning_rate": 3.242873877391644e-05, "loss": 1.2765, "step": 3330000 }, { "epoch": 35.15, "learning_rate": 3.2426100446405016e-05, "loss": 1.2968, "step": 3330500 }, { "epoch": 35.15, "learning_rate": 3.242346211889359e-05, "loss": 1.2759, "step": 3331000 }, { "epoch": 35.16, "learning_rate": 3.2420823791382174e-05, "loss": 1.2986, "step": 3331500 }, { "epoch": 35.16, "learning_rate": 3.241818546387074e-05, "loss": 1.3082, "step": 3332000 }, { "epoch": 35.17, "learning_rate": 3.241554713635932e-05, "loss": 1.3032, "step": 3332500 }, { "epoch": 35.17, "learning_rate": 3.24129088088479e-05, "loss": 1.3105, "step": 3333000 }, { "epoch": 35.18, "learning_rate": 3.2410270481336476e-05, "loss": 1.3073, "step": 3333500 }, { "epoch": 35.18, "learning_rate": 3.2407632153825044e-05, "loss": 1.2848, "step": 3334000 }, { "epoch": 35.19, "learning_rate": 3.240499382631362e-05, "loss": 1.2727, "step": 3334500 }, { "epoch": 35.2, "learning_rate": 3.24023554988022e-05, "loss": 1.3173, "step": 3335000 }, { "epoch": 35.2, "learning_rate": 3.239971717129078e-05, "loss": 1.3117, "step": 3335500 }, { "epoch": 35.21, "learning_rate": 3.239707884377935e-05, "loss": 1.3018, "step": 3336000 }, { "epoch": 35.21, "learning_rate": 3.239444051626793e-05, "loss": 1.2742, "step": 3336500 }, { "epoch": 35.22, "learning_rate": 3.2391802188756504e-05, "loss": 1.3232, "step": 3337000 }, { "epoch": 35.22, "learning_rate": 3.238916386124508e-05, "loss": 1.3547, "step": 3337500 }, { "epoch": 35.23, "learning_rate": 3.2386525533733655e-05, "loss": 1.3141, "step": 3338000 }, { "epoch": 35.23, "learning_rate": 3.238388720622224e-05, "loss": 1.2623, "step": 3338500 }, { "epoch": 35.24, "learning_rate": 3.2381248878710806e-05, "loss": 1.3982, "step": 3339000 }, { "epoch": 35.24, "learning_rate": 3.237861055119938e-05, "loss": 1.3163, "step": 3339500 }, { "epoch": 35.25, "learning_rate": 3.2375972223687964e-05, "loss": 1.2853, "step": 3340000 }, { "epoch": 35.25, "learning_rate": 3.237333389617654e-05, "loss": 1.2563, "step": 3340500 }, { "epoch": 35.26, "learning_rate": 3.2370695568665115e-05, "loss": 1.3293, "step": 3341000 }, { "epoch": 35.26, "learning_rate": 3.236805724115369e-05, "loss": 1.2829, "step": 3341500 }, { "epoch": 35.27, "learning_rate": 3.2365418913642266e-05, "loss": 1.2506, "step": 3342000 }, { "epoch": 35.27, "learning_rate": 3.236278058613084e-05, "loss": 1.2898, "step": 3342500 }, { "epoch": 35.28, "learning_rate": 3.236014225861942e-05, "loss": 1.3228, "step": 3343000 }, { "epoch": 35.28, "learning_rate": 3.235750393110799e-05, "loss": 1.2863, "step": 3343500 }, { "epoch": 35.29, "learning_rate": 3.235486560359657e-05, "loss": 1.3688, "step": 3344000 }, { "epoch": 35.3, "learning_rate": 3.235222727608514e-05, "loss": 1.2793, "step": 3344500 }, { "epoch": 35.3, "learning_rate": 3.2349588948573725e-05, "loss": 1.2555, "step": 3345000 }, { "epoch": 35.31, "learning_rate": 3.23469506210623e-05, "loss": 1.2876, "step": 3345500 }, { "epoch": 35.31, "learning_rate": 3.234431229355087e-05, "loss": 1.254, "step": 3346000 }, { "epoch": 35.32, "learning_rate": 3.234167396603945e-05, "loss": 1.3048, "step": 3346500 }, { "epoch": 35.32, "learning_rate": 3.233903563852803e-05, "loss": 1.354, "step": 3347000 }, { "epoch": 35.33, "learning_rate": 3.23363973110166e-05, "loss": 1.3816, "step": 3347500 }, { "epoch": 35.33, "learning_rate": 3.233375898350518e-05, "loss": 1.3683, "step": 3348000 }, { "epoch": 35.34, "learning_rate": 3.2331120655993754e-05, "loss": 1.2878, "step": 3348500 }, { "epoch": 35.34, "learning_rate": 3.232848232848233e-05, "loss": 1.3261, "step": 3349000 }, { "epoch": 35.35, "learning_rate": 3.2325844000970905e-05, "loss": 1.3175, "step": 3349500 }, { "epoch": 35.35, "learning_rate": 3.232320567345948e-05, "loss": 1.2946, "step": 3350000 }, { "epoch": 35.36, "learning_rate": 3.232056734594806e-05, "loss": 1.3184, "step": 3350500 }, { "epoch": 35.36, "learning_rate": 3.231792901843663e-05, "loss": 1.3249, "step": 3351000 }, { "epoch": 35.37, "learning_rate": 3.231529069092521e-05, "loss": 1.285, "step": 3351500 }, { "epoch": 35.37, "learning_rate": 3.231265236341379e-05, "loss": 1.3183, "step": 3352000 }, { "epoch": 35.38, "learning_rate": 3.2310014035902365e-05, "loss": 1.3327, "step": 3352500 }, { "epoch": 35.39, "learning_rate": 3.230737570839093e-05, "loss": 1.3164, "step": 3353000 }, { "epoch": 35.39, "learning_rate": 3.2304737380879516e-05, "loss": 1.2909, "step": 3353500 }, { "epoch": 35.4, "learning_rate": 3.230209905336809e-05, "loss": 1.3256, "step": 3354000 }, { "epoch": 35.4, "learning_rate": 3.2299460725856666e-05, "loss": 1.3115, "step": 3354500 }, { "epoch": 35.41, "learning_rate": 3.229682239834524e-05, "loss": 1.2867, "step": 3355000 }, { "epoch": 35.41, "learning_rate": 3.229418407083382e-05, "loss": 1.2568, "step": 3355500 }, { "epoch": 35.42, "learning_rate": 3.229154574332239e-05, "loss": 1.2942, "step": 3356000 }, { "epoch": 35.42, "learning_rate": 3.228890741581097e-05, "loss": 1.3226, "step": 3356500 }, { "epoch": 35.43, "learning_rate": 3.228626908829955e-05, "loss": 1.2666, "step": 3357000 }, { "epoch": 35.43, "learning_rate": 3.2283630760788126e-05, "loss": 1.2832, "step": 3357500 }, { "epoch": 35.44, "learning_rate": 3.2280992433276695e-05, "loss": 1.3169, "step": 3358000 }, { "epoch": 35.44, "learning_rate": 3.227835410576528e-05, "loss": 1.2809, "step": 3358500 }, { "epoch": 35.45, "learning_rate": 3.227571577825385e-05, "loss": 1.2725, "step": 3359000 }, { "epoch": 35.45, "learning_rate": 3.227307745074243e-05, "loss": 1.3165, "step": 3359500 }, { "epoch": 35.46, "learning_rate": 3.2270439123231004e-05, "loss": 1.3392, "step": 3360000 }, { "epoch": 35.46, "learning_rate": 3.226780079571958e-05, "loss": 1.2745, "step": 3360500 }, { "epoch": 35.47, "learning_rate": 3.2265162468208155e-05, "loss": 1.3238, "step": 3361000 }, { "epoch": 35.47, "learning_rate": 3.226252414069673e-05, "loss": 1.2903, "step": 3361500 }, { "epoch": 35.48, "learning_rate": 3.2259885813185306e-05, "loss": 1.3401, "step": 3362000 }, { "epoch": 35.49, "learning_rate": 3.225724748567388e-05, "loss": 1.3136, "step": 3362500 }, { "epoch": 35.49, "learning_rate": 3.2254609158162457e-05, "loss": 1.2528, "step": 3363000 }, { "epoch": 35.5, "learning_rate": 3.225197083065103e-05, "loss": 1.3093, "step": 3363500 }, { "epoch": 35.5, "learning_rate": 3.2249332503139614e-05, "loss": 1.2914, "step": 3364000 }, { "epoch": 35.51, "learning_rate": 3.224669417562819e-05, "loss": 1.2961, "step": 3364500 }, { "epoch": 35.51, "learning_rate": 3.224405584811676e-05, "loss": 1.3304, "step": 3365000 }, { "epoch": 35.52, "learning_rate": 3.224141752060534e-05, "loss": 1.3033, "step": 3365500 }, { "epoch": 35.52, "learning_rate": 3.2238779193093916e-05, "loss": 1.2998, "step": 3366000 }, { "epoch": 35.53, "learning_rate": 3.223614086558249e-05, "loss": 1.3956, "step": 3366500 }, { "epoch": 35.53, "learning_rate": 3.223350253807107e-05, "loss": 1.2939, "step": 3367000 }, { "epoch": 35.54, "learning_rate": 3.223086421055964e-05, "loss": 1.3304, "step": 3367500 }, { "epoch": 35.54, "learning_rate": 3.222822588304822e-05, "loss": 1.3282, "step": 3368000 }, { "epoch": 35.55, "learning_rate": 3.2225587555536794e-05, "loss": 1.2735, "step": 3368500 }, { "epoch": 35.55, "learning_rate": 3.2222949228025376e-05, "loss": 1.2829, "step": 3369000 }, { "epoch": 35.56, "learning_rate": 3.222031090051395e-05, "loss": 1.3421, "step": 3369500 }, { "epoch": 35.56, "learning_rate": 3.221767257300252e-05, "loss": 1.3165, "step": 3370000 }, { "epoch": 35.57, "learning_rate": 3.22150342454911e-05, "loss": 1.3031, "step": 3370500 }, { "epoch": 35.58, "learning_rate": 3.221239591797968e-05, "loss": 1.3093, "step": 3371000 }, { "epoch": 35.58, "learning_rate": 3.220975759046825e-05, "loss": 1.324, "step": 3371500 }, { "epoch": 35.59, "learning_rate": 3.220711926295682e-05, "loss": 1.3185, "step": 3372000 }, { "epoch": 35.59, "learning_rate": 3.2204480935445404e-05, "loss": 1.365, "step": 3372500 }, { "epoch": 35.6, "learning_rate": 3.220184260793398e-05, "loss": 1.3447, "step": 3373000 }, { "epoch": 35.6, "learning_rate": 3.2199204280422555e-05, "loss": 1.2985, "step": 3373500 }, { "epoch": 35.61, "learning_rate": 3.219656595291113e-05, "loss": 1.3086, "step": 3374000 }, { "epoch": 35.61, "learning_rate": 3.2193927625399706e-05, "loss": 1.3139, "step": 3374500 }, { "epoch": 35.62, "learning_rate": 3.219128929788828e-05, "loss": 1.2734, "step": 3375000 }, { "epoch": 35.62, "learning_rate": 3.218865097037686e-05, "loss": 1.2954, "step": 3375500 }, { "epoch": 35.63, "learning_rate": 3.218601264286544e-05, "loss": 1.2664, "step": 3376000 }, { "epoch": 35.63, "learning_rate": 3.2183374315354015e-05, "loss": 1.3353, "step": 3376500 }, { "epoch": 35.64, "learning_rate": 3.2180735987842584e-05, "loss": 1.3147, "step": 3377000 }, { "epoch": 35.64, "learning_rate": 3.2178097660331166e-05, "loss": 1.3677, "step": 3377500 }, { "epoch": 35.65, "learning_rate": 3.217545933281974e-05, "loss": 1.3092, "step": 3378000 }, { "epoch": 35.65, "learning_rate": 3.217282100530832e-05, "loss": 1.2832, "step": 3378500 }, { "epoch": 35.66, "learning_rate": 3.217018267779689e-05, "loss": 1.3259, "step": 3379000 }, { "epoch": 35.66, "learning_rate": 3.216754435028547e-05, "loss": 1.2887, "step": 3379500 }, { "epoch": 35.67, "learning_rate": 3.2164906022774043e-05, "loss": 1.3324, "step": 3380000 }, { "epoch": 35.68, "learning_rate": 3.216226769526262e-05, "loss": 1.3417, "step": 3380500 }, { "epoch": 35.68, "learning_rate": 3.21596293677512e-05, "loss": 1.3076, "step": 3381000 }, { "epoch": 35.69, "learning_rate": 3.215699104023977e-05, "loss": 1.3274, "step": 3381500 }, { "epoch": 35.69, "learning_rate": 3.2154352712728345e-05, "loss": 1.313, "step": 3382000 }, { "epoch": 35.7, "learning_rate": 3.215171438521693e-05, "loss": 1.3182, "step": 3382500 }, { "epoch": 35.7, "learning_rate": 3.21490760577055e-05, "loss": 1.3163, "step": 3383000 }, { "epoch": 35.71, "learning_rate": 3.214643773019408e-05, "loss": 1.2353, "step": 3383500 }, { "epoch": 35.71, "learning_rate": 3.214379940268265e-05, "loss": 1.2792, "step": 3384000 }, { "epoch": 35.72, "learning_rate": 3.214116107517123e-05, "loss": 1.2776, "step": 3384500 }, { "epoch": 35.72, "learning_rate": 3.2138522747659805e-05, "loss": 1.3411, "step": 3385000 }, { "epoch": 35.73, "learning_rate": 3.213588442014838e-05, "loss": 1.2824, "step": 3385500 }, { "epoch": 35.73, "learning_rate": 3.2133246092636956e-05, "loss": 1.3001, "step": 3386000 }, { "epoch": 35.74, "learning_rate": 3.213060776512553e-05, "loss": 1.3303, "step": 3386500 }, { "epoch": 35.74, "learning_rate": 3.212796943761411e-05, "loss": 1.264, "step": 3387000 }, { "epoch": 35.75, "learning_rate": 3.212533111010268e-05, "loss": 1.239, "step": 3387500 }, { "epoch": 35.75, "learning_rate": 3.2122692782591265e-05, "loss": 1.2878, "step": 3388000 }, { "epoch": 35.76, "learning_rate": 3.212005445507984e-05, "loss": 1.3013, "step": 3388500 }, { "epoch": 35.77, "learning_rate": 3.211741612756841e-05, "loss": 1.2855, "step": 3389000 }, { "epoch": 35.77, "learning_rate": 3.211477780005699e-05, "loss": 1.2948, "step": 3389500 }, { "epoch": 35.78, "learning_rate": 3.211213947254557e-05, "loss": 1.3197, "step": 3390000 }, { "epoch": 35.78, "learning_rate": 3.210950114503414e-05, "loss": 1.306, "step": 3390500 }, { "epoch": 35.79, "learning_rate": 3.210686281752272e-05, "loss": 1.3721, "step": 3391000 }, { "epoch": 35.79, "learning_rate": 3.210422449001129e-05, "loss": 1.3112, "step": 3391500 }, { "epoch": 35.8, "learning_rate": 3.210158616249987e-05, "loss": 1.321, "step": 3392000 }, { "epoch": 35.8, "learning_rate": 3.2098947834988444e-05, "loss": 1.3018, "step": 3392500 }, { "epoch": 35.81, "learning_rate": 3.2096309507477026e-05, "loss": 1.3218, "step": 3393000 }, { "epoch": 35.81, "learning_rate": 3.2093671179965595e-05, "loss": 1.34, "step": 3393500 }, { "epoch": 35.82, "learning_rate": 3.209103285245417e-05, "loss": 1.3263, "step": 3394000 }, { "epoch": 35.82, "learning_rate": 3.208839452494275e-05, "loss": 1.3037, "step": 3394500 }, { "epoch": 35.83, "learning_rate": 3.208575619743133e-05, "loss": 1.2947, "step": 3395000 }, { "epoch": 35.83, "learning_rate": 3.2083117869919904e-05, "loss": 1.3031, "step": 3395500 }, { "epoch": 35.84, "learning_rate": 3.208047954240847e-05, "loss": 1.325, "step": 3396000 }, { "epoch": 35.84, "learning_rate": 3.2077841214897055e-05, "loss": 1.3524, "step": 3396500 }, { "epoch": 35.85, "learning_rate": 3.207520288738563e-05, "loss": 1.2493, "step": 3397000 }, { "epoch": 35.85, "learning_rate": 3.2072564559874206e-05, "loss": 1.3388, "step": 3397500 }, { "epoch": 35.86, "learning_rate": 3.206992623236279e-05, "loss": 1.2963, "step": 3398000 }, { "epoch": 35.87, "learning_rate": 3.206728790485136e-05, "loss": 1.3261, "step": 3398500 }, { "epoch": 35.87, "learning_rate": 3.206464957733993e-05, "loss": 1.2814, "step": 3399000 }, { "epoch": 35.88, "learning_rate": 3.206201124982851e-05, "loss": 1.2833, "step": 3399500 }, { "epoch": 35.88, "learning_rate": 3.205937292231709e-05, "loss": 1.264, "step": 3400000 }, { "epoch": 35.89, "learning_rate": 3.205673459480566e-05, "loss": 1.2544, "step": 3400500 }, { "epoch": 35.89, "learning_rate": 3.2054096267294234e-05, "loss": 1.3241, "step": 3401000 }, { "epoch": 35.9, "learning_rate": 3.2051457939782817e-05, "loss": 1.3062, "step": 3401500 }, { "epoch": 35.9, "learning_rate": 3.204881961227139e-05, "loss": 1.2721, "step": 3402000 }, { "epoch": 35.91, "learning_rate": 3.204618128475997e-05, "loss": 1.2978, "step": 3402500 }, { "epoch": 35.91, "learning_rate": 3.204354295724854e-05, "loss": 1.3007, "step": 3403000 }, { "epoch": 35.92, "learning_rate": 3.204090462973712e-05, "loss": 1.3314, "step": 3403500 }, { "epoch": 35.92, "learning_rate": 3.2038266302225694e-05, "loss": 1.321, "step": 3404000 }, { "epoch": 35.93, "learning_rate": 3.203562797471427e-05, "loss": 1.2821, "step": 3404500 }, { "epoch": 35.93, "learning_rate": 3.203298964720285e-05, "loss": 1.329, "step": 3405000 }, { "epoch": 35.94, "learning_rate": 3.203035131969142e-05, "loss": 1.2505, "step": 3405500 }, { "epoch": 35.94, "learning_rate": 3.2027712992179996e-05, "loss": 1.3061, "step": 3406000 }, { "epoch": 35.95, "learning_rate": 3.202507466466858e-05, "loss": 1.3506, "step": 3406500 }, { "epoch": 35.96, "learning_rate": 3.2022436337157154e-05, "loss": 1.2848, "step": 3407000 }, { "epoch": 35.96, "learning_rate": 3.201979800964573e-05, "loss": 1.2351, "step": 3407500 }, { "epoch": 35.97, "learning_rate": 3.20171596821343e-05, "loss": 1.3178, "step": 3408000 }, { "epoch": 35.97, "learning_rate": 3.201452135462288e-05, "loss": 1.2878, "step": 3408500 }, { "epoch": 35.98, "learning_rate": 3.2011883027111456e-05, "loss": 1.3587, "step": 3409000 }, { "epoch": 35.98, "learning_rate": 3.200924469960003e-05, "loss": 1.4041, "step": 3409500 }, { "epoch": 35.99, "learning_rate": 3.2006606372088607e-05, "loss": 1.3553, "step": 3410000 }, { "epoch": 35.99, "learning_rate": 3.200396804457718e-05, "loss": 1.3405, "step": 3410500 }, { "epoch": 36.0, "learning_rate": 3.200132971706576e-05, "loss": 1.3439, "step": 3411000 }, { "epoch": 36.0, "learning_rate": 3.199869138955433e-05, "loss": 1.2915, "step": 3411500 }, { "epoch": 36.01, "learning_rate": 3.1996053062042915e-05, "loss": 1.3599, "step": 3412000 }, { "epoch": 36.01, "learning_rate": 3.1993414734531484e-05, "loss": 1.3213, "step": 3412500 }, { "epoch": 36.02, "learning_rate": 3.199077640702006e-05, "loss": 1.2963, "step": 3413000 }, { "epoch": 36.02, "learning_rate": 3.198813807950864e-05, "loss": 1.2931, "step": 3413500 }, { "epoch": 36.03, "learning_rate": 3.198549975199722e-05, "loss": 1.2656, "step": 3414000 }, { "epoch": 36.03, "learning_rate": 3.198286142448579e-05, "loss": 1.3036, "step": 3414500 }, { "epoch": 36.04, "learning_rate": 3.198022309697437e-05, "loss": 1.3521, "step": 3415000 }, { "epoch": 36.04, "learning_rate": 3.1977584769462944e-05, "loss": 1.2994, "step": 3415500 }, { "epoch": 36.05, "learning_rate": 3.197494644195152e-05, "loss": 1.2671, "step": 3416000 }, { "epoch": 36.06, "learning_rate": 3.1972308114440095e-05, "loss": 1.2654, "step": 3416500 }, { "epoch": 36.06, "learning_rate": 3.196966978692868e-05, "loss": 1.303, "step": 3417000 }, { "epoch": 36.07, "learning_rate": 3.1967031459417246e-05, "loss": 1.3033, "step": 3417500 }, { "epoch": 36.07, "learning_rate": 3.196439313190582e-05, "loss": 1.3188, "step": 3418000 }, { "epoch": 36.08, "learning_rate": 3.1961754804394403e-05, "loss": 1.281, "step": 3418500 }, { "epoch": 36.08, "learning_rate": 3.195911647688298e-05, "loss": 1.3069, "step": 3419000 }, { "epoch": 36.09, "learning_rate": 3.195647814937155e-05, "loss": 1.2874, "step": 3419500 }, { "epoch": 36.09, "learning_rate": 3.195383982186013e-05, "loss": 1.2861, "step": 3420000 }, { "epoch": 36.1, "learning_rate": 3.1951201494348705e-05, "loss": 1.282, "step": 3420500 }, { "epoch": 36.1, "learning_rate": 3.194856316683728e-05, "loss": 1.3354, "step": 3421000 }, { "epoch": 36.11, "learning_rate": 3.1945924839325856e-05, "loss": 1.2998, "step": 3421500 }, { "epoch": 36.11, "learning_rate": 3.194328651181443e-05, "loss": 1.3079, "step": 3422000 }, { "epoch": 36.12, "learning_rate": 3.194064818430301e-05, "loss": 1.2694, "step": 3422500 }, { "epoch": 36.12, "learning_rate": 3.193800985679158e-05, "loss": 1.2697, "step": 3423000 }, { "epoch": 36.13, "learning_rate": 3.193537152928016e-05, "loss": 1.3719, "step": 3423500 }, { "epoch": 36.13, "learning_rate": 3.193273320176874e-05, "loss": 1.3045, "step": 3424000 }, { "epoch": 36.14, "learning_rate": 3.193009487425731e-05, "loss": 1.3182, "step": 3424500 }, { "epoch": 36.15, "learning_rate": 3.1927456546745885e-05, "loss": 1.3024, "step": 3425000 }, { "epoch": 36.15, "learning_rate": 3.192481821923447e-05, "loss": 1.2764, "step": 3425500 }, { "epoch": 36.16, "learning_rate": 3.192217989172304e-05, "loss": 1.3114, "step": 3426000 }, { "epoch": 36.16, "learning_rate": 3.191954156421162e-05, "loss": 1.2808, "step": 3426500 }, { "epoch": 36.17, "learning_rate": 3.1916903236700193e-05, "loss": 1.3368, "step": 3427000 }, { "epoch": 36.17, "learning_rate": 3.191426490918877e-05, "loss": 1.3199, "step": 3427500 }, { "epoch": 36.18, "learning_rate": 3.1911626581677344e-05, "loss": 1.3201, "step": 3428000 }, { "epoch": 36.18, "learning_rate": 3.190898825416592e-05, "loss": 1.273, "step": 3428500 }, { "epoch": 36.19, "learning_rate": 3.1906349926654495e-05, "loss": 1.3349, "step": 3429000 }, { "epoch": 36.19, "learning_rate": 3.190371159914307e-05, "loss": 1.3314, "step": 3429500 }, { "epoch": 36.2, "learning_rate": 3.1901073271631646e-05, "loss": 1.2621, "step": 3430000 }, { "epoch": 36.2, "learning_rate": 3.189843494412023e-05, "loss": 1.3375, "step": 3430500 }, { "epoch": 36.21, "learning_rate": 3.1895796616608804e-05, "loss": 1.2945, "step": 3431000 }, { "epoch": 36.21, "learning_rate": 3.189315828909737e-05, "loss": 1.2716, "step": 3431500 }, { "epoch": 36.22, "learning_rate": 3.1890519961585955e-05, "loss": 1.2819, "step": 3432000 }, { "epoch": 36.22, "learning_rate": 3.188788163407453e-05, "loss": 1.3364, "step": 3432500 }, { "epoch": 36.23, "learning_rate": 3.1885243306563106e-05, "loss": 1.3239, "step": 3433000 }, { "epoch": 36.23, "learning_rate": 3.188260497905168e-05, "loss": 1.2668, "step": 3433500 }, { "epoch": 36.24, "learning_rate": 3.187996665154026e-05, "loss": 1.3217, "step": 3434000 }, { "epoch": 36.25, "learning_rate": 3.187732832402883e-05, "loss": 1.3135, "step": 3434500 }, { "epoch": 36.25, "learning_rate": 3.187468999651741e-05, "loss": 1.3086, "step": 3435000 }, { "epoch": 36.26, "learning_rate": 3.1872051669005984e-05, "loss": 1.3335, "step": 3435500 }, { "epoch": 36.26, "learning_rate": 3.1869413341494566e-05, "loss": 1.2929, "step": 3436000 }, { "epoch": 36.27, "learning_rate": 3.1866775013983134e-05, "loss": 1.2855, "step": 3436500 }, { "epoch": 36.27, "learning_rate": 3.186413668647171e-05, "loss": 1.3044, "step": 3437000 }, { "epoch": 36.28, "learning_rate": 3.186149835896029e-05, "loss": 1.3197, "step": 3437500 }, { "epoch": 36.28, "learning_rate": 3.185886003144887e-05, "loss": 1.2898, "step": 3438000 }, { "epoch": 36.29, "learning_rate": 3.1856221703937436e-05, "loss": 1.3115, "step": 3438500 }, { "epoch": 36.29, "learning_rate": 3.185358337642602e-05, "loss": 1.3275, "step": 3439000 }, { "epoch": 36.3, "learning_rate": 3.1850945048914594e-05, "loss": 1.3462, "step": 3439500 }, { "epoch": 36.3, "learning_rate": 3.184830672140317e-05, "loss": 1.3389, "step": 3440000 }, { "epoch": 36.31, "learning_rate": 3.1845668393891745e-05, "loss": 1.3338, "step": 3440500 }, { "epoch": 36.31, "learning_rate": 3.184303006638032e-05, "loss": 1.3057, "step": 3441000 }, { "epoch": 36.32, "learning_rate": 3.1840391738868896e-05, "loss": 1.3027, "step": 3441500 }, { "epoch": 36.32, "learning_rate": 3.183775341135747e-05, "loss": 1.2566, "step": 3442000 }, { "epoch": 36.33, "learning_rate": 3.1835115083846054e-05, "loss": 1.3479, "step": 3442500 }, { "epoch": 36.34, "learning_rate": 3.183247675633463e-05, "loss": 1.2729, "step": 3443000 }, { "epoch": 36.34, "learning_rate": 3.18298384288232e-05, "loss": 1.3078, "step": 3443500 }, { "epoch": 36.35, "learning_rate": 3.182720010131178e-05, "loss": 1.2878, "step": 3444000 }, { "epoch": 36.35, "learning_rate": 3.1824561773800356e-05, "loss": 1.3511, "step": 3444500 }, { "epoch": 36.36, "learning_rate": 3.182192344628893e-05, "loss": 1.3416, "step": 3445000 }, { "epoch": 36.36, "learning_rate": 3.181928511877751e-05, "loss": 1.3127, "step": 3445500 }, { "epoch": 36.37, "learning_rate": 3.181664679126608e-05, "loss": 1.3096, "step": 3446000 }, { "epoch": 36.37, "learning_rate": 3.181400846375466e-05, "loss": 1.3012, "step": 3446500 }, { "epoch": 36.38, "learning_rate": 3.181137013624323e-05, "loss": 1.3508, "step": 3447000 }, { "epoch": 36.38, "learning_rate": 3.180873180873181e-05, "loss": 1.3412, "step": 3447500 }, { "epoch": 36.39, "learning_rate": 3.1806093481220384e-05, "loss": 1.3027, "step": 3448000 }, { "epoch": 36.39, "learning_rate": 3.180345515370896e-05, "loss": 1.2829, "step": 3448500 }, { "epoch": 36.4, "learning_rate": 3.1800816826197535e-05, "loss": 1.3455, "step": 3449000 }, { "epoch": 36.4, "learning_rate": 3.179817849868612e-05, "loss": 1.3141, "step": 3449500 }, { "epoch": 36.41, "learning_rate": 3.179554017117469e-05, "loss": 1.2862, "step": 3450000 }, { "epoch": 36.41, "learning_rate": 3.179290184366326e-05, "loss": 1.3245, "step": 3450500 }, { "epoch": 36.42, "learning_rate": 3.1790263516151844e-05, "loss": 1.2743, "step": 3451000 }, { "epoch": 36.42, "learning_rate": 3.178762518864042e-05, "loss": 1.3071, "step": 3451500 }, { "epoch": 36.43, "learning_rate": 3.1784986861128995e-05, "loss": 1.2824, "step": 3452000 }, { "epoch": 36.44, "learning_rate": 3.178234853361757e-05, "loss": 1.2841, "step": 3452500 }, { "epoch": 36.44, "learning_rate": 3.1779710206106146e-05, "loss": 1.2612, "step": 3453000 }, { "epoch": 36.45, "learning_rate": 3.177707187859472e-05, "loss": 1.2588, "step": 3453500 }, { "epoch": 36.45, "learning_rate": 3.17744335510833e-05, "loss": 1.2779, "step": 3454000 }, { "epoch": 36.46, "learning_rate": 3.177179522357188e-05, "loss": 1.2871, "step": 3454500 }, { "epoch": 36.46, "learning_rate": 3.1769156896060455e-05, "loss": 1.2744, "step": 3455000 }, { "epoch": 36.47, "learning_rate": 3.176651856854902e-05, "loss": 1.3723, "step": 3455500 }, { "epoch": 36.47, "learning_rate": 3.1763880241037606e-05, "loss": 1.3229, "step": 3456000 }, { "epoch": 36.48, "learning_rate": 3.176124191352618e-05, "loss": 1.292, "step": 3456500 }, { "epoch": 36.48, "learning_rate": 3.1758603586014757e-05, "loss": 1.3108, "step": 3457000 }, { "epoch": 36.49, "learning_rate": 3.1755965258503325e-05, "loss": 1.3146, "step": 3457500 }, { "epoch": 36.49, "learning_rate": 3.175332693099191e-05, "loss": 1.2721, "step": 3458000 }, { "epoch": 36.5, "learning_rate": 3.175068860348048e-05, "loss": 1.2681, "step": 3458500 }, { "epoch": 36.5, "learning_rate": 3.174805027596906e-05, "loss": 1.318, "step": 3459000 }, { "epoch": 36.51, "learning_rate": 3.1745411948457634e-05, "loss": 1.2839, "step": 3459500 }, { "epoch": 36.51, "learning_rate": 3.174277362094621e-05, "loss": 1.2645, "step": 3460000 }, { "epoch": 36.52, "learning_rate": 3.1740135293434785e-05, "loss": 1.3626, "step": 3460500 }, { "epoch": 36.53, "learning_rate": 3.173749696592336e-05, "loss": 1.2433, "step": 3461000 }, { "epoch": 36.53, "learning_rate": 3.173485863841194e-05, "loss": 1.3081, "step": 3461500 }, { "epoch": 36.54, "learning_rate": 3.173222031090052e-05, "loss": 1.3224, "step": 3462000 }, { "epoch": 36.54, "learning_rate": 3.172958198338909e-05, "loss": 1.295, "step": 3462500 }, { "epoch": 36.55, "learning_rate": 3.172694365587767e-05, "loss": 1.2893, "step": 3463000 }, { "epoch": 36.55, "learning_rate": 3.1724305328366245e-05, "loss": 1.3241, "step": 3463500 }, { "epoch": 36.56, "learning_rate": 3.172166700085482e-05, "loss": 1.3496, "step": 3464000 }, { "epoch": 36.56, "learning_rate": 3.1719028673343396e-05, "loss": 1.3775, "step": 3464500 }, { "epoch": 36.57, "learning_rate": 3.171639034583197e-05, "loss": 1.3175, "step": 3465000 }, { "epoch": 36.57, "learning_rate": 3.171375201832055e-05, "loss": 1.3133, "step": 3465500 }, { "epoch": 36.58, "learning_rate": 3.171111369080912e-05, "loss": 1.3219, "step": 3466000 }, { "epoch": 36.58, "learning_rate": 3.1708475363297704e-05, "loss": 1.3065, "step": 3466500 }, { "epoch": 36.59, "learning_rate": 3.170583703578627e-05, "loss": 1.3005, "step": 3467000 }, { "epoch": 36.59, "learning_rate": 3.170319870827485e-05, "loss": 1.2658, "step": 3467500 }, { "epoch": 36.6, "learning_rate": 3.170056038076343e-05, "loss": 1.2803, "step": 3468000 }, { "epoch": 36.6, "learning_rate": 3.1697922053252006e-05, "loss": 1.3296, "step": 3468500 }, { "epoch": 36.61, "learning_rate": 3.169528372574058e-05, "loss": 1.3021, "step": 3469000 }, { "epoch": 36.61, "learning_rate": 3.169264539822915e-05, "loss": 1.2946, "step": 3469500 }, { "epoch": 36.62, "learning_rate": 3.169000707071773e-05, "loss": 1.2531, "step": 3470000 }, { "epoch": 36.63, "learning_rate": 3.168736874320631e-05, "loss": 1.3658, "step": 3470500 }, { "epoch": 36.63, "learning_rate": 3.1684730415694884e-05, "loss": 1.2815, "step": 3471000 }, { "epoch": 36.64, "learning_rate": 3.168209208818346e-05, "loss": 1.2771, "step": 3471500 }, { "epoch": 36.64, "learning_rate": 3.1679453760672035e-05, "loss": 1.3529, "step": 3472000 }, { "epoch": 36.65, "learning_rate": 3.167681543316061e-05, "loss": 1.3334, "step": 3472500 }, { "epoch": 36.65, "learning_rate": 3.1674177105649186e-05, "loss": 1.283, "step": 3473000 }, { "epoch": 36.66, "learning_rate": 3.167153877813777e-05, "loss": 1.2908, "step": 3473500 }, { "epoch": 36.66, "learning_rate": 3.1668900450626343e-05, "loss": 1.277, "step": 3474000 }, { "epoch": 36.67, "learning_rate": 3.166626212311491e-05, "loss": 1.2739, "step": 3474500 }, { "epoch": 36.67, "learning_rate": 3.1663623795603494e-05, "loss": 1.2821, "step": 3475000 }, { "epoch": 36.68, "learning_rate": 3.166098546809207e-05, "loss": 1.272, "step": 3475500 }, { "epoch": 36.68, "learning_rate": 3.1658347140580645e-05, "loss": 1.275, "step": 3476000 }, { "epoch": 36.69, "learning_rate": 3.165570881306922e-05, "loss": 1.2274, "step": 3476500 }, { "epoch": 36.69, "learning_rate": 3.1653070485557796e-05, "loss": 1.3197, "step": 3477000 }, { "epoch": 36.7, "learning_rate": 3.165043215804637e-05, "loss": 1.196, "step": 3477500 }, { "epoch": 36.7, "learning_rate": 3.164779383053495e-05, "loss": 1.2854, "step": 3478000 }, { "epoch": 36.71, "learning_rate": 3.164515550302353e-05, "loss": 1.2703, "step": 3478500 }, { "epoch": 36.71, "learning_rate": 3.16425171755121e-05, "loss": 1.2607, "step": 3479000 }, { "epoch": 36.72, "learning_rate": 3.1639878848000674e-05, "loss": 1.3349, "step": 3479500 }, { "epoch": 36.73, "learning_rate": 3.1637240520489256e-05, "loss": 1.2538, "step": 3480000 }, { "epoch": 36.73, "learning_rate": 3.163460219297783e-05, "loss": 1.2865, "step": 3480500 }, { "epoch": 36.74, "learning_rate": 3.163196386546641e-05, "loss": 1.2793, "step": 3481000 }, { "epoch": 36.74, "learning_rate": 3.1629325537954976e-05, "loss": 1.2554, "step": 3481500 }, { "epoch": 36.75, "learning_rate": 3.162668721044356e-05, "loss": 1.2991, "step": 3482000 }, { "epoch": 36.75, "learning_rate": 3.1624048882932134e-05, "loss": 1.3173, "step": 3482500 }, { "epoch": 36.76, "learning_rate": 3.162141055542071e-05, "loss": 1.2879, "step": 3483000 }, { "epoch": 36.76, "learning_rate": 3.161877222790929e-05, "loss": 1.3041, "step": 3483500 }, { "epoch": 36.77, "learning_rate": 3.161613390039786e-05, "loss": 1.3767, "step": 3484000 }, { "epoch": 36.77, "learning_rate": 3.1613495572886435e-05, "loss": 1.2725, "step": 3484500 }, { "epoch": 36.78, "learning_rate": 3.161085724537501e-05, "loss": 1.3227, "step": 3485000 }, { "epoch": 36.78, "learning_rate": 3.160821891786359e-05, "loss": 1.2992, "step": 3485500 }, { "epoch": 36.79, "learning_rate": 3.160558059035216e-05, "loss": 1.2995, "step": 3486000 }, { "epoch": 36.79, "learning_rate": 3.160294226284074e-05, "loss": 1.281, "step": 3486500 }, { "epoch": 36.8, "learning_rate": 3.160030393532932e-05, "loss": 1.3515, "step": 3487000 }, { "epoch": 36.8, "learning_rate": 3.1597665607817895e-05, "loss": 1.2775, "step": 3487500 }, { "epoch": 36.81, "learning_rate": 3.159502728030647e-05, "loss": 1.2939, "step": 3488000 }, { "epoch": 36.82, "learning_rate": 3.1592388952795046e-05, "loss": 1.2884, "step": 3488500 }, { "epoch": 36.82, "learning_rate": 3.158975062528362e-05, "loss": 1.2933, "step": 3489000 }, { "epoch": 36.83, "learning_rate": 3.15871122977722e-05, "loss": 1.2886, "step": 3489500 }, { "epoch": 36.83, "learning_rate": 3.158447397026077e-05, "loss": 1.3439, "step": 3490000 }, { "epoch": 36.84, "learning_rate": 3.1581835642749355e-05, "loss": 1.2699, "step": 3490500 }, { "epoch": 36.84, "learning_rate": 3.1579197315237924e-05, "loss": 1.277, "step": 3491000 }, { "epoch": 36.85, "learning_rate": 3.15765589877265e-05, "loss": 1.4058, "step": 3491500 }, { "epoch": 36.85, "learning_rate": 3.157392066021508e-05, "loss": 1.3213, "step": 3492000 }, { "epoch": 36.86, "learning_rate": 3.157128233270366e-05, "loss": 1.314, "step": 3492500 }, { "epoch": 36.86, "learning_rate": 3.156864400519223e-05, "loss": 1.3191, "step": 3493000 }, { "epoch": 36.87, "learning_rate": 3.156600567768081e-05, "loss": 1.2955, "step": 3493500 }, { "epoch": 36.87, "learning_rate": 3.156336735016938e-05, "loss": 1.3016, "step": 3494000 }, { "epoch": 36.88, "learning_rate": 3.156072902265796e-05, "loss": 1.2911, "step": 3494500 }, { "epoch": 36.88, "learning_rate": 3.1558090695146534e-05, "loss": 1.287, "step": 3495000 }, { "epoch": 36.89, "learning_rate": 3.155545236763511e-05, "loss": 1.2406, "step": 3495500 }, { "epoch": 36.89, "learning_rate": 3.1552814040123685e-05, "loss": 1.3367, "step": 3496000 }, { "epoch": 36.9, "learning_rate": 3.155017571261226e-05, "loss": 1.3336, "step": 3496500 }, { "epoch": 36.9, "learning_rate": 3.1547537385100836e-05, "loss": 1.3336, "step": 3497000 }, { "epoch": 36.91, "learning_rate": 3.154489905758942e-05, "loss": 1.3178, "step": 3497500 }, { "epoch": 36.92, "learning_rate": 3.154226073007799e-05, "loss": 1.3729, "step": 3498000 }, { "epoch": 36.92, "learning_rate": 3.153962240256656e-05, "loss": 1.2757, "step": 3498500 }, { "epoch": 36.93, "learning_rate": 3.1536984075055145e-05, "loss": 1.3081, "step": 3499000 }, { "epoch": 36.93, "learning_rate": 3.153434574754372e-05, "loss": 1.3061, "step": 3499500 }, { "epoch": 36.94, "learning_rate": 3.1531707420032296e-05, "loss": 1.2998, "step": 3500000 }, { "epoch": 36.94, "learning_rate": 3.152906909252087e-05, "loss": 1.369, "step": 3500500 }, { "epoch": 36.95, "learning_rate": 3.152643076500945e-05, "loss": 1.3256, "step": 3501000 }, { "epoch": 36.95, "learning_rate": 3.152379243749802e-05, "loss": 1.3197, "step": 3501500 }, { "epoch": 36.96, "learning_rate": 3.15211541099866e-05, "loss": 1.2965, "step": 3502000 }, { "epoch": 36.96, "learning_rate": 3.151851578247518e-05, "loss": 1.2806, "step": 3502500 }, { "epoch": 36.97, "learning_rate": 3.151587745496375e-05, "loss": 1.3501, "step": 3503000 }, { "epoch": 36.97, "learning_rate": 3.1513239127452324e-05, "loss": 1.3758, "step": 3503500 }, { "epoch": 36.98, "learning_rate": 3.1510600799940907e-05, "loss": 1.2739, "step": 3504000 }, { "epoch": 36.98, "learning_rate": 3.150796247242948e-05, "loss": 1.3115, "step": 3504500 }, { "epoch": 36.99, "learning_rate": 3.150532414491805e-05, "loss": 1.2546, "step": 3505000 }, { "epoch": 36.99, "learning_rate": 3.150268581740663e-05, "loss": 1.2759, "step": 3505500 }, { "epoch": 37.0, "learning_rate": 3.150004748989521e-05, "loss": 1.3059, "step": 3506000 }, { "epoch": 37.01, "learning_rate": 3.1497409162383784e-05, "loss": 1.2743, "step": 3506500 }, { "epoch": 37.01, "learning_rate": 3.149477083487236e-05, "loss": 1.204, "step": 3507000 }, { "epoch": 37.02, "learning_rate": 3.1492132507360935e-05, "loss": 1.3306, "step": 3507500 }, { "epoch": 37.02, "learning_rate": 3.148949417984951e-05, "loss": 1.306, "step": 3508000 }, { "epoch": 37.03, "learning_rate": 3.1486855852338086e-05, "loss": 1.2886, "step": 3508500 }, { "epoch": 37.03, "learning_rate": 3.148421752482666e-05, "loss": 1.2411, "step": 3509000 }, { "epoch": 37.04, "learning_rate": 3.1481579197315244e-05, "loss": 1.2975, "step": 3509500 }, { "epoch": 37.04, "learning_rate": 3.147894086980381e-05, "loss": 1.2473, "step": 3510000 }, { "epoch": 37.05, "learning_rate": 3.147630254229239e-05, "loss": 1.3332, "step": 3510500 }, { "epoch": 37.05, "learning_rate": 3.147366421478097e-05, "loss": 1.3025, "step": 3511000 }, { "epoch": 37.06, "learning_rate": 3.1471025887269546e-05, "loss": 1.2582, "step": 3511500 }, { "epoch": 37.06, "learning_rate": 3.146838755975812e-05, "loss": 1.3181, "step": 3512000 }, { "epoch": 37.07, "learning_rate": 3.14657492322467e-05, "loss": 1.304, "step": 3512500 }, { "epoch": 37.07, "learning_rate": 3.146311090473527e-05, "loss": 1.2764, "step": 3513000 }, { "epoch": 37.08, "learning_rate": 3.146047257722385e-05, "loss": 1.2825, "step": 3513500 }, { "epoch": 37.08, "learning_rate": 3.145783424971242e-05, "loss": 1.2407, "step": 3514000 }, { "epoch": 37.09, "learning_rate": 3.1455195922201e-05, "loss": 1.2931, "step": 3514500 }, { "epoch": 37.09, "learning_rate": 3.1452557594689574e-05, "loss": 1.2562, "step": 3515000 }, { "epoch": 37.1, "learning_rate": 3.144991926717815e-05, "loss": 1.2767, "step": 3515500 }, { "epoch": 37.11, "learning_rate": 3.144728093966673e-05, "loss": 1.3452, "step": 3516000 }, { "epoch": 37.11, "learning_rate": 3.144464261215531e-05, "loss": 1.3305, "step": 3516500 }, { "epoch": 37.12, "learning_rate": 3.1442004284643876e-05, "loss": 1.2927, "step": 3517000 }, { "epoch": 37.12, "learning_rate": 3.143936595713246e-05, "loss": 1.2694, "step": 3517500 }, { "epoch": 37.13, "learning_rate": 3.1436727629621034e-05, "loss": 1.2725, "step": 3518000 }, { "epoch": 37.13, "learning_rate": 3.143408930210961e-05, "loss": 1.2999, "step": 3518500 }, { "epoch": 37.14, "learning_rate": 3.1431450974598185e-05, "loss": 1.3309, "step": 3519000 }, { "epoch": 37.14, "learning_rate": 3.142881264708676e-05, "loss": 1.3217, "step": 3519500 }, { "epoch": 37.15, "learning_rate": 3.1426174319575336e-05, "loss": 1.3062, "step": 3520000 }, { "epoch": 37.15, "learning_rate": 3.142353599206391e-05, "loss": 1.3038, "step": 3520500 }, { "epoch": 37.16, "learning_rate": 3.142089766455249e-05, "loss": 1.273, "step": 3521000 }, { "epoch": 37.16, "learning_rate": 3.141825933704107e-05, "loss": 1.3366, "step": 3521500 }, { "epoch": 37.17, "learning_rate": 3.141562100952964e-05, "loss": 1.3053, "step": 3522000 }, { "epoch": 37.17, "learning_rate": 3.141298268201821e-05, "loss": 1.3027, "step": 3522500 }, { "epoch": 37.18, "learning_rate": 3.1410344354506795e-05, "loss": 1.2441, "step": 3523000 }, { "epoch": 37.18, "learning_rate": 3.140770602699537e-05, "loss": 1.2713, "step": 3523500 }, { "epoch": 37.19, "learning_rate": 3.140506769948394e-05, "loss": 1.2683, "step": 3524000 }, { "epoch": 37.2, "learning_rate": 3.140242937197252e-05, "loss": 1.2926, "step": 3524500 }, { "epoch": 37.2, "learning_rate": 3.13997910444611e-05, "loss": 1.3188, "step": 3525000 }, { "epoch": 37.21, "learning_rate": 3.139715271694967e-05, "loss": 1.2798, "step": 3525500 }, { "epoch": 37.21, "learning_rate": 3.139451438943825e-05, "loss": 1.304, "step": 3526000 }, { "epoch": 37.22, "learning_rate": 3.1391876061926824e-05, "loss": 1.3754, "step": 3526500 }, { "epoch": 37.22, "learning_rate": 3.13892377344154e-05, "loss": 1.3202, "step": 3527000 }, { "epoch": 37.23, "learning_rate": 3.1386599406903975e-05, "loss": 1.2923, "step": 3527500 }, { "epoch": 37.23, "learning_rate": 3.138396107939256e-05, "loss": 1.2978, "step": 3528000 }, { "epoch": 37.24, "learning_rate": 3.138132275188113e-05, "loss": 1.3355, "step": 3528500 }, { "epoch": 37.24, "learning_rate": 3.13786844243697e-05, "loss": 1.2581, "step": 3529000 }, { "epoch": 37.25, "learning_rate": 3.1376046096858284e-05, "loss": 1.3587, "step": 3529500 }, { "epoch": 37.25, "learning_rate": 3.137340776934686e-05, "loss": 1.3081, "step": 3530000 }, { "epoch": 37.26, "learning_rate": 3.1370769441835435e-05, "loss": 1.3271, "step": 3530500 }, { "epoch": 37.26, "learning_rate": 3.136813111432401e-05, "loss": 1.2658, "step": 3531000 }, { "epoch": 37.27, "learning_rate": 3.1365492786812585e-05, "loss": 1.3463, "step": 3531500 }, { "epoch": 37.27, "learning_rate": 3.136285445930116e-05, "loss": 1.3232, "step": 3532000 }, { "epoch": 37.28, "learning_rate": 3.1360216131789736e-05, "loss": 1.2941, "step": 3532500 }, { "epoch": 37.28, "learning_rate": 3.135757780427831e-05, "loss": 1.2635, "step": 3533000 }, { "epoch": 37.29, "learning_rate": 3.135493947676689e-05, "loss": 1.3233, "step": 3533500 }, { "epoch": 37.3, "learning_rate": 3.135230114925546e-05, "loss": 1.3269, "step": 3534000 }, { "epoch": 37.3, "learning_rate": 3.134966282174404e-05, "loss": 1.2183, "step": 3534500 }, { "epoch": 37.31, "learning_rate": 3.134702449423262e-05, "loss": 1.3354, "step": 3535000 }, { "epoch": 37.31, "learning_rate": 3.1344386166721196e-05, "loss": 1.2628, "step": 3535500 }, { "epoch": 37.32, "learning_rate": 3.1341747839209765e-05, "loss": 1.2855, "step": 3536000 }, { "epoch": 37.32, "learning_rate": 3.133910951169835e-05, "loss": 1.2879, "step": 3536500 }, { "epoch": 37.33, "learning_rate": 3.133647118418692e-05, "loss": 1.2446, "step": 3537000 }, { "epoch": 37.33, "learning_rate": 3.13338328566755e-05, "loss": 1.2713, "step": 3537500 }, { "epoch": 37.34, "learning_rate": 3.1331194529164074e-05, "loss": 1.2536, "step": 3538000 }, { "epoch": 37.34, "learning_rate": 3.132855620165265e-05, "loss": 1.2811, "step": 3538500 }, { "epoch": 37.35, "learning_rate": 3.1325917874141225e-05, "loss": 1.2432, "step": 3539000 }, { "epoch": 37.35, "learning_rate": 3.13232795466298e-05, "loss": 1.2986, "step": 3539500 }, { "epoch": 37.36, "learning_rate": 3.132064121911838e-05, "loss": 1.2511, "step": 3540000 }, { "epoch": 37.36, "learning_rate": 3.131800289160696e-05, "loss": 1.3599, "step": 3540500 }, { "epoch": 37.37, "learning_rate": 3.1315364564095527e-05, "loss": 1.302, "step": 3541000 }, { "epoch": 37.37, "learning_rate": 3.131272623658411e-05, "loss": 1.2909, "step": 3541500 }, { "epoch": 37.38, "learning_rate": 3.1310087909072684e-05, "loss": 1.2782, "step": 3542000 }, { "epoch": 37.39, "learning_rate": 3.130744958156126e-05, "loss": 1.3049, "step": 3542500 }, { "epoch": 37.39, "learning_rate": 3.130481125404983e-05, "loss": 1.2527, "step": 3543000 }, { "epoch": 37.4, "learning_rate": 3.130217292653841e-05, "loss": 1.311, "step": 3543500 }, { "epoch": 37.4, "learning_rate": 3.1299534599026986e-05, "loss": 1.3096, "step": 3544000 }, { "epoch": 37.41, "learning_rate": 3.129689627151556e-05, "loss": 1.2773, "step": 3544500 }, { "epoch": 37.41, "learning_rate": 3.129425794400414e-05, "loss": 1.3178, "step": 3545000 }, { "epoch": 37.42, "learning_rate": 3.129161961649271e-05, "loss": 1.2972, "step": 3545500 }, { "epoch": 37.42, "learning_rate": 3.128898128898129e-05, "loss": 1.3083, "step": 3546000 }, { "epoch": 37.43, "learning_rate": 3.1286342961469864e-05, "loss": 1.2639, "step": 3546500 }, { "epoch": 37.43, "learning_rate": 3.1283704633958446e-05, "loss": 1.3255, "step": 3547000 }, { "epoch": 37.44, "learning_rate": 3.128106630644702e-05, "loss": 1.2943, "step": 3547500 }, { "epoch": 37.44, "learning_rate": 3.127842797893559e-05, "loss": 1.2451, "step": 3548000 }, { "epoch": 37.45, "learning_rate": 3.127578965142417e-05, "loss": 1.2843, "step": 3548500 }, { "epoch": 37.45, "learning_rate": 3.127315132391275e-05, "loss": 1.2924, "step": 3549000 }, { "epoch": 37.46, "learning_rate": 3.127051299640132e-05, "loss": 1.3469, "step": 3549500 }, { "epoch": 37.46, "learning_rate": 3.12678746688899e-05, "loss": 1.27, "step": 3550000 }, { "epoch": 37.47, "learning_rate": 3.1265236341378474e-05, "loss": 1.3546, "step": 3550500 }, { "epoch": 37.47, "learning_rate": 3.126259801386705e-05, "loss": 1.3035, "step": 3551000 }, { "epoch": 37.48, "learning_rate": 3.1259959686355625e-05, "loss": 1.2968, "step": 3551500 }, { "epoch": 37.49, "learning_rate": 3.125732135884421e-05, "loss": 1.2922, "step": 3552000 }, { "epoch": 37.49, "learning_rate": 3.1254683031332776e-05, "loss": 1.2902, "step": 3552500 }, { "epoch": 37.5, "learning_rate": 3.125204470382135e-05, "loss": 1.2772, "step": 3553000 }, { "epoch": 37.5, "learning_rate": 3.1249406376309934e-05, "loss": 1.2903, "step": 3553500 }, { "epoch": 37.51, "learning_rate": 3.124676804879851e-05, "loss": 1.3347, "step": 3554000 }, { "epoch": 37.51, "learning_rate": 3.1244129721287085e-05, "loss": 1.2396, "step": 3554500 }, { "epoch": 37.52, "learning_rate": 3.1241491393775654e-05, "loss": 1.2524, "step": 3555000 }, { "epoch": 37.52, "learning_rate": 3.1238853066264236e-05, "loss": 1.299, "step": 3555500 }, { "epoch": 37.53, "learning_rate": 3.123621473875281e-05, "loss": 1.2833, "step": 3556000 }, { "epoch": 37.53, "learning_rate": 3.123357641124139e-05, "loss": 1.2958, "step": 3556500 }, { "epoch": 37.54, "learning_rate": 3.123093808372997e-05, "loss": 1.3694, "step": 3557000 }, { "epoch": 37.54, "learning_rate": 3.122829975621854e-05, "loss": 1.3125, "step": 3557500 }, { "epoch": 37.55, "learning_rate": 3.1225661428707113e-05, "loss": 1.324, "step": 3558000 }, { "epoch": 37.55, "learning_rate": 3.122302310119569e-05, "loss": 1.2821, "step": 3558500 }, { "epoch": 37.56, "learning_rate": 3.122038477368427e-05, "loss": 1.2851, "step": 3559000 }, { "epoch": 37.56, "learning_rate": 3.121774644617285e-05, "loss": 1.2913, "step": 3559500 }, { "epoch": 37.57, "learning_rate": 3.1215108118661415e-05, "loss": 1.3314, "step": 3560000 }, { "epoch": 37.58, "learning_rate": 3.121246979115e-05, "loss": 1.2375, "step": 3560500 }, { "epoch": 37.58, "learning_rate": 3.120983146363857e-05, "loss": 1.3295, "step": 3561000 }, { "epoch": 37.59, "learning_rate": 3.120719313612715e-05, "loss": 1.2751, "step": 3561500 }, { "epoch": 37.59, "learning_rate": 3.1204554808615724e-05, "loss": 1.3341, "step": 3562000 }, { "epoch": 37.6, "learning_rate": 3.12019164811043e-05, "loss": 1.2574, "step": 3562500 }, { "epoch": 37.6, "learning_rate": 3.1199278153592875e-05, "loss": 1.2863, "step": 3563000 }, { "epoch": 37.61, "learning_rate": 3.119663982608145e-05, "loss": 1.3683, "step": 3563500 }, { "epoch": 37.61, "learning_rate": 3.119400149857003e-05, "loss": 1.2716, "step": 3564000 }, { "epoch": 37.62, "learning_rate": 3.11913631710586e-05, "loss": 1.2802, "step": 3564500 }, { "epoch": 37.62, "learning_rate": 3.118872484354718e-05, "loss": 1.2951, "step": 3565000 }, { "epoch": 37.63, "learning_rate": 3.118608651603576e-05, "loss": 1.276, "step": 3565500 }, { "epoch": 37.63, "learning_rate": 3.1183448188524335e-05, "loss": 1.296, "step": 3566000 }, { "epoch": 37.64, "learning_rate": 3.118080986101291e-05, "loss": 1.3109, "step": 3566500 }, { "epoch": 37.64, "learning_rate": 3.117817153350148e-05, "loss": 1.3153, "step": 3567000 }, { "epoch": 37.65, "learning_rate": 3.117553320599006e-05, "loss": 1.3087, "step": 3567500 }, { "epoch": 37.65, "learning_rate": 3.117289487847864e-05, "loss": 1.3113, "step": 3568000 }, { "epoch": 37.66, "learning_rate": 3.117025655096721e-05, "loss": 1.2798, "step": 3568500 }, { "epoch": 37.66, "learning_rate": 3.116761822345579e-05, "loss": 1.3298, "step": 3569000 }, { "epoch": 37.67, "learning_rate": 3.116497989594436e-05, "loss": 1.2994, "step": 3569500 }, { "epoch": 37.68, "learning_rate": 3.116234156843294e-05, "loss": 1.3055, "step": 3570000 }, { "epoch": 37.68, "learning_rate": 3.1159703240921514e-05, "loss": 1.345, "step": 3570500 }, { "epoch": 37.69, "learning_rate": 3.1157064913410096e-05, "loss": 1.2459, "step": 3571000 }, { "epoch": 37.69, "learning_rate": 3.1154426585898665e-05, "loss": 1.2933, "step": 3571500 }, { "epoch": 37.7, "learning_rate": 3.115178825838724e-05, "loss": 1.3094, "step": 3572000 }, { "epoch": 37.7, "learning_rate": 3.114914993087582e-05, "loss": 1.2663, "step": 3572500 }, { "epoch": 37.71, "learning_rate": 3.11465116033644e-05, "loss": 1.2994, "step": 3573000 }, { "epoch": 37.71, "learning_rate": 3.1143873275852974e-05, "loss": 1.2687, "step": 3573500 }, { "epoch": 37.72, "learning_rate": 3.114123494834155e-05, "loss": 1.3251, "step": 3574000 }, { "epoch": 37.72, "learning_rate": 3.1138596620830125e-05, "loss": 1.2766, "step": 3574500 }, { "epoch": 37.73, "learning_rate": 3.11359582933187e-05, "loss": 1.2729, "step": 3575000 }, { "epoch": 37.73, "learning_rate": 3.1133319965807276e-05, "loss": 1.3013, "step": 3575500 }, { "epoch": 37.74, "learning_rate": 3.113068163829586e-05, "loss": 1.3001, "step": 3576000 }, { "epoch": 37.74, "learning_rate": 3.112804331078443e-05, "loss": 1.3328, "step": 3576500 }, { "epoch": 37.75, "learning_rate": 3.1125404983273e-05, "loss": 1.2695, "step": 3577000 }, { "epoch": 37.75, "learning_rate": 3.1122766655761585e-05, "loss": 1.3324, "step": 3577500 }, { "epoch": 37.76, "learning_rate": 3.112012832825016e-05, "loss": 1.3068, "step": 3578000 }, { "epoch": 37.77, "learning_rate": 3.111749000073873e-05, "loss": 1.3331, "step": 3578500 }, { "epoch": 37.77, "learning_rate": 3.111485167322731e-05, "loss": 1.266, "step": 3579000 }, { "epoch": 37.78, "learning_rate": 3.1112213345715886e-05, "loss": 1.3525, "step": 3579500 }, { "epoch": 37.78, "learning_rate": 3.110957501820446e-05, "loss": 1.339, "step": 3580000 }, { "epoch": 37.79, "learning_rate": 3.110693669069304e-05, "loss": 1.2937, "step": 3580500 }, { "epoch": 37.79, "learning_rate": 3.110429836318161e-05, "loss": 1.3404, "step": 3581000 }, { "epoch": 37.8, "learning_rate": 3.110166003567019e-05, "loss": 1.3609, "step": 3581500 }, { "epoch": 37.8, "learning_rate": 3.1099021708158764e-05, "loss": 1.2835, "step": 3582000 }, { "epoch": 37.81, "learning_rate": 3.109638338064734e-05, "loss": 1.3117, "step": 3582500 }, { "epoch": 37.81, "learning_rate": 3.109374505313592e-05, "loss": 1.3059, "step": 3583000 }, { "epoch": 37.82, "learning_rate": 3.109110672562449e-05, "loss": 1.2819, "step": 3583500 }, { "epoch": 37.82, "learning_rate": 3.1088468398113066e-05, "loss": 1.3394, "step": 3584000 }, { "epoch": 37.83, "learning_rate": 3.108583007060165e-05, "loss": 1.3265, "step": 3584500 }, { "epoch": 37.83, "learning_rate": 3.1083191743090224e-05, "loss": 1.2447, "step": 3585000 }, { "epoch": 37.84, "learning_rate": 3.10805534155788e-05, "loss": 1.3353, "step": 3585500 }, { "epoch": 37.84, "learning_rate": 3.1077915088067375e-05, "loss": 1.3104, "step": 3586000 }, { "epoch": 37.85, "learning_rate": 3.107527676055595e-05, "loss": 1.3407, "step": 3586500 }, { "epoch": 37.85, "learning_rate": 3.1072638433044526e-05, "loss": 1.2549, "step": 3587000 }, { "epoch": 37.86, "learning_rate": 3.10700001055331e-05, "loss": 1.2954, "step": 3587500 }, { "epoch": 37.87, "learning_rate": 3.1067361778021677e-05, "loss": 1.3035, "step": 3588000 }, { "epoch": 37.87, "learning_rate": 3.106472345051025e-05, "loss": 1.2972, "step": 3588500 }, { "epoch": 37.88, "learning_rate": 3.106208512299883e-05, "loss": 1.3019, "step": 3589000 }, { "epoch": 37.88, "learning_rate": 3.105944679548741e-05, "loss": 1.3349, "step": 3589500 }, { "epoch": 37.89, "learning_rate": 3.1056808467975985e-05, "loss": 1.3267, "step": 3590000 }, { "epoch": 37.89, "learning_rate": 3.1054170140464554e-05, "loss": 1.3267, "step": 3590500 }, { "epoch": 37.9, "learning_rate": 3.1051531812953136e-05, "loss": 1.284, "step": 3591000 }, { "epoch": 37.9, "learning_rate": 3.104889348544171e-05, "loss": 1.2897, "step": 3591500 }, { "epoch": 37.91, "learning_rate": 3.104625515793029e-05, "loss": 1.306, "step": 3592000 }, { "epoch": 37.91, "learning_rate": 3.104361683041886e-05, "loss": 1.2621, "step": 3592500 }, { "epoch": 37.92, "learning_rate": 3.104097850290744e-05, "loss": 1.3002, "step": 3593000 }, { "epoch": 37.92, "learning_rate": 3.1038340175396014e-05, "loss": 1.3352, "step": 3593500 }, { "epoch": 37.93, "learning_rate": 3.103570184788459e-05, "loss": 1.3168, "step": 3594000 }, { "epoch": 37.93, "learning_rate": 3.1033063520373165e-05, "loss": 1.2641, "step": 3594500 }, { "epoch": 37.94, "learning_rate": 3.103042519286175e-05, "loss": 1.2941, "step": 3595000 }, { "epoch": 37.94, "learning_rate": 3.1027786865350316e-05, "loss": 1.3031, "step": 3595500 }, { "epoch": 37.95, "learning_rate": 3.102514853783889e-05, "loss": 1.2373, "step": 3596000 }, { "epoch": 37.95, "learning_rate": 3.102251021032747e-05, "loss": 1.324, "step": 3596500 }, { "epoch": 37.96, "learning_rate": 3.101987188281605e-05, "loss": 1.3248, "step": 3597000 }, { "epoch": 37.97, "learning_rate": 3.101723355530462e-05, "loss": 1.3311, "step": 3597500 }, { "epoch": 37.97, "learning_rate": 3.10145952277932e-05, "loss": 1.2579, "step": 3598000 }, { "epoch": 37.98, "learning_rate": 3.1011956900281775e-05, "loss": 1.2958, "step": 3598500 }, { "epoch": 37.98, "learning_rate": 3.100931857277035e-05, "loss": 1.2695, "step": 3599000 }, { "epoch": 37.99, "learning_rate": 3.1006680245258926e-05, "loss": 1.3572, "step": 3599500 }, { "epoch": 37.99, "learning_rate": 3.10040419177475e-05, "loss": 1.3317, "step": 3600000 }, { "epoch": 38.0, "learning_rate": 3.100140359023608e-05, "loss": 1.3077, "step": 3600500 }, { "epoch": 38.0, "learning_rate": 3.099876526272465e-05, "loss": 1.3127, "step": 3601000 }, { "epoch": 38.01, "learning_rate": 3.0996126935213235e-05, "loss": 1.2715, "step": 3601500 }, { "epoch": 38.01, "learning_rate": 3.099348860770181e-05, "loss": 1.2618, "step": 3602000 }, { "epoch": 38.02, "learning_rate": 3.099085028019038e-05, "loss": 1.261, "step": 3602500 }, { "epoch": 38.02, "learning_rate": 3.098821195267896e-05, "loss": 1.2368, "step": 3603000 }, { "epoch": 38.03, "learning_rate": 3.098557362516754e-05, "loss": 1.3434, "step": 3603500 }, { "epoch": 38.03, "learning_rate": 3.098293529765611e-05, "loss": 1.2844, "step": 3604000 }, { "epoch": 38.04, "learning_rate": 3.098029697014469e-05, "loss": 1.2871, "step": 3604500 }, { "epoch": 38.04, "learning_rate": 3.0977658642633263e-05, "loss": 1.285, "step": 3605000 }, { "epoch": 38.05, "learning_rate": 3.097502031512184e-05, "loss": 1.2593, "step": 3605500 }, { "epoch": 38.06, "learning_rate": 3.0972381987610414e-05, "loss": 1.2517, "step": 3606000 }, { "epoch": 38.06, "learning_rate": 3.096974366009899e-05, "loss": 1.3258, "step": 3606500 }, { "epoch": 38.07, "learning_rate": 3.0967105332587565e-05, "loss": 1.2934, "step": 3607000 }, { "epoch": 38.07, "learning_rate": 3.096446700507614e-05, "loss": 1.3306, "step": 3607500 }, { "epoch": 38.08, "learning_rate": 3.0961828677564716e-05, "loss": 1.2237, "step": 3608000 }, { "epoch": 38.08, "learning_rate": 3.09591903500533e-05, "loss": 1.2778, "step": 3608500 }, { "epoch": 38.09, "learning_rate": 3.0956552022541874e-05, "loss": 1.3481, "step": 3609000 }, { "epoch": 38.09, "learning_rate": 3.095391369503044e-05, "loss": 1.3004, "step": 3609500 }, { "epoch": 38.1, "learning_rate": 3.0951275367519025e-05, "loss": 1.2927, "step": 3610000 }, { "epoch": 38.1, "learning_rate": 3.09486370400076e-05, "loss": 1.314, "step": 3610500 }, { "epoch": 38.11, "learning_rate": 3.0945998712496176e-05, "loss": 1.2793, "step": 3611000 }, { "epoch": 38.11, "learning_rate": 3.094336038498475e-05, "loss": 1.2633, "step": 3611500 }, { "epoch": 38.12, "learning_rate": 3.094072205747333e-05, "loss": 1.273, "step": 3612000 }, { "epoch": 38.12, "learning_rate": 3.09380837299619e-05, "loss": 1.2879, "step": 3612500 }, { "epoch": 38.13, "learning_rate": 3.093544540245048e-05, "loss": 1.2624, "step": 3613000 }, { "epoch": 38.13, "learning_rate": 3.093280707493906e-05, "loss": 1.2828, "step": 3613500 }, { "epoch": 38.14, "learning_rate": 3.0930168747427636e-05, "loss": 1.3175, "step": 3614000 }, { "epoch": 38.14, "learning_rate": 3.0927530419916204e-05, "loss": 1.3011, "step": 3614500 }, { "epoch": 38.15, "learning_rate": 3.092489209240479e-05, "loss": 1.2962, "step": 3615000 }, { "epoch": 38.16, "learning_rate": 3.092225376489336e-05, "loss": 1.3362, "step": 3615500 }, { "epoch": 38.16, "learning_rate": 3.091961543738194e-05, "loss": 1.3329, "step": 3616000 }, { "epoch": 38.17, "learning_rate": 3.0916977109870506e-05, "loss": 1.2369, "step": 3616500 }, { "epoch": 38.17, "learning_rate": 3.091433878235909e-05, "loss": 1.3477, "step": 3617000 }, { "epoch": 38.18, "learning_rate": 3.0911700454847664e-05, "loss": 1.252, "step": 3617500 }, { "epoch": 38.18, "learning_rate": 3.090906212733624e-05, "loss": 1.2455, "step": 3618000 }, { "epoch": 38.19, "learning_rate": 3.0906423799824815e-05, "loss": 1.3031, "step": 3618500 }, { "epoch": 38.19, "learning_rate": 3.090378547231339e-05, "loss": 1.3106, "step": 3619000 }, { "epoch": 38.2, "learning_rate": 3.0901147144801966e-05, "loss": 1.3114, "step": 3619500 }, { "epoch": 38.2, "learning_rate": 3.089850881729054e-05, "loss": 1.3085, "step": 3620000 }, { "epoch": 38.21, "learning_rate": 3.0895870489779124e-05, "loss": 1.3261, "step": 3620500 }, { "epoch": 38.21, "learning_rate": 3.08932321622677e-05, "loss": 1.3116, "step": 3621000 }, { "epoch": 38.22, "learning_rate": 3.089059383475627e-05, "loss": 1.2837, "step": 3621500 }, { "epoch": 38.22, "learning_rate": 3.088795550724485e-05, "loss": 1.2882, "step": 3622000 }, { "epoch": 38.23, "learning_rate": 3.0885317179733426e-05, "loss": 1.326, "step": 3622500 }, { "epoch": 38.23, "learning_rate": 3.0882678852222e-05, "loss": 1.2903, "step": 3623000 }, { "epoch": 38.24, "learning_rate": 3.088004052471058e-05, "loss": 1.304, "step": 3623500 }, { "epoch": 38.25, "learning_rate": 3.087740219719915e-05, "loss": 1.2466, "step": 3624000 }, { "epoch": 38.25, "learning_rate": 3.087476386968773e-05, "loss": 1.2843, "step": 3624500 }, { "epoch": 38.26, "learning_rate": 3.08721255421763e-05, "loss": 1.2736, "step": 3625000 }, { "epoch": 38.26, "learning_rate": 3.0869487214664886e-05, "loss": 1.2839, "step": 3625500 }, { "epoch": 38.27, "learning_rate": 3.0866848887153454e-05, "loss": 1.2872, "step": 3626000 }, { "epoch": 38.27, "learning_rate": 3.086421055964203e-05, "loss": 1.2936, "step": 3626500 }, { "epoch": 38.28, "learning_rate": 3.086157223213061e-05, "loss": 1.3215, "step": 3627000 }, { "epoch": 38.28, "learning_rate": 3.085893390461919e-05, "loss": 1.295, "step": 3627500 }, { "epoch": 38.29, "learning_rate": 3.085629557710776e-05, "loss": 1.3557, "step": 3628000 }, { "epoch": 38.29, "learning_rate": 3.085365724959633e-05, "loss": 1.2848, "step": 3628500 }, { "epoch": 38.3, "learning_rate": 3.0851018922084914e-05, "loss": 1.3248, "step": 3629000 }, { "epoch": 38.3, "learning_rate": 3.084838059457349e-05, "loss": 1.2777, "step": 3629500 }, { "epoch": 38.31, "learning_rate": 3.0845742267062065e-05, "loss": 1.2879, "step": 3630000 }, { "epoch": 38.31, "learning_rate": 3.084310393955065e-05, "loss": 1.297, "step": 3630500 }, { "epoch": 38.32, "learning_rate": 3.0840465612039216e-05, "loss": 1.2889, "step": 3631000 }, { "epoch": 38.32, "learning_rate": 3.083782728452779e-05, "loss": 1.3347, "step": 3631500 }, { "epoch": 38.33, "learning_rate": 3.083518895701637e-05, "loss": 1.2896, "step": 3632000 }, { "epoch": 38.33, "learning_rate": 3.083255062950495e-05, "loss": 1.255, "step": 3632500 }, { "epoch": 38.34, "learning_rate": 3.0829912301993525e-05, "loss": 1.294, "step": 3633000 }, { "epoch": 38.35, "learning_rate": 3.082727397448209e-05, "loss": 1.4025, "step": 3633500 }, { "epoch": 38.35, "learning_rate": 3.0824635646970676e-05, "loss": 1.2801, "step": 3634000 }, { "epoch": 38.36, "learning_rate": 3.082199731945925e-05, "loss": 1.2967, "step": 3634500 }, { "epoch": 38.36, "learning_rate": 3.0819358991947827e-05, "loss": 1.2847, "step": 3635000 }, { "epoch": 38.37, "learning_rate": 3.08167206644364e-05, "loss": 1.2721, "step": 3635500 }, { "epoch": 38.37, "learning_rate": 3.081408233692498e-05, "loss": 1.3239, "step": 3636000 }, { "epoch": 38.38, "learning_rate": 3.081144400941355e-05, "loss": 1.2762, "step": 3636500 }, { "epoch": 38.38, "learning_rate": 3.080880568190213e-05, "loss": 1.2819, "step": 3637000 }, { "epoch": 38.39, "learning_rate": 3.080616735439071e-05, "loss": 1.272, "step": 3637500 }, { "epoch": 38.39, "learning_rate": 3.080352902687928e-05, "loss": 1.2374, "step": 3638000 }, { "epoch": 38.4, "learning_rate": 3.0800890699367855e-05, "loss": 1.3617, "step": 3638500 }, { "epoch": 38.4, "learning_rate": 3.079825237185644e-05, "loss": 1.2834, "step": 3639000 }, { "epoch": 38.41, "learning_rate": 3.079561404434501e-05, "loss": 1.2947, "step": 3639500 }, { "epoch": 38.41, "learning_rate": 3.079297571683359e-05, "loss": 1.3261, "step": 3640000 }, { "epoch": 38.42, "learning_rate": 3.079033738932216e-05, "loss": 1.2946, "step": 3640500 }, { "epoch": 38.42, "learning_rate": 3.078769906181074e-05, "loss": 1.2273, "step": 3641000 }, { "epoch": 38.43, "learning_rate": 3.0785060734299315e-05, "loss": 1.2319, "step": 3641500 }, { "epoch": 38.44, "learning_rate": 3.078242240678789e-05, "loss": 1.3579, "step": 3642000 }, { "epoch": 38.44, "learning_rate": 3.077978407927647e-05, "loss": 1.2767, "step": 3642500 }, { "epoch": 38.45, "learning_rate": 3.077714575176504e-05, "loss": 1.3563, "step": 3643000 }, { "epoch": 38.45, "learning_rate": 3.0774507424253617e-05, "loss": 1.248, "step": 3643500 }, { "epoch": 38.46, "learning_rate": 3.077186909674219e-05, "loss": 1.261, "step": 3644000 }, { "epoch": 38.46, "learning_rate": 3.0769230769230774e-05, "loss": 1.2807, "step": 3644500 }, { "epoch": 38.47, "learning_rate": 3.076659244171934e-05, "loss": 1.2621, "step": 3645000 }, { "epoch": 38.47, "learning_rate": 3.076395411420792e-05, "loss": 1.3265, "step": 3645500 }, { "epoch": 38.48, "learning_rate": 3.07613157866965e-05, "loss": 1.2932, "step": 3646000 }, { "epoch": 38.48, "learning_rate": 3.0758677459185076e-05, "loss": 1.2887, "step": 3646500 }, { "epoch": 38.49, "learning_rate": 3.075603913167365e-05, "loss": 1.2912, "step": 3647000 }, { "epoch": 38.49, "learning_rate": 3.075340080416223e-05, "loss": 1.3059, "step": 3647500 }, { "epoch": 38.5, "learning_rate": 3.07507624766508e-05, "loss": 1.3369, "step": 3648000 }, { "epoch": 38.5, "learning_rate": 3.074812414913938e-05, "loss": 1.2788, "step": 3648500 }, { "epoch": 38.51, "learning_rate": 3.0745485821627954e-05, "loss": 1.3355, "step": 3649000 }, { "epoch": 38.51, "learning_rate": 3.0742847494116536e-05, "loss": 1.3327, "step": 3649500 }, { "epoch": 38.52, "learning_rate": 3.0740209166605105e-05, "loss": 1.3675, "step": 3650000 }, { "epoch": 38.52, "learning_rate": 3.073757083909368e-05, "loss": 1.3394, "step": 3650500 }, { "epoch": 38.53, "learning_rate": 3.073493251158226e-05, "loss": 1.2871, "step": 3651000 }, { "epoch": 38.54, "learning_rate": 3.073229418407084e-05, "loss": 1.3155, "step": 3651500 }, { "epoch": 38.54, "learning_rate": 3.0729655856559413e-05, "loss": 1.3138, "step": 3652000 }, { "epoch": 38.55, "learning_rate": 3.072701752904799e-05, "loss": 1.2531, "step": 3652500 }, { "epoch": 38.55, "learning_rate": 3.0724379201536564e-05, "loss": 1.3086, "step": 3653000 }, { "epoch": 38.56, "learning_rate": 3.072174087402514e-05, "loss": 1.2629, "step": 3653500 }, { "epoch": 38.56, "learning_rate": 3.0719102546513715e-05, "loss": 1.2449, "step": 3654000 }, { "epoch": 38.57, "learning_rate": 3.071646421900229e-05, "loss": 1.2981, "step": 3654500 }, { "epoch": 38.57, "learning_rate": 3.0713825891490866e-05, "loss": 1.3214, "step": 3655000 }, { "epoch": 38.58, "learning_rate": 3.071118756397944e-05, "loss": 1.3296, "step": 3655500 }, { "epoch": 38.58, "learning_rate": 3.070854923646802e-05, "loss": 1.3121, "step": 3656000 }, { "epoch": 38.59, "learning_rate": 3.07059109089566e-05, "loss": 1.3163, "step": 3656500 }, { "epoch": 38.59, "learning_rate": 3.070327258144517e-05, "loss": 1.3148, "step": 3657000 }, { "epoch": 38.6, "learning_rate": 3.0700634253933744e-05, "loss": 1.3179, "step": 3657500 }, { "epoch": 38.6, "learning_rate": 3.0697995926422326e-05, "loss": 1.3077, "step": 3658000 }, { "epoch": 38.61, "learning_rate": 3.06953575989109e-05, "loss": 1.2817, "step": 3658500 }, { "epoch": 38.61, "learning_rate": 3.069271927139948e-05, "loss": 1.3094, "step": 3659000 }, { "epoch": 38.62, "learning_rate": 3.069008094388805e-05, "loss": 1.2963, "step": 3659500 }, { "epoch": 38.63, "learning_rate": 3.068744261637663e-05, "loss": 1.2877, "step": 3660000 }, { "epoch": 38.63, "learning_rate": 3.0684804288865203e-05, "loss": 1.3078, "step": 3660500 }, { "epoch": 38.64, "learning_rate": 3.068216596135378e-05, "loss": 1.2477, "step": 3661000 }, { "epoch": 38.64, "learning_rate": 3.067952763384236e-05, "loss": 1.3078, "step": 3661500 }, { "epoch": 38.65, "learning_rate": 3.067688930633093e-05, "loss": 1.3221, "step": 3662000 }, { "epoch": 38.65, "learning_rate": 3.0674250978819505e-05, "loss": 1.2753, "step": 3662500 }, { "epoch": 38.66, "learning_rate": 3.067161265130809e-05, "loss": 1.2861, "step": 3663000 }, { "epoch": 38.66, "learning_rate": 3.066897432379666e-05, "loss": 1.2798, "step": 3663500 }, { "epoch": 38.67, "learning_rate": 3.066633599628523e-05, "loss": 1.3205, "step": 3664000 }, { "epoch": 38.67, "learning_rate": 3.0663697668773814e-05, "loss": 1.2458, "step": 3664500 }, { "epoch": 38.68, "learning_rate": 3.066105934126239e-05, "loss": 1.2799, "step": 3665000 }, { "epoch": 38.68, "learning_rate": 3.0658421013750965e-05, "loss": 1.2375, "step": 3665500 }, { "epoch": 38.69, "learning_rate": 3.065578268623954e-05, "loss": 1.2995, "step": 3666000 }, { "epoch": 38.69, "learning_rate": 3.0653144358728116e-05, "loss": 1.3302, "step": 3666500 }, { "epoch": 38.7, "learning_rate": 3.065050603121669e-05, "loss": 1.2859, "step": 3667000 }, { "epoch": 38.7, "learning_rate": 3.064786770370527e-05, "loss": 1.3259, "step": 3667500 }, { "epoch": 38.71, "learning_rate": 3.064522937619384e-05, "loss": 1.2783, "step": 3668000 }, { "epoch": 38.71, "learning_rate": 3.0642591048682425e-05, "loss": 1.3033, "step": 3668500 }, { "epoch": 38.72, "learning_rate": 3.0639952721170994e-05, "loss": 1.2916, "step": 3669000 }, { "epoch": 38.73, "learning_rate": 3.063731439365957e-05, "loss": 1.3068, "step": 3669500 }, { "epoch": 38.73, "learning_rate": 3.063467606614815e-05, "loss": 1.303, "step": 3670000 }, { "epoch": 38.74, "learning_rate": 3.063203773863673e-05, "loss": 1.2467, "step": 3670500 }, { "epoch": 38.74, "learning_rate": 3.06293994111253e-05, "loss": 1.2778, "step": 3671000 }, { "epoch": 38.75, "learning_rate": 3.062676108361388e-05, "loss": 1.268, "step": 3671500 }, { "epoch": 38.75, "learning_rate": 3.062412275610245e-05, "loss": 1.2974, "step": 3672000 }, { "epoch": 38.76, "learning_rate": 3.062148442859103e-05, "loss": 1.3043, "step": 3672500 }, { "epoch": 38.76, "learning_rate": 3.0618846101079604e-05, "loss": 1.2938, "step": 3673000 }, { "epoch": 38.77, "learning_rate": 3.061620777356818e-05, "loss": 1.3003, "step": 3673500 }, { "epoch": 38.77, "learning_rate": 3.0613569446056755e-05, "loss": 1.3006, "step": 3674000 }, { "epoch": 38.78, "learning_rate": 3.061093111854533e-05, "loss": 1.2978, "step": 3674500 }, { "epoch": 38.78, "learning_rate": 3.060829279103391e-05, "loss": 1.3091, "step": 3675000 }, { "epoch": 38.79, "learning_rate": 3.060565446352249e-05, "loss": 1.2824, "step": 3675500 }, { "epoch": 38.79, "learning_rate": 3.060301613601106e-05, "loss": 1.3213, "step": 3676000 }, { "epoch": 38.8, "learning_rate": 3.060037780849964e-05, "loss": 1.3462, "step": 3676500 }, { "epoch": 38.8, "learning_rate": 3.0597739480988215e-05, "loss": 1.3336, "step": 3677000 }, { "epoch": 38.81, "learning_rate": 3.059510115347679e-05, "loss": 1.2941, "step": 3677500 }, { "epoch": 38.82, "learning_rate": 3.0592462825965366e-05, "loss": 1.3338, "step": 3678000 }, { "epoch": 38.82, "learning_rate": 3.058982449845394e-05, "loss": 1.2603, "step": 3678500 }, { "epoch": 38.83, "learning_rate": 3.058718617094252e-05, "loss": 1.2919, "step": 3679000 }, { "epoch": 38.83, "learning_rate": 3.058454784343109e-05, "loss": 1.3111, "step": 3679500 }, { "epoch": 38.84, "learning_rate": 3.058190951591967e-05, "loss": 1.2985, "step": 3680000 }, { "epoch": 38.84, "learning_rate": 3.057927118840825e-05, "loss": 1.2699, "step": 3680500 }, { "epoch": 38.85, "learning_rate": 3.057663286089682e-05, "loss": 1.305, "step": 3681000 }, { "epoch": 38.85, "learning_rate": 3.0573994533385394e-05, "loss": 1.3043, "step": 3681500 }, { "epoch": 38.86, "learning_rate": 3.0571356205873977e-05, "loss": 1.3073, "step": 3682000 }, { "epoch": 38.86, "learning_rate": 3.056871787836255e-05, "loss": 1.3479, "step": 3682500 }, { "epoch": 38.87, "learning_rate": 3.056607955085112e-05, "loss": 1.2775, "step": 3683000 }, { "epoch": 38.87, "learning_rate": 3.05634412233397e-05, "loss": 1.2491, "step": 3683500 }, { "epoch": 38.88, "learning_rate": 3.056080289582828e-05, "loss": 1.2949, "step": 3684000 }, { "epoch": 38.88, "learning_rate": 3.0558164568316854e-05, "loss": 1.306, "step": 3684500 }, { "epoch": 38.89, "learning_rate": 3.055552624080543e-05, "loss": 1.3837, "step": 3685000 }, { "epoch": 38.89, "learning_rate": 3.0552887913294005e-05, "loss": 1.2902, "step": 3685500 }, { "epoch": 38.9, "learning_rate": 3.055024958578258e-05, "loss": 1.2794, "step": 3686000 }, { "epoch": 38.9, "learning_rate": 3.0547611258271156e-05, "loss": 1.304, "step": 3686500 }, { "epoch": 38.91, "learning_rate": 3.054497293075974e-05, "loss": 1.3062, "step": 3687000 }, { "epoch": 38.92, "learning_rate": 3.0542334603248314e-05, "loss": 1.2228, "step": 3687500 }, { "epoch": 38.92, "learning_rate": 3.053969627573688e-05, "loss": 1.2226, "step": 3688000 }, { "epoch": 38.93, "learning_rate": 3.0537057948225465e-05, "loss": 1.3092, "step": 3688500 }, { "epoch": 38.93, "learning_rate": 3.053441962071404e-05, "loss": 1.2809, "step": 3689000 }, { "epoch": 38.94, "learning_rate": 3.0531781293202616e-05, "loss": 1.3087, "step": 3689500 }, { "epoch": 38.94, "learning_rate": 3.052914296569119e-05, "loss": 1.3397, "step": 3690000 }, { "epoch": 38.95, "learning_rate": 3.052650463817977e-05, "loss": 1.3004, "step": 3690500 }, { "epoch": 38.95, "learning_rate": 3.052386631066834e-05, "loss": 1.3025, "step": 3691000 }, { "epoch": 38.96, "learning_rate": 3.052122798315692e-05, "loss": 1.2884, "step": 3691500 }, { "epoch": 38.96, "learning_rate": 3.051858965564549e-05, "loss": 1.338, "step": 3692000 }, { "epoch": 38.97, "learning_rate": 3.051595132813407e-05, "loss": 1.2948, "step": 3692500 }, { "epoch": 38.97, "learning_rate": 3.0513313000622644e-05, "loss": 1.2783, "step": 3693000 }, { "epoch": 38.98, "learning_rate": 3.0510674673111223e-05, "loss": 1.302, "step": 3693500 }, { "epoch": 38.98, "learning_rate": 3.05080363455998e-05, "loss": 1.3005, "step": 3694000 }, { "epoch": 38.99, "learning_rate": 3.0505398018088377e-05, "loss": 1.2677, "step": 3694500 }, { "epoch": 38.99, "learning_rate": 3.050275969057695e-05, "loss": 1.3221, "step": 3695000 }, { "epoch": 39.0, "learning_rate": 3.0500121363065525e-05, "loss": 1.2966, "step": 3695500 }, { "epoch": 39.01, "learning_rate": 3.0497483035554104e-05, "loss": 1.2627, "step": 3696000 }, { "epoch": 39.01, "learning_rate": 3.049484470804268e-05, "loss": 1.3116, "step": 3696500 }, { "epoch": 39.02, "learning_rate": 3.0492206380531258e-05, "loss": 1.2912, "step": 3697000 }, { "epoch": 39.02, "learning_rate": 3.048956805301983e-05, "loss": 1.2074, "step": 3697500 }, { "epoch": 39.03, "learning_rate": 3.0486929725508406e-05, "loss": 1.2317, "step": 3698000 }, { "epoch": 39.03, "learning_rate": 3.0484291397996985e-05, "loss": 1.2856, "step": 3698500 }, { "epoch": 39.04, "learning_rate": 3.048165307048556e-05, "loss": 1.2923, "step": 3699000 }, { "epoch": 39.04, "learning_rate": 3.047901474297414e-05, "loss": 1.2612, "step": 3699500 }, { "epoch": 39.05, "learning_rate": 3.047637641546271e-05, "loss": 1.2874, "step": 3700000 }, { "epoch": 39.05, "learning_rate": 3.0473738087951287e-05, "loss": 1.2982, "step": 3700500 }, { "epoch": 39.06, "learning_rate": 3.0471099760439865e-05, "loss": 1.3113, "step": 3701000 }, { "epoch": 39.06, "learning_rate": 3.046846143292844e-05, "loss": 1.2585, "step": 3701500 }, { "epoch": 39.07, "learning_rate": 3.0465823105417013e-05, "loss": 1.3048, "step": 3702000 }, { "epoch": 39.07, "learning_rate": 3.046318477790559e-05, "loss": 1.3022, "step": 3702500 }, { "epoch": 39.08, "learning_rate": 3.0460546450394167e-05, "loss": 1.3191, "step": 3703000 }, { "epoch": 39.08, "learning_rate": 3.0457908122882746e-05, "loss": 1.256, "step": 3703500 }, { "epoch": 39.09, "learning_rate": 3.0455269795371322e-05, "loss": 1.3103, "step": 3704000 }, { "epoch": 39.09, "learning_rate": 3.0452631467859894e-05, "loss": 1.2801, "step": 3704500 }, { "epoch": 39.1, "learning_rate": 3.044999314034847e-05, "loss": 1.2666, "step": 3705000 }, { "epoch": 39.11, "learning_rate": 3.0447354812837048e-05, "loss": 1.3158, "step": 3705500 }, { "epoch": 39.11, "learning_rate": 3.0444716485325624e-05, "loss": 1.3281, "step": 3706000 }, { "epoch": 39.12, "learning_rate": 3.0442078157814203e-05, "loss": 1.2227, "step": 3706500 }, { "epoch": 39.12, "learning_rate": 3.0439439830302775e-05, "loss": 1.3007, "step": 3707000 }, { "epoch": 39.13, "learning_rate": 3.043680150279135e-05, "loss": 1.2608, "step": 3707500 }, { "epoch": 39.13, "learning_rate": 3.043416317527993e-05, "loss": 1.277, "step": 3708000 }, { "epoch": 39.14, "learning_rate": 3.0431524847768504e-05, "loss": 1.257, "step": 3708500 }, { "epoch": 39.14, "learning_rate": 3.0428886520257083e-05, "loss": 1.3345, "step": 3709000 }, { "epoch": 39.15, "learning_rate": 3.0426248192745655e-05, "loss": 1.3154, "step": 3709500 }, { "epoch": 39.15, "learning_rate": 3.042360986523423e-05, "loss": 1.285, "step": 3710000 }, { "epoch": 39.16, "learning_rate": 3.042097153772281e-05, "loss": 1.2749, "step": 3710500 }, { "epoch": 39.16, "learning_rate": 3.0418333210211385e-05, "loss": 1.3185, "step": 3711000 }, { "epoch": 39.17, "learning_rate": 3.0415694882699957e-05, "loss": 1.2652, "step": 3711500 }, { "epoch": 39.17, "learning_rate": 3.0413056555188536e-05, "loss": 1.2825, "step": 3712000 }, { "epoch": 39.18, "learning_rate": 3.0410418227677112e-05, "loss": 1.2574, "step": 3712500 }, { "epoch": 39.18, "learning_rate": 3.040777990016569e-05, "loss": 1.2609, "step": 3713000 }, { "epoch": 39.19, "learning_rate": 3.0405141572654266e-05, "loss": 1.2943, "step": 3713500 }, { "epoch": 39.19, "learning_rate": 3.0402503245142838e-05, "loss": 1.314, "step": 3714000 }, { "epoch": 39.2, "learning_rate": 3.0399864917631417e-05, "loss": 1.3016, "step": 3714500 }, { "epoch": 39.21, "learning_rate": 3.0397226590119993e-05, "loss": 1.3011, "step": 3715000 }, { "epoch": 39.21, "learning_rate": 3.039458826260857e-05, "loss": 1.2779, "step": 3715500 }, { "epoch": 39.22, "learning_rate": 3.0391949935097147e-05, "loss": 1.3138, "step": 3716000 }, { "epoch": 39.22, "learning_rate": 3.038931160758572e-05, "loss": 1.2833, "step": 3716500 }, { "epoch": 39.23, "learning_rate": 3.0386673280074295e-05, "loss": 1.2896, "step": 3717000 }, { "epoch": 39.23, "learning_rate": 3.0384034952562873e-05, "loss": 1.2702, "step": 3717500 }, { "epoch": 39.24, "learning_rate": 3.038139662505145e-05, "loss": 1.2876, "step": 3718000 }, { "epoch": 39.24, "learning_rate": 3.0378758297540028e-05, "loss": 1.3494, "step": 3718500 }, { "epoch": 39.25, "learning_rate": 3.03761199700286e-05, "loss": 1.2678, "step": 3719000 }, { "epoch": 39.25, "learning_rate": 3.0373481642517175e-05, "loss": 1.3026, "step": 3719500 }, { "epoch": 39.26, "learning_rate": 3.0370843315005754e-05, "loss": 1.3281, "step": 3720000 }, { "epoch": 39.26, "learning_rate": 3.036820498749433e-05, "loss": 1.2905, "step": 3720500 }, { "epoch": 39.27, "learning_rate": 3.0365566659982902e-05, "loss": 1.2438, "step": 3721000 }, { "epoch": 39.27, "learning_rate": 3.036292833247148e-05, "loss": 1.3483, "step": 3721500 }, { "epoch": 39.28, "learning_rate": 3.0360290004960056e-05, "loss": 1.3028, "step": 3722000 }, { "epoch": 39.28, "learning_rate": 3.0357651677448635e-05, "loss": 1.2744, "step": 3722500 }, { "epoch": 39.29, "learning_rate": 3.035501334993721e-05, "loss": 1.3814, "step": 3723000 }, { "epoch": 39.3, "learning_rate": 3.0352375022425783e-05, "loss": 1.3052, "step": 3723500 }, { "epoch": 39.3, "learning_rate": 3.034973669491436e-05, "loss": 1.2657, "step": 3724000 }, { "epoch": 39.31, "learning_rate": 3.0347098367402937e-05, "loss": 1.3492, "step": 3724500 }, { "epoch": 39.31, "learning_rate": 3.0344460039891516e-05, "loss": 1.285, "step": 3725000 }, { "epoch": 39.32, "learning_rate": 3.034182171238009e-05, "loss": 1.2748, "step": 3725500 }, { "epoch": 39.32, "learning_rate": 3.0339183384868663e-05, "loss": 1.29, "step": 3726000 }, { "epoch": 39.33, "learning_rate": 3.0336545057357242e-05, "loss": 1.3286, "step": 3726500 }, { "epoch": 39.33, "learning_rate": 3.0333906729845818e-05, "loss": 1.3099, "step": 3727000 }, { "epoch": 39.34, "learning_rate": 3.0331268402334397e-05, "loss": 1.3255, "step": 3727500 }, { "epoch": 39.34, "learning_rate": 3.0328630074822972e-05, "loss": 1.3504, "step": 3728000 }, { "epoch": 39.35, "learning_rate": 3.0325991747311544e-05, "loss": 1.2631, "step": 3728500 }, { "epoch": 39.35, "learning_rate": 3.032335341980012e-05, "loss": 1.282, "step": 3729000 }, { "epoch": 39.36, "learning_rate": 3.03207150922887e-05, "loss": 1.3016, "step": 3729500 }, { "epoch": 39.36, "learning_rate": 3.0318076764777274e-05, "loss": 1.2831, "step": 3730000 }, { "epoch": 39.37, "learning_rate": 3.0315438437265846e-05, "loss": 1.3083, "step": 3730500 }, { "epoch": 39.37, "learning_rate": 3.0312800109754425e-05, "loss": 1.2929, "step": 3731000 }, { "epoch": 39.38, "learning_rate": 3.0310161782243e-05, "loss": 1.274, "step": 3731500 }, { "epoch": 39.38, "learning_rate": 3.030752345473158e-05, "loss": 1.2958, "step": 3732000 }, { "epoch": 39.39, "learning_rate": 3.0304885127220155e-05, "loss": 1.2795, "step": 3732500 }, { "epoch": 39.4, "learning_rate": 3.0302246799708727e-05, "loss": 1.2969, "step": 3733000 }, { "epoch": 39.4, "learning_rate": 3.0299608472197306e-05, "loss": 1.2972, "step": 3733500 }, { "epoch": 39.41, "learning_rate": 3.029697014468588e-05, "loss": 1.244, "step": 3734000 }, { "epoch": 39.41, "learning_rate": 3.029433181717446e-05, "loss": 1.2301, "step": 3734500 }, { "epoch": 39.42, "learning_rate": 3.0291693489663036e-05, "loss": 1.2682, "step": 3735000 }, { "epoch": 39.42, "learning_rate": 3.0289055162151608e-05, "loss": 1.2676, "step": 3735500 }, { "epoch": 39.43, "learning_rate": 3.0286416834640187e-05, "loss": 1.2455, "step": 3736000 }, { "epoch": 39.43, "learning_rate": 3.0283778507128762e-05, "loss": 1.2341, "step": 3736500 }, { "epoch": 39.44, "learning_rate": 3.028114017961734e-05, "loss": 1.3537, "step": 3737000 }, { "epoch": 39.44, "learning_rate": 3.0278501852105917e-05, "loss": 1.2914, "step": 3737500 }, { "epoch": 39.45, "learning_rate": 3.027586352459449e-05, "loss": 1.2666, "step": 3738000 }, { "epoch": 39.45, "learning_rate": 3.0273225197083068e-05, "loss": 1.2779, "step": 3738500 }, { "epoch": 39.46, "learning_rate": 3.0270586869571643e-05, "loss": 1.2993, "step": 3739000 }, { "epoch": 39.46, "learning_rate": 3.0267948542060222e-05, "loss": 1.2787, "step": 3739500 }, { "epoch": 39.47, "learning_rate": 3.026531021454879e-05, "loss": 1.2817, "step": 3740000 }, { "epoch": 39.47, "learning_rate": 3.026267188703737e-05, "loss": 1.2879, "step": 3740500 }, { "epoch": 39.48, "learning_rate": 3.0260033559525945e-05, "loss": 1.2679, "step": 3741000 }, { "epoch": 39.49, "learning_rate": 3.0257395232014524e-05, "loss": 1.2716, "step": 3741500 }, { "epoch": 39.49, "learning_rate": 3.02547569045031e-05, "loss": 1.3122, "step": 3742000 }, { "epoch": 39.5, "learning_rate": 3.025211857699167e-05, "loss": 1.2329, "step": 3742500 }, { "epoch": 39.5, "learning_rate": 3.024948024948025e-05, "loss": 1.2737, "step": 3743000 }, { "epoch": 39.51, "learning_rate": 3.0246841921968826e-05, "loss": 1.3606, "step": 3743500 }, { "epoch": 39.51, "learning_rate": 3.0244203594457405e-05, "loss": 1.2852, "step": 3744000 }, { "epoch": 39.52, "learning_rate": 3.024156526694598e-05, "loss": 1.2874, "step": 3744500 }, { "epoch": 39.52, "learning_rate": 3.0238926939434552e-05, "loss": 1.2643, "step": 3745000 }, { "epoch": 39.53, "learning_rate": 3.023628861192313e-05, "loss": 1.2696, "step": 3745500 }, { "epoch": 39.53, "learning_rate": 3.0233650284411707e-05, "loss": 1.2566, "step": 3746000 }, { "epoch": 39.54, "learning_rate": 3.0231011956900286e-05, "loss": 1.3058, "step": 3746500 }, { "epoch": 39.54, "learning_rate": 3.022837362938886e-05, "loss": 1.3486, "step": 3747000 }, { "epoch": 39.55, "learning_rate": 3.0225735301877433e-05, "loss": 1.3049, "step": 3747500 }, { "epoch": 39.55, "learning_rate": 3.0223096974366012e-05, "loss": 1.3453, "step": 3748000 }, { "epoch": 39.56, "learning_rate": 3.0220458646854588e-05, "loss": 1.2766, "step": 3748500 }, { "epoch": 39.56, "learning_rate": 3.0217820319343166e-05, "loss": 1.2796, "step": 3749000 }, { "epoch": 39.57, "learning_rate": 3.021518199183174e-05, "loss": 1.276, "step": 3749500 }, { "epoch": 39.57, "learning_rate": 3.0212543664320314e-05, "loss": 1.2452, "step": 3750000 }, { "epoch": 39.58, "learning_rate": 3.0209905336808893e-05, "loss": 1.2875, "step": 3750500 }, { "epoch": 39.59, "learning_rate": 3.020726700929747e-05, "loss": 1.3366, "step": 3751000 }, { "epoch": 39.59, "learning_rate": 3.0204628681786047e-05, "loss": 1.2837, "step": 3751500 }, { "epoch": 39.6, "learning_rate": 3.0201990354274616e-05, "loss": 1.2585, "step": 3752000 }, { "epoch": 39.6, "learning_rate": 3.0199352026763195e-05, "loss": 1.3008, "step": 3752500 }, { "epoch": 39.61, "learning_rate": 3.019671369925177e-05, "loss": 1.3413, "step": 3753000 }, { "epoch": 39.61, "learning_rate": 3.019407537174035e-05, "loss": 1.2834, "step": 3753500 }, { "epoch": 39.62, "learning_rate": 3.0191437044228925e-05, "loss": 1.3547, "step": 3754000 }, { "epoch": 39.62, "learning_rate": 3.0188798716717497e-05, "loss": 1.283, "step": 3754500 }, { "epoch": 39.63, "learning_rate": 3.0186160389206076e-05, "loss": 1.2926, "step": 3755000 }, { "epoch": 39.63, "learning_rate": 3.018352206169465e-05, "loss": 1.2841, "step": 3755500 }, { "epoch": 39.64, "learning_rate": 3.018088373418323e-05, "loss": 1.2965, "step": 3756000 }, { "epoch": 39.64, "learning_rate": 3.0178245406671805e-05, "loss": 1.2879, "step": 3756500 }, { "epoch": 39.65, "learning_rate": 3.0175607079160378e-05, "loss": 1.314, "step": 3757000 }, { "epoch": 39.65, "learning_rate": 3.0172968751648956e-05, "loss": 1.2496, "step": 3757500 }, { "epoch": 39.66, "learning_rate": 3.0170330424137532e-05, "loss": 1.2746, "step": 3758000 }, { "epoch": 39.66, "learning_rate": 3.016769209662611e-05, "loss": 1.3129, "step": 3758500 }, { "epoch": 39.67, "learning_rate": 3.0165053769114683e-05, "loss": 1.2871, "step": 3759000 }, { "epoch": 39.68, "learning_rate": 3.016241544160326e-05, "loss": 1.2567, "step": 3759500 }, { "epoch": 39.68, "learning_rate": 3.0159777114091837e-05, "loss": 1.2215, "step": 3760000 }, { "epoch": 39.69, "learning_rate": 3.0157138786580413e-05, "loss": 1.3382, "step": 3760500 }, { "epoch": 39.69, "learning_rate": 3.015450045906899e-05, "loss": 1.2814, "step": 3761000 }, { "epoch": 39.7, "learning_rate": 3.0151862131557564e-05, "loss": 1.3407, "step": 3761500 }, { "epoch": 39.7, "learning_rate": 3.014922380404614e-05, "loss": 1.2551, "step": 3762000 }, { "epoch": 39.71, "learning_rate": 3.0146585476534718e-05, "loss": 1.2868, "step": 3762500 }, { "epoch": 39.71, "learning_rate": 3.0143947149023294e-05, "loss": 1.3158, "step": 3763000 }, { "epoch": 39.72, "learning_rate": 3.0141308821511872e-05, "loss": 1.3082, "step": 3763500 }, { "epoch": 39.72, "learning_rate": 3.013867049400044e-05, "loss": 1.2573, "step": 3764000 }, { "epoch": 39.73, "learning_rate": 3.013603216648902e-05, "loss": 1.3264, "step": 3764500 }, { "epoch": 39.73, "learning_rate": 3.0133393838977596e-05, "loss": 1.323, "step": 3765000 }, { "epoch": 39.74, "learning_rate": 3.0130755511466174e-05, "loss": 1.3086, "step": 3765500 }, { "epoch": 39.74, "learning_rate": 3.012811718395475e-05, "loss": 1.2547, "step": 3766000 }, { "epoch": 39.75, "learning_rate": 3.0125478856443322e-05, "loss": 1.2832, "step": 3766500 }, { "epoch": 39.75, "learning_rate": 3.01228405289319e-05, "loss": 1.247, "step": 3767000 }, { "epoch": 39.76, "learning_rate": 3.0120202201420476e-05, "loss": 1.271, "step": 3767500 }, { "epoch": 39.76, "learning_rate": 3.0117563873909055e-05, "loss": 1.287, "step": 3768000 }, { "epoch": 39.77, "learning_rate": 3.0114925546397627e-05, "loss": 1.2517, "step": 3768500 }, { "epoch": 39.78, "learning_rate": 3.0112287218886203e-05, "loss": 1.2938, "step": 3769000 }, { "epoch": 39.78, "learning_rate": 3.010964889137478e-05, "loss": 1.3196, "step": 3769500 }, { "epoch": 39.79, "learning_rate": 3.0107010563863357e-05, "loss": 1.2566, "step": 3770000 }, { "epoch": 39.79, "learning_rate": 3.0104372236351936e-05, "loss": 1.2695, "step": 3770500 }, { "epoch": 39.8, "learning_rate": 3.0101733908840508e-05, "loss": 1.3343, "step": 3771000 }, { "epoch": 39.8, "learning_rate": 3.0099095581329084e-05, "loss": 1.3017, "step": 3771500 }, { "epoch": 39.81, "learning_rate": 3.0096457253817663e-05, "loss": 1.2487, "step": 3772000 }, { "epoch": 39.81, "learning_rate": 3.0093818926306238e-05, "loss": 1.2956, "step": 3772500 }, { "epoch": 39.82, "learning_rate": 3.0091180598794817e-05, "loss": 1.3198, "step": 3773000 }, { "epoch": 39.82, "learning_rate": 3.008854227128339e-05, "loss": 1.2935, "step": 3773500 }, { "epoch": 39.83, "learning_rate": 3.0085903943771964e-05, "loss": 1.3179, "step": 3774000 }, { "epoch": 39.83, "learning_rate": 3.0083265616260543e-05, "loss": 1.3211, "step": 3774500 }, { "epoch": 39.84, "learning_rate": 3.008062728874912e-05, "loss": 1.287, "step": 3775000 }, { "epoch": 39.84, "learning_rate": 3.0077988961237698e-05, "loss": 1.3208, "step": 3775500 }, { "epoch": 39.85, "learning_rate": 3.0075350633726266e-05, "loss": 1.3272, "step": 3776000 }, { "epoch": 39.85, "learning_rate": 3.0072712306214845e-05, "loss": 1.2802, "step": 3776500 }, { "epoch": 39.86, "learning_rate": 3.007007397870342e-05, "loss": 1.2805, "step": 3777000 }, { "epoch": 39.87, "learning_rate": 3.0067435651192e-05, "loss": 1.2899, "step": 3777500 }, { "epoch": 39.87, "learning_rate": 3.0064797323680572e-05, "loss": 1.3023, "step": 3778000 }, { "epoch": 39.88, "learning_rate": 3.0062158996169147e-05, "loss": 1.2987, "step": 3778500 }, { "epoch": 39.88, "learning_rate": 3.0059520668657726e-05, "loss": 1.2757, "step": 3779000 }, { "epoch": 39.89, "learning_rate": 3.00568823411463e-05, "loss": 1.2729, "step": 3779500 }, { "epoch": 39.89, "learning_rate": 3.005424401363488e-05, "loss": 1.2831, "step": 3780000 }, { "epoch": 39.9, "learning_rate": 3.0051605686123453e-05, "loss": 1.2759, "step": 3780500 }, { "epoch": 39.9, "learning_rate": 3.0048967358612028e-05, "loss": 1.32, "step": 3781000 }, { "epoch": 39.91, "learning_rate": 3.0046329031100607e-05, "loss": 1.2889, "step": 3781500 }, { "epoch": 39.91, "learning_rate": 3.0043690703589182e-05, "loss": 1.3122, "step": 3782000 }, { "epoch": 39.92, "learning_rate": 3.004105237607776e-05, "loss": 1.3377, "step": 3782500 }, { "epoch": 39.92, "learning_rate": 3.0038414048566333e-05, "loss": 1.2814, "step": 3783000 }, { "epoch": 39.93, "learning_rate": 3.003577572105491e-05, "loss": 1.2565, "step": 3783500 }, { "epoch": 39.93, "learning_rate": 3.0033137393543488e-05, "loss": 1.1767, "step": 3784000 }, { "epoch": 39.94, "learning_rate": 3.0030499066032063e-05, "loss": 1.2786, "step": 3784500 }, { "epoch": 39.94, "learning_rate": 3.0027860738520642e-05, "loss": 1.2608, "step": 3785000 }, { "epoch": 39.95, "learning_rate": 3.0025222411009214e-05, "loss": 1.2622, "step": 3785500 }, { "epoch": 39.95, "learning_rate": 3.002258408349779e-05, "loss": 1.308, "step": 3786000 }, { "epoch": 39.96, "learning_rate": 3.001994575598637e-05, "loss": 1.2896, "step": 3786500 }, { "epoch": 39.97, "learning_rate": 3.0017307428474944e-05, "loss": 1.3324, "step": 3787000 }, { "epoch": 39.97, "learning_rate": 3.0014669100963516e-05, "loss": 1.2818, "step": 3787500 }, { "epoch": 39.98, "learning_rate": 3.001203077345209e-05, "loss": 1.3104, "step": 3788000 }, { "epoch": 39.98, "learning_rate": 3.000939244594067e-05, "loss": 1.3077, "step": 3788500 }, { "epoch": 39.99, "learning_rate": 3.000675411842925e-05, "loss": 1.2942, "step": 3789000 }, { "epoch": 39.99, "learning_rate": 3.0004115790917825e-05, "loss": 1.2591, "step": 3789500 }, { "epoch": 40.0, "learning_rate": 3.0001477463406397e-05, "loss": 1.2968, "step": 3790000 }, { "epoch": 40.0, "learning_rate": 2.9998839135894972e-05, "loss": 1.2073, "step": 3790500 }, { "epoch": 40.01, "learning_rate": 2.999620080838355e-05, "loss": 1.3145, "step": 3791000 }, { "epoch": 40.01, "learning_rate": 2.9993562480872127e-05, "loss": 1.3051, "step": 3791500 }, { "epoch": 40.02, "learning_rate": 2.9990924153360706e-05, "loss": 1.327, "step": 3792000 }, { "epoch": 40.02, "learning_rate": 2.9988285825849278e-05, "loss": 1.2778, "step": 3792500 }, { "epoch": 40.03, "learning_rate": 2.9985647498337853e-05, "loss": 1.2692, "step": 3793000 }, { "epoch": 40.03, "learning_rate": 2.9983009170826432e-05, "loss": 1.263, "step": 3793500 }, { "epoch": 40.04, "learning_rate": 2.9980370843315008e-05, "loss": 1.2983, "step": 3794000 }, { "epoch": 40.04, "learning_rate": 2.9977732515803587e-05, "loss": 1.2429, "step": 3794500 }, { "epoch": 40.05, "learning_rate": 2.997509418829216e-05, "loss": 1.2505, "step": 3795000 }, { "epoch": 40.06, "learning_rate": 2.9972455860780734e-05, "loss": 1.262, "step": 3795500 }, { "epoch": 40.06, "learning_rate": 2.9969817533269313e-05, "loss": 1.2848, "step": 3796000 }, { "epoch": 40.07, "learning_rate": 2.996717920575789e-05, "loss": 1.24, "step": 3796500 }, { "epoch": 40.07, "learning_rate": 2.996454087824646e-05, "loss": 1.2671, "step": 3797000 }, { "epoch": 40.08, "learning_rate": 2.996190255073504e-05, "loss": 1.2988, "step": 3797500 }, { "epoch": 40.08, "learning_rate": 2.9959264223223615e-05, "loss": 1.2656, "step": 3798000 }, { "epoch": 40.09, "learning_rate": 2.9956625895712194e-05, "loss": 1.2575, "step": 3798500 }, { "epoch": 40.09, "learning_rate": 2.995398756820077e-05, "loss": 1.2889, "step": 3799000 }, { "epoch": 40.1, "learning_rate": 2.995134924068934e-05, "loss": 1.2592, "step": 3799500 }, { "epoch": 40.1, "learning_rate": 2.994871091317792e-05, "loss": 1.2684, "step": 3800000 }, { "epoch": 40.11, "learning_rate": 2.9946072585666496e-05, "loss": 1.2743, "step": 3800500 }, { "epoch": 40.11, "learning_rate": 2.9943434258155075e-05, "loss": 1.2797, "step": 3801000 }, { "epoch": 40.12, "learning_rate": 2.994079593064365e-05, "loss": 1.2696, "step": 3801500 }, { "epoch": 40.12, "learning_rate": 2.9938157603132222e-05, "loss": 1.2236, "step": 3802000 }, { "epoch": 40.13, "learning_rate": 2.9935519275620798e-05, "loss": 1.2962, "step": 3802500 }, { "epoch": 40.13, "learning_rate": 2.9932880948109377e-05, "loss": 1.2417, "step": 3803000 }, { "epoch": 40.14, "learning_rate": 2.9930242620597952e-05, "loss": 1.2544, "step": 3803500 }, { "epoch": 40.14, "learning_rate": 2.992760429308653e-05, "loss": 1.3332, "step": 3804000 }, { "epoch": 40.15, "learning_rate": 2.9924965965575103e-05, "loss": 1.2941, "step": 3804500 }, { "epoch": 40.16, "learning_rate": 2.992232763806368e-05, "loss": 1.2687, "step": 3805000 }, { "epoch": 40.16, "learning_rate": 2.9919689310552257e-05, "loss": 1.2653, "step": 3805500 }, { "epoch": 40.17, "learning_rate": 2.9917050983040833e-05, "loss": 1.3053, "step": 3806000 }, { "epoch": 40.17, "learning_rate": 2.9914412655529405e-05, "loss": 1.3103, "step": 3806500 }, { "epoch": 40.18, "learning_rate": 2.9911774328017984e-05, "loss": 1.3036, "step": 3807000 }, { "epoch": 40.18, "learning_rate": 2.990913600050656e-05, "loss": 1.2935, "step": 3807500 }, { "epoch": 40.19, "learning_rate": 2.9906497672995138e-05, "loss": 1.253, "step": 3808000 }, { "epoch": 40.19, "learning_rate": 2.9903859345483714e-05, "loss": 1.3203, "step": 3808500 }, { "epoch": 40.2, "learning_rate": 2.9901221017972286e-05, "loss": 1.2584, "step": 3809000 }, { "epoch": 40.2, "learning_rate": 2.9898582690460865e-05, "loss": 1.2824, "step": 3809500 }, { "epoch": 40.21, "learning_rate": 2.989594436294944e-05, "loss": 1.2789, "step": 3810000 }, { "epoch": 40.21, "learning_rate": 2.989330603543802e-05, "loss": 1.3012, "step": 3810500 }, { "epoch": 40.22, "learning_rate": 2.9890667707926595e-05, "loss": 1.3143, "step": 3811000 }, { "epoch": 40.22, "learning_rate": 2.9888029380415167e-05, "loss": 1.2876, "step": 3811500 }, { "epoch": 40.23, "learning_rate": 2.9885391052903746e-05, "loss": 1.3121, "step": 3812000 }, { "epoch": 40.23, "learning_rate": 2.988275272539232e-05, "loss": 1.291, "step": 3812500 }, { "epoch": 40.24, "learning_rate": 2.98801143978809e-05, "loss": 1.3156, "step": 3813000 }, { "epoch": 40.25, "learning_rate": 2.987747607036947e-05, "loss": 1.2794, "step": 3813500 }, { "epoch": 40.25, "learning_rate": 2.9874837742858047e-05, "loss": 1.3067, "step": 3814000 }, { "epoch": 40.26, "learning_rate": 2.9872199415346623e-05, "loss": 1.2771, "step": 3814500 }, { "epoch": 40.26, "learning_rate": 2.9869561087835202e-05, "loss": 1.2814, "step": 3815000 }, { "epoch": 40.27, "learning_rate": 2.9866922760323777e-05, "loss": 1.3138, "step": 3815500 }, { "epoch": 40.27, "learning_rate": 2.986428443281235e-05, "loss": 1.2927, "step": 3816000 }, { "epoch": 40.28, "learning_rate": 2.986164610530093e-05, "loss": 1.2839, "step": 3816500 }, { "epoch": 40.28, "learning_rate": 2.9859007777789504e-05, "loss": 1.2602, "step": 3817000 }, { "epoch": 40.29, "learning_rate": 2.9856369450278083e-05, "loss": 1.2687, "step": 3817500 }, { "epoch": 40.29, "learning_rate": 2.9853731122766658e-05, "loss": 1.3279, "step": 3818000 }, { "epoch": 40.3, "learning_rate": 2.985109279525523e-05, "loss": 1.2802, "step": 3818500 }, { "epoch": 40.3, "learning_rate": 2.984845446774381e-05, "loss": 1.2766, "step": 3819000 }, { "epoch": 40.31, "learning_rate": 2.9845816140232385e-05, "loss": 1.2289, "step": 3819500 }, { "epoch": 40.31, "learning_rate": 2.9843177812720964e-05, "loss": 1.264, "step": 3820000 }, { "epoch": 40.32, "learning_rate": 2.984053948520954e-05, "loss": 1.2971, "step": 3820500 }, { "epoch": 40.32, "learning_rate": 2.983790115769811e-05, "loss": 1.2918, "step": 3821000 }, { "epoch": 40.33, "learning_rate": 2.983526283018669e-05, "loss": 1.2784, "step": 3821500 }, { "epoch": 40.33, "learning_rate": 2.9832624502675265e-05, "loss": 1.2708, "step": 3822000 }, { "epoch": 40.34, "learning_rate": 2.9829986175163844e-05, "loss": 1.2726, "step": 3822500 }, { "epoch": 40.35, "learning_rate": 2.9827347847652416e-05, "loss": 1.3161, "step": 3823000 }, { "epoch": 40.35, "learning_rate": 2.9824709520140992e-05, "loss": 1.2442, "step": 3823500 }, { "epoch": 40.36, "learning_rate": 2.982207119262957e-05, "loss": 1.2927, "step": 3824000 }, { "epoch": 40.36, "learning_rate": 2.9819432865118146e-05, "loss": 1.2855, "step": 3824500 }, { "epoch": 40.37, "learning_rate": 2.9816794537606725e-05, "loss": 1.233, "step": 3825000 }, { "epoch": 40.37, "learning_rate": 2.9814156210095294e-05, "loss": 1.2776, "step": 3825500 }, { "epoch": 40.38, "learning_rate": 2.9811517882583873e-05, "loss": 1.3113, "step": 3826000 }, { "epoch": 40.38, "learning_rate": 2.9808879555072448e-05, "loss": 1.2881, "step": 3826500 }, { "epoch": 40.39, "learning_rate": 2.9806241227561027e-05, "loss": 1.2635, "step": 3827000 }, { "epoch": 40.39, "learning_rate": 2.9803602900049603e-05, "loss": 1.2669, "step": 3827500 }, { "epoch": 40.4, "learning_rate": 2.9800964572538175e-05, "loss": 1.3229, "step": 3828000 }, { "epoch": 40.4, "learning_rate": 2.9798326245026754e-05, "loss": 1.2892, "step": 3828500 }, { "epoch": 40.41, "learning_rate": 2.979568791751533e-05, "loss": 1.287, "step": 3829000 }, { "epoch": 40.41, "learning_rate": 2.9793049590003908e-05, "loss": 1.3417, "step": 3829500 }, { "epoch": 40.42, "learning_rate": 2.9790411262492483e-05, "loss": 1.2762, "step": 3830000 }, { "epoch": 40.42, "learning_rate": 2.9787772934981056e-05, "loss": 1.2892, "step": 3830500 }, { "epoch": 40.43, "learning_rate": 2.9785134607469634e-05, "loss": 1.2975, "step": 3831000 }, { "epoch": 40.44, "learning_rate": 2.978249627995821e-05, "loss": 1.2667, "step": 3831500 }, { "epoch": 40.44, "learning_rate": 2.977985795244679e-05, "loss": 1.3025, "step": 3832000 }, { "epoch": 40.45, "learning_rate": 2.977721962493536e-05, "loss": 1.3342, "step": 3832500 }, { "epoch": 40.45, "learning_rate": 2.9774581297423936e-05, "loss": 1.344, "step": 3833000 }, { "epoch": 40.46, "learning_rate": 2.9771942969912515e-05, "loss": 1.2725, "step": 3833500 }, { "epoch": 40.46, "learning_rate": 2.976930464240109e-05, "loss": 1.297, "step": 3834000 }, { "epoch": 40.47, "learning_rate": 2.976666631488967e-05, "loss": 1.2629, "step": 3834500 }, { "epoch": 40.47, "learning_rate": 2.976402798737824e-05, "loss": 1.2456, "step": 3835000 }, { "epoch": 40.48, "learning_rate": 2.9761389659866817e-05, "loss": 1.2878, "step": 3835500 }, { "epoch": 40.48, "learning_rate": 2.9758751332355396e-05, "loss": 1.301, "step": 3836000 }, { "epoch": 40.49, "learning_rate": 2.975611300484397e-05, "loss": 1.3487, "step": 3836500 }, { "epoch": 40.49, "learning_rate": 2.975347467733255e-05, "loss": 1.3096, "step": 3837000 }, { "epoch": 40.5, "learning_rate": 2.975083634982112e-05, "loss": 1.2346, "step": 3837500 }, { "epoch": 40.5, "learning_rate": 2.9748198022309698e-05, "loss": 1.3194, "step": 3838000 }, { "epoch": 40.51, "learning_rate": 2.9745559694798273e-05, "loss": 1.2866, "step": 3838500 }, { "epoch": 40.51, "learning_rate": 2.9742921367286852e-05, "loss": 1.2559, "step": 3839000 }, { "epoch": 40.52, "learning_rate": 2.9740283039775428e-05, "loss": 1.2477, "step": 3839500 }, { "epoch": 40.52, "learning_rate": 2.9737644712264e-05, "loss": 1.3168, "step": 3840000 }, { "epoch": 40.53, "learning_rate": 2.973500638475258e-05, "loss": 1.2672, "step": 3840500 }, { "epoch": 40.54, "learning_rate": 2.9732368057241154e-05, "loss": 1.2665, "step": 3841000 }, { "epoch": 40.54, "learning_rate": 2.9729729729729733e-05, "loss": 1.275, "step": 3841500 }, { "epoch": 40.55, "learning_rate": 2.9727091402218305e-05, "loss": 1.2878, "step": 3842000 }, { "epoch": 40.55, "learning_rate": 2.972445307470688e-05, "loss": 1.3002, "step": 3842500 }, { "epoch": 40.56, "learning_rate": 2.972181474719546e-05, "loss": 1.3255, "step": 3843000 }, { "epoch": 40.56, "learning_rate": 2.9719176419684035e-05, "loss": 1.2739, "step": 3843500 }, { "epoch": 40.57, "learning_rate": 2.9716538092172614e-05, "loss": 1.2965, "step": 3844000 }, { "epoch": 40.57, "learning_rate": 2.9713899764661186e-05, "loss": 1.2723, "step": 3844500 }, { "epoch": 40.58, "learning_rate": 2.971126143714976e-05, "loss": 1.2924, "step": 3845000 }, { "epoch": 40.58, "learning_rate": 2.970862310963834e-05, "loss": 1.2846, "step": 3845500 }, { "epoch": 40.59, "learning_rate": 2.9705984782126916e-05, "loss": 1.2413, "step": 3846000 }, { "epoch": 40.59, "learning_rate": 2.9703346454615495e-05, "loss": 1.2336, "step": 3846500 }, { "epoch": 40.6, "learning_rate": 2.9700708127104067e-05, "loss": 1.296, "step": 3847000 }, { "epoch": 40.6, "learning_rate": 2.9698069799592642e-05, "loss": 1.3092, "step": 3847500 }, { "epoch": 40.61, "learning_rate": 2.969543147208122e-05, "loss": 1.2596, "step": 3848000 }, { "epoch": 40.61, "learning_rate": 2.9692793144569797e-05, "loss": 1.3005, "step": 3848500 }, { "epoch": 40.62, "learning_rate": 2.9690154817058376e-05, "loss": 1.2493, "step": 3849000 }, { "epoch": 40.62, "learning_rate": 2.9687516489546944e-05, "loss": 1.2205, "step": 3849500 }, { "epoch": 40.63, "learning_rate": 2.9684878162035523e-05, "loss": 1.2676, "step": 3850000 }, { "epoch": 40.64, "learning_rate": 2.96822398345241e-05, "loss": 1.2452, "step": 3850500 }, { "epoch": 40.64, "learning_rate": 2.9679601507012678e-05, "loss": 1.2786, "step": 3851000 }, { "epoch": 40.65, "learning_rate": 2.967696317950125e-05, "loss": 1.2345, "step": 3851500 }, { "epoch": 40.65, "learning_rate": 2.9674324851989825e-05, "loss": 1.2567, "step": 3852000 }, { "epoch": 40.66, "learning_rate": 2.9671686524478404e-05, "loss": 1.3184, "step": 3852500 }, { "epoch": 40.66, "learning_rate": 2.966904819696698e-05, "loss": 1.2621, "step": 3853000 }, { "epoch": 40.67, "learning_rate": 2.966640986945556e-05, "loss": 1.2586, "step": 3853500 }, { "epoch": 40.67, "learning_rate": 2.966377154194413e-05, "loss": 1.2999, "step": 3854000 }, { "epoch": 40.68, "learning_rate": 2.9661133214432706e-05, "loss": 1.3012, "step": 3854500 }, { "epoch": 40.68, "learning_rate": 2.9658494886921285e-05, "loss": 1.3094, "step": 3855000 }, { "epoch": 40.69, "learning_rate": 2.965585655940986e-05, "loss": 1.2551, "step": 3855500 }, { "epoch": 40.69, "learning_rate": 2.965321823189844e-05, "loss": 1.2835, "step": 3856000 }, { "epoch": 40.7, "learning_rate": 2.965057990438701e-05, "loss": 1.2885, "step": 3856500 }, { "epoch": 40.7, "learning_rate": 2.9647941576875587e-05, "loss": 1.2825, "step": 3857000 }, { "epoch": 40.71, "learning_rate": 2.9645303249364166e-05, "loss": 1.3136, "step": 3857500 }, { "epoch": 40.71, "learning_rate": 2.964266492185274e-05, "loss": 1.2414, "step": 3858000 }, { "epoch": 40.72, "learning_rate": 2.964002659434132e-05, "loss": 1.2583, "step": 3858500 }, { "epoch": 40.73, "learning_rate": 2.9637388266829892e-05, "loss": 1.2388, "step": 3859000 }, { "epoch": 40.73, "learning_rate": 2.9634749939318468e-05, "loss": 1.322, "step": 3859500 }, { "epoch": 40.74, "learning_rate": 2.9632111611807047e-05, "loss": 1.299, "step": 3860000 }, { "epoch": 40.74, "learning_rate": 2.9629473284295622e-05, "loss": 1.2891, "step": 3860500 }, { "epoch": 40.75, "learning_rate": 2.9626834956784194e-05, "loss": 1.3002, "step": 3861000 }, { "epoch": 40.75, "learning_rate": 2.962419662927277e-05, "loss": 1.2993, "step": 3861500 }, { "epoch": 40.76, "learning_rate": 2.962155830176135e-05, "loss": 1.3152, "step": 3862000 }, { "epoch": 40.76, "learning_rate": 2.9618919974249927e-05, "loss": 1.2641, "step": 3862500 }, { "epoch": 40.77, "learning_rate": 2.9616281646738503e-05, "loss": 1.2902, "step": 3863000 }, { "epoch": 40.77, "learning_rate": 2.9613643319227075e-05, "loss": 1.3493, "step": 3863500 }, { "epoch": 40.78, "learning_rate": 2.961100499171565e-05, "loss": 1.2589, "step": 3864000 }, { "epoch": 40.78, "learning_rate": 2.960836666420423e-05, "loss": 1.2822, "step": 3864500 }, { "epoch": 40.79, "learning_rate": 2.9605728336692805e-05, "loss": 1.3098, "step": 3865000 }, { "epoch": 40.79, "learning_rate": 2.9603090009181384e-05, "loss": 1.3106, "step": 3865500 }, { "epoch": 40.8, "learning_rate": 2.9600451681669956e-05, "loss": 1.2315, "step": 3866000 }, { "epoch": 40.8, "learning_rate": 2.959781335415853e-05, "loss": 1.2861, "step": 3866500 }, { "epoch": 40.81, "learning_rate": 2.959517502664711e-05, "loss": 1.2881, "step": 3867000 }, { "epoch": 40.81, "learning_rate": 2.9592536699135686e-05, "loss": 1.3084, "step": 3867500 }, { "epoch": 40.82, "learning_rate": 2.9589898371624264e-05, "loss": 1.2859, "step": 3868000 }, { "epoch": 40.83, "learning_rate": 2.9587260044112837e-05, "loss": 1.3075, "step": 3868500 }, { "epoch": 40.83, "learning_rate": 2.9584621716601412e-05, "loss": 1.3354, "step": 3869000 }, { "epoch": 40.84, "learning_rate": 2.958198338908999e-05, "loss": 1.307, "step": 3869500 }, { "epoch": 40.84, "learning_rate": 2.9579345061578566e-05, "loss": 1.3242, "step": 3870000 }, { "epoch": 40.85, "learning_rate": 2.957670673406714e-05, "loss": 1.2135, "step": 3870500 }, { "epoch": 40.85, "learning_rate": 2.9574068406555717e-05, "loss": 1.2997, "step": 3871000 }, { "epoch": 40.86, "learning_rate": 2.9571430079044293e-05, "loss": 1.2701, "step": 3871500 }, { "epoch": 40.86, "learning_rate": 2.9568791751532872e-05, "loss": 1.2237, "step": 3872000 }, { "epoch": 40.87, "learning_rate": 2.9566153424021447e-05, "loss": 1.2509, "step": 3872500 }, { "epoch": 40.87, "learning_rate": 2.956351509651002e-05, "loss": 1.3115, "step": 3873000 }, { "epoch": 40.88, "learning_rate": 2.9560876768998598e-05, "loss": 1.281, "step": 3873500 }, { "epoch": 40.88, "learning_rate": 2.9558238441487174e-05, "loss": 1.2742, "step": 3874000 }, { "epoch": 40.89, "learning_rate": 2.9555600113975753e-05, "loss": 1.216, "step": 3874500 }, { "epoch": 40.89, "learning_rate": 2.9552961786464328e-05, "loss": 1.2375, "step": 3875000 }, { "epoch": 40.9, "learning_rate": 2.95503234589529e-05, "loss": 1.3085, "step": 3875500 }, { "epoch": 40.9, "learning_rate": 2.9547685131441476e-05, "loss": 1.2771, "step": 3876000 }, { "epoch": 40.91, "learning_rate": 2.9545046803930055e-05, "loss": 1.3081, "step": 3876500 }, { "epoch": 40.92, "learning_rate": 2.954240847641863e-05, "loss": 1.2853, "step": 3877000 }, { "epoch": 40.92, "learning_rate": 2.953977014890721e-05, "loss": 1.318, "step": 3877500 }, { "epoch": 40.93, "learning_rate": 2.953713182139578e-05, "loss": 1.2957, "step": 3878000 }, { "epoch": 40.93, "learning_rate": 2.9534493493884356e-05, "loss": 1.2807, "step": 3878500 }, { "epoch": 40.94, "learning_rate": 2.9531855166372935e-05, "loss": 1.3099, "step": 3879000 }, { "epoch": 40.94, "learning_rate": 2.952921683886151e-05, "loss": 1.3132, "step": 3879500 }, { "epoch": 40.95, "learning_rate": 2.9526578511350083e-05, "loss": 1.2709, "step": 3880000 }, { "epoch": 40.95, "learning_rate": 2.9523940183838662e-05, "loss": 1.2494, "step": 3880500 }, { "epoch": 40.96, "learning_rate": 2.9521301856327237e-05, "loss": 1.222, "step": 3881000 }, { "epoch": 40.96, "learning_rate": 2.9518663528815816e-05, "loss": 1.2866, "step": 3881500 }, { "epoch": 40.97, "learning_rate": 2.951602520130439e-05, "loss": 1.2742, "step": 3882000 }, { "epoch": 40.97, "learning_rate": 2.9513386873792964e-05, "loss": 1.2825, "step": 3882500 }, { "epoch": 40.98, "learning_rate": 2.9510748546281543e-05, "loss": 1.2741, "step": 3883000 }, { "epoch": 40.98, "learning_rate": 2.9508110218770118e-05, "loss": 1.2877, "step": 3883500 }, { "epoch": 40.99, "learning_rate": 2.9505471891258697e-05, "loss": 1.3264, "step": 3884000 }, { "epoch": 40.99, "learning_rate": 2.9502833563747273e-05, "loss": 1.2687, "step": 3884500 }, { "epoch": 41.0, "learning_rate": 2.9500195236235845e-05, "loss": 1.2987, "step": 3885000 }, { "epoch": 41.0, "learning_rate": 2.9497556908724423e-05, "loss": 1.2782, "step": 3885500 }, { "epoch": 41.01, "learning_rate": 2.9494918581213e-05, "loss": 1.2783, "step": 3886000 }, { "epoch": 41.02, "learning_rate": 2.9492280253701578e-05, "loss": 1.2871, "step": 3886500 }, { "epoch": 41.02, "learning_rate": 2.9489641926190153e-05, "loss": 1.2709, "step": 3887000 }, { "epoch": 41.03, "learning_rate": 2.9487003598678725e-05, "loss": 1.2787, "step": 3887500 }, { "epoch": 41.03, "learning_rate": 2.94843652711673e-05, "loss": 1.2966, "step": 3888000 }, { "epoch": 41.04, "learning_rate": 2.948172694365588e-05, "loss": 1.2757, "step": 3888500 }, { "epoch": 41.04, "learning_rate": 2.9479088616144455e-05, "loss": 1.3147, "step": 3889000 }, { "epoch": 41.05, "learning_rate": 2.9476450288633027e-05, "loss": 1.2713, "step": 3889500 }, { "epoch": 41.05, "learning_rate": 2.9473811961121606e-05, "loss": 1.2595, "step": 3890000 }, { "epoch": 41.06, "learning_rate": 2.9471173633610182e-05, "loss": 1.2561, "step": 3890500 }, { "epoch": 41.06, "learning_rate": 2.946853530609876e-05, "loss": 1.22, "step": 3891000 }, { "epoch": 41.07, "learning_rate": 2.9465896978587336e-05, "loss": 1.2484, "step": 3891500 }, { "epoch": 41.07, "learning_rate": 2.9463258651075908e-05, "loss": 1.2289, "step": 3892000 }, { "epoch": 41.08, "learning_rate": 2.9460620323564487e-05, "loss": 1.2898, "step": 3892500 }, { "epoch": 41.08, "learning_rate": 2.9457981996053063e-05, "loss": 1.3129, "step": 3893000 }, { "epoch": 41.09, "learning_rate": 2.945534366854164e-05, "loss": 1.2559, "step": 3893500 }, { "epoch": 41.09, "learning_rate": 2.9452705341030217e-05, "loss": 1.2632, "step": 3894000 }, { "epoch": 41.1, "learning_rate": 2.945006701351879e-05, "loss": 1.2475, "step": 3894500 }, { "epoch": 41.11, "learning_rate": 2.9447428686007368e-05, "loss": 1.2604, "step": 3895000 }, { "epoch": 41.11, "learning_rate": 2.9444790358495943e-05, "loss": 1.3092, "step": 3895500 }, { "epoch": 41.12, "learning_rate": 2.9442152030984522e-05, "loss": 1.2973, "step": 3896000 }, { "epoch": 41.12, "learning_rate": 2.9439513703473098e-05, "loss": 1.2917, "step": 3896500 }, { "epoch": 41.13, "learning_rate": 2.943687537596167e-05, "loss": 1.3229, "step": 3897000 }, { "epoch": 41.13, "learning_rate": 2.943423704845025e-05, "loss": 1.3144, "step": 3897500 }, { "epoch": 41.14, "learning_rate": 2.9431598720938824e-05, "loss": 1.3009, "step": 3898000 }, { "epoch": 41.14, "learning_rate": 2.9428960393427403e-05, "loss": 1.2182, "step": 3898500 }, { "epoch": 41.15, "learning_rate": 2.9426322065915972e-05, "loss": 1.2465, "step": 3899000 }, { "epoch": 41.15, "learning_rate": 2.942368373840455e-05, "loss": 1.2324, "step": 3899500 }, { "epoch": 41.16, "learning_rate": 2.9421045410893126e-05, "loss": 1.2768, "step": 3900000 }, { "epoch": 41.16, "learning_rate": 2.9418407083381705e-05, "loss": 1.2958, "step": 3900500 }, { "epoch": 41.17, "learning_rate": 2.941576875587028e-05, "loss": 1.2084, "step": 3901000 }, { "epoch": 41.17, "learning_rate": 2.9413130428358853e-05, "loss": 1.2451, "step": 3901500 }, { "epoch": 41.18, "learning_rate": 2.941049210084743e-05, "loss": 1.2615, "step": 3902000 }, { "epoch": 41.18, "learning_rate": 2.9407853773336007e-05, "loss": 1.2457, "step": 3902500 }, { "epoch": 41.19, "learning_rate": 2.9405215445824586e-05, "loss": 1.2258, "step": 3903000 }, { "epoch": 41.19, "learning_rate": 2.940257711831316e-05, "loss": 1.3397, "step": 3903500 }, { "epoch": 41.2, "learning_rate": 2.9399938790801733e-05, "loss": 1.2863, "step": 3904000 }, { "epoch": 41.21, "learning_rate": 2.9397300463290312e-05, "loss": 1.2657, "step": 3904500 }, { "epoch": 41.21, "learning_rate": 2.9394662135778888e-05, "loss": 1.2744, "step": 3905000 }, { "epoch": 41.22, "learning_rate": 2.9392023808267467e-05, "loss": 1.2772, "step": 3905500 }, { "epoch": 41.22, "learning_rate": 2.9389385480756042e-05, "loss": 1.2587, "step": 3906000 }, { "epoch": 41.23, "learning_rate": 2.9386747153244614e-05, "loss": 1.2857, "step": 3906500 }, { "epoch": 41.23, "learning_rate": 2.9384108825733193e-05, "loss": 1.2379, "step": 3907000 }, { "epoch": 41.24, "learning_rate": 2.938147049822177e-05, "loss": 1.2925, "step": 3907500 }, { "epoch": 41.24, "learning_rate": 2.9378832170710348e-05, "loss": 1.3325, "step": 3908000 }, { "epoch": 41.25, "learning_rate": 2.937619384319892e-05, "loss": 1.3342, "step": 3908500 }, { "epoch": 41.25, "learning_rate": 2.9373555515687495e-05, "loss": 1.2762, "step": 3909000 }, { "epoch": 41.26, "learning_rate": 2.9370917188176074e-05, "loss": 1.2631, "step": 3909500 }, { "epoch": 41.26, "learning_rate": 2.936827886066465e-05, "loss": 1.3033, "step": 3910000 }, { "epoch": 41.27, "learning_rate": 2.936564053315323e-05, "loss": 1.2892, "step": 3910500 }, { "epoch": 41.27, "learning_rate": 2.9363002205641797e-05, "loss": 1.2456, "step": 3911000 }, { "epoch": 41.28, "learning_rate": 2.9360363878130376e-05, "loss": 1.1919, "step": 3911500 }, { "epoch": 41.28, "learning_rate": 2.935772555061895e-05, "loss": 1.2703, "step": 3912000 }, { "epoch": 41.29, "learning_rate": 2.935508722310753e-05, "loss": 1.3329, "step": 3912500 }, { "epoch": 41.3, "learning_rate": 2.9352448895596106e-05, "loss": 1.2833, "step": 3913000 }, { "epoch": 41.3, "learning_rate": 2.9349810568084678e-05, "loss": 1.2743, "step": 3913500 }, { "epoch": 41.31, "learning_rate": 2.9347172240573257e-05, "loss": 1.3183, "step": 3914000 }, { "epoch": 41.31, "learning_rate": 2.9344533913061832e-05, "loss": 1.2807, "step": 3914500 }, { "epoch": 41.32, "learning_rate": 2.934189558555041e-05, "loss": 1.2692, "step": 3915000 }, { "epoch": 41.32, "learning_rate": 2.9339257258038987e-05, "loss": 1.3309, "step": 3915500 }, { "epoch": 41.33, "learning_rate": 2.933661893052756e-05, "loss": 1.2836, "step": 3916000 }, { "epoch": 41.33, "learning_rate": 2.9333980603016138e-05, "loss": 1.3107, "step": 3916500 }, { "epoch": 41.34, "learning_rate": 2.9331342275504713e-05, "loss": 1.235, "step": 3917000 }, { "epoch": 41.34, "learning_rate": 2.9328703947993292e-05, "loss": 1.2778, "step": 3917500 }, { "epoch": 41.35, "learning_rate": 2.9326065620481864e-05, "loss": 1.2726, "step": 3918000 }, { "epoch": 41.35, "learning_rate": 2.932342729297044e-05, "loss": 1.304, "step": 3918500 }, { "epoch": 41.36, "learning_rate": 2.932078896545902e-05, "loss": 1.2657, "step": 3919000 }, { "epoch": 41.36, "learning_rate": 2.9318150637947594e-05, "loss": 1.2901, "step": 3919500 }, { "epoch": 41.37, "learning_rate": 2.9315512310436173e-05, "loss": 1.2499, "step": 3920000 }, { "epoch": 41.37, "learning_rate": 2.9312873982924745e-05, "loss": 1.2927, "step": 3920500 }, { "epoch": 41.38, "learning_rate": 2.931023565541332e-05, "loss": 1.311, "step": 3921000 }, { "epoch": 41.38, "learning_rate": 2.93075973279019e-05, "loss": 1.2014, "step": 3921500 }, { "epoch": 41.39, "learning_rate": 2.9304959000390475e-05, "loss": 1.2747, "step": 3922000 }, { "epoch": 41.4, "learning_rate": 2.9302320672879054e-05, "loss": 1.24, "step": 3922500 }, { "epoch": 41.4, "learning_rate": 2.9299682345367622e-05, "loss": 1.2629, "step": 3923000 }, { "epoch": 41.41, "learning_rate": 2.92970440178562e-05, "loss": 1.2601, "step": 3923500 }, { "epoch": 41.41, "learning_rate": 2.9294405690344777e-05, "loss": 1.3351, "step": 3924000 }, { "epoch": 41.42, "learning_rate": 2.9291767362833356e-05, "loss": 1.308, "step": 3924500 }, { "epoch": 41.42, "learning_rate": 2.9289129035321934e-05, "loss": 1.3393, "step": 3925000 }, { "epoch": 41.43, "learning_rate": 2.9286490707810503e-05, "loss": 1.3037, "step": 3925500 }, { "epoch": 41.43, "learning_rate": 2.9283852380299082e-05, "loss": 1.2267, "step": 3926000 }, { "epoch": 41.44, "learning_rate": 2.9281214052787657e-05, "loss": 1.2495, "step": 3926500 }, { "epoch": 41.44, "learning_rate": 2.9278575725276236e-05, "loss": 1.2476, "step": 3927000 }, { "epoch": 41.45, "learning_rate": 2.927593739776481e-05, "loss": 1.3104, "step": 3927500 }, { "epoch": 41.45, "learning_rate": 2.9273299070253384e-05, "loss": 1.2962, "step": 3928000 }, { "epoch": 41.46, "learning_rate": 2.9270660742741963e-05, "loss": 1.3519, "step": 3928500 }, { "epoch": 41.46, "learning_rate": 2.9268022415230538e-05, "loss": 1.2402, "step": 3929000 }, { "epoch": 41.47, "learning_rate": 2.9265384087719117e-05, "loss": 1.2622, "step": 3929500 }, { "epoch": 41.47, "learning_rate": 2.926274576020769e-05, "loss": 1.2771, "step": 3930000 }, { "epoch": 41.48, "learning_rate": 2.9260107432696265e-05, "loss": 1.2216, "step": 3930500 }, { "epoch": 41.49, "learning_rate": 2.9257469105184844e-05, "loss": 1.3431, "step": 3931000 }, { "epoch": 41.49, "learning_rate": 2.925483077767342e-05, "loss": 1.2765, "step": 3931500 }, { "epoch": 41.5, "learning_rate": 2.9252192450161998e-05, "loss": 1.314, "step": 3932000 }, { "epoch": 41.5, "learning_rate": 2.924955412265057e-05, "loss": 1.3019, "step": 3932500 }, { "epoch": 41.51, "learning_rate": 2.9246915795139146e-05, "loss": 1.2738, "step": 3933000 }, { "epoch": 41.51, "learning_rate": 2.9244277467627724e-05, "loss": 1.3521, "step": 3933500 }, { "epoch": 41.52, "learning_rate": 2.92416391401163e-05, "loss": 1.3333, "step": 3934000 }, { "epoch": 41.52, "learning_rate": 2.923900081260488e-05, "loss": 1.2845, "step": 3934500 }, { "epoch": 41.53, "learning_rate": 2.9236362485093448e-05, "loss": 1.2674, "step": 3935000 }, { "epoch": 41.53, "learning_rate": 2.9233724157582026e-05, "loss": 1.2631, "step": 3935500 }, { "epoch": 41.54, "learning_rate": 2.9231085830070605e-05, "loss": 1.2796, "step": 3936000 }, { "epoch": 41.54, "learning_rate": 2.922844750255918e-05, "loss": 1.3116, "step": 3936500 }, { "epoch": 41.55, "learning_rate": 2.9225809175047753e-05, "loss": 1.315, "step": 3937000 }, { "epoch": 41.55, "learning_rate": 2.922317084753633e-05, "loss": 1.2797, "step": 3937500 }, { "epoch": 41.56, "learning_rate": 2.9220532520024907e-05, "loss": 1.2644, "step": 3938000 }, { "epoch": 41.56, "learning_rate": 2.9217894192513483e-05, "loss": 1.3176, "step": 3938500 }, { "epoch": 41.57, "learning_rate": 2.921525586500206e-05, "loss": 1.2216, "step": 3939000 }, { "epoch": 41.57, "learning_rate": 2.9212617537490634e-05, "loss": 1.2379, "step": 3939500 }, { "epoch": 41.58, "learning_rate": 2.920997920997921e-05, "loss": 1.3044, "step": 3940000 }, { "epoch": 41.59, "learning_rate": 2.9207340882467788e-05, "loss": 1.2812, "step": 3940500 }, { "epoch": 41.59, "learning_rate": 2.9204702554956364e-05, "loss": 1.2792, "step": 3941000 }, { "epoch": 41.6, "learning_rate": 2.9202064227444942e-05, "loss": 1.2953, "step": 3941500 }, { "epoch": 41.6, "learning_rate": 2.9199425899933515e-05, "loss": 1.249, "step": 3942000 }, { "epoch": 41.61, "learning_rate": 2.919678757242209e-05, "loss": 1.3042, "step": 3942500 }, { "epoch": 41.61, "learning_rate": 2.919414924491067e-05, "loss": 1.2485, "step": 3943000 }, { "epoch": 41.62, "learning_rate": 2.9191510917399244e-05, "loss": 1.3151, "step": 3943500 }, { "epoch": 41.62, "learning_rate": 2.9188872589887823e-05, "loss": 1.2435, "step": 3944000 }, { "epoch": 41.63, "learning_rate": 2.9186234262376395e-05, "loss": 1.1918, "step": 3944500 }, { "epoch": 41.63, "learning_rate": 2.918359593486497e-05, "loss": 1.2874, "step": 3945000 }, { "epoch": 41.64, "learning_rate": 2.918095760735355e-05, "loss": 1.2233, "step": 3945500 }, { "epoch": 41.64, "learning_rate": 2.9178319279842125e-05, "loss": 1.2975, "step": 3946000 }, { "epoch": 41.65, "learning_rate": 2.9175680952330697e-05, "loss": 1.2814, "step": 3946500 }, { "epoch": 41.65, "learning_rate": 2.9173042624819276e-05, "loss": 1.2976, "step": 3947000 }, { "epoch": 41.66, "learning_rate": 2.917040429730785e-05, "loss": 1.263, "step": 3947500 }, { "epoch": 41.66, "learning_rate": 2.916776596979643e-05, "loss": 1.2902, "step": 3948000 }, { "epoch": 41.67, "learning_rate": 2.9165127642285006e-05, "loss": 1.2235, "step": 3948500 }, { "epoch": 41.68, "learning_rate": 2.9162489314773578e-05, "loss": 1.2764, "step": 3949000 }, { "epoch": 41.68, "learning_rate": 2.9159850987262154e-05, "loss": 1.305, "step": 3949500 }, { "epoch": 41.69, "learning_rate": 2.9157212659750732e-05, "loss": 1.3046, "step": 3950000 }, { "epoch": 41.69, "learning_rate": 2.9154574332239308e-05, "loss": 1.2743, "step": 3950500 }, { "epoch": 41.7, "learning_rate": 2.9151936004727887e-05, "loss": 1.3085, "step": 3951000 }, { "epoch": 41.7, "learning_rate": 2.914929767721646e-05, "loss": 1.3261, "step": 3951500 }, { "epoch": 41.71, "learning_rate": 2.9146659349705034e-05, "loss": 1.3159, "step": 3952000 }, { "epoch": 41.71, "learning_rate": 2.9144021022193613e-05, "loss": 1.2358, "step": 3952500 }, { "epoch": 41.72, "learning_rate": 2.914138269468219e-05, "loss": 1.2988, "step": 3953000 }, { "epoch": 41.72, "learning_rate": 2.9138744367170768e-05, "loss": 1.2806, "step": 3953500 }, { "epoch": 41.73, "learning_rate": 2.913610603965934e-05, "loss": 1.2541, "step": 3954000 }, { "epoch": 41.73, "learning_rate": 2.9133467712147915e-05, "loss": 1.267, "step": 3954500 }, { "epoch": 41.74, "learning_rate": 2.9130829384636494e-05, "loss": 1.2859, "step": 3955000 }, { "epoch": 41.74, "learning_rate": 2.912819105712507e-05, "loss": 1.2896, "step": 3955500 }, { "epoch": 41.75, "learning_rate": 2.9125552729613642e-05, "loss": 1.2515, "step": 3956000 }, { "epoch": 41.75, "learning_rate": 2.912291440210222e-05, "loss": 1.3311, "step": 3956500 }, { "epoch": 41.76, "learning_rate": 2.9120276074590796e-05, "loss": 1.2552, "step": 3957000 }, { "epoch": 41.76, "learning_rate": 2.9117637747079375e-05, "loss": 1.2881, "step": 3957500 }, { "epoch": 41.77, "learning_rate": 2.911499941956795e-05, "loss": 1.2998, "step": 3958000 }, { "epoch": 41.78, "learning_rate": 2.9112361092056523e-05, "loss": 1.2806, "step": 3958500 }, { "epoch": 41.78, "learning_rate": 2.91097227645451e-05, "loss": 1.3078, "step": 3959000 }, { "epoch": 41.79, "learning_rate": 2.9107084437033677e-05, "loss": 1.2879, "step": 3959500 }, { "epoch": 41.79, "learning_rate": 2.9104446109522256e-05, "loss": 1.3182, "step": 3960000 }, { "epoch": 41.8, "learning_rate": 2.910180778201083e-05, "loss": 1.2441, "step": 3960500 }, { "epoch": 41.8, "learning_rate": 2.9099169454499403e-05, "loss": 1.2734, "step": 3961000 }, { "epoch": 41.81, "learning_rate": 2.909653112698798e-05, "loss": 1.2831, "step": 3961500 }, { "epoch": 41.81, "learning_rate": 2.9093892799476558e-05, "loss": 1.2526, "step": 3962000 }, { "epoch": 41.82, "learning_rate": 2.9091254471965133e-05, "loss": 1.3214, "step": 3962500 }, { "epoch": 41.82, "learning_rate": 2.9088616144453712e-05, "loss": 1.3016, "step": 3963000 }, { "epoch": 41.83, "learning_rate": 2.9085977816942284e-05, "loss": 1.2588, "step": 3963500 }, { "epoch": 41.83, "learning_rate": 2.908333948943086e-05, "loss": 1.214, "step": 3964000 }, { "epoch": 41.84, "learning_rate": 2.908070116191944e-05, "loss": 1.3237, "step": 3964500 }, { "epoch": 41.84, "learning_rate": 2.9078062834408014e-05, "loss": 1.2564, "step": 3965000 }, { "epoch": 41.85, "learning_rate": 2.9075424506896586e-05, "loss": 1.2834, "step": 3965500 }, { "epoch": 41.85, "learning_rate": 2.9072786179385165e-05, "loss": 1.2958, "step": 3966000 }, { "epoch": 41.86, "learning_rate": 2.907014785187374e-05, "loss": 1.268, "step": 3966500 }, { "epoch": 41.86, "learning_rate": 2.906750952436232e-05, "loss": 1.2976, "step": 3967000 }, { "epoch": 41.87, "learning_rate": 2.9064871196850895e-05, "loss": 1.2896, "step": 3967500 }, { "epoch": 41.88, "learning_rate": 2.9062232869339467e-05, "loss": 1.2822, "step": 3968000 }, { "epoch": 41.88, "learning_rate": 2.9059594541828046e-05, "loss": 1.3549, "step": 3968500 }, { "epoch": 41.89, "learning_rate": 2.905695621431662e-05, "loss": 1.2775, "step": 3969000 }, { "epoch": 41.89, "learning_rate": 2.90543178868052e-05, "loss": 1.3109, "step": 3969500 }, { "epoch": 41.9, "learning_rate": 2.9051679559293776e-05, "loss": 1.2612, "step": 3970000 }, { "epoch": 41.9, "learning_rate": 2.9049041231782348e-05, "loss": 1.3096, "step": 3970500 }, { "epoch": 41.91, "learning_rate": 2.9046402904270927e-05, "loss": 1.2832, "step": 3971000 }, { "epoch": 41.91, "learning_rate": 2.9043764576759502e-05, "loss": 1.2875, "step": 3971500 }, { "epoch": 41.92, "learning_rate": 2.904112624924808e-05, "loss": 1.2729, "step": 3972000 }, { "epoch": 41.92, "learning_rate": 2.9038487921736657e-05, "loss": 1.2272, "step": 3972500 }, { "epoch": 41.93, "learning_rate": 2.903584959422523e-05, "loss": 1.3132, "step": 3973000 }, { "epoch": 41.93, "learning_rate": 2.9033211266713804e-05, "loss": 1.247, "step": 3973500 }, { "epoch": 41.94, "learning_rate": 2.9030572939202383e-05, "loss": 1.2938, "step": 3974000 }, { "epoch": 41.94, "learning_rate": 2.902793461169096e-05, "loss": 1.2442, "step": 3974500 }, { "epoch": 41.95, "learning_rate": 2.902529628417953e-05, "loss": 1.2343, "step": 3975000 }, { "epoch": 41.95, "learning_rate": 2.902265795666811e-05, "loss": 1.2513, "step": 3975500 }, { "epoch": 41.96, "learning_rate": 2.9020019629156685e-05, "loss": 1.3541, "step": 3976000 }, { "epoch": 41.97, "learning_rate": 2.9017381301645264e-05, "loss": 1.3127, "step": 3976500 }, { "epoch": 41.97, "learning_rate": 2.901474297413384e-05, "loss": 1.3185, "step": 3977000 }, { "epoch": 41.98, "learning_rate": 2.901210464662241e-05, "loss": 1.3199, "step": 3977500 }, { "epoch": 41.98, "learning_rate": 2.900946631911099e-05, "loss": 1.2896, "step": 3978000 }, { "epoch": 41.99, "learning_rate": 2.9006827991599566e-05, "loss": 1.2315, "step": 3978500 }, { "epoch": 41.99, "learning_rate": 2.9004189664088145e-05, "loss": 1.286, "step": 3979000 }, { "epoch": 42.0, "learning_rate": 2.900155133657672e-05, "loss": 1.2375, "step": 3979500 }, { "epoch": 42.0, "learning_rate": 2.8998913009065292e-05, "loss": 1.256, "step": 3980000 }, { "epoch": 42.01, "learning_rate": 2.899627468155387e-05, "loss": 1.2418, "step": 3980500 }, { "epoch": 42.01, "learning_rate": 2.8993636354042447e-05, "loss": 1.2534, "step": 3981000 }, { "epoch": 42.02, "learning_rate": 2.8990998026531025e-05, "loss": 1.2367, "step": 3981500 }, { "epoch": 42.02, "learning_rate": 2.89883596990196e-05, "loss": 1.2877, "step": 3982000 }, { "epoch": 42.03, "learning_rate": 2.8985721371508173e-05, "loss": 1.2967, "step": 3982500 }, { "epoch": 42.03, "learning_rate": 2.8983083043996752e-05, "loss": 1.2861, "step": 3983000 }, { "epoch": 42.04, "learning_rate": 2.8980444716485327e-05, "loss": 1.2334, "step": 3983500 }, { "epoch": 42.04, "learning_rate": 2.8977806388973906e-05, "loss": 1.2337, "step": 3984000 }, { "epoch": 42.05, "learning_rate": 2.8975168061462475e-05, "loss": 1.2845, "step": 3984500 }, { "epoch": 42.05, "learning_rate": 2.8972529733951054e-05, "loss": 1.239, "step": 3985000 }, { "epoch": 42.06, "learning_rate": 2.896989140643963e-05, "loss": 1.2662, "step": 3985500 }, { "epoch": 42.07, "learning_rate": 2.8967253078928208e-05, "loss": 1.2181, "step": 3986000 }, { "epoch": 42.07, "learning_rate": 2.8964614751416784e-05, "loss": 1.3112, "step": 3986500 }, { "epoch": 42.08, "learning_rate": 2.8961976423905356e-05, "loss": 1.2161, "step": 3987000 }, { "epoch": 42.08, "learning_rate": 2.8959338096393935e-05, "loss": 1.2596, "step": 3987500 }, { "epoch": 42.09, "learning_rate": 2.895669976888251e-05, "loss": 1.2455, "step": 3988000 }, { "epoch": 42.09, "learning_rate": 2.895406144137109e-05, "loss": 1.2925, "step": 3988500 }, { "epoch": 42.1, "learning_rate": 2.8951423113859665e-05, "loss": 1.3084, "step": 3989000 }, { "epoch": 42.1, "learning_rate": 2.8948784786348237e-05, "loss": 1.2711, "step": 3989500 }, { "epoch": 42.11, "learning_rate": 2.8946146458836816e-05, "loss": 1.2956, "step": 3990000 }, { "epoch": 42.11, "learning_rate": 2.894350813132539e-05, "loss": 1.2159, "step": 3990500 }, { "epoch": 42.12, "learning_rate": 2.894086980381397e-05, "loss": 1.3134, "step": 3991000 }, { "epoch": 42.12, "learning_rate": 2.8938231476302545e-05, "loss": 1.2774, "step": 3991500 }, { "epoch": 42.13, "learning_rate": 2.8935593148791117e-05, "loss": 1.2675, "step": 3992000 }, { "epoch": 42.13, "learning_rate": 2.8932954821279696e-05, "loss": 1.2479, "step": 3992500 }, { "epoch": 42.14, "learning_rate": 2.8930316493768272e-05, "loss": 1.2596, "step": 3993000 }, { "epoch": 42.14, "learning_rate": 2.892767816625685e-05, "loss": 1.3228, "step": 3993500 }, { "epoch": 42.15, "learning_rate": 2.8925039838745423e-05, "loss": 1.2573, "step": 3994000 }, { "epoch": 42.16, "learning_rate": 2.8922401511233998e-05, "loss": 1.2541, "step": 3994500 }, { "epoch": 42.16, "learning_rate": 2.8919763183722577e-05, "loss": 1.2875, "step": 3995000 }, { "epoch": 42.17, "learning_rate": 2.8917124856211153e-05, "loss": 1.264, "step": 3995500 }, { "epoch": 42.17, "learning_rate": 2.891448652869973e-05, "loss": 1.2671, "step": 3996000 }, { "epoch": 42.18, "learning_rate": 2.89118482011883e-05, "loss": 1.2826, "step": 3996500 }, { "epoch": 42.18, "learning_rate": 2.890920987367688e-05, "loss": 1.2997, "step": 3997000 }, { "epoch": 42.19, "learning_rate": 2.8906571546165455e-05, "loss": 1.2166, "step": 3997500 }, { "epoch": 42.19, "learning_rate": 2.8903933218654033e-05, "loss": 1.2844, "step": 3998000 }, { "epoch": 42.2, "learning_rate": 2.8901294891142612e-05, "loss": 1.2475, "step": 3998500 }, { "epoch": 42.2, "learning_rate": 2.889865656363118e-05, "loss": 1.2709, "step": 3999000 }, { "epoch": 42.21, "learning_rate": 2.889601823611976e-05, "loss": 1.2664, "step": 3999500 }, { "epoch": 42.21, "learning_rate": 2.8893379908608335e-05, "loss": 1.2986, "step": 4000000 }, { "epoch": 42.22, "learning_rate": 2.8890741581096914e-05, "loss": 1.2661, "step": 4000500 }, { "epoch": 42.22, "learning_rate": 2.888810325358549e-05, "loss": 1.2648, "step": 4001000 }, { "epoch": 42.23, "learning_rate": 2.8885464926074062e-05, "loss": 1.2735, "step": 4001500 }, { "epoch": 42.23, "learning_rate": 2.888282659856264e-05, "loss": 1.2427, "step": 4002000 }, { "epoch": 42.24, "learning_rate": 2.8880188271051216e-05, "loss": 1.2756, "step": 4002500 }, { "epoch": 42.24, "learning_rate": 2.8877549943539795e-05, "loss": 1.2559, "step": 4003000 }, { "epoch": 42.25, "learning_rate": 2.8874911616028367e-05, "loss": 1.313, "step": 4003500 }, { "epoch": 42.26, "learning_rate": 2.8872273288516943e-05, "loss": 1.2188, "step": 4004000 }, { "epoch": 42.26, "learning_rate": 2.886963496100552e-05, "loss": 1.2476, "step": 4004500 }, { "epoch": 42.27, "learning_rate": 2.8866996633494097e-05, "loss": 1.2987, "step": 4005000 }, { "epoch": 42.27, "learning_rate": 2.8864358305982676e-05, "loss": 1.3074, "step": 4005500 }, { "epoch": 42.28, "learning_rate": 2.8861719978471248e-05, "loss": 1.2805, "step": 4006000 }, { "epoch": 42.28, "learning_rate": 2.8859081650959824e-05, "loss": 1.2125, "step": 4006500 }, { "epoch": 42.29, "learning_rate": 2.8856443323448402e-05, "loss": 1.2467, "step": 4007000 }, { "epoch": 42.29, "learning_rate": 2.8853804995936978e-05, "loss": 1.2509, "step": 4007500 }, { "epoch": 42.3, "learning_rate": 2.8851166668425557e-05, "loss": 1.3075, "step": 4008000 }, { "epoch": 42.3, "learning_rate": 2.8848528340914125e-05, "loss": 1.2375, "step": 4008500 }, { "epoch": 42.31, "learning_rate": 2.8845890013402704e-05, "loss": 1.2313, "step": 4009000 }, { "epoch": 42.31, "learning_rate": 2.8843251685891283e-05, "loss": 1.2556, "step": 4009500 }, { "epoch": 42.32, "learning_rate": 2.884061335837986e-05, "loss": 1.2491, "step": 4010000 }, { "epoch": 42.32, "learning_rate": 2.8837975030868438e-05, "loss": 1.2501, "step": 4010500 }, { "epoch": 42.33, "learning_rate": 2.8835336703357006e-05, "loss": 1.2733, "step": 4011000 }, { "epoch": 42.33, "learning_rate": 2.8832698375845585e-05, "loss": 1.2476, "step": 4011500 }, { "epoch": 42.34, "learning_rate": 2.883006004833416e-05, "loss": 1.2293, "step": 4012000 }, { "epoch": 42.35, "learning_rate": 2.882742172082274e-05, "loss": 1.2775, "step": 4012500 }, { "epoch": 42.35, "learning_rate": 2.882478339331131e-05, "loss": 1.2917, "step": 4013000 }, { "epoch": 42.36, "learning_rate": 2.8822145065799887e-05, "loss": 1.2224, "step": 4013500 }, { "epoch": 42.36, "learning_rate": 2.8819506738288466e-05, "loss": 1.2441, "step": 4014000 }, { "epoch": 42.37, "learning_rate": 2.881686841077704e-05, "loss": 1.2804, "step": 4014500 }, { "epoch": 42.37, "learning_rate": 2.881423008326562e-05, "loss": 1.238, "step": 4015000 }, { "epoch": 42.38, "learning_rate": 2.8811591755754192e-05, "loss": 1.2379, "step": 4015500 }, { "epoch": 42.38, "learning_rate": 2.8808953428242768e-05, "loss": 1.2544, "step": 4016000 }, { "epoch": 42.39, "learning_rate": 2.8806315100731347e-05, "loss": 1.249, "step": 4016500 }, { "epoch": 42.39, "learning_rate": 2.8803676773219922e-05, "loss": 1.3206, "step": 4017000 }, { "epoch": 42.4, "learning_rate": 2.88010384457085e-05, "loss": 1.3087, "step": 4017500 }, { "epoch": 42.4, "learning_rate": 2.8798400118197073e-05, "loss": 1.2692, "step": 4018000 }, { "epoch": 42.41, "learning_rate": 2.879576179068565e-05, "loss": 1.2561, "step": 4018500 }, { "epoch": 42.41, "learning_rate": 2.8793123463174228e-05, "loss": 1.3202, "step": 4019000 }, { "epoch": 42.42, "learning_rate": 2.8790485135662803e-05, "loss": 1.2494, "step": 4019500 }, { "epoch": 42.42, "learning_rate": 2.8787846808151382e-05, "loss": 1.2625, "step": 4020000 }, { "epoch": 42.43, "learning_rate": 2.8785208480639954e-05, "loss": 1.2354, "step": 4020500 }, { "epoch": 42.43, "learning_rate": 2.878257015312853e-05, "loss": 1.2605, "step": 4021000 }, { "epoch": 42.44, "learning_rate": 2.877993182561711e-05, "loss": 1.3107, "step": 4021500 }, { "epoch": 42.45, "learning_rate": 2.8777293498105684e-05, "loss": 1.3275, "step": 4022000 }, { "epoch": 42.45, "learning_rate": 2.8774655170594256e-05, "loss": 1.247, "step": 4022500 }, { "epoch": 42.46, "learning_rate": 2.877201684308283e-05, "loss": 1.3305, "step": 4023000 }, { "epoch": 42.46, "learning_rate": 2.876937851557141e-05, "loss": 1.2991, "step": 4023500 }, { "epoch": 42.47, "learning_rate": 2.8766740188059986e-05, "loss": 1.2979, "step": 4024000 }, { "epoch": 42.47, "learning_rate": 2.8764101860548565e-05, "loss": 1.3003, "step": 4024500 }, { "epoch": 42.48, "learning_rate": 2.8761463533037137e-05, "loss": 1.2881, "step": 4025000 }, { "epoch": 42.48, "learning_rate": 2.8758825205525712e-05, "loss": 1.2938, "step": 4025500 }, { "epoch": 42.49, "learning_rate": 2.875618687801429e-05, "loss": 1.2961, "step": 4026000 }, { "epoch": 42.49, "learning_rate": 2.8753548550502867e-05, "loss": 1.247, "step": 4026500 }, { "epoch": 42.5, "learning_rate": 2.8750910222991446e-05, "loss": 1.2898, "step": 4027000 }, { "epoch": 42.5, "learning_rate": 2.8748271895480018e-05, "loss": 1.3503, "step": 4027500 }, { "epoch": 42.51, "learning_rate": 2.8745633567968593e-05, "loss": 1.3053, "step": 4028000 }, { "epoch": 42.51, "learning_rate": 2.8742995240457172e-05, "loss": 1.2472, "step": 4028500 }, { "epoch": 42.52, "learning_rate": 2.8740356912945748e-05, "loss": 1.2439, "step": 4029000 }, { "epoch": 42.52, "learning_rate": 2.8737718585434326e-05, "loss": 1.2707, "step": 4029500 }, { "epoch": 42.53, "learning_rate": 2.87350802579229e-05, "loss": 1.307, "step": 4030000 }, { "epoch": 42.54, "learning_rate": 2.8732441930411474e-05, "loss": 1.2667, "step": 4030500 }, { "epoch": 42.54, "learning_rate": 2.8729803602900053e-05, "loss": 1.3454, "step": 4031000 }, { "epoch": 42.55, "learning_rate": 2.872716527538863e-05, "loss": 1.2483, "step": 4031500 }, { "epoch": 42.55, "learning_rate": 2.87245269478772e-05, "loss": 1.2839, "step": 4032000 }, { "epoch": 42.56, "learning_rate": 2.872188862036578e-05, "loss": 1.3193, "step": 4032500 }, { "epoch": 42.56, "learning_rate": 2.8719250292854355e-05, "loss": 1.2832, "step": 4033000 }, { "epoch": 42.57, "learning_rate": 2.8716611965342934e-05, "loss": 1.3364, "step": 4033500 }, { "epoch": 42.57, "learning_rate": 2.871397363783151e-05, "loss": 1.3013, "step": 4034000 }, { "epoch": 42.58, "learning_rate": 2.871133531032008e-05, "loss": 1.3178, "step": 4034500 }, { "epoch": 42.58, "learning_rate": 2.8708696982808657e-05, "loss": 1.3141, "step": 4035000 }, { "epoch": 42.59, "learning_rate": 2.8706058655297236e-05, "loss": 1.24, "step": 4035500 }, { "epoch": 42.59, "learning_rate": 2.870342032778581e-05, "loss": 1.2989, "step": 4036000 }, { "epoch": 42.6, "learning_rate": 2.870078200027439e-05, "loss": 1.2601, "step": 4036500 }, { "epoch": 42.6, "learning_rate": 2.8698143672762962e-05, "loss": 1.2557, "step": 4037000 }, { "epoch": 42.61, "learning_rate": 2.8695505345251538e-05, "loss": 1.2139, "step": 4037500 }, { "epoch": 42.61, "learning_rate": 2.8692867017740116e-05, "loss": 1.2897, "step": 4038000 }, { "epoch": 42.62, "learning_rate": 2.8690228690228692e-05, "loss": 1.2548, "step": 4038500 }, { "epoch": 42.62, "learning_rate": 2.868759036271727e-05, "loss": 1.273, "step": 4039000 }, { "epoch": 42.63, "learning_rate": 2.8684952035205843e-05, "loss": 1.2842, "step": 4039500 }, { "epoch": 42.64, "learning_rate": 2.868231370769442e-05, "loss": 1.2641, "step": 4040000 }, { "epoch": 42.64, "learning_rate": 2.8679675380182997e-05, "loss": 1.2132, "step": 4040500 }, { "epoch": 42.65, "learning_rate": 2.8677037052671573e-05, "loss": 1.2665, "step": 4041000 }, { "epoch": 42.65, "learning_rate": 2.8674398725160145e-05, "loss": 1.2703, "step": 4041500 }, { "epoch": 42.66, "learning_rate": 2.8671760397648724e-05, "loss": 1.2896, "step": 4042000 }, { "epoch": 42.66, "learning_rate": 2.86691220701373e-05, "loss": 1.311, "step": 4042500 }, { "epoch": 42.67, "learning_rate": 2.8666483742625878e-05, "loss": 1.2905, "step": 4043000 }, { "epoch": 42.67, "learning_rate": 2.8663845415114454e-05, "loss": 1.3263, "step": 4043500 }, { "epoch": 42.68, "learning_rate": 2.8661207087603026e-05, "loss": 1.2728, "step": 4044000 }, { "epoch": 42.68, "learning_rate": 2.8658568760091605e-05, "loss": 1.2705, "step": 4044500 }, { "epoch": 42.69, "learning_rate": 2.865593043258018e-05, "loss": 1.3108, "step": 4045000 }, { "epoch": 42.69, "learning_rate": 2.865329210506876e-05, "loss": 1.1922, "step": 4045500 }, { "epoch": 42.7, "learning_rate": 2.8650653777557334e-05, "loss": 1.2619, "step": 4046000 }, { "epoch": 42.7, "learning_rate": 2.8648015450045907e-05, "loss": 1.2481, "step": 4046500 }, { "epoch": 42.71, "learning_rate": 2.8645377122534482e-05, "loss": 1.2659, "step": 4047000 }, { "epoch": 42.71, "learning_rate": 2.864273879502306e-05, "loss": 1.2755, "step": 4047500 }, { "epoch": 42.72, "learning_rate": 2.8640100467511636e-05, "loss": 1.2941, "step": 4048000 }, { "epoch": 42.73, "learning_rate": 2.8637462140000215e-05, "loss": 1.3295, "step": 4048500 }, { "epoch": 42.73, "learning_rate": 2.8634823812488787e-05, "loss": 1.2977, "step": 4049000 }, { "epoch": 42.74, "learning_rate": 2.8632185484977363e-05, "loss": 1.2877, "step": 4049500 }, { "epoch": 42.74, "learning_rate": 2.8629547157465942e-05, "loss": 1.2302, "step": 4050000 }, { "epoch": 42.75, "learning_rate": 2.8626908829954517e-05, "loss": 1.2874, "step": 4050500 }, { "epoch": 42.75, "learning_rate": 2.862427050244309e-05, "loss": 1.307, "step": 4051000 }, { "epoch": 42.76, "learning_rate": 2.8621632174931668e-05, "loss": 1.3237, "step": 4051500 }, { "epoch": 42.76, "learning_rate": 2.8618993847420244e-05, "loss": 1.2934, "step": 4052000 }, { "epoch": 42.77, "learning_rate": 2.8616355519908823e-05, "loss": 1.2573, "step": 4052500 }, { "epoch": 42.77, "learning_rate": 2.8613717192397398e-05, "loss": 1.295, "step": 4053000 }, { "epoch": 42.78, "learning_rate": 2.861107886488597e-05, "loss": 1.299, "step": 4053500 }, { "epoch": 42.78, "learning_rate": 2.860844053737455e-05, "loss": 1.3359, "step": 4054000 }, { "epoch": 42.79, "learning_rate": 2.8605802209863125e-05, "loss": 1.247, "step": 4054500 }, { "epoch": 42.79, "learning_rate": 2.8603163882351703e-05, "loss": 1.2991, "step": 4055000 }, { "epoch": 42.8, "learning_rate": 2.860052555484028e-05, "loss": 1.2891, "step": 4055500 }, { "epoch": 42.8, "learning_rate": 2.859788722732885e-05, "loss": 1.2613, "step": 4056000 }, { "epoch": 42.81, "learning_rate": 2.859524889981743e-05, "loss": 1.2838, "step": 4056500 }, { "epoch": 42.81, "learning_rate": 2.8592610572306005e-05, "loss": 1.2755, "step": 4057000 }, { "epoch": 42.82, "learning_rate": 2.8589972244794584e-05, "loss": 1.2663, "step": 4057500 }, { "epoch": 42.83, "learning_rate": 2.8587333917283153e-05, "loss": 1.3341, "step": 4058000 }, { "epoch": 42.83, "learning_rate": 2.8584695589771732e-05, "loss": 1.2752, "step": 4058500 }, { "epoch": 42.84, "learning_rate": 2.8582057262260307e-05, "loss": 1.2919, "step": 4059000 }, { "epoch": 42.84, "learning_rate": 2.8579418934748886e-05, "loss": 1.2442, "step": 4059500 }, { "epoch": 42.85, "learning_rate": 2.857678060723746e-05, "loss": 1.2864, "step": 4060000 }, { "epoch": 42.85, "learning_rate": 2.8574142279726034e-05, "loss": 1.1978, "step": 4060500 }, { "epoch": 42.86, "learning_rate": 2.8571503952214613e-05, "loss": 1.2368, "step": 4061000 }, { "epoch": 42.86, "learning_rate": 2.8568865624703188e-05, "loss": 1.2691, "step": 4061500 }, { "epoch": 42.87, "learning_rate": 2.8566227297191767e-05, "loss": 1.3094, "step": 4062000 }, { "epoch": 42.87, "learning_rate": 2.8563588969680342e-05, "loss": 1.2768, "step": 4062500 }, { "epoch": 42.88, "learning_rate": 2.8560950642168915e-05, "loss": 1.2823, "step": 4063000 }, { "epoch": 42.88, "learning_rate": 2.8558312314657493e-05, "loss": 1.2533, "step": 4063500 }, { "epoch": 42.89, "learning_rate": 2.855567398714607e-05, "loss": 1.2663, "step": 4064000 }, { "epoch": 42.89, "learning_rate": 2.8553035659634648e-05, "loss": 1.3035, "step": 4064500 }, { "epoch": 42.9, "learning_rate": 2.8550397332123223e-05, "loss": 1.3426, "step": 4065000 }, { "epoch": 42.9, "learning_rate": 2.8547759004611795e-05, "loss": 1.2816, "step": 4065500 }, { "epoch": 42.91, "learning_rate": 2.8545120677100374e-05, "loss": 1.256, "step": 4066000 }, { "epoch": 42.92, "learning_rate": 2.854248234958895e-05, "loss": 1.2517, "step": 4066500 }, { "epoch": 42.92, "learning_rate": 2.853984402207753e-05, "loss": 1.2477, "step": 4067000 }, { "epoch": 42.93, "learning_rate": 2.85372056945661e-05, "loss": 1.2788, "step": 4067500 }, { "epoch": 42.93, "learning_rate": 2.8534567367054676e-05, "loss": 1.3344, "step": 4068000 }, { "epoch": 42.94, "learning_rate": 2.8531929039543255e-05, "loss": 1.1978, "step": 4068500 }, { "epoch": 42.94, "learning_rate": 2.852929071203183e-05, "loss": 1.2451, "step": 4069000 }, { "epoch": 42.95, "learning_rate": 2.852665238452041e-05, "loss": 1.3336, "step": 4069500 }, { "epoch": 42.95, "learning_rate": 2.8524014057008978e-05, "loss": 1.2927, "step": 4070000 }, { "epoch": 42.96, "learning_rate": 2.8521375729497557e-05, "loss": 1.2424, "step": 4070500 }, { "epoch": 42.96, "learning_rate": 2.8518737401986133e-05, "loss": 1.2722, "step": 4071000 }, { "epoch": 42.97, "learning_rate": 2.851609907447471e-05, "loss": 1.3419, "step": 4071500 }, { "epoch": 42.97, "learning_rate": 2.851346074696329e-05, "loss": 1.2677, "step": 4072000 }, { "epoch": 42.98, "learning_rate": 2.851082241945186e-05, "loss": 1.2728, "step": 4072500 }, { "epoch": 42.98, "learning_rate": 2.8508184091940438e-05, "loss": 1.336, "step": 4073000 }, { "epoch": 42.99, "learning_rate": 2.8505545764429013e-05, "loss": 1.2793, "step": 4073500 }, { "epoch": 42.99, "learning_rate": 2.8502907436917592e-05, "loss": 1.2664, "step": 4074000 }, { "epoch": 43.0, "learning_rate": 2.8500269109406168e-05, "loss": 1.2903, "step": 4074500 }, { "epoch": 43.0, "learning_rate": 2.849763078189474e-05, "loss": 1.2872, "step": 4075000 }, { "epoch": 43.01, "learning_rate": 2.849499245438332e-05, "loss": 1.2584, "step": 4075500 }, { "epoch": 43.02, "learning_rate": 2.8492354126871894e-05, "loss": 1.2225, "step": 4076000 }, { "epoch": 43.02, "learning_rate": 2.8489715799360473e-05, "loss": 1.2586, "step": 4076500 }, { "epoch": 43.03, "learning_rate": 2.8487077471849045e-05, "loss": 1.2468, "step": 4077000 }, { "epoch": 43.03, "learning_rate": 2.848443914433762e-05, "loss": 1.3046, "step": 4077500 }, { "epoch": 43.04, "learning_rate": 2.84818008168262e-05, "loss": 1.3063, "step": 4078000 }, { "epoch": 43.04, "learning_rate": 2.8479162489314775e-05, "loss": 1.2688, "step": 4078500 }, { "epoch": 43.05, "learning_rate": 2.8476524161803354e-05, "loss": 1.2908, "step": 4079000 }, { "epoch": 43.05, "learning_rate": 2.8473885834291926e-05, "loss": 1.2418, "step": 4079500 }, { "epoch": 43.06, "learning_rate": 2.84712475067805e-05, "loss": 1.3131, "step": 4080000 }, { "epoch": 43.06, "learning_rate": 2.846860917926908e-05, "loss": 1.2918, "step": 4080500 }, { "epoch": 43.07, "learning_rate": 2.8465970851757656e-05, "loss": 1.2299, "step": 4081000 }, { "epoch": 43.07, "learning_rate": 2.8463332524246235e-05, "loss": 1.2403, "step": 4081500 }, { "epoch": 43.08, "learning_rate": 2.8460694196734803e-05, "loss": 1.2692, "step": 4082000 }, { "epoch": 43.08, "learning_rate": 2.8458055869223382e-05, "loss": 1.2775, "step": 4082500 }, { "epoch": 43.09, "learning_rate": 2.845541754171196e-05, "loss": 1.2699, "step": 4083000 }, { "epoch": 43.09, "learning_rate": 2.8452779214200537e-05, "loss": 1.2366, "step": 4083500 }, { "epoch": 43.1, "learning_rate": 2.8450140886689116e-05, "loss": 1.2786, "step": 4084000 }, { "epoch": 43.1, "learning_rate": 2.8447502559177684e-05, "loss": 1.2418, "step": 4084500 }, { "epoch": 43.11, "learning_rate": 2.8444864231666263e-05, "loss": 1.277, "step": 4085000 }, { "epoch": 43.12, "learning_rate": 2.844222590415484e-05, "loss": 1.2597, "step": 4085500 }, { "epoch": 43.12, "learning_rate": 2.8439587576643417e-05, "loss": 1.2567, "step": 4086000 }, { "epoch": 43.13, "learning_rate": 2.843694924913199e-05, "loss": 1.2621, "step": 4086500 }, { "epoch": 43.13, "learning_rate": 2.8434310921620565e-05, "loss": 1.2322, "step": 4087000 }, { "epoch": 43.14, "learning_rate": 2.8431672594109144e-05, "loss": 1.2903, "step": 4087500 }, { "epoch": 43.14, "learning_rate": 2.842903426659772e-05, "loss": 1.2317, "step": 4088000 }, { "epoch": 43.15, "learning_rate": 2.84263959390863e-05, "loss": 1.2454, "step": 4088500 }, { "epoch": 43.15, "learning_rate": 2.842375761157487e-05, "loss": 1.2996, "step": 4089000 }, { "epoch": 43.16, "learning_rate": 2.8421119284063446e-05, "loss": 1.2473, "step": 4089500 }, { "epoch": 43.16, "learning_rate": 2.8418480956552025e-05, "loss": 1.2198, "step": 4090000 }, { "epoch": 43.17, "learning_rate": 2.84158426290406e-05, "loss": 1.2687, "step": 4090500 }, { "epoch": 43.17, "learning_rate": 2.841320430152918e-05, "loss": 1.3179, "step": 4091000 }, { "epoch": 43.18, "learning_rate": 2.841056597401775e-05, "loss": 1.2674, "step": 4091500 }, { "epoch": 43.18, "learning_rate": 2.8407927646506327e-05, "loss": 1.2113, "step": 4092000 }, { "epoch": 43.19, "learning_rate": 2.8405289318994906e-05, "loss": 1.2466, "step": 4092500 }, { "epoch": 43.19, "learning_rate": 2.840265099148348e-05, "loss": 1.3149, "step": 4093000 }, { "epoch": 43.2, "learning_rate": 2.840001266397206e-05, "loss": 1.2403, "step": 4093500 }, { "epoch": 43.21, "learning_rate": 2.8397374336460632e-05, "loss": 1.2608, "step": 4094000 }, { "epoch": 43.21, "learning_rate": 2.8394736008949208e-05, "loss": 1.2035, "step": 4094500 }, { "epoch": 43.22, "learning_rate": 2.8392097681437786e-05, "loss": 1.2606, "step": 4095000 }, { "epoch": 43.22, "learning_rate": 2.8389459353926362e-05, "loss": 1.3013, "step": 4095500 }, { "epoch": 43.23, "learning_rate": 2.8386821026414934e-05, "loss": 1.2838, "step": 4096000 }, { "epoch": 43.23, "learning_rate": 2.838418269890351e-05, "loss": 1.2518, "step": 4096500 }, { "epoch": 43.24, "learning_rate": 2.838154437139209e-05, "loss": 1.3333, "step": 4097000 }, { "epoch": 43.24, "learning_rate": 2.8378906043880664e-05, "loss": 1.3063, "step": 4097500 }, { "epoch": 43.25, "learning_rate": 2.8376267716369243e-05, "loss": 1.3303, "step": 4098000 }, { "epoch": 43.25, "learning_rate": 2.8373629388857815e-05, "loss": 1.2816, "step": 4098500 }, { "epoch": 43.26, "learning_rate": 2.837099106134639e-05, "loss": 1.2343, "step": 4099000 }, { "epoch": 43.26, "learning_rate": 2.836835273383497e-05, "loss": 1.2343, "step": 4099500 }, { "epoch": 43.27, "learning_rate": 2.8365714406323545e-05, "loss": 1.2869, "step": 4100000 }, { "epoch": 43.27, "learning_rate": 2.8363076078812124e-05, "loss": 1.2625, "step": 4100500 }, { "epoch": 43.28, "learning_rate": 2.8360437751300696e-05, "loss": 1.2675, "step": 4101000 }, { "epoch": 43.28, "learning_rate": 2.835779942378927e-05, "loss": 1.2758, "step": 4101500 }, { "epoch": 43.29, "learning_rate": 2.835516109627785e-05, "loss": 1.2216, "step": 4102000 }, { "epoch": 43.29, "learning_rate": 2.8352522768766426e-05, "loss": 1.3036, "step": 4102500 }, { "epoch": 43.3, "learning_rate": 2.8349884441255004e-05, "loss": 1.2765, "step": 4103000 }, { "epoch": 43.31, "learning_rate": 2.8347246113743576e-05, "loss": 1.2661, "step": 4103500 }, { "epoch": 43.31, "learning_rate": 2.8344607786232152e-05, "loss": 1.237, "step": 4104000 }, { "epoch": 43.32, "learning_rate": 2.834196945872073e-05, "loss": 1.2546, "step": 4104500 }, { "epoch": 43.32, "learning_rate": 2.8339331131209306e-05, "loss": 1.2498, "step": 4105000 }, { "epoch": 43.33, "learning_rate": 2.833669280369788e-05, "loss": 1.2624, "step": 4105500 }, { "epoch": 43.33, "learning_rate": 2.8334054476186457e-05, "loss": 1.2765, "step": 4106000 }, { "epoch": 43.34, "learning_rate": 2.8331416148675033e-05, "loss": 1.2298, "step": 4106500 }, { "epoch": 43.34, "learning_rate": 2.832877782116361e-05, "loss": 1.2721, "step": 4107000 }, { "epoch": 43.35, "learning_rate": 2.8326139493652187e-05, "loss": 1.2732, "step": 4107500 }, { "epoch": 43.35, "learning_rate": 2.832350116614076e-05, "loss": 1.3115, "step": 4108000 }, { "epoch": 43.36, "learning_rate": 2.8320862838629335e-05, "loss": 1.255, "step": 4108500 }, { "epoch": 43.36, "learning_rate": 2.8318224511117914e-05, "loss": 1.2978, "step": 4109000 }, { "epoch": 43.37, "learning_rate": 2.831558618360649e-05, "loss": 1.2897, "step": 4109500 }, { "epoch": 43.37, "learning_rate": 2.8312947856095068e-05, "loss": 1.2279, "step": 4110000 }, { "epoch": 43.38, "learning_rate": 2.831030952858364e-05, "loss": 1.242, "step": 4110500 }, { "epoch": 43.38, "learning_rate": 2.8307671201072216e-05, "loss": 1.2971, "step": 4111000 }, { "epoch": 43.39, "learning_rate": 2.8305032873560794e-05, "loss": 1.329, "step": 4111500 }, { "epoch": 43.4, "learning_rate": 2.830239454604937e-05, "loss": 1.2786, "step": 4112000 }, { "epoch": 43.4, "learning_rate": 2.829975621853795e-05, "loss": 1.2268, "step": 4112500 }, { "epoch": 43.41, "learning_rate": 2.829711789102652e-05, "loss": 1.3142, "step": 4113000 }, { "epoch": 43.41, "learning_rate": 2.8294479563515096e-05, "loss": 1.2762, "step": 4113500 }, { "epoch": 43.42, "learning_rate": 2.8291841236003675e-05, "loss": 1.2728, "step": 4114000 }, { "epoch": 43.42, "learning_rate": 2.828920290849225e-05, "loss": 1.2839, "step": 4114500 }, { "epoch": 43.43, "learning_rate": 2.8286564580980823e-05, "loss": 1.329, "step": 4115000 }, { "epoch": 43.43, "learning_rate": 2.8283926253469402e-05, "loss": 1.2847, "step": 4115500 }, { "epoch": 43.44, "learning_rate": 2.8281287925957977e-05, "loss": 1.2584, "step": 4116000 }, { "epoch": 43.44, "learning_rate": 2.8278649598446556e-05, "loss": 1.2389, "step": 4116500 }, { "epoch": 43.45, "learning_rate": 2.827601127093513e-05, "loss": 1.2597, "step": 4117000 }, { "epoch": 43.45, "learning_rate": 2.8273372943423704e-05, "loss": 1.2592, "step": 4117500 }, { "epoch": 43.46, "learning_rate": 2.8270734615912283e-05, "loss": 1.2803, "step": 4118000 }, { "epoch": 43.46, "learning_rate": 2.8268096288400858e-05, "loss": 1.275, "step": 4118500 }, { "epoch": 43.47, "learning_rate": 2.8265457960889437e-05, "loss": 1.2472, "step": 4119000 }, { "epoch": 43.47, "learning_rate": 2.8262819633378012e-05, "loss": 1.2748, "step": 4119500 }, { "epoch": 43.48, "learning_rate": 2.8260181305866584e-05, "loss": 1.2377, "step": 4120000 }, { "epoch": 43.48, "learning_rate": 2.825754297835516e-05, "loss": 1.256, "step": 4120500 }, { "epoch": 43.49, "learning_rate": 2.825490465084374e-05, "loss": 1.2798, "step": 4121000 }, { "epoch": 43.5, "learning_rate": 2.8252266323332314e-05, "loss": 1.2864, "step": 4121500 }, { "epoch": 43.5, "learning_rate": 2.8249627995820893e-05, "loss": 1.2851, "step": 4122000 }, { "epoch": 43.51, "learning_rate": 2.8246989668309465e-05, "loss": 1.2802, "step": 4122500 }, { "epoch": 43.51, "learning_rate": 2.824435134079804e-05, "loss": 1.2707, "step": 4123000 }, { "epoch": 43.52, "learning_rate": 2.824171301328662e-05, "loss": 1.325, "step": 4123500 }, { "epoch": 43.52, "learning_rate": 2.8239074685775195e-05, "loss": 1.254, "step": 4124000 }, { "epoch": 43.53, "learning_rate": 2.8236436358263767e-05, "loss": 1.2688, "step": 4124500 }, { "epoch": 43.53, "learning_rate": 2.8233798030752346e-05, "loss": 1.2587, "step": 4125000 }, { "epoch": 43.54, "learning_rate": 2.823115970324092e-05, "loss": 1.2819, "step": 4125500 }, { "epoch": 43.54, "learning_rate": 2.82285213757295e-05, "loss": 1.2314, "step": 4126000 }, { "epoch": 43.55, "learning_rate": 2.8225883048218076e-05, "loss": 1.2654, "step": 4126500 }, { "epoch": 43.55, "learning_rate": 2.8223244720706648e-05, "loss": 1.2805, "step": 4127000 }, { "epoch": 43.56, "learning_rate": 2.8220606393195227e-05, "loss": 1.2532, "step": 4127500 }, { "epoch": 43.56, "learning_rate": 2.8217968065683802e-05, "loss": 1.2731, "step": 4128000 }, { "epoch": 43.57, "learning_rate": 2.821532973817238e-05, "loss": 1.2992, "step": 4128500 }, { "epoch": 43.57, "learning_rate": 2.8212691410660957e-05, "loss": 1.1971, "step": 4129000 }, { "epoch": 43.58, "learning_rate": 2.821005308314953e-05, "loss": 1.2881, "step": 4129500 }, { "epoch": 43.59, "learning_rate": 2.8207414755638108e-05, "loss": 1.3036, "step": 4130000 }, { "epoch": 43.59, "learning_rate": 2.8204776428126683e-05, "loss": 1.2519, "step": 4130500 }, { "epoch": 43.6, "learning_rate": 2.8202138100615262e-05, "loss": 1.2538, "step": 4131000 }, { "epoch": 43.6, "learning_rate": 2.8199499773103838e-05, "loss": 1.2736, "step": 4131500 }, { "epoch": 43.61, "learning_rate": 2.819686144559241e-05, "loss": 1.3308, "step": 4132000 }, { "epoch": 43.61, "learning_rate": 2.8194223118080985e-05, "loss": 1.2863, "step": 4132500 }, { "epoch": 43.62, "learning_rate": 2.8191584790569564e-05, "loss": 1.3037, "step": 4133000 }, { "epoch": 43.62, "learning_rate": 2.818894646305814e-05, "loss": 1.2843, "step": 4133500 }, { "epoch": 43.63, "learning_rate": 2.818630813554671e-05, "loss": 1.2685, "step": 4134000 }, { "epoch": 43.63, "learning_rate": 2.818366980803529e-05, "loss": 1.3115, "step": 4134500 }, { "epoch": 43.64, "learning_rate": 2.8181031480523866e-05, "loss": 1.2531, "step": 4135000 }, { "epoch": 43.64, "learning_rate": 2.8178393153012445e-05, "loss": 1.2945, "step": 4135500 }, { "epoch": 43.65, "learning_rate": 2.817575482550102e-05, "loss": 1.2239, "step": 4136000 }, { "epoch": 43.65, "learning_rate": 2.8173116497989593e-05, "loss": 1.2265, "step": 4136500 }, { "epoch": 43.66, "learning_rate": 2.817047817047817e-05, "loss": 1.3333, "step": 4137000 }, { "epoch": 43.66, "learning_rate": 2.8167839842966747e-05, "loss": 1.2899, "step": 4137500 }, { "epoch": 43.67, "learning_rate": 2.8165201515455326e-05, "loss": 1.25, "step": 4138000 }, { "epoch": 43.67, "learning_rate": 2.81625631879439e-05, "loss": 1.274, "step": 4138500 }, { "epoch": 43.68, "learning_rate": 2.8159924860432473e-05, "loss": 1.2634, "step": 4139000 }, { "epoch": 43.69, "learning_rate": 2.8157286532921052e-05, "loss": 1.3068, "step": 4139500 }, { "epoch": 43.69, "learning_rate": 2.8154648205409628e-05, "loss": 1.2671, "step": 4140000 }, { "epoch": 43.7, "learning_rate": 2.8152009877898207e-05, "loss": 1.216, "step": 4140500 }, { "epoch": 43.7, "learning_rate": 2.8149371550386782e-05, "loss": 1.2618, "step": 4141000 }, { "epoch": 43.71, "learning_rate": 2.8146733222875354e-05, "loss": 1.2986, "step": 4141500 }, { "epoch": 43.71, "learning_rate": 2.8144094895363933e-05, "loss": 1.2513, "step": 4142000 }, { "epoch": 43.72, "learning_rate": 2.814145656785251e-05, "loss": 1.2792, "step": 4142500 }, { "epoch": 43.72, "learning_rate": 2.8138818240341087e-05, "loss": 1.2605, "step": 4143000 }, { "epoch": 43.73, "learning_rate": 2.8136179912829656e-05, "loss": 1.2428, "step": 4143500 }, { "epoch": 43.73, "learning_rate": 2.8133541585318235e-05, "loss": 1.3134, "step": 4144000 }, { "epoch": 43.74, "learning_rate": 2.813090325780681e-05, "loss": 1.3273, "step": 4144500 }, { "epoch": 43.74, "learning_rate": 2.812826493029539e-05, "loss": 1.3172, "step": 4145000 }, { "epoch": 43.75, "learning_rate": 2.8125626602783968e-05, "loss": 1.2511, "step": 4145500 }, { "epoch": 43.75, "learning_rate": 2.8122988275272537e-05, "loss": 1.3296, "step": 4146000 }, { "epoch": 43.76, "learning_rate": 2.8120349947761116e-05, "loss": 1.2817, "step": 4146500 }, { "epoch": 43.76, "learning_rate": 2.811771162024969e-05, "loss": 1.2177, "step": 4147000 }, { "epoch": 43.77, "learning_rate": 2.811507329273827e-05, "loss": 1.2253, "step": 4147500 }, { "epoch": 43.78, "learning_rate": 2.8112434965226846e-05, "loss": 1.2508, "step": 4148000 }, { "epoch": 43.78, "learning_rate": 2.8109796637715418e-05, "loss": 1.2732, "step": 4148500 }, { "epoch": 43.79, "learning_rate": 2.8107158310203997e-05, "loss": 1.3124, "step": 4149000 }, { "epoch": 43.79, "learning_rate": 2.8104519982692572e-05, "loss": 1.2687, "step": 4149500 }, { "epoch": 43.8, "learning_rate": 2.810188165518115e-05, "loss": 1.2666, "step": 4150000 }, { "epoch": 43.8, "learning_rate": 2.8099243327669726e-05, "loss": 1.3096, "step": 4150500 }, { "epoch": 43.81, "learning_rate": 2.80966050001583e-05, "loss": 1.2895, "step": 4151000 }, { "epoch": 43.81, "learning_rate": 2.8093966672646877e-05, "loss": 1.262, "step": 4151500 }, { "epoch": 43.82, "learning_rate": 2.8091328345135453e-05, "loss": 1.3304, "step": 4152000 }, { "epoch": 43.82, "learning_rate": 2.8088690017624032e-05, "loss": 1.3112, "step": 4152500 }, { "epoch": 43.83, "learning_rate": 2.8086051690112604e-05, "loss": 1.2445, "step": 4153000 }, { "epoch": 43.83, "learning_rate": 2.808341336260118e-05, "loss": 1.2587, "step": 4153500 }, { "epoch": 43.84, "learning_rate": 2.8080775035089758e-05, "loss": 1.2631, "step": 4154000 }, { "epoch": 43.84, "learning_rate": 2.8078136707578334e-05, "loss": 1.2896, "step": 4154500 }, { "epoch": 43.85, "learning_rate": 2.8075498380066913e-05, "loss": 1.2033, "step": 4155000 }, { "epoch": 43.85, "learning_rate": 2.807286005255548e-05, "loss": 1.205, "step": 4155500 }, { "epoch": 43.86, "learning_rate": 2.807022172504406e-05, "loss": 1.2958, "step": 4156000 }, { "epoch": 43.86, "learning_rate": 2.806758339753264e-05, "loss": 1.3089, "step": 4156500 }, { "epoch": 43.87, "learning_rate": 2.8064945070021215e-05, "loss": 1.224, "step": 4157000 }, { "epoch": 43.88, "learning_rate": 2.8062306742509793e-05, "loss": 1.2814, "step": 4157500 }, { "epoch": 43.88, "learning_rate": 2.8059668414998362e-05, "loss": 1.2153, "step": 4158000 }, { "epoch": 43.89, "learning_rate": 2.805703008748694e-05, "loss": 1.2693, "step": 4158500 }, { "epoch": 43.89, "learning_rate": 2.8054391759975517e-05, "loss": 1.2444, "step": 4159000 }, { "epoch": 43.9, "learning_rate": 2.8051753432464095e-05, "loss": 1.2112, "step": 4159500 }, { "epoch": 43.9, "learning_rate": 2.804911510495267e-05, "loss": 1.2973, "step": 4160000 }, { "epoch": 43.91, "learning_rate": 2.8046476777441243e-05, "loss": 1.2887, "step": 4160500 }, { "epoch": 43.91, "learning_rate": 2.8043838449929822e-05, "loss": 1.2492, "step": 4161000 }, { "epoch": 43.92, "learning_rate": 2.8041200122418397e-05, "loss": 1.325, "step": 4161500 }, { "epoch": 43.92, "learning_rate": 2.8038561794906976e-05, "loss": 1.2583, "step": 4162000 }, { "epoch": 43.93, "learning_rate": 2.803592346739555e-05, "loss": 1.2671, "step": 4162500 }, { "epoch": 43.93, "learning_rate": 2.8033285139884124e-05, "loss": 1.2993, "step": 4163000 }, { "epoch": 43.94, "learning_rate": 2.8030646812372703e-05, "loss": 1.2827, "step": 4163500 }, { "epoch": 43.94, "learning_rate": 2.8028008484861278e-05, "loss": 1.2375, "step": 4164000 }, { "epoch": 43.95, "learning_rate": 2.8025370157349857e-05, "loss": 1.2518, "step": 4164500 }, { "epoch": 43.95, "learning_rate": 2.802273182983843e-05, "loss": 1.2935, "step": 4165000 }, { "epoch": 43.96, "learning_rate": 2.8020093502327005e-05, "loss": 1.1985, "step": 4165500 }, { "epoch": 43.97, "learning_rate": 2.8017455174815584e-05, "loss": 1.2837, "step": 4166000 }, { "epoch": 43.97, "learning_rate": 2.801481684730416e-05, "loss": 1.2756, "step": 4166500 }, { "epoch": 43.98, "learning_rate": 2.8012178519792738e-05, "loss": 1.2889, "step": 4167000 }, { "epoch": 43.98, "learning_rate": 2.800954019228131e-05, "loss": 1.2715, "step": 4167500 }, { "epoch": 43.99, "learning_rate": 2.8006901864769885e-05, "loss": 1.2758, "step": 4168000 }, { "epoch": 43.99, "learning_rate": 2.8004263537258464e-05, "loss": 1.2348, "step": 4168500 }, { "epoch": 44.0, "learning_rate": 2.800162520974704e-05, "loss": 1.2239, "step": 4169000 }, { "epoch": 44.0, "learning_rate": 2.799898688223562e-05, "loss": 1.2747, "step": 4169500 }, { "epoch": 44.01, "learning_rate": 2.7996348554724187e-05, "loss": 1.2632, "step": 4170000 }, { "epoch": 44.01, "learning_rate": 2.7993710227212766e-05, "loss": 1.2196, "step": 4170500 }, { "epoch": 44.02, "learning_rate": 2.7991071899701342e-05, "loss": 1.2756, "step": 4171000 }, { "epoch": 44.02, "learning_rate": 2.798843357218992e-05, "loss": 1.2115, "step": 4171500 }, { "epoch": 44.03, "learning_rate": 2.7985795244678493e-05, "loss": 1.2712, "step": 4172000 }, { "epoch": 44.03, "learning_rate": 2.7983156917167068e-05, "loss": 1.2618, "step": 4172500 }, { "epoch": 44.04, "learning_rate": 2.7980518589655647e-05, "loss": 1.2611, "step": 4173000 }, { "epoch": 44.04, "learning_rate": 2.7977880262144223e-05, "loss": 1.2974, "step": 4173500 }, { "epoch": 44.05, "learning_rate": 2.79752419346328e-05, "loss": 1.2199, "step": 4174000 }, { "epoch": 44.05, "learning_rate": 2.7972603607121374e-05, "loss": 1.2339, "step": 4174500 }, { "epoch": 44.06, "learning_rate": 2.796996527960995e-05, "loss": 1.2957, "step": 4175000 }, { "epoch": 44.07, "learning_rate": 2.7967326952098528e-05, "loss": 1.2807, "step": 4175500 }, { "epoch": 44.07, "learning_rate": 2.7964688624587103e-05, "loss": 1.221, "step": 4176000 }, { "epoch": 44.08, "learning_rate": 2.7962050297075682e-05, "loss": 1.2851, "step": 4176500 }, { "epoch": 44.08, "learning_rate": 2.7959411969564254e-05, "loss": 1.2824, "step": 4177000 }, { "epoch": 44.09, "learning_rate": 2.795677364205283e-05, "loss": 1.2459, "step": 4177500 }, { "epoch": 44.09, "learning_rate": 2.795413531454141e-05, "loss": 1.2534, "step": 4178000 }, { "epoch": 44.1, "learning_rate": 2.7951496987029984e-05, "loss": 1.3443, "step": 4178500 }, { "epoch": 44.1, "learning_rate": 2.7948858659518563e-05, "loss": 1.2736, "step": 4179000 }, { "epoch": 44.11, "learning_rate": 2.7946220332007135e-05, "loss": 1.3069, "step": 4179500 }, { "epoch": 44.11, "learning_rate": 2.794358200449571e-05, "loss": 1.2612, "step": 4180000 }, { "epoch": 44.12, "learning_rate": 2.794094367698429e-05, "loss": 1.2397, "step": 4180500 }, { "epoch": 44.12, "learning_rate": 2.7938305349472865e-05, "loss": 1.279, "step": 4181000 } ], "logging_steps": 500, "max_steps": 9475700, "num_train_epochs": 100, "save_steps": 1000, "total_flos": 7.09037848506394e+17, "trial_name": null, "trial_params": null }