{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 383745, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.993485257136901e-05, "loss": 1.5554, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9869705142738016e-05, "loss": 1.5383, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9804557714107027e-05, "loss": 1.5335, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.973941028547604e-05, "loss": 1.528, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.967426285684505e-05, "loss": 1.5146, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.960911542821405e-05, "loss": 1.5074, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9543967999583054e-05, "loss": 1.5076, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9478820570952065e-05, "loss": 1.5025, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.9413673142321075e-05, "loss": 1.5122, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.934852571369008e-05, "loss": 1.5013, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.928337828505909e-05, "loss": 1.5068, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.92182308564281e-05, "loss": 1.4992, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.915308342779711e-05, "loss": 1.4902, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.908793599916611e-05, "loss": 1.4905, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.902278857053512e-05, "loss": 1.4883, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.8957641141904134e-05, "loss": 1.4811, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.8892493713273144e-05, "loss": 1.4823, "step": 8500 }, { "epoch": 0.02, "learning_rate": 4.882734628464215e-05, "loss": 1.4842, "step": 9000 }, { "epoch": 0.02, "learning_rate": 4.876219885601115e-05, "loss": 1.4744, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.869705142738016e-05, "loss": 1.471, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.863190399874917e-05, "loss": 1.4788, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.8566756570118175e-05, "loss": 1.4833, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.8501609141487185e-05, "loss": 1.4778, "step": 11500 }, { "epoch": 0.03, "learning_rate": 4.8436461712856196e-05, "loss": 1.4651, "step": 12000 }, { "epoch": 0.03, "learning_rate": 4.8371314284225206e-05, "loss": 1.4722, "step": 12500 }, { "epoch": 0.03, "learning_rate": 4.830616685559421e-05, "loss": 1.4741, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.824101942696322e-05, "loss": 1.4711, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.817587199833223e-05, "loss": 1.463, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.811072456970124e-05, "loss": 1.4641, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.8045577141070244e-05, "loss": 1.465, "step": 15000 }, { "epoch": 0.04, "learning_rate": 4.7980429712439254e-05, "loss": 1.46, "step": 15500 }, { "epoch": 0.04, "learning_rate": 4.791528228380826e-05, "loss": 1.4563, "step": 16000 }, { "epoch": 0.04, "learning_rate": 4.785013485517727e-05, "loss": 1.4545, "step": 16500 }, { "epoch": 0.04, "learning_rate": 4.778498742654627e-05, "loss": 1.4643, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.771983999791528e-05, "loss": 1.4612, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.765469256928429e-05, "loss": 1.4538, "step": 18000 }, { "epoch": 0.05, "learning_rate": 4.75895451406533e-05, "loss": 1.4508, "step": 18500 }, { "epoch": 0.05, "learning_rate": 4.7524397712022306e-05, "loss": 1.4565, "step": 19000 }, { "epoch": 0.05, "learning_rate": 4.7459250283391316e-05, "loss": 1.4562, "step": 19500 }, { "epoch": 0.05, "learning_rate": 4.739410285476033e-05, "loss": 1.4538, "step": 20000 }, { "epoch": 0.05, "learning_rate": 4.732895542612934e-05, "loss": 1.4476, "step": 20500 }, { "epoch": 0.05, "learning_rate": 4.726380799749834e-05, "loss": 1.4567, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.719866056886735e-05, "loss": 1.4464, "step": 21500 }, { "epoch": 0.06, "learning_rate": 4.7133513140236354e-05, "loss": 1.4514, "step": 22000 }, { "epoch": 0.06, "learning_rate": 4.7068365711605365e-05, "loss": 1.4504, "step": 22500 }, { "epoch": 0.06, "learning_rate": 4.700321828297437e-05, "loss": 1.4467, "step": 23000 }, { "epoch": 0.06, "learning_rate": 4.693807085434338e-05, "loss": 1.4449, "step": 23500 }, { "epoch": 0.06, "learning_rate": 4.687292342571239e-05, "loss": 1.439, "step": 24000 }, { "epoch": 0.06, "learning_rate": 4.68077759970814e-05, "loss": 1.4369, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.67426285684504e-05, "loss": 1.4486, "step": 25000 }, { "epoch": 0.07, "learning_rate": 4.667748113981941e-05, "loss": 1.4426, "step": 25500 }, { "epoch": 0.07, "learning_rate": 4.6612333711188423e-05, "loss": 1.4371, "step": 26000 }, { "epoch": 0.07, "learning_rate": 4.6547186282557434e-05, "loss": 1.4382, "step": 26500 }, { "epoch": 0.07, "learning_rate": 4.648203885392644e-05, "loss": 1.4306, "step": 27000 }, { "epoch": 0.07, "learning_rate": 4.641689142529545e-05, "loss": 1.4491, "step": 27500 }, { "epoch": 0.07, "learning_rate": 4.635174399666445e-05, "loss": 1.441, "step": 28000 }, { "epoch": 0.07, "learning_rate": 4.628659656803346e-05, "loss": 1.4499, "step": 28500 }, { "epoch": 0.08, "learning_rate": 4.6221449139402465e-05, "loss": 1.4347, "step": 29000 }, { "epoch": 0.08, "learning_rate": 4.6156301710771475e-05, "loss": 1.4458, "step": 29500 }, { "epoch": 0.08, "learning_rate": 4.6091154282140486e-05, "loss": 1.4394, "step": 30000 }, { "epoch": 0.08, "learning_rate": 4.6026006853509496e-05, "loss": 1.4264, "step": 30500 }, { "epoch": 0.08, "learning_rate": 4.59608594248785e-05, "loss": 1.422, "step": 31000 }, { "epoch": 0.08, "learning_rate": 4.589571199624751e-05, "loss": 1.4297, "step": 31500 }, { "epoch": 0.08, "learning_rate": 4.583056456761652e-05, "loss": 1.4204, "step": 32000 }, { "epoch": 0.08, "learning_rate": 4.576541713898553e-05, "loss": 1.4287, "step": 32500 }, { "epoch": 0.09, "learning_rate": 4.5700269710354534e-05, "loss": 1.4262, "step": 33000 }, { "epoch": 0.09, "learning_rate": 4.5635122281723544e-05, "loss": 1.4353, "step": 33500 }, { "epoch": 0.09, "learning_rate": 4.556997485309255e-05, "loss": 1.422, "step": 34000 }, { "epoch": 0.09, "learning_rate": 4.550482742446156e-05, "loss": 1.4264, "step": 34500 }, { "epoch": 0.09, "learning_rate": 4.543967999583056e-05, "loss": 1.4279, "step": 35000 }, { "epoch": 0.09, "learning_rate": 4.537453256719957e-05, "loss": 1.4255, "step": 35500 }, { "epoch": 0.09, "learning_rate": 4.530938513856858e-05, "loss": 1.4245, "step": 36000 }, { "epoch": 0.1, "learning_rate": 4.524423770993759e-05, "loss": 1.4112, "step": 36500 }, { "epoch": 0.1, "learning_rate": 4.5179090281306596e-05, "loss": 1.4267, "step": 37000 }, { "epoch": 0.1, "learning_rate": 4.5113942852675606e-05, "loss": 1.4233, "step": 37500 }, { "epoch": 0.1, "learning_rate": 4.504879542404462e-05, "loss": 1.4283, "step": 38000 }, { "epoch": 0.1, "learning_rate": 4.498364799541363e-05, "loss": 1.4263, "step": 38500 }, { "epoch": 0.1, "learning_rate": 4.491850056678263e-05, "loss": 1.4239, "step": 39000 }, { "epoch": 0.1, "learning_rate": 4.485335313815164e-05, "loss": 1.4243, "step": 39500 }, { "epoch": 0.1, "learning_rate": 4.4788205709520644e-05, "loss": 1.4223, "step": 40000 }, { "epoch": 0.11, "learning_rate": 4.4723058280889655e-05, "loss": 1.4162, "step": 40500 }, { "epoch": 0.11, "learning_rate": 4.465791085225866e-05, "loss": 1.4142, "step": 41000 }, { "epoch": 0.11, "learning_rate": 4.459276342362767e-05, "loss": 1.4186, "step": 41500 }, { "epoch": 0.11, "learning_rate": 4.452761599499668e-05, "loss": 1.4115, "step": 42000 }, { "epoch": 0.11, "learning_rate": 4.446246856636569e-05, "loss": 1.4171, "step": 42500 }, { "epoch": 0.11, "learning_rate": 4.439732113773469e-05, "loss": 1.4107, "step": 43000 }, { "epoch": 0.11, "learning_rate": 4.43321737091037e-05, "loss": 1.4115, "step": 43500 }, { "epoch": 0.11, "learning_rate": 4.426702628047271e-05, "loss": 1.4064, "step": 44000 }, { "epoch": 0.12, "learning_rate": 4.4201878851841724e-05, "loss": 1.4168, "step": 44500 }, { "epoch": 0.12, "learning_rate": 4.413673142321073e-05, "loss": 1.415, "step": 45000 }, { "epoch": 0.12, "learning_rate": 4.407158399457974e-05, "loss": 1.4082, "step": 45500 }, { "epoch": 0.12, "learning_rate": 4.400643656594874e-05, "loss": 1.4104, "step": 46000 }, { "epoch": 0.12, "learning_rate": 4.394128913731775e-05, "loss": 1.4077, "step": 46500 }, { "epoch": 0.12, "learning_rate": 4.3876141708686755e-05, "loss": 1.4152, "step": 47000 }, { "epoch": 0.12, "learning_rate": 4.3810994280055765e-05, "loss": 1.4087, "step": 47500 }, { "epoch": 0.13, "learning_rate": 4.3745846851424775e-05, "loss": 1.4101, "step": 48000 }, { "epoch": 0.13, "learning_rate": 4.3680699422793786e-05, "loss": 1.4064, "step": 48500 }, { "epoch": 0.13, "learning_rate": 4.361555199416279e-05, "loss": 1.4071, "step": 49000 }, { "epoch": 0.13, "learning_rate": 4.35504045655318e-05, "loss": 1.4124, "step": 49500 }, { "epoch": 0.13, "learning_rate": 4.348525713690081e-05, "loss": 1.4091, "step": 50000 }, { "epoch": 0.13, "learning_rate": 4.342010970826982e-05, "loss": 1.4081, "step": 50500 }, { "epoch": 0.13, "learning_rate": 4.3354962279638824e-05, "loss": 1.4099, "step": 51000 }, { "epoch": 0.13, "learning_rate": 4.3289814851007834e-05, "loss": 1.4087, "step": 51500 }, { "epoch": 0.14, "learning_rate": 4.3224667422376844e-05, "loss": 1.3954, "step": 52000 }, { "epoch": 0.14, "learning_rate": 4.315951999374585e-05, "loss": 1.3962, "step": 52500 }, { "epoch": 0.14, "learning_rate": 4.309437256511485e-05, "loss": 1.4091, "step": 53000 }, { "epoch": 0.14, "learning_rate": 4.302922513648386e-05, "loss": 1.403, "step": 53500 }, { "epoch": 0.14, "learning_rate": 4.296407770785287e-05, "loss": 1.4087, "step": 54000 }, { "epoch": 0.14, "learning_rate": 4.289893027922188e-05, "loss": 1.4044, "step": 54500 }, { "epoch": 0.14, "learning_rate": 4.2833782850590886e-05, "loss": 1.3922, "step": 55000 }, { "epoch": 0.14, "learning_rate": 4.2768635421959896e-05, "loss": 1.4006, "step": 55500 }, { "epoch": 0.15, "learning_rate": 4.2703487993328907e-05, "loss": 1.3969, "step": 56000 }, { "epoch": 0.15, "learning_rate": 4.263834056469792e-05, "loss": 1.3985, "step": 56500 }, { "epoch": 0.15, "learning_rate": 4.257319313606692e-05, "loss": 1.4059, "step": 57000 }, { "epoch": 0.15, "learning_rate": 4.250804570743593e-05, "loss": 1.3923, "step": 57500 }, { "epoch": 0.15, "learning_rate": 4.244289827880494e-05, "loss": 1.3966, "step": 58000 }, { "epoch": 0.15, "learning_rate": 4.2377750850173945e-05, "loss": 1.3921, "step": 58500 }, { "epoch": 0.15, "learning_rate": 4.231260342154295e-05, "loss": 1.3987, "step": 59000 }, { "epoch": 0.16, "learning_rate": 4.224745599291196e-05, "loss": 1.3984, "step": 59500 }, { "epoch": 0.16, "learning_rate": 4.218230856428097e-05, "loss": 1.3914, "step": 60000 }, { "epoch": 0.16, "learning_rate": 4.211716113564998e-05, "loss": 1.3976, "step": 60500 }, { "epoch": 0.16, "learning_rate": 4.205201370701898e-05, "loss": 1.3883, "step": 61000 }, { "epoch": 0.16, "learning_rate": 4.198686627838799e-05, "loss": 1.3898, "step": 61500 }, { "epoch": 0.16, "learning_rate": 4.1921718849757e-05, "loss": 1.3917, "step": 62000 }, { "epoch": 0.16, "learning_rate": 4.1856571421126014e-05, "loss": 1.3973, "step": 62500 }, { "epoch": 0.16, "learning_rate": 4.179142399249502e-05, "loss": 1.392, "step": 63000 }, { "epoch": 0.17, "learning_rate": 4.172627656386403e-05, "loss": 1.3966, "step": 63500 }, { "epoch": 0.17, "learning_rate": 4.166112913523304e-05, "loss": 1.3923, "step": 64000 }, { "epoch": 0.17, "learning_rate": 4.159598170660204e-05, "loss": 1.3838, "step": 64500 }, { "epoch": 0.17, "learning_rate": 4.1530834277971045e-05, "loss": 1.3881, "step": 65000 }, { "epoch": 0.17, "learning_rate": 4.1465686849340055e-05, "loss": 1.3865, "step": 65500 }, { "epoch": 0.17, "learning_rate": 4.1400539420709065e-05, "loss": 1.3855, "step": 66000 }, { "epoch": 0.17, "learning_rate": 4.1335391992078076e-05, "loss": 1.3888, "step": 66500 }, { "epoch": 0.17, "learning_rate": 4.127024456344708e-05, "loss": 1.3954, "step": 67000 }, { "epoch": 0.18, "learning_rate": 4.120509713481609e-05, "loss": 1.387, "step": 67500 }, { "epoch": 0.18, "learning_rate": 4.11399497061851e-05, "loss": 1.3765, "step": 68000 }, { "epoch": 0.18, "learning_rate": 4.107480227755411e-05, "loss": 1.387, "step": 68500 }, { "epoch": 0.18, "learning_rate": 4.1009654848923114e-05, "loss": 1.3865, "step": 69000 }, { "epoch": 0.18, "learning_rate": 4.0944507420292124e-05, "loss": 1.3913, "step": 69500 }, { "epoch": 0.18, "learning_rate": 4.0879359991661134e-05, "loss": 1.3781, "step": 70000 }, { "epoch": 0.18, "learning_rate": 4.081421256303014e-05, "loss": 1.3833, "step": 70500 }, { "epoch": 0.19, "learning_rate": 4.074906513439914e-05, "loss": 1.3776, "step": 71000 }, { "epoch": 0.19, "learning_rate": 4.068391770576815e-05, "loss": 1.3837, "step": 71500 }, { "epoch": 0.19, "learning_rate": 4.061877027713716e-05, "loss": 1.3884, "step": 72000 }, { "epoch": 0.19, "learning_rate": 4.055362284850617e-05, "loss": 1.3811, "step": 72500 }, { "epoch": 0.19, "learning_rate": 4.0488475419875176e-05, "loss": 1.3868, "step": 73000 }, { "epoch": 0.19, "learning_rate": 4.0423327991244186e-05, "loss": 1.384, "step": 73500 }, { "epoch": 0.19, "learning_rate": 4.0358180562613196e-05, "loss": 1.3832, "step": 74000 }, { "epoch": 0.19, "learning_rate": 4.029303313398221e-05, "loss": 1.3871, "step": 74500 }, { "epoch": 0.2, "learning_rate": 4.022788570535121e-05, "loss": 1.386, "step": 75000 }, { "epoch": 0.2, "learning_rate": 4.016273827672022e-05, "loss": 1.3815, "step": 75500 }, { "epoch": 0.2, "learning_rate": 4.009759084808923e-05, "loss": 1.3755, "step": 76000 }, { "epoch": 0.2, "learning_rate": 4.0032443419458234e-05, "loss": 1.3859, "step": 76500 }, { "epoch": 0.2, "learning_rate": 3.9967295990827245e-05, "loss": 1.38, "step": 77000 }, { "epoch": 0.2, "learning_rate": 3.990214856219625e-05, "loss": 1.3834, "step": 77500 }, { "epoch": 0.2, "learning_rate": 3.983700113356526e-05, "loss": 1.3793, "step": 78000 }, { "epoch": 0.2, "learning_rate": 3.977185370493427e-05, "loss": 1.3765, "step": 78500 }, { "epoch": 0.21, "learning_rate": 3.970670627630327e-05, "loss": 1.3787, "step": 79000 }, { "epoch": 0.21, "learning_rate": 3.964155884767228e-05, "loss": 1.3818, "step": 79500 }, { "epoch": 0.21, "learning_rate": 3.957641141904129e-05, "loss": 1.3736, "step": 80000 }, { "epoch": 0.21, "learning_rate": 3.9511263990410303e-05, "loss": 1.3854, "step": 80500 }, { "epoch": 0.21, "learning_rate": 3.944611656177931e-05, "loss": 1.3796, "step": 81000 }, { "epoch": 0.21, "learning_rate": 3.938096913314832e-05, "loss": 1.3775, "step": 81500 }, { "epoch": 0.21, "learning_rate": 3.931582170451733e-05, "loss": 1.3768, "step": 82000 }, { "epoch": 0.21, "learning_rate": 3.925067427588633e-05, "loss": 1.3691, "step": 82500 }, { "epoch": 0.22, "learning_rate": 3.918552684725534e-05, "loss": 1.3796, "step": 83000 }, { "epoch": 0.22, "learning_rate": 3.9120379418624345e-05, "loss": 1.3701, "step": 83500 }, { "epoch": 0.22, "learning_rate": 3.9055231989993355e-05, "loss": 1.3828, "step": 84000 }, { "epoch": 0.22, "learning_rate": 3.8990084561362366e-05, "loss": 1.3812, "step": 84500 }, { "epoch": 0.22, "learning_rate": 3.892493713273137e-05, "loss": 1.3767, "step": 85000 }, { "epoch": 0.22, "learning_rate": 3.885978970410038e-05, "loss": 1.3712, "step": 85500 }, { "epoch": 0.22, "learning_rate": 3.879464227546939e-05, "loss": 1.3715, "step": 86000 }, { "epoch": 0.23, "learning_rate": 3.87294948468384e-05, "loss": 1.3662, "step": 86500 }, { "epoch": 0.23, "learning_rate": 3.8664347418207404e-05, "loss": 1.3782, "step": 87000 }, { "epoch": 0.23, "learning_rate": 3.8599199989576414e-05, "loss": 1.3758, "step": 87500 }, { "epoch": 0.23, "learning_rate": 3.8534052560945424e-05, "loss": 1.3655, "step": 88000 }, { "epoch": 0.23, "learning_rate": 3.8468905132314435e-05, "loss": 1.3802, "step": 88500 }, { "epoch": 0.23, "learning_rate": 3.840375770368344e-05, "loss": 1.3766, "step": 89000 }, { "epoch": 0.23, "learning_rate": 3.833861027505244e-05, "loss": 1.3648, "step": 89500 }, { "epoch": 0.23, "learning_rate": 3.827346284642145e-05, "loss": 1.3703, "step": 90000 }, { "epoch": 0.24, "learning_rate": 3.820831541779046e-05, "loss": 1.3643, "step": 90500 }, { "epoch": 0.24, "learning_rate": 3.8143167989159466e-05, "loss": 1.368, "step": 91000 }, { "epoch": 0.24, "learning_rate": 3.8078020560528476e-05, "loss": 1.367, "step": 91500 }, { "epoch": 0.24, "learning_rate": 3.8012873131897486e-05, "loss": 1.3641, "step": 92000 }, { "epoch": 0.24, "learning_rate": 3.79477257032665e-05, "loss": 1.3743, "step": 92500 }, { "epoch": 0.24, "learning_rate": 3.78825782746355e-05, "loss": 1.3724, "step": 93000 }, { "epoch": 0.24, "learning_rate": 3.781743084600451e-05, "loss": 1.3729, "step": 93500 }, { "epoch": 0.24, "learning_rate": 3.775228341737352e-05, "loss": 1.3627, "step": 94000 }, { "epoch": 0.25, "learning_rate": 3.768713598874253e-05, "loss": 1.3649, "step": 94500 }, { "epoch": 0.25, "learning_rate": 3.7621988560111535e-05, "loss": 1.3617, "step": 95000 }, { "epoch": 0.25, "learning_rate": 3.755684113148054e-05, "loss": 1.3645, "step": 95500 }, { "epoch": 0.25, "learning_rate": 3.749169370284955e-05, "loss": 1.3537, "step": 96000 }, { "epoch": 0.25, "learning_rate": 3.742654627421856e-05, "loss": 1.3666, "step": 96500 }, { "epoch": 0.25, "learning_rate": 3.736139884558756e-05, "loss": 1.3629, "step": 97000 }, { "epoch": 0.25, "learning_rate": 3.729625141695657e-05, "loss": 1.367, "step": 97500 }, { "epoch": 0.26, "learning_rate": 3.723110398832558e-05, "loss": 1.3658, "step": 98000 }, { "epoch": 0.26, "learning_rate": 3.716595655969459e-05, "loss": 1.3599, "step": 98500 }, { "epoch": 0.26, "learning_rate": 3.71008091310636e-05, "loss": 1.3658, "step": 99000 }, { "epoch": 0.26, "learning_rate": 3.703566170243261e-05, "loss": 1.3595, "step": 99500 }, { "epoch": 0.26, "learning_rate": 3.697051427380162e-05, "loss": 1.3662, "step": 100000 }, { "epoch": 0.26, "learning_rate": 3.690536684517063e-05, "loss": 1.3613, "step": 100500 }, { "epoch": 0.26, "learning_rate": 3.684021941653963e-05, "loss": 1.3626, "step": 101000 }, { "epoch": 0.26, "learning_rate": 3.6775071987908635e-05, "loss": 1.3692, "step": 101500 }, { "epoch": 0.27, "learning_rate": 3.6709924559277645e-05, "loss": 1.3572, "step": 102000 }, { "epoch": 0.27, "learning_rate": 3.6644777130646656e-05, "loss": 1.3553, "step": 102500 }, { "epoch": 0.27, "learning_rate": 3.657962970201566e-05, "loss": 1.3561, "step": 103000 }, { "epoch": 0.27, "learning_rate": 3.651448227338467e-05, "loss": 1.3588, "step": 103500 }, { "epoch": 0.27, "learning_rate": 3.644933484475368e-05, "loss": 1.3554, "step": 104000 }, { "epoch": 0.27, "learning_rate": 3.638418741612269e-05, "loss": 1.3612, "step": 104500 }, { "epoch": 0.27, "learning_rate": 3.6319039987491694e-05, "loss": 1.3579, "step": 105000 }, { "epoch": 0.27, "learning_rate": 3.6253892558860704e-05, "loss": 1.3564, "step": 105500 }, { "epoch": 0.28, "learning_rate": 3.6188745130229714e-05, "loss": 1.3523, "step": 106000 }, { "epoch": 0.28, "learning_rate": 3.6123597701598724e-05, "loss": 1.353, "step": 106500 }, { "epoch": 0.28, "learning_rate": 3.605845027296773e-05, "loss": 1.3539, "step": 107000 }, { "epoch": 0.28, "learning_rate": 3.599330284433673e-05, "loss": 1.3577, "step": 107500 }, { "epoch": 0.28, "learning_rate": 3.592815541570574e-05, "loss": 1.353, "step": 108000 }, { "epoch": 0.28, "learning_rate": 3.586300798707475e-05, "loss": 1.3577, "step": 108500 }, { "epoch": 0.28, "learning_rate": 3.5797860558443756e-05, "loss": 1.3528, "step": 109000 }, { "epoch": 0.29, "learning_rate": 3.5732713129812766e-05, "loss": 1.364, "step": 109500 }, { "epoch": 0.29, "learning_rate": 3.5667565701181776e-05, "loss": 1.3585, "step": 110000 }, { "epoch": 0.29, "learning_rate": 3.560241827255079e-05, "loss": 1.3559, "step": 110500 }, { "epoch": 0.29, "learning_rate": 3.553727084391979e-05, "loss": 1.3518, "step": 111000 }, { "epoch": 0.29, "learning_rate": 3.54721234152888e-05, "loss": 1.3575, "step": 111500 }, { "epoch": 0.29, "learning_rate": 3.540697598665781e-05, "loss": 1.3527, "step": 112000 }, { "epoch": 0.29, "learning_rate": 3.534182855802682e-05, "loss": 1.3525, "step": 112500 }, { "epoch": 0.29, "learning_rate": 3.5276681129395825e-05, "loss": 1.353, "step": 113000 }, { "epoch": 0.3, "learning_rate": 3.5211533700764835e-05, "loss": 1.3471, "step": 113500 }, { "epoch": 0.3, "learning_rate": 3.514638627213384e-05, "loss": 1.3439, "step": 114000 }, { "epoch": 0.3, "learning_rate": 3.508123884350285e-05, "loss": 1.3478, "step": 114500 }, { "epoch": 0.3, "learning_rate": 3.501609141487185e-05, "loss": 1.3502, "step": 115000 }, { "epoch": 0.3, "learning_rate": 3.495094398624086e-05, "loss": 1.3482, "step": 115500 }, { "epoch": 0.3, "learning_rate": 3.488579655760987e-05, "loss": 1.3565, "step": 116000 }, { "epoch": 0.3, "learning_rate": 3.482064912897888e-05, "loss": 1.3556, "step": 116500 }, { "epoch": 0.3, "learning_rate": 3.475550170034789e-05, "loss": 1.3455, "step": 117000 }, { "epoch": 0.31, "learning_rate": 3.46903542717169e-05, "loss": 1.3487, "step": 117500 }, { "epoch": 0.31, "learning_rate": 3.462520684308591e-05, "loss": 1.3422, "step": 118000 }, { "epoch": 0.31, "learning_rate": 3.456005941445492e-05, "loss": 1.3493, "step": 118500 }, { "epoch": 0.31, "learning_rate": 3.449491198582392e-05, "loss": 1.3547, "step": 119000 }, { "epoch": 0.31, "learning_rate": 3.442976455719293e-05, "loss": 1.3355, "step": 119500 }, { "epoch": 0.31, "learning_rate": 3.4364617128561935e-05, "loss": 1.3569, "step": 120000 }, { "epoch": 0.31, "learning_rate": 3.4299469699930945e-05, "loss": 1.3542, "step": 120500 }, { "epoch": 0.32, "learning_rate": 3.423432227129995e-05, "loss": 1.3408, "step": 121000 }, { "epoch": 0.32, "learning_rate": 3.416917484266896e-05, "loss": 1.3452, "step": 121500 }, { "epoch": 0.32, "learning_rate": 3.410402741403797e-05, "loss": 1.3521, "step": 122000 }, { "epoch": 0.32, "learning_rate": 3.403887998540698e-05, "loss": 1.3483, "step": 122500 }, { "epoch": 0.32, "learning_rate": 3.3973732556775983e-05, "loss": 1.3409, "step": 123000 }, { "epoch": 0.32, "learning_rate": 3.3908585128144994e-05, "loss": 1.3493, "step": 123500 }, { "epoch": 0.32, "learning_rate": 3.3843437699514004e-05, "loss": 1.3459, "step": 124000 }, { "epoch": 0.32, "learning_rate": 3.3778290270883014e-05, "loss": 1.3358, "step": 124500 }, { "epoch": 0.33, "learning_rate": 3.371314284225202e-05, "loss": 1.347, "step": 125000 }, { "epoch": 0.33, "learning_rate": 3.364799541362103e-05, "loss": 1.3453, "step": 125500 }, { "epoch": 0.33, "learning_rate": 3.358284798499003e-05, "loss": 1.3358, "step": 126000 }, { "epoch": 0.33, "learning_rate": 3.351770055635904e-05, "loss": 1.3601, "step": 126500 }, { "epoch": 0.33, "learning_rate": 3.3452553127728046e-05, "loss": 1.3471, "step": 127000 }, { "epoch": 0.33, "learning_rate": 3.3387405699097056e-05, "loss": 1.3505, "step": 127500 }, { "epoch": 0.33, "learning_rate": 3.3322258270466066e-05, "loss": 1.3415, "step": 128000 }, { "epoch": 0.33, "learning_rate": 3.3257110841835077e-05, "loss": 1.3301, "step": 128500 }, { "epoch": 0.34, "learning_rate": 3.319196341320408e-05, "loss": 1.3444, "step": 129000 }, { "epoch": 0.34, "learning_rate": 3.312681598457309e-05, "loss": 1.34, "step": 129500 }, { "epoch": 0.34, "learning_rate": 3.30616685559421e-05, "loss": 1.3383, "step": 130000 }, { "epoch": 0.34, "learning_rate": 3.299652112731111e-05, "loss": 1.3386, "step": 130500 }, { "epoch": 0.34, "learning_rate": 3.2931373698680115e-05, "loss": 1.3397, "step": 131000 }, { "epoch": 0.34, "learning_rate": 3.2866226270049125e-05, "loss": 1.3377, "step": 131500 }, { "epoch": 0.34, "learning_rate": 3.280107884141813e-05, "loss": 1.3377, "step": 132000 }, { "epoch": 0.35, "learning_rate": 3.273593141278714e-05, "loss": 1.3327, "step": 132500 }, { "epoch": 0.35, "learning_rate": 3.267078398415614e-05, "loss": 1.3333, "step": 133000 }, { "epoch": 0.35, "learning_rate": 3.260563655552515e-05, "loss": 1.3369, "step": 133500 }, { "epoch": 0.35, "learning_rate": 3.254048912689416e-05, "loss": 1.3389, "step": 134000 }, { "epoch": 0.35, "learning_rate": 3.247534169826317e-05, "loss": 1.3419, "step": 134500 }, { "epoch": 0.35, "learning_rate": 3.241019426963218e-05, "loss": 1.3362, "step": 135000 }, { "epoch": 0.35, "learning_rate": 3.234504684100119e-05, "loss": 1.338, "step": 135500 }, { "epoch": 0.35, "learning_rate": 3.22798994123702e-05, "loss": 1.3349, "step": 136000 }, { "epoch": 0.36, "learning_rate": 3.221475198373921e-05, "loss": 1.3299, "step": 136500 }, { "epoch": 0.36, "learning_rate": 3.214960455510821e-05, "loss": 1.3305, "step": 137000 }, { "epoch": 0.36, "learning_rate": 3.208445712647722e-05, "loss": 1.3395, "step": 137500 }, { "epoch": 0.36, "learning_rate": 3.2019309697846225e-05, "loss": 1.3348, "step": 138000 }, { "epoch": 0.36, "learning_rate": 3.1954162269215235e-05, "loss": 1.3358, "step": 138500 }, { "epoch": 0.36, "learning_rate": 3.188901484058424e-05, "loss": 1.3295, "step": 139000 }, { "epoch": 0.36, "learning_rate": 3.182386741195325e-05, "loss": 1.335, "step": 139500 }, { "epoch": 0.36, "learning_rate": 3.175871998332226e-05, "loss": 1.3326, "step": 140000 }, { "epoch": 0.37, "learning_rate": 3.169357255469127e-05, "loss": 1.3367, "step": 140500 }, { "epoch": 0.37, "learning_rate": 3.162842512606027e-05, "loss": 1.3372, "step": 141000 }, { "epoch": 0.37, "learning_rate": 3.1563277697429284e-05, "loss": 1.3337, "step": 141500 }, { "epoch": 0.37, "learning_rate": 3.1498130268798294e-05, "loss": 1.3295, "step": 142000 }, { "epoch": 0.37, "learning_rate": 3.1432982840167304e-05, "loss": 1.3445, "step": 142500 }, { "epoch": 0.37, "learning_rate": 3.136783541153631e-05, "loss": 1.33, "step": 143000 }, { "epoch": 0.37, "learning_rate": 3.130268798290532e-05, "loss": 1.3319, "step": 143500 }, { "epoch": 0.38, "learning_rate": 3.123754055427432e-05, "loss": 1.3295, "step": 144000 }, { "epoch": 0.38, "learning_rate": 3.117239312564333e-05, "loss": 1.3294, "step": 144500 }, { "epoch": 0.38, "learning_rate": 3.1107245697012335e-05, "loss": 1.3217, "step": 145000 }, { "epoch": 0.38, "learning_rate": 3.1042098268381346e-05, "loss": 1.3308, "step": 145500 }, { "epoch": 0.38, "learning_rate": 3.0976950839750356e-05, "loss": 1.3278, "step": 146000 }, { "epoch": 0.38, "learning_rate": 3.0911803411119366e-05, "loss": 1.3286, "step": 146500 }, { "epoch": 0.38, "learning_rate": 3.084665598248837e-05, "loss": 1.3352, "step": 147000 }, { "epoch": 0.38, "learning_rate": 3.078150855385738e-05, "loss": 1.3125, "step": 147500 }, { "epoch": 0.39, "learning_rate": 3.071636112522639e-05, "loss": 1.3303, "step": 148000 }, { "epoch": 0.39, "learning_rate": 3.06512136965954e-05, "loss": 1.3185, "step": 148500 }, { "epoch": 0.39, "learning_rate": 3.0586066267964404e-05, "loss": 1.3295, "step": 149000 }, { "epoch": 0.39, "learning_rate": 3.0520918839333415e-05, "loss": 1.3218, "step": 149500 }, { "epoch": 0.39, "learning_rate": 3.045577141070242e-05, "loss": 1.3252, "step": 150000 }, { "epoch": 0.39, "learning_rate": 3.0390623982071432e-05, "loss": 1.3241, "step": 150500 }, { "epoch": 0.39, "learning_rate": 3.0325476553440436e-05, "loss": 1.3232, "step": 151000 }, { "epoch": 0.39, "learning_rate": 3.0260329124809446e-05, "loss": 1.329, "step": 151500 }, { "epoch": 0.4, "learning_rate": 3.0195181696178453e-05, "loss": 1.33, "step": 152000 }, { "epoch": 0.4, "learning_rate": 3.0130034267547463e-05, "loss": 1.3331, "step": 152500 }, { "epoch": 0.4, "learning_rate": 3.0064886838916467e-05, "loss": 1.32, "step": 153000 }, { "epoch": 0.4, "learning_rate": 2.9999739410285477e-05, "loss": 1.316, "step": 153500 }, { "epoch": 0.4, "learning_rate": 2.9934591981654487e-05, "loss": 1.3224, "step": 154000 }, { "epoch": 0.4, "learning_rate": 2.9869444553023494e-05, "loss": 1.3297, "step": 154500 }, { "epoch": 0.4, "learning_rate": 2.98042971243925e-05, "loss": 1.3222, "step": 155000 }, { "epoch": 0.41, "learning_rate": 2.9739149695761508e-05, "loss": 1.3346, "step": 155500 }, { "epoch": 0.41, "learning_rate": 2.9674002267130518e-05, "loss": 1.3314, "step": 156000 }, { "epoch": 0.41, "learning_rate": 2.960885483849953e-05, "loss": 1.3266, "step": 156500 }, { "epoch": 0.41, "learning_rate": 2.9543707409868532e-05, "loss": 1.3189, "step": 157000 }, { "epoch": 0.41, "learning_rate": 2.9478559981237542e-05, "loss": 1.3265, "step": 157500 }, { "epoch": 0.41, "learning_rate": 2.941341255260655e-05, "loss": 1.3136, "step": 158000 }, { "epoch": 0.41, "learning_rate": 2.934826512397556e-05, "loss": 1.3229, "step": 158500 }, { "epoch": 0.41, "learning_rate": 2.9283117695344563e-05, "loss": 1.3244, "step": 159000 }, { "epoch": 0.42, "learning_rate": 2.9217970266713574e-05, "loss": 1.3237, "step": 159500 }, { "epoch": 0.42, "learning_rate": 2.9152822838082584e-05, "loss": 1.3136, "step": 160000 }, { "epoch": 0.42, "learning_rate": 2.908767540945159e-05, "loss": 1.3158, "step": 160500 }, { "epoch": 0.42, "learning_rate": 2.9022527980820598e-05, "loss": 1.3164, "step": 161000 }, { "epoch": 0.42, "learning_rate": 2.8957380552189605e-05, "loss": 1.3238, "step": 161500 }, { "epoch": 0.42, "learning_rate": 2.8892233123558615e-05, "loss": 1.3206, "step": 162000 }, { "epoch": 0.42, "learning_rate": 2.8827085694927625e-05, "loss": 1.3232, "step": 162500 }, { "epoch": 0.42, "learning_rate": 2.876193826629663e-05, "loss": 1.316, "step": 163000 }, { "epoch": 0.43, "learning_rate": 2.869679083766564e-05, "loss": 1.3094, "step": 163500 }, { "epoch": 0.43, "learning_rate": 2.8631643409034646e-05, "loss": 1.313, "step": 164000 }, { "epoch": 0.43, "learning_rate": 2.8566495980403656e-05, "loss": 1.3158, "step": 164500 }, { "epoch": 0.43, "learning_rate": 2.850134855177266e-05, "loss": 1.3211, "step": 165000 }, { "epoch": 0.43, "learning_rate": 2.843620112314167e-05, "loss": 1.3155, "step": 165500 }, { "epoch": 0.43, "learning_rate": 2.837105369451068e-05, "loss": 1.314, "step": 166000 }, { "epoch": 0.43, "learning_rate": 2.830590626587969e-05, "loss": 1.3217, "step": 166500 }, { "epoch": 0.44, "learning_rate": 2.8240758837248694e-05, "loss": 1.3236, "step": 167000 }, { "epoch": 0.44, "learning_rate": 2.81756114086177e-05, "loss": 1.3118, "step": 167500 }, { "epoch": 0.44, "learning_rate": 2.811046397998671e-05, "loss": 1.3163, "step": 168000 }, { "epoch": 0.44, "learning_rate": 2.8045316551355722e-05, "loss": 1.3117, "step": 168500 }, { "epoch": 0.44, "learning_rate": 2.7980169122724725e-05, "loss": 1.3104, "step": 169000 }, { "epoch": 0.44, "learning_rate": 2.7915021694093736e-05, "loss": 1.3209, "step": 169500 }, { "epoch": 0.44, "learning_rate": 2.7849874265462743e-05, "loss": 1.3155, "step": 170000 }, { "epoch": 0.44, "learning_rate": 2.7784726836831753e-05, "loss": 1.3046, "step": 170500 }, { "epoch": 0.45, "learning_rate": 2.7719579408200757e-05, "loss": 1.3179, "step": 171000 }, { "epoch": 0.45, "learning_rate": 2.7654431979569767e-05, "loss": 1.3151, "step": 171500 }, { "epoch": 0.45, "learning_rate": 2.7589284550938777e-05, "loss": 1.3135, "step": 172000 }, { "epoch": 0.45, "learning_rate": 2.7524137122307787e-05, "loss": 1.3182, "step": 172500 }, { "epoch": 0.45, "learning_rate": 2.745898969367679e-05, "loss": 1.3114, "step": 173000 }, { "epoch": 0.45, "learning_rate": 2.7393842265045798e-05, "loss": 1.3103, "step": 173500 }, { "epoch": 0.45, "learning_rate": 2.7328694836414808e-05, "loss": 1.3097, "step": 174000 }, { "epoch": 0.45, "learning_rate": 2.726354740778382e-05, "loss": 1.3122, "step": 174500 }, { "epoch": 0.46, "learning_rate": 2.7198399979152822e-05, "loss": 1.3144, "step": 175000 }, { "epoch": 0.46, "learning_rate": 2.7133252550521832e-05, "loss": 1.3087, "step": 175500 }, { "epoch": 0.46, "learning_rate": 2.7068105121890843e-05, "loss": 1.3114, "step": 176000 }, { "epoch": 0.46, "learning_rate": 2.700295769325985e-05, "loss": 1.3098, "step": 176500 }, { "epoch": 0.46, "learning_rate": 2.6937810264628853e-05, "loss": 1.3131, "step": 177000 }, { "epoch": 0.46, "learning_rate": 2.6872662835997863e-05, "loss": 1.3064, "step": 177500 }, { "epoch": 0.46, "learning_rate": 2.6807515407366874e-05, "loss": 1.3102, "step": 178000 }, { "epoch": 0.47, "learning_rate": 2.6742367978735884e-05, "loss": 1.31, "step": 178500 }, { "epoch": 0.47, "learning_rate": 2.6677220550104888e-05, "loss": 1.3068, "step": 179000 }, { "epoch": 0.47, "learning_rate": 2.6612073121473895e-05, "loss": 1.3148, "step": 179500 }, { "epoch": 0.47, "learning_rate": 2.6546925692842905e-05, "loss": 1.317, "step": 180000 }, { "epoch": 0.47, "learning_rate": 2.6481778264211915e-05, "loss": 1.3105, "step": 180500 }, { "epoch": 0.47, "learning_rate": 2.641663083558092e-05, "loss": 1.3166, "step": 181000 }, { "epoch": 0.47, "learning_rate": 2.635148340694993e-05, "loss": 1.3029, "step": 181500 }, { "epoch": 0.47, "learning_rate": 2.628633597831894e-05, "loss": 1.3018, "step": 182000 }, { "epoch": 0.48, "learning_rate": 2.6221188549687946e-05, "loss": 1.311, "step": 182500 }, { "epoch": 0.48, "learning_rate": 2.615604112105695e-05, "loss": 1.3071, "step": 183000 }, { "epoch": 0.48, "learning_rate": 2.609089369242596e-05, "loss": 1.3201, "step": 183500 }, { "epoch": 0.48, "learning_rate": 2.602574626379497e-05, "loss": 1.2955, "step": 184000 }, { "epoch": 0.48, "learning_rate": 2.596059883516398e-05, "loss": 1.3063, "step": 184500 }, { "epoch": 0.48, "learning_rate": 2.5895451406532984e-05, "loss": 1.3042, "step": 185000 }, { "epoch": 0.48, "learning_rate": 2.583030397790199e-05, "loss": 1.3157, "step": 185500 }, { "epoch": 0.48, "learning_rate": 2.5765156549271e-05, "loss": 1.3007, "step": 186000 }, { "epoch": 0.49, "learning_rate": 2.5700009120640012e-05, "loss": 1.3051, "step": 186500 }, { "epoch": 0.49, "learning_rate": 2.5634861692009015e-05, "loss": 1.3082, "step": 187000 }, { "epoch": 0.49, "learning_rate": 2.5569714263378026e-05, "loss": 1.3, "step": 187500 }, { "epoch": 0.49, "learning_rate": 2.5504566834747036e-05, "loss": 1.3076, "step": 188000 }, { "epoch": 0.49, "learning_rate": 2.5439419406116043e-05, "loss": 1.3042, "step": 188500 }, { "epoch": 0.49, "learning_rate": 2.5374271977485046e-05, "loss": 1.3045, "step": 189000 }, { "epoch": 0.49, "learning_rate": 2.5309124548854057e-05, "loss": 1.306, "step": 189500 }, { "epoch": 0.5, "learning_rate": 2.5243977120223067e-05, "loss": 1.3041, "step": 190000 }, { "epoch": 0.5, "learning_rate": 2.5178829691592077e-05, "loss": 1.3057, "step": 190500 }, { "epoch": 0.5, "learning_rate": 2.511368226296108e-05, "loss": 1.3108, "step": 191000 }, { "epoch": 0.5, "learning_rate": 2.504853483433009e-05, "loss": 1.3084, "step": 191500 }, { "epoch": 0.5, "learning_rate": 2.4983387405699098e-05, "loss": 1.3049, "step": 192000 }, { "epoch": 0.5, "learning_rate": 2.4918239977068105e-05, "loss": 1.3015, "step": 192500 }, { "epoch": 0.5, "learning_rate": 2.4853092548437115e-05, "loss": 1.3037, "step": 193000 }, { "epoch": 0.5, "learning_rate": 2.4787945119806122e-05, "loss": 1.3066, "step": 193500 }, { "epoch": 0.51, "learning_rate": 2.4722797691175133e-05, "loss": 1.3041, "step": 194000 }, { "epoch": 0.51, "learning_rate": 2.465765026254414e-05, "loss": 1.3004, "step": 194500 }, { "epoch": 0.51, "learning_rate": 2.4592502833913146e-05, "loss": 1.3052, "step": 195000 }, { "epoch": 0.51, "learning_rate": 2.4527355405282153e-05, "loss": 1.3044, "step": 195500 }, { "epoch": 0.51, "learning_rate": 2.4462207976651164e-05, "loss": 1.2971, "step": 196000 }, { "epoch": 0.51, "learning_rate": 2.439706054802017e-05, "loss": 1.3006, "step": 196500 }, { "epoch": 0.51, "learning_rate": 2.433191311938918e-05, "loss": 1.3022, "step": 197000 }, { "epoch": 0.51, "learning_rate": 2.4266765690758188e-05, "loss": 1.3031, "step": 197500 }, { "epoch": 0.52, "learning_rate": 2.4201618262127195e-05, "loss": 1.3019, "step": 198000 }, { "epoch": 0.52, "learning_rate": 2.41364708334962e-05, "loss": 1.2947, "step": 198500 }, { "epoch": 0.52, "learning_rate": 2.4071323404865212e-05, "loss": 1.2943, "step": 199000 }, { "epoch": 0.52, "learning_rate": 2.400617597623422e-05, "loss": 1.3004, "step": 199500 }, { "epoch": 0.52, "learning_rate": 2.394102854760323e-05, "loss": 1.2991, "step": 200000 }, { "epoch": 0.52, "learning_rate": 2.3875881118972236e-05, "loss": 1.2941, "step": 200500 }, { "epoch": 0.52, "learning_rate": 2.3810733690341243e-05, "loss": 1.3016, "step": 201000 }, { "epoch": 0.53, "learning_rate": 2.374558626171025e-05, "loss": 1.3067, "step": 201500 }, { "epoch": 0.53, "learning_rate": 2.368043883307926e-05, "loss": 1.2968, "step": 202000 }, { "epoch": 0.53, "learning_rate": 2.3615291404448267e-05, "loss": 1.2987, "step": 202500 }, { "epoch": 0.53, "learning_rate": 2.3550143975817278e-05, "loss": 1.292, "step": 203000 }, { "epoch": 0.53, "learning_rate": 2.3484996547186284e-05, "loss": 1.3013, "step": 203500 }, { "epoch": 0.53, "learning_rate": 2.341984911855529e-05, "loss": 1.2993, "step": 204000 }, { "epoch": 0.53, "learning_rate": 2.33547016899243e-05, "loss": 1.289, "step": 204500 }, { "epoch": 0.53, "learning_rate": 2.328955426129331e-05, "loss": 1.2991, "step": 205000 }, { "epoch": 0.54, "learning_rate": 2.3224406832662316e-05, "loss": 1.3004, "step": 205500 }, { "epoch": 0.54, "learning_rate": 2.3159259404031326e-05, "loss": 1.3011, "step": 206000 }, { "epoch": 0.54, "learning_rate": 2.3094111975400333e-05, "loss": 1.3007, "step": 206500 }, { "epoch": 0.54, "learning_rate": 2.302896454676934e-05, "loss": 1.292, "step": 207000 }, { "epoch": 0.54, "learning_rate": 2.2963817118138347e-05, "loss": 1.2954, "step": 207500 }, { "epoch": 0.54, "learning_rate": 2.2898669689507357e-05, "loss": 1.2993, "step": 208000 }, { "epoch": 0.54, "learning_rate": 2.2833522260876364e-05, "loss": 1.2923, "step": 208500 }, { "epoch": 0.54, "learning_rate": 2.2768374832245374e-05, "loss": 1.2959, "step": 209000 }, { "epoch": 0.55, "learning_rate": 2.270322740361438e-05, "loss": 1.2818, "step": 209500 }, { "epoch": 0.55, "learning_rate": 2.2638079974983388e-05, "loss": 1.2905, "step": 210000 }, { "epoch": 0.55, "learning_rate": 2.2572932546352395e-05, "loss": 1.295, "step": 210500 }, { "epoch": 0.55, "learning_rate": 2.2507785117721405e-05, "loss": 1.2953, "step": 211000 }, { "epoch": 0.55, "learning_rate": 2.2442637689090412e-05, "loss": 1.2963, "step": 211500 }, { "epoch": 0.55, "learning_rate": 2.2377490260459422e-05, "loss": 1.2898, "step": 212000 }, { "epoch": 0.55, "learning_rate": 2.231234283182843e-05, "loss": 1.2868, "step": 212500 }, { "epoch": 0.56, "learning_rate": 2.2247195403197436e-05, "loss": 1.2888, "step": 213000 }, { "epoch": 0.56, "learning_rate": 2.2182047974566443e-05, "loss": 1.2844, "step": 213500 }, { "epoch": 0.56, "learning_rate": 2.2116900545935454e-05, "loss": 1.2876, "step": 214000 }, { "epoch": 0.56, "learning_rate": 2.205175311730446e-05, "loss": 1.2836, "step": 214500 }, { "epoch": 0.56, "learning_rate": 2.198660568867347e-05, "loss": 1.291, "step": 215000 }, { "epoch": 0.56, "learning_rate": 2.1921458260042478e-05, "loss": 1.2887, "step": 215500 }, { "epoch": 0.56, "learning_rate": 2.1856310831411485e-05, "loss": 1.2891, "step": 216000 }, { "epoch": 0.56, "learning_rate": 2.179116340278049e-05, "loss": 1.2859, "step": 216500 }, { "epoch": 0.57, "learning_rate": 2.1726015974149502e-05, "loss": 1.2833, "step": 217000 }, { "epoch": 0.57, "learning_rate": 2.166086854551851e-05, "loss": 1.2901, "step": 217500 }, { "epoch": 0.57, "learning_rate": 2.159572111688752e-05, "loss": 1.2928, "step": 218000 }, { "epoch": 0.57, "learning_rate": 2.1530573688256526e-05, "loss": 1.2991, "step": 218500 }, { "epoch": 0.57, "learning_rate": 2.1465426259625533e-05, "loss": 1.2895, "step": 219000 }, { "epoch": 0.57, "learning_rate": 2.140027883099454e-05, "loss": 1.2908, "step": 219500 }, { "epoch": 0.57, "learning_rate": 2.133513140236355e-05, "loss": 1.2973, "step": 220000 }, { "epoch": 0.57, "learning_rate": 2.1269983973732557e-05, "loss": 1.2887, "step": 220500 }, { "epoch": 0.58, "learning_rate": 2.1204836545101567e-05, "loss": 1.2807, "step": 221000 }, { "epoch": 0.58, "learning_rate": 2.1139689116470574e-05, "loss": 1.2805, "step": 221500 }, { "epoch": 0.58, "learning_rate": 2.107454168783958e-05, "loss": 1.2887, "step": 222000 }, { "epoch": 0.58, "learning_rate": 2.1009394259208588e-05, "loss": 1.2902, "step": 222500 }, { "epoch": 0.58, "learning_rate": 2.09442468305776e-05, "loss": 1.2915, "step": 223000 }, { "epoch": 0.58, "learning_rate": 2.0879099401946605e-05, "loss": 1.2829, "step": 223500 }, { "epoch": 0.58, "learning_rate": 2.0813951973315616e-05, "loss": 1.2915, "step": 224000 }, { "epoch": 0.59, "learning_rate": 2.0748804544684623e-05, "loss": 1.286, "step": 224500 }, { "epoch": 0.59, "learning_rate": 2.0683657116053633e-05, "loss": 1.2893, "step": 225000 }, { "epoch": 0.59, "learning_rate": 2.0618509687422637e-05, "loss": 1.2825, "step": 225500 }, { "epoch": 0.59, "learning_rate": 2.0553362258791647e-05, "loss": 1.2829, "step": 226000 }, { "epoch": 0.59, "learning_rate": 2.0488214830160654e-05, "loss": 1.2924, "step": 226500 }, { "epoch": 0.59, "learning_rate": 2.0423067401529664e-05, "loss": 1.2794, "step": 227000 }, { "epoch": 0.59, "learning_rate": 2.035791997289867e-05, "loss": 1.2912, "step": 227500 }, { "epoch": 0.59, "learning_rate": 2.029277254426768e-05, "loss": 1.2656, "step": 228000 }, { "epoch": 0.6, "learning_rate": 2.0227625115636685e-05, "loss": 1.2763, "step": 228500 }, { "epoch": 0.6, "learning_rate": 2.0162477687005695e-05, "loss": 1.2925, "step": 229000 }, { "epoch": 0.6, "learning_rate": 2.0097330258374702e-05, "loss": 1.28, "step": 229500 }, { "epoch": 0.6, "learning_rate": 2.0032182829743712e-05, "loss": 1.2827, "step": 230000 }, { "epoch": 0.6, "learning_rate": 1.996703540111272e-05, "loss": 1.2835, "step": 230500 }, { "epoch": 0.6, "learning_rate": 1.990188797248173e-05, "loss": 1.2814, "step": 231000 }, { "epoch": 0.6, "learning_rate": 1.9836740543850733e-05, "loss": 1.2853, "step": 231500 }, { "epoch": 0.6, "learning_rate": 1.9771593115219743e-05, "loss": 1.2748, "step": 232000 }, { "epoch": 0.61, "learning_rate": 1.970644568658875e-05, "loss": 1.2812, "step": 232500 }, { "epoch": 0.61, "learning_rate": 1.964129825795776e-05, "loss": 1.284, "step": 233000 }, { "epoch": 0.61, "learning_rate": 1.9576150829326768e-05, "loss": 1.2796, "step": 233500 }, { "epoch": 0.61, "learning_rate": 1.9511003400695778e-05, "loss": 1.2759, "step": 234000 }, { "epoch": 0.61, "learning_rate": 1.944585597206478e-05, "loss": 1.285, "step": 234500 }, { "epoch": 0.61, "learning_rate": 1.9380708543433792e-05, "loss": 1.2847, "step": 235000 }, { "epoch": 0.61, "learning_rate": 1.93155611148028e-05, "loss": 1.2795, "step": 235500 }, { "epoch": 0.61, "learning_rate": 1.925041368617181e-05, "loss": 1.2723, "step": 236000 }, { "epoch": 0.62, "learning_rate": 1.9185266257540816e-05, "loss": 1.2786, "step": 236500 }, { "epoch": 0.62, "learning_rate": 1.9120118828909826e-05, "loss": 1.272, "step": 237000 }, { "epoch": 0.62, "learning_rate": 1.9054971400278833e-05, "loss": 1.2739, "step": 237500 }, { "epoch": 0.62, "learning_rate": 1.898982397164784e-05, "loss": 1.2694, "step": 238000 }, { "epoch": 0.62, "learning_rate": 1.8924676543016847e-05, "loss": 1.2819, "step": 238500 }, { "epoch": 0.62, "learning_rate": 1.8859529114385857e-05, "loss": 1.2792, "step": 239000 }, { "epoch": 0.62, "learning_rate": 1.8794381685754864e-05, "loss": 1.2849, "step": 239500 }, { "epoch": 0.63, "learning_rate": 1.8729234257123875e-05, "loss": 1.2889, "step": 240000 }, { "epoch": 0.63, "learning_rate": 1.866408682849288e-05, "loss": 1.274, "step": 240500 }, { "epoch": 0.63, "learning_rate": 1.859893939986189e-05, "loss": 1.2778, "step": 241000 }, { "epoch": 0.63, "learning_rate": 1.8533791971230895e-05, "loss": 1.2751, "step": 241500 }, { "epoch": 0.63, "learning_rate": 1.8468644542599906e-05, "loss": 1.2791, "step": 242000 }, { "epoch": 0.63, "learning_rate": 1.8403497113968913e-05, "loss": 1.2803, "step": 242500 }, { "epoch": 0.63, "learning_rate": 1.8338349685337923e-05, "loss": 1.2785, "step": 243000 }, { "epoch": 0.63, "learning_rate": 1.827320225670693e-05, "loss": 1.272, "step": 243500 }, { "epoch": 0.64, "learning_rate": 1.8208054828075937e-05, "loss": 1.267, "step": 244000 }, { "epoch": 0.64, "learning_rate": 1.8142907399444944e-05, "loss": 1.2723, "step": 244500 }, { "epoch": 0.64, "learning_rate": 1.8077759970813954e-05, "loss": 1.2766, "step": 245000 }, { "epoch": 0.64, "learning_rate": 1.801261254218296e-05, "loss": 1.2756, "step": 245500 }, { "epoch": 0.64, "learning_rate": 1.794746511355197e-05, "loss": 1.275, "step": 246000 }, { "epoch": 0.64, "learning_rate": 1.7882317684920978e-05, "loss": 1.2775, "step": 246500 }, { "epoch": 0.64, "learning_rate": 1.7817170256289985e-05, "loss": 1.2707, "step": 247000 }, { "epoch": 0.64, "learning_rate": 1.7752022827658992e-05, "loss": 1.273, "step": 247500 }, { "epoch": 0.65, "learning_rate": 1.7686875399028002e-05, "loss": 1.2714, "step": 248000 }, { "epoch": 0.65, "learning_rate": 1.762172797039701e-05, "loss": 1.2758, "step": 248500 }, { "epoch": 0.65, "learning_rate": 1.755658054176602e-05, "loss": 1.2756, "step": 249000 }, { "epoch": 0.65, "learning_rate": 1.7491433113135026e-05, "loss": 1.2762, "step": 249500 }, { "epoch": 0.65, "learning_rate": 1.7426285684504033e-05, "loss": 1.2763, "step": 250000 }, { "epoch": 0.65, "learning_rate": 1.736113825587304e-05, "loss": 1.2766, "step": 250500 }, { "epoch": 0.65, "learning_rate": 1.729599082724205e-05, "loss": 1.2707, "step": 251000 }, { "epoch": 0.66, "learning_rate": 1.7230843398611058e-05, "loss": 1.2719, "step": 251500 }, { "epoch": 0.66, "learning_rate": 1.7165695969980068e-05, "loss": 1.2686, "step": 252000 }, { "epoch": 0.66, "learning_rate": 1.7100548541349075e-05, "loss": 1.2693, "step": 252500 }, { "epoch": 0.66, "learning_rate": 1.7035401112718082e-05, "loss": 1.2699, "step": 253000 }, { "epoch": 0.66, "learning_rate": 1.697025368408709e-05, "loss": 1.2696, "step": 253500 }, { "epoch": 0.66, "learning_rate": 1.69051062554561e-05, "loss": 1.2693, "step": 254000 }, { "epoch": 0.66, "learning_rate": 1.6839958826825106e-05, "loss": 1.2666, "step": 254500 }, { "epoch": 0.66, "learning_rate": 1.6774811398194116e-05, "loss": 1.2697, "step": 255000 }, { "epoch": 0.67, "learning_rate": 1.6709663969563123e-05, "loss": 1.2691, "step": 255500 }, { "epoch": 0.67, "learning_rate": 1.664451654093213e-05, "loss": 1.2669, "step": 256000 }, { "epoch": 0.67, "learning_rate": 1.6579369112301137e-05, "loss": 1.2663, "step": 256500 }, { "epoch": 0.67, "learning_rate": 1.6514221683670147e-05, "loss": 1.2694, "step": 257000 }, { "epoch": 0.67, "learning_rate": 1.6449074255039154e-05, "loss": 1.2706, "step": 257500 }, { "epoch": 0.67, "learning_rate": 1.6383926826408164e-05, "loss": 1.2704, "step": 258000 }, { "epoch": 0.67, "learning_rate": 1.631877939777717e-05, "loss": 1.258, "step": 258500 }, { "epoch": 0.67, "learning_rate": 1.625363196914618e-05, "loss": 1.2664, "step": 259000 }, { "epoch": 0.68, "learning_rate": 1.6188484540515185e-05, "loss": 1.2659, "step": 259500 }, { "epoch": 0.68, "learning_rate": 1.6123337111884196e-05, "loss": 1.2677, "step": 260000 }, { "epoch": 0.68, "learning_rate": 1.6058189683253202e-05, "loss": 1.2664, "step": 260500 }, { "epoch": 0.68, "learning_rate": 1.5993042254622213e-05, "loss": 1.2661, "step": 261000 }, { "epoch": 0.68, "learning_rate": 1.592789482599122e-05, "loss": 1.267, "step": 261500 }, { "epoch": 0.68, "learning_rate": 1.5862747397360227e-05, "loss": 1.2642, "step": 262000 }, { "epoch": 0.68, "learning_rate": 1.5797599968729234e-05, "loss": 1.2681, "step": 262500 }, { "epoch": 0.69, "learning_rate": 1.5732452540098244e-05, "loss": 1.2618, "step": 263000 }, { "epoch": 0.69, "learning_rate": 1.566730511146725e-05, "loss": 1.2658, "step": 263500 }, { "epoch": 0.69, "learning_rate": 1.560215768283626e-05, "loss": 1.264, "step": 264000 }, { "epoch": 0.69, "learning_rate": 1.5537010254205268e-05, "loss": 1.2676, "step": 264500 }, { "epoch": 0.69, "learning_rate": 1.5471862825574275e-05, "loss": 1.2665, "step": 265000 }, { "epoch": 0.69, "learning_rate": 1.5406715396943282e-05, "loss": 1.2559, "step": 265500 }, { "epoch": 0.69, "learning_rate": 1.5341567968312292e-05, "loss": 1.266, "step": 266000 }, { "epoch": 0.69, "learning_rate": 1.52764205396813e-05, "loss": 1.2616, "step": 266500 }, { "epoch": 0.7, "learning_rate": 1.521127311105031e-05, "loss": 1.2641, "step": 267000 }, { "epoch": 0.7, "learning_rate": 1.5146125682419315e-05, "loss": 1.2645, "step": 267500 }, { "epoch": 0.7, "learning_rate": 1.5080978253788325e-05, "loss": 1.2654, "step": 268000 }, { "epoch": 0.7, "learning_rate": 1.5015830825157332e-05, "loss": 1.2604, "step": 268500 }, { "epoch": 0.7, "learning_rate": 1.495068339652634e-05, "loss": 1.2601, "step": 269000 }, { "epoch": 0.7, "learning_rate": 1.4885535967895347e-05, "loss": 1.2642, "step": 269500 }, { "epoch": 0.7, "learning_rate": 1.4820388539264358e-05, "loss": 1.2579, "step": 270000 }, { "epoch": 0.7, "learning_rate": 1.4755241110633363e-05, "loss": 1.2642, "step": 270500 }, { "epoch": 0.71, "learning_rate": 1.4690093682002373e-05, "loss": 1.2728, "step": 271000 }, { "epoch": 0.71, "learning_rate": 1.462494625337138e-05, "loss": 1.265, "step": 271500 }, { "epoch": 0.71, "learning_rate": 1.4559798824740389e-05, "loss": 1.2657, "step": 272000 }, { "epoch": 0.71, "learning_rate": 1.4494651396109396e-05, "loss": 1.2748, "step": 272500 }, { "epoch": 0.71, "learning_rate": 1.4429503967478406e-05, "loss": 1.2682, "step": 273000 }, { "epoch": 0.71, "learning_rate": 1.4364356538847413e-05, "loss": 1.2628, "step": 273500 }, { "epoch": 0.71, "learning_rate": 1.4299209110216422e-05, "loss": 1.2729, "step": 274000 }, { "epoch": 0.72, "learning_rate": 1.4234061681585429e-05, "loss": 1.2698, "step": 274500 }, { "epoch": 0.72, "learning_rate": 1.4168914252954437e-05, "loss": 1.263, "step": 275000 }, { "epoch": 0.72, "learning_rate": 1.4103766824323444e-05, "loss": 1.2563, "step": 275500 }, { "epoch": 0.72, "learning_rate": 1.4038619395692454e-05, "loss": 1.2606, "step": 276000 }, { "epoch": 0.72, "learning_rate": 1.3973471967061461e-05, "loss": 1.2605, "step": 276500 }, { "epoch": 0.72, "learning_rate": 1.390832453843047e-05, "loss": 1.2604, "step": 277000 }, { "epoch": 0.72, "learning_rate": 1.3843177109799477e-05, "loss": 1.2627, "step": 277500 }, { "epoch": 0.72, "learning_rate": 1.3778029681168485e-05, "loss": 1.2586, "step": 278000 }, { "epoch": 0.73, "learning_rate": 1.3712882252537492e-05, "loss": 1.2617, "step": 278500 }, { "epoch": 0.73, "learning_rate": 1.3647734823906503e-05, "loss": 1.2648, "step": 279000 }, { "epoch": 0.73, "learning_rate": 1.358258739527551e-05, "loss": 1.2552, "step": 279500 }, { "epoch": 0.73, "learning_rate": 1.3517439966644518e-05, "loss": 1.2602, "step": 280000 }, { "epoch": 0.73, "learning_rate": 1.3452292538013525e-05, "loss": 1.2603, "step": 280500 }, { "epoch": 0.73, "learning_rate": 1.3387145109382534e-05, "loss": 1.2644, "step": 281000 }, { "epoch": 0.73, "learning_rate": 1.332199768075154e-05, "loss": 1.2553, "step": 281500 }, { "epoch": 0.73, "learning_rate": 1.3256850252120551e-05, "loss": 1.2513, "step": 282000 }, { "epoch": 0.74, "learning_rate": 1.3191702823489558e-05, "loss": 1.2441, "step": 282500 }, { "epoch": 0.74, "learning_rate": 1.3126555394858567e-05, "loss": 1.2602, "step": 283000 }, { "epoch": 0.74, "learning_rate": 1.3061407966227573e-05, "loss": 1.2647, "step": 283500 }, { "epoch": 0.74, "learning_rate": 1.2996260537596582e-05, "loss": 1.258, "step": 284000 }, { "epoch": 0.74, "learning_rate": 1.2931113108965589e-05, "loss": 1.2548, "step": 284500 }, { "epoch": 0.74, "learning_rate": 1.28659656803346e-05, "loss": 1.2598, "step": 285000 }, { "epoch": 0.74, "learning_rate": 1.2800818251703606e-05, "loss": 1.2573, "step": 285500 }, { "epoch": 0.75, "learning_rate": 1.2735670823072615e-05, "loss": 1.2622, "step": 286000 }, { "epoch": 0.75, "learning_rate": 1.2670523394441622e-05, "loss": 1.2579, "step": 286500 }, { "epoch": 0.75, "learning_rate": 1.2605375965810632e-05, "loss": 1.2516, "step": 287000 }, { "epoch": 0.75, "learning_rate": 1.2540228537179637e-05, "loss": 1.2547, "step": 287500 }, { "epoch": 0.75, "learning_rate": 1.2475081108548646e-05, "loss": 1.2576, "step": 288000 }, { "epoch": 0.75, "learning_rate": 1.2409933679917655e-05, "loss": 1.2495, "step": 288500 }, { "epoch": 0.75, "learning_rate": 1.2344786251286662e-05, "loss": 1.2578, "step": 289000 }, { "epoch": 0.75, "learning_rate": 1.227963882265567e-05, "loss": 1.2646, "step": 289500 }, { "epoch": 0.76, "learning_rate": 1.2214491394024679e-05, "loss": 1.2638, "step": 290000 }, { "epoch": 0.76, "learning_rate": 1.2149343965393686e-05, "loss": 1.2554, "step": 290500 }, { "epoch": 0.76, "learning_rate": 1.2084196536762694e-05, "loss": 1.2501, "step": 291000 }, { "epoch": 0.76, "learning_rate": 1.2019049108131703e-05, "loss": 1.2508, "step": 291500 }, { "epoch": 0.76, "learning_rate": 1.195390167950071e-05, "loss": 1.2557, "step": 292000 }, { "epoch": 0.76, "learning_rate": 1.1888754250869718e-05, "loss": 1.2434, "step": 292500 }, { "epoch": 0.76, "learning_rate": 1.1823606822238727e-05, "loss": 1.2519, "step": 293000 }, { "epoch": 0.76, "learning_rate": 1.1758459393607734e-05, "loss": 1.2475, "step": 293500 }, { "epoch": 0.77, "learning_rate": 1.1693311964976743e-05, "loss": 1.2479, "step": 294000 }, { "epoch": 0.77, "learning_rate": 1.1628164536345751e-05, "loss": 1.2572, "step": 294500 }, { "epoch": 0.77, "learning_rate": 1.1563017107714758e-05, "loss": 1.2527, "step": 295000 }, { "epoch": 0.77, "learning_rate": 1.1497869679083767e-05, "loss": 1.2505, "step": 295500 }, { "epoch": 0.77, "learning_rate": 1.1432722250452775e-05, "loss": 1.2472, "step": 296000 }, { "epoch": 0.77, "learning_rate": 1.1367574821821782e-05, "loss": 1.2524, "step": 296500 }, { "epoch": 0.77, "learning_rate": 1.1302427393190791e-05, "loss": 1.2548, "step": 297000 }, { "epoch": 0.78, "learning_rate": 1.12372799645598e-05, "loss": 1.2437, "step": 297500 }, { "epoch": 0.78, "learning_rate": 1.1172132535928808e-05, "loss": 1.2503, "step": 298000 }, { "epoch": 0.78, "learning_rate": 1.1106985107297815e-05, "loss": 1.259, "step": 298500 }, { "epoch": 0.78, "learning_rate": 1.1041837678666824e-05, "loss": 1.2518, "step": 299000 }, { "epoch": 0.78, "learning_rate": 1.0976690250035832e-05, "loss": 1.2511, "step": 299500 }, { "epoch": 0.78, "learning_rate": 1.091154282140484e-05, "loss": 1.2536, "step": 300000 }, { "epoch": 0.78, "learning_rate": 1.0846395392773848e-05, "loss": 1.2522, "step": 300500 }, { "epoch": 0.78, "learning_rate": 1.0781247964142856e-05, "loss": 1.2527, "step": 301000 }, { "epoch": 0.79, "learning_rate": 1.0716100535511863e-05, "loss": 1.2461, "step": 301500 }, { "epoch": 0.79, "learning_rate": 1.0650953106880872e-05, "loss": 1.2507, "step": 302000 }, { "epoch": 0.79, "learning_rate": 1.058580567824988e-05, "loss": 1.2536, "step": 302500 }, { "epoch": 0.79, "learning_rate": 1.0520658249618888e-05, "loss": 1.2427, "step": 303000 }, { "epoch": 0.79, "learning_rate": 1.0455510820987896e-05, "loss": 1.2488, "step": 303500 }, { "epoch": 0.79, "learning_rate": 1.0390363392356905e-05, "loss": 1.2536, "step": 304000 }, { "epoch": 0.79, "learning_rate": 1.0325215963725912e-05, "loss": 1.2464, "step": 304500 }, { "epoch": 0.79, "learning_rate": 1.026006853509492e-05, "loss": 1.2432, "step": 305000 }, { "epoch": 0.8, "learning_rate": 1.0194921106463929e-05, "loss": 1.2573, "step": 305500 }, { "epoch": 0.8, "learning_rate": 1.0129773677832936e-05, "loss": 1.2486, "step": 306000 }, { "epoch": 0.8, "learning_rate": 1.0064626249201944e-05, "loss": 1.2575, "step": 306500 }, { "epoch": 0.8, "learning_rate": 9.999478820570953e-06, "loss": 1.2395, "step": 307000 }, { "epoch": 0.8, "learning_rate": 9.93433139193996e-06, "loss": 1.2447, "step": 307500 }, { "epoch": 0.8, "learning_rate": 9.869183963308969e-06, "loss": 1.252, "step": 308000 }, { "epoch": 0.8, "learning_rate": 9.804036534677977e-06, "loss": 1.2417, "step": 308500 }, { "epoch": 0.81, "learning_rate": 9.738889106046984e-06, "loss": 1.2573, "step": 309000 }, { "epoch": 0.81, "learning_rate": 9.673741677415993e-06, "loss": 1.2414, "step": 309500 }, { "epoch": 0.81, "learning_rate": 9.608594248785001e-06, "loss": 1.248, "step": 310000 }, { "epoch": 0.81, "learning_rate": 9.543446820154008e-06, "loss": 1.2386, "step": 310500 }, { "epoch": 0.81, "learning_rate": 9.478299391523017e-06, "loss": 1.258, "step": 311000 }, { "epoch": 0.81, "learning_rate": 9.413151962892026e-06, "loss": 1.2442, "step": 311500 }, { "epoch": 0.81, "learning_rate": 9.348004534261033e-06, "loss": 1.2456, "step": 312000 }, { "epoch": 0.81, "learning_rate": 9.282857105630041e-06, "loss": 1.2505, "step": 312500 }, { "epoch": 0.82, "learning_rate": 9.21770967699905e-06, "loss": 1.2391, "step": 313000 }, { "epoch": 0.82, "learning_rate": 9.152562248368057e-06, "loss": 1.2483, "step": 313500 }, { "epoch": 0.82, "learning_rate": 9.087414819737065e-06, "loss": 1.2432, "step": 314000 }, { "epoch": 0.82, "learning_rate": 9.022267391106074e-06, "loss": 1.2428, "step": 314500 }, { "epoch": 0.82, "learning_rate": 8.95711996247508e-06, "loss": 1.2468, "step": 315000 }, { "epoch": 0.82, "learning_rate": 8.89197253384409e-06, "loss": 1.2469, "step": 315500 }, { "epoch": 0.82, "learning_rate": 8.826825105213098e-06, "loss": 1.2465, "step": 316000 }, { "epoch": 0.82, "learning_rate": 8.761677676582105e-06, "loss": 1.2313, "step": 316500 }, { "epoch": 0.83, "learning_rate": 8.696530247951114e-06, "loss": 1.2507, "step": 317000 }, { "epoch": 0.83, "learning_rate": 8.631382819320122e-06, "loss": 1.2452, "step": 317500 }, { "epoch": 0.83, "learning_rate": 8.566235390689129e-06, "loss": 1.2403, "step": 318000 }, { "epoch": 0.83, "learning_rate": 8.501087962058138e-06, "loss": 1.2561, "step": 318500 }, { "epoch": 0.83, "learning_rate": 8.435940533427146e-06, "loss": 1.2514, "step": 319000 }, { "epoch": 0.83, "learning_rate": 8.370793104796153e-06, "loss": 1.2375, "step": 319500 }, { "epoch": 0.83, "learning_rate": 8.305645676165162e-06, "loss": 1.245, "step": 320000 }, { "epoch": 0.84, "learning_rate": 8.24049824753417e-06, "loss": 1.2454, "step": 320500 }, { "epoch": 0.84, "learning_rate": 8.175350818903177e-06, "loss": 1.2389, "step": 321000 }, { "epoch": 0.84, "learning_rate": 8.110203390272186e-06, "loss": 1.241, "step": 321500 }, { "epoch": 0.84, "learning_rate": 8.045055961641195e-06, "loss": 1.2465, "step": 322000 }, { "epoch": 0.84, "learning_rate": 7.979908533010202e-06, "loss": 1.2451, "step": 322500 }, { "epoch": 0.84, "learning_rate": 7.91476110437921e-06, "loss": 1.2396, "step": 323000 }, { "epoch": 0.84, "learning_rate": 7.849613675748219e-06, "loss": 1.2392, "step": 323500 }, { "epoch": 0.84, "learning_rate": 7.784466247117227e-06, "loss": 1.2431, "step": 324000 }, { "epoch": 0.85, "learning_rate": 7.719318818486234e-06, "loss": 1.2502, "step": 324500 }, { "epoch": 0.85, "learning_rate": 7.654171389855243e-06, "loss": 1.2453, "step": 325000 }, { "epoch": 0.85, "learning_rate": 7.589023961224251e-06, "loss": 1.237, "step": 325500 }, { "epoch": 0.85, "learning_rate": 7.5238765325932586e-06, "loss": 1.2242, "step": 326000 }, { "epoch": 0.85, "learning_rate": 7.458729103962267e-06, "loss": 1.2353, "step": 326500 }, { "epoch": 0.85, "learning_rate": 7.393581675331275e-06, "loss": 1.239, "step": 327000 }, { "epoch": 0.85, "learning_rate": 7.328434246700283e-06, "loss": 1.2448, "step": 327500 }, { "epoch": 0.85, "learning_rate": 7.263286818069291e-06, "loss": 1.2389, "step": 328000 }, { "epoch": 0.86, "learning_rate": 7.198139389438299e-06, "loss": 1.2387, "step": 328500 }, { "epoch": 0.86, "learning_rate": 7.132991960807307e-06, "loss": 1.242, "step": 329000 }, { "epoch": 0.86, "learning_rate": 7.0678445321763155e-06, "loss": 1.2347, "step": 329500 }, { "epoch": 0.86, "learning_rate": 7.002697103545323e-06, "loss": 1.2357, "step": 330000 }, { "epoch": 0.86, "learning_rate": 6.937549674914332e-06, "loss": 1.2378, "step": 330500 }, { "epoch": 0.86, "learning_rate": 6.87240224628334e-06, "loss": 1.2436, "step": 331000 }, { "epoch": 0.86, "learning_rate": 6.8072548176523474e-06, "loss": 1.2335, "step": 331500 }, { "epoch": 0.87, "learning_rate": 6.742107389021356e-06, "loss": 1.2444, "step": 332000 }, { "epoch": 0.87, "learning_rate": 6.676959960390364e-06, "loss": 1.2385, "step": 332500 }, { "epoch": 0.87, "learning_rate": 6.611812531759372e-06, "loss": 1.2399, "step": 333000 }, { "epoch": 0.87, "learning_rate": 6.54666510312838e-06, "loss": 1.2405, "step": 333500 }, { "epoch": 0.87, "learning_rate": 6.481517674497388e-06, "loss": 1.2354, "step": 334000 }, { "epoch": 0.87, "learning_rate": 6.416370245866396e-06, "loss": 1.2357, "step": 334500 }, { "epoch": 0.87, "learning_rate": 6.351222817235404e-06, "loss": 1.2321, "step": 335000 }, { "epoch": 0.87, "learning_rate": 6.286075388604412e-06, "loss": 1.241, "step": 335500 }, { "epoch": 0.88, "learning_rate": 6.22092795997342e-06, "loss": 1.2333, "step": 336000 }, { "epoch": 0.88, "learning_rate": 6.1557805313424285e-06, "loss": 1.2356, "step": 336500 }, { "epoch": 0.88, "learning_rate": 6.090633102711436e-06, "loss": 1.2372, "step": 337000 }, { "epoch": 0.88, "learning_rate": 6.025485674080444e-06, "loss": 1.238, "step": 337500 }, { "epoch": 0.88, "learning_rate": 5.960338245449453e-06, "loss": 1.2322, "step": 338000 }, { "epoch": 0.88, "learning_rate": 5.8951908168184605e-06, "loss": 1.2388, "step": 338500 }, { "epoch": 0.88, "learning_rate": 5.830043388187468e-06, "loss": 1.2371, "step": 339000 }, { "epoch": 0.88, "learning_rate": 5.764895959556477e-06, "loss": 1.23, "step": 339500 }, { "epoch": 0.89, "learning_rate": 5.699748530925485e-06, "loss": 1.2387, "step": 340000 }, { "epoch": 0.89, "learning_rate": 5.634601102294492e-06, "loss": 1.2379, "step": 340500 }, { "epoch": 0.89, "learning_rate": 5.569453673663501e-06, "loss": 1.2343, "step": 341000 }, { "epoch": 0.89, "learning_rate": 5.504306245032509e-06, "loss": 1.2251, "step": 341500 }, { "epoch": 0.89, "learning_rate": 5.439158816401517e-06, "loss": 1.2316, "step": 342000 }, { "epoch": 0.89, "learning_rate": 5.374011387770525e-06, "loss": 1.2261, "step": 342500 }, { "epoch": 0.89, "learning_rate": 5.308863959139533e-06, "loss": 1.2313, "step": 343000 }, { "epoch": 0.9, "learning_rate": 5.2437165305085415e-06, "loss": 1.2262, "step": 343500 }, { "epoch": 0.9, "learning_rate": 5.178569101877549e-06, "loss": 1.2244, "step": 344000 }, { "epoch": 0.9, "learning_rate": 5.113421673246557e-06, "loss": 1.2219, "step": 344500 }, { "epoch": 0.9, "learning_rate": 5.048274244615566e-06, "loss": 1.2371, "step": 345000 }, { "epoch": 0.9, "learning_rate": 4.9831268159845735e-06, "loss": 1.2361, "step": 345500 }, { "epoch": 0.9, "learning_rate": 4.917979387353581e-06, "loss": 1.2376, "step": 346000 }, { "epoch": 0.9, "learning_rate": 4.85283195872259e-06, "loss": 1.2303, "step": 346500 }, { "epoch": 0.9, "learning_rate": 4.787684530091598e-06, "loss": 1.2284, "step": 347000 }, { "epoch": 0.91, "learning_rate": 4.722537101460605e-06, "loss": 1.2293, "step": 347500 }, { "epoch": 0.91, "learning_rate": 4.657389672829614e-06, "loss": 1.2322, "step": 348000 }, { "epoch": 0.91, "learning_rate": 4.592242244198622e-06, "loss": 1.2377, "step": 348500 }, { "epoch": 0.91, "learning_rate": 4.5270948155676296e-06, "loss": 1.2291, "step": 349000 }, { "epoch": 0.91, "learning_rate": 4.461947386936638e-06, "loss": 1.2351, "step": 349500 }, { "epoch": 0.91, "learning_rate": 4.396799958305646e-06, "loss": 1.2346, "step": 350000 }, { "epoch": 0.91, "learning_rate": 4.331652529674654e-06, "loss": 1.2344, "step": 350500 }, { "epoch": 0.91, "learning_rate": 4.266505101043662e-06, "loss": 1.2364, "step": 351000 }, { "epoch": 0.92, "learning_rate": 4.20135767241267e-06, "loss": 1.2236, "step": 351500 }, { "epoch": 0.92, "learning_rate": 4.136210243781678e-06, "loss": 1.2356, "step": 352000 }, { "epoch": 0.92, "learning_rate": 4.0710628151506865e-06, "loss": 1.2279, "step": 352500 }, { "epoch": 0.92, "learning_rate": 4.005915386519694e-06, "loss": 1.2347, "step": 353000 }, { "epoch": 0.92, "learning_rate": 3.940767957888702e-06, "loss": 1.2281, "step": 353500 }, { "epoch": 0.92, "learning_rate": 3.875620529257711e-06, "loss": 1.2307, "step": 354000 }, { "epoch": 0.92, "learning_rate": 3.8104731006267184e-06, "loss": 1.2296, "step": 354500 }, { "epoch": 0.93, "learning_rate": 3.7453256719957266e-06, "loss": 1.2299, "step": 355000 }, { "epoch": 0.93, "learning_rate": 3.680178243364735e-06, "loss": 1.2335, "step": 355500 }, { "epoch": 0.93, "learning_rate": 3.6150308147337426e-06, "loss": 1.2345, "step": 356000 }, { "epoch": 0.93, "learning_rate": 3.5498833861027508e-06, "loss": 1.2255, "step": 356500 }, { "epoch": 0.93, "learning_rate": 3.484735957471759e-06, "loss": 1.2279, "step": 357000 }, { "epoch": 0.93, "learning_rate": 3.419588528840767e-06, "loss": 1.2318, "step": 357500 }, { "epoch": 0.93, "learning_rate": 3.354441100209775e-06, "loss": 1.2328, "step": 358000 }, { "epoch": 0.93, "learning_rate": 3.289293671578783e-06, "loss": 1.2237, "step": 358500 }, { "epoch": 0.94, "learning_rate": 3.2241462429477913e-06, "loss": 1.2246, "step": 359000 }, { "epoch": 0.94, "learning_rate": 3.158998814316799e-06, "loss": 1.2295, "step": 359500 }, { "epoch": 0.94, "learning_rate": 3.0938513856858073e-06, "loss": 1.2272, "step": 360000 }, { "epoch": 0.94, "learning_rate": 3.0287039570548155e-06, "loss": 1.2275, "step": 360500 }, { "epoch": 0.94, "learning_rate": 2.9635565284238233e-06, "loss": 1.2233, "step": 361000 }, { "epoch": 0.94, "learning_rate": 2.8984090997928315e-06, "loss": 1.2321, "step": 361500 }, { "epoch": 0.94, "learning_rate": 2.8332616711618396e-06, "loss": 1.2314, "step": 362000 }, { "epoch": 0.94, "learning_rate": 2.7681142425308474e-06, "loss": 1.2295, "step": 362500 }, { "epoch": 0.95, "learning_rate": 2.702966813899855e-06, "loss": 1.2241, "step": 363000 }, { "epoch": 0.95, "learning_rate": 2.6378193852688634e-06, "loss": 1.2239, "step": 363500 }, { "epoch": 0.95, "learning_rate": 2.5726719566378716e-06, "loss": 1.224, "step": 364000 }, { "epoch": 0.95, "learning_rate": 2.5075245280068793e-06, "loss": 1.2289, "step": 364500 }, { "epoch": 0.95, "learning_rate": 2.4423770993758875e-06, "loss": 1.2216, "step": 365000 }, { "epoch": 0.95, "learning_rate": 2.3772296707448957e-06, "loss": 1.2295, "step": 365500 }, { "epoch": 0.95, "learning_rate": 2.3120822421139035e-06, "loss": 1.2328, "step": 366000 }, { "epoch": 0.96, "learning_rate": 2.2469348134829117e-06, "loss": 1.2245, "step": 366500 }, { "epoch": 0.96, "learning_rate": 2.18178738485192e-06, "loss": 1.2259, "step": 367000 }, { "epoch": 0.96, "learning_rate": 2.116639956220928e-06, "loss": 1.2159, "step": 367500 }, { "epoch": 0.96, "learning_rate": 2.051492527589936e-06, "loss": 1.2287, "step": 368000 }, { "epoch": 0.96, "learning_rate": 1.986345098958944e-06, "loss": 1.2279, "step": 368500 }, { "epoch": 0.96, "learning_rate": 1.9211976703279522e-06, "loss": 1.23, "step": 369000 }, { "epoch": 0.96, "learning_rate": 1.8560502416969602e-06, "loss": 1.2333, "step": 369500 }, { "epoch": 0.96, "learning_rate": 1.7909028130659682e-06, "loss": 1.2307, "step": 370000 }, { "epoch": 0.97, "learning_rate": 1.7257553844349762e-06, "loss": 1.2207, "step": 370500 }, { "epoch": 0.97, "learning_rate": 1.6606079558039844e-06, "loss": 1.2358, "step": 371000 }, { "epoch": 0.97, "learning_rate": 1.5954605271729924e-06, "loss": 1.2185, "step": 371500 }, { "epoch": 0.97, "learning_rate": 1.5303130985420006e-06, "loss": 1.2283, "step": 372000 }, { "epoch": 0.97, "learning_rate": 1.4651656699110088e-06, "loss": 1.2243, "step": 372500 }, { "epoch": 0.97, "learning_rate": 1.4000182412800167e-06, "loss": 1.2249, "step": 373000 }, { "epoch": 0.97, "learning_rate": 1.334870812649025e-06, "loss": 1.2199, "step": 373500 }, { "epoch": 0.97, "learning_rate": 1.269723384018033e-06, "loss": 1.2299, "step": 374000 }, { "epoch": 0.98, "learning_rate": 1.204575955387041e-06, "loss": 1.2126, "step": 374500 }, { "epoch": 0.98, "learning_rate": 1.139428526756049e-06, "loss": 1.2272, "step": 375000 }, { "epoch": 0.98, "learning_rate": 1.074281098125057e-06, "loss": 1.2325, "step": 375500 }, { "epoch": 0.98, "learning_rate": 1.0091336694940653e-06, "loss": 1.2247, "step": 376000 }, { "epoch": 0.98, "learning_rate": 9.439862408630733e-07, "loss": 1.2243, "step": 376500 }, { "epoch": 0.98, "learning_rate": 8.788388122320813e-07, "loss": 1.2236, "step": 377000 }, { "epoch": 0.98, "learning_rate": 8.136913836010893e-07, "loss": 1.2297, "step": 377500 }, { "epoch": 0.99, "learning_rate": 7.485439549700973e-07, "loss": 1.2215, "step": 378000 }, { "epoch": 0.99, "learning_rate": 6.833965263391054e-07, "loss": 1.2209, "step": 378500 }, { "epoch": 0.99, "learning_rate": 6.182490977081135e-07, "loss": 1.2283, "step": 379000 }, { "epoch": 0.99, "learning_rate": 5.531016690771216e-07, "loss": 1.2304, "step": 379500 }, { "epoch": 0.99, "learning_rate": 4.879542404461296e-07, "loss": 1.2144, "step": 380000 }, { "epoch": 0.99, "learning_rate": 4.2280681181513764e-07, "loss": 1.2267, "step": 380500 }, { "epoch": 0.99, "learning_rate": 3.5765938318414573e-07, "loss": 1.2266, "step": 381000 }, { "epoch": 0.99, "learning_rate": 2.925119545531538e-07, "loss": 1.2236, "step": 381500 }, { "epoch": 1.0, "learning_rate": 2.2736452592216185e-07, "loss": 1.2162, "step": 382000 }, { "epoch": 1.0, "learning_rate": 1.6221709729116992e-07, "loss": 1.2267, "step": 382500 }, { "epoch": 1.0, "learning_rate": 9.706966866017799e-08, "loss": 1.234, "step": 383000 }, { "epoch": 1.0, "learning_rate": 3.192224002918605e-08, "loss": 1.2369, "step": 383500 } ], "max_steps": 383745, "num_train_epochs": 1, "total_flos": 1.61642598748028e+18, "trial_name": null, "trial_params": null }