diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" deleted file mode 100644--- "a/last-checkpoint/trainer_state.json" +++ /dev/null @@ -1,11812 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 14.0, - "global_step": 972622, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 5.140331037318804e-06, - "loss": 9.929, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.0280662074637608e-05, - "loss": 8.8567, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.542099311195641e-05, - "loss": 8.1753, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 2.0561324149275216e-05, - "loss": 7.9004, - "step": 2000 - }, - { - "epoch": 0.04, - "learning_rate": 2.570165518659402e-05, - "loss": 7.6965, - "step": 2500 - }, - { - "epoch": 0.04, - "learning_rate": 3.084198622391282e-05, - "loss": 7.5233, - "step": 3000 - }, - { - "epoch": 0.05, - "learning_rate": 3.5982317261231625e-05, - "loss": 7.344, - "step": 3500 - }, - { - "epoch": 0.06, - "learning_rate": 4.112264829855043e-05, - "loss": 7.1251, - "step": 4000 - }, - { - "epoch": 0.06, - "learning_rate": 4.6262979335869235e-05, - "loss": 6.8775, - "step": 4500 - }, - { - "epoch": 0.07, - "learning_rate": 5.140331037318804e-05, - "loss": 6.6238, - "step": 5000 - }, - { - "epoch": 0.08, - "learning_rate": 5.6543641410506844e-05, - "loss": 6.4148, - "step": 5500 - }, - { - "epoch": 0.09, - "learning_rate": 6.168397244782565e-05, - "loss": 6.226, - "step": 6000 - }, - { - "epoch": 0.09, - "learning_rate": 6.682430348514444e-05, - "loss": 6.0576, - "step": 6500 - }, - { - "epoch": 0.1, - "learning_rate": 7.196463452246325e-05, - "loss": 5.8947, - "step": 7000 - }, - { - "epoch": 0.11, - "learning_rate": 7.710496555978204e-05, - "loss": 5.7397, - "step": 7500 - }, - { - "epoch": 0.12, - "learning_rate": 8.224529659710087e-05, - "loss": 5.6097, - "step": 8000 - }, - { - "epoch": 0.12, - "learning_rate": 8.738562763441966e-05, - "loss": 5.4844, - "step": 8500 - }, - { - "epoch": 0.13, - "learning_rate": 9.252595867173847e-05, - "loss": 5.3823, - "step": 9000 - }, - { - "epoch": 0.14, - "learning_rate": 9.766628970905726e-05, - "loss": 5.2885, - "step": 9500 - }, - { - "epoch": 0.14, - "learning_rate": 9.997164799900302e-05, - "loss": 5.1927, - "step": 10000 - }, - { - "epoch": 0.15, - "learning_rate": 9.99199289642173e-05, - "loss": 5.1158, - "step": 10500 - }, - { - "epoch": 0.16, - "learning_rate": 9.986800222246455e-05, - "loss": 5.0358, - "step": 11000 - }, - { - "epoch": 0.17, - "learning_rate": 9.981607548071182e-05, - "loss": 4.9621, - "step": 11500 - }, - { - "epoch": 0.17, - "learning_rate": 9.976414873895909e-05, - "loss": 4.9065, - "step": 12000 - }, - { - "epoch": 0.18, - "learning_rate": 9.971222199720634e-05, - "loss": 4.8418, - "step": 12500 - }, - { - "epoch": 0.19, - "learning_rate": 9.966029525545361e-05, - "loss": 4.792, - "step": 13000 - }, - { - "epoch": 0.19, - "learning_rate": 9.960836851370088e-05, - "loss": 4.7285, - "step": 13500 - }, - { - "epoch": 0.2, - "learning_rate": 9.955644177194815e-05, - "loss": 4.6777, - "step": 14000 - }, - { - "epoch": 0.21, - "learning_rate": 9.950461888367891e-05, - "loss": 4.6233, - "step": 14500 - }, - { - "epoch": 0.22, - "learning_rate": 9.945269214192618e-05, - "loss": 4.5836, - "step": 15000 - }, - { - "epoch": 0.22, - "learning_rate": 9.940086925365695e-05, - "loss": 4.5417, - "step": 15500 - }, - { - "epoch": 0.23, - "learning_rate": 9.934894251190421e-05, - "loss": 4.4895, - "step": 16000 - }, - { - "epoch": 0.24, - "learning_rate": 9.929701577015148e-05, - "loss": 4.463, - "step": 16500 - }, - { - "epoch": 0.24, - "learning_rate": 9.924508902839874e-05, - "loss": 4.4206, - "step": 17000 - }, - { - "epoch": 0.25, - "learning_rate": 9.9193162286646e-05, - "loss": 4.3836, - "step": 17500 - }, - { - "epoch": 0.26, - "learning_rate": 9.914133939837677e-05, - "loss": 4.3513, - "step": 18000 - }, - { - "epoch": 0.27, - "learning_rate": 9.908941265662404e-05, - "loss": 4.3211, - "step": 18500 - }, - { - "epoch": 0.27, - "learning_rate": 9.903748591487131e-05, - "loss": 4.292, - "step": 19000 - }, - { - "epoch": 0.28, - "learning_rate": 9.898555917311856e-05, - "loss": 4.2664, - "step": 19500 - }, - { - "epoch": 0.29, - "learning_rate": 9.893373628484934e-05, - "loss": 4.2326, - "step": 20000 - }, - { - "epoch": 0.3, - "learning_rate": 9.88818095430966e-05, - "loss": 4.2052, - "step": 20500 - }, - { - "epoch": 0.3, - "learning_rate": 9.882988280134387e-05, - "loss": 4.1833, - "step": 21000 - }, - { - "epoch": 0.31, - "learning_rate": 9.877795605959113e-05, - "loss": 4.1571, - "step": 21500 - }, - { - "epoch": 0.32, - "learning_rate": 9.87260293178384e-05, - "loss": 4.1389, - "step": 22000 - }, - { - "epoch": 0.32, - "learning_rate": 9.867420642956917e-05, - "loss": 4.1185, - "step": 22500 - }, - { - "epoch": 0.33, - "learning_rate": 9.862227968781644e-05, - "loss": 4.0938, - "step": 23000 - }, - { - "epoch": 0.34, - "learning_rate": 9.857035294606369e-05, - "loss": 4.0727, - "step": 23500 - }, - { - "epoch": 0.35, - "learning_rate": 9.851842620431096e-05, - "loss": 4.0493, - "step": 24000 - }, - { - "epoch": 0.35, - "learning_rate": 9.846660331604173e-05, - "loss": 4.0329, - "step": 24500 - }, - { - "epoch": 0.36, - "learning_rate": 9.8414676574289e-05, - "loss": 4.0212, - "step": 25000 - }, - { - "epoch": 0.37, - "learning_rate": 9.836274983253626e-05, - "loss": 3.9938, - "step": 25500 - }, - { - "epoch": 0.37, - "learning_rate": 9.831082309078353e-05, - "loss": 3.9826, - "step": 26000 - }, - { - "epoch": 0.38, - "learning_rate": 9.82590002025143e-05, - "loss": 3.9701, - "step": 26500 - }, - { - "epoch": 0.39, - "learning_rate": 9.820707346076157e-05, - "loss": 3.9476, - "step": 27000 - }, - { - "epoch": 0.4, - "learning_rate": 9.815514671900883e-05, - "loss": 3.9318, - "step": 27500 - }, - { - "epoch": 0.4, - "learning_rate": 9.810321997725609e-05, - "loss": 3.9158, - "step": 28000 - }, - { - "epoch": 0.41, - "learning_rate": 9.805139708898687e-05, - "loss": 3.9113, - "step": 28500 - }, - { - "epoch": 0.42, - "learning_rate": 9.799947034723412e-05, - "loss": 3.8964, - "step": 29000 - }, - { - "epoch": 0.42, - "learning_rate": 9.794754360548139e-05, - "loss": 3.8808, - "step": 29500 - }, - { - "epoch": 0.43, - "learning_rate": 9.789561686372866e-05, - "loss": 3.8692, - "step": 30000 - }, - { - "epoch": 0.44, - "learning_rate": 9.784379397545943e-05, - "loss": 3.8509, - "step": 30500 - }, - { - "epoch": 0.45, - "learning_rate": 9.77918672337067e-05, - "loss": 3.8422, - "step": 31000 - }, - { - "epoch": 0.45, - "learning_rate": 9.773994049195395e-05, - "loss": 3.832, - "step": 31500 - }, - { - "epoch": 0.46, - "learning_rate": 9.768801375020122e-05, - "loss": 3.8167, - "step": 32000 - }, - { - "epoch": 0.47, - "learning_rate": 9.763619086193198e-05, - "loss": 3.807, - "step": 32500 - }, - { - "epoch": 0.48, - "learning_rate": 9.758426412017925e-05, - "loss": 3.7982, - "step": 33000 - }, - { - "epoch": 0.48, - "learning_rate": 9.753233737842652e-05, - "loss": 3.7902, - "step": 33500 - }, - { - "epoch": 0.49, - "learning_rate": 9.748041063667378e-05, - "loss": 3.7848, - "step": 34000 - }, - { - "epoch": 0.5, - "learning_rate": 9.742858774840455e-05, - "loss": 3.7684, - "step": 34500 - }, - { - "epoch": 0.5, - "learning_rate": 9.737676486013532e-05, - "loss": 3.7611, - "step": 35000 - }, - { - "epoch": 0.51, - "learning_rate": 9.732483811838259e-05, - "loss": 3.7457, - "step": 35500 - }, - { - "epoch": 0.52, - "learning_rate": 9.727291137662986e-05, - "loss": 3.7369, - "step": 36000 - }, - { - "epoch": 0.53, - "learning_rate": 9.722098463487713e-05, - "loss": 3.7263, - "step": 36500 - }, - { - "epoch": 0.53, - "learning_rate": 9.716916174660789e-05, - "loss": 3.7241, - "step": 37000 - }, - { - "epoch": 0.54, - "learning_rate": 9.711723500485516e-05, - "loss": 3.7116, - "step": 37500 - }, - { - "epoch": 0.55, - "learning_rate": 9.706530826310242e-05, - "loss": 3.703, - "step": 38000 - }, - { - "epoch": 0.55, - "learning_rate": 9.701338152134968e-05, - "loss": 3.695, - "step": 38500 - }, - { - "epoch": 0.56, - "learning_rate": 9.696155863308045e-05, - "loss": 3.6895, - "step": 39000 - }, - { - "epoch": 0.57, - "learning_rate": 9.690963189132772e-05, - "loss": 3.6811, - "step": 39500 - }, - { - "epoch": 0.58, - "learning_rate": 9.685770514957499e-05, - "loss": 3.6675, - "step": 40000 - }, - { - "epoch": 0.58, - "learning_rate": 9.680577840782224e-05, - "loss": 3.6633, - "step": 40500 - }, - { - "epoch": 0.59, - "learning_rate": 9.675385166606952e-05, - "loss": 3.6563, - "step": 41000 - }, - { - "epoch": 0.6, - "learning_rate": 9.670202877780028e-05, - "loss": 3.6523, - "step": 41500 - }, - { - "epoch": 0.6, - "learning_rate": 9.665010203604756e-05, - "loss": 3.6408, - "step": 42000 - }, - { - "epoch": 0.61, - "learning_rate": 9.659817529429481e-05, - "loss": 3.6388, - "step": 42500 - }, - { - "epoch": 0.62, - "learning_rate": 9.654624855254207e-05, - "loss": 3.6306, - "step": 43000 - }, - { - "epoch": 0.63, - "learning_rate": 9.649442566427285e-05, - "loss": 3.6238, - "step": 43500 - }, - { - "epoch": 0.63, - "learning_rate": 9.64424989225201e-05, - "loss": 3.6138, - "step": 44000 - }, - { - "epoch": 0.64, - "learning_rate": 9.639057218076738e-05, - "loss": 3.6122, - "step": 44500 - }, - { - "epoch": 0.65, - "learning_rate": 9.633864543901464e-05, - "loss": 3.6025, - "step": 45000 - }, - { - "epoch": 0.65, - "learning_rate": 9.628682255074542e-05, - "loss": 3.595, - "step": 45500 - }, - { - "epoch": 0.66, - "learning_rate": 9.623499966247618e-05, - "loss": 3.59, - "step": 46000 - }, - { - "epoch": 0.67, - "learning_rate": 9.618307292072345e-05, - "loss": 3.5841, - "step": 46500 - }, - { - "epoch": 0.68, - "learning_rate": 9.613114617897071e-05, - "loss": 3.5787, - "step": 47000 - }, - { - "epoch": 0.68, - "learning_rate": 9.607921943721798e-05, - "loss": 3.5692, - "step": 47500 - }, - { - "epoch": 0.69, - "learning_rate": 9.602729269546524e-05, - "loss": 3.5626, - "step": 48000 - }, - { - "epoch": 0.7, - "learning_rate": 9.59753659537125e-05, - "loss": 3.5571, - "step": 48500 - }, - { - "epoch": 0.71, - "learning_rate": 9.592343921195978e-05, - "loss": 3.5565, - "step": 49000 - }, - { - "epoch": 0.71, - "learning_rate": 9.587151247020704e-05, - "loss": 3.547, - "step": 49500 - }, - { - "epoch": 0.72, - "learning_rate": 9.581968958193781e-05, - "loss": 3.5455, - "step": 50000 - }, - { - "epoch": 0.73, - "learning_rate": 9.576776284018507e-05, - "loss": 3.5455, - "step": 50500 - }, - { - "epoch": 0.73, - "learning_rate": 9.571583609843234e-05, - "loss": 3.53, - "step": 51000 - }, - { - "epoch": 0.74, - "learning_rate": 9.56639093566796e-05, - "loss": 3.5335, - "step": 51500 - }, - { - "epoch": 0.75, - "learning_rate": 9.561198261492686e-05, - "loss": 3.5245, - "step": 52000 - }, - { - "epoch": 0.76, - "learning_rate": 9.556015972665764e-05, - "loss": 3.523, - "step": 52500 - }, - { - "epoch": 0.76, - "learning_rate": 9.55082329849049e-05, - "loss": 3.5178, - "step": 53000 - }, - { - "epoch": 0.77, - "learning_rate": 9.545630624315216e-05, - "loss": 3.5076, - "step": 53500 - }, - { - "epoch": 0.78, - "learning_rate": 9.540437950139943e-05, - "loss": 3.5016, - "step": 54000 - }, - { - "epoch": 0.78, - "learning_rate": 9.53525566131302e-05, - "loss": 3.5036, - "step": 54500 - }, - { - "epoch": 0.79, - "learning_rate": 9.530062987137747e-05, - "loss": 3.4971, - "step": 55000 - }, - { - "epoch": 0.8, - "learning_rate": 9.524870312962472e-05, - "loss": 3.4954, - "step": 55500 - }, - { - "epoch": 0.81, - "learning_rate": 9.5196776387872e-05, - "loss": 3.4901, - "step": 56000 - }, - { - "epoch": 0.81, - "learning_rate": 9.514495349960276e-05, - "loss": 3.4811, - "step": 56500 - }, - { - "epoch": 0.82, - "learning_rate": 9.509302675785004e-05, - "loss": 3.4849, - "step": 57000 - }, - { - "epoch": 0.83, - "learning_rate": 9.504110001609729e-05, - "loss": 3.476, - "step": 57500 - }, - { - "epoch": 0.83, - "learning_rate": 9.498917327434456e-05, - "loss": 3.4747, - "step": 58000 - }, - { - "epoch": 0.84, - "learning_rate": 9.493735038607533e-05, - "loss": 3.467, - "step": 58500 - }, - { - "epoch": 0.85, - "learning_rate": 9.48854236443226e-05, - "loss": 3.4629, - "step": 59000 - }, - { - "epoch": 0.86, - "learning_rate": 9.483349690256986e-05, - "loss": 3.4556, - "step": 59500 - }, - { - "epoch": 0.86, - "learning_rate": 9.478157016081712e-05, - "loss": 3.4596, - "step": 60000 - }, - { - "epoch": 0.87, - "learning_rate": 9.47297472725479e-05, - "loss": 3.4524, - "step": 60500 - }, - { - "epoch": 0.88, - "learning_rate": 9.467782053079515e-05, - "loss": 3.4456, - "step": 61000 - }, - { - "epoch": 0.89, - "learning_rate": 9.462589378904242e-05, - "loss": 3.4469, - "step": 61500 - }, - { - "epoch": 0.89, - "learning_rate": 9.457396704728969e-05, - "loss": 3.4437, - "step": 62000 - }, - { - "epoch": 0.9, - "learning_rate": 9.452214415902046e-05, - "loss": 3.4429, - "step": 62500 - }, - { - "epoch": 0.91, - "learning_rate": 9.447021741726772e-05, - "loss": 3.4346, - "step": 63000 - }, - { - "epoch": 0.91, - "learning_rate": 9.441829067551499e-05, - "loss": 3.4279, - "step": 63500 - }, - { - "epoch": 0.92, - "learning_rate": 9.436636393376225e-05, - "loss": 3.4271, - "step": 64000 - }, - { - "epoch": 0.93, - "learning_rate": 9.431443719200952e-05, - "loss": 3.4291, - "step": 64500 - }, - { - "epoch": 0.94, - "learning_rate": 9.42626143037403e-05, - "loss": 3.4229, - "step": 65000 - }, - { - "epoch": 0.94, - "learning_rate": 9.421068756198755e-05, - "loss": 3.4233, - "step": 65500 - }, - { - "epoch": 0.95, - "learning_rate": 9.415876082023482e-05, - "loss": 3.4166, - "step": 66000 - }, - { - "epoch": 0.96, - "learning_rate": 9.410683407848209e-05, - "loss": 3.4175, - "step": 66500 - }, - { - "epoch": 0.96, - "learning_rate": 9.405501119021285e-05, - "loss": 3.4086, - "step": 67000 - }, - { - "epoch": 0.97, - "learning_rate": 9.400308444846012e-05, - "loss": 3.4012, - "step": 67500 - }, - { - "epoch": 0.98, - "learning_rate": 9.395115770670738e-05, - "loss": 3.3995, - "step": 68000 - }, - { - "epoch": 0.99, - "learning_rate": 9.389923096495464e-05, - "loss": 3.4001, - "step": 68500 - }, - { - "epoch": 0.99, - "learning_rate": 9.384740807668541e-05, - "loss": 3.3946, - "step": 69000 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.4298818794254001, - "eval_loss": 3.247347354888916, - "eval_runtime": 556.0117, - "eval_samples_per_second": 969.291, - "eval_steps_per_second": 40.388, - "step": 69473 - }, - { - "epoch": 1.0, - "learning_rate": 9.379548133493268e-05, - "loss": 3.3917, - "step": 69500 - }, - { - "epoch": 1.01, - "learning_rate": 9.374355459317995e-05, - "loss": 3.3775, - "step": 70000 - }, - { - "epoch": 1.01, - "learning_rate": 9.369162785142722e-05, - "loss": 3.3849, - "step": 70500 - }, - { - "epoch": 1.02, - "learning_rate": 9.363980496315798e-05, - "loss": 3.375, - "step": 71000 - }, - { - "epoch": 1.03, - "learning_rate": 9.358787822140525e-05, - "loss": 3.377, - "step": 71500 - }, - { - "epoch": 1.04, - "learning_rate": 9.35359514796525e-05, - "loss": 3.3756, - "step": 72000 - }, - { - "epoch": 1.04, - "learning_rate": 9.348402473789977e-05, - "loss": 3.374, - "step": 72500 - }, - { - "epoch": 1.05, - "learning_rate": 9.343209799614704e-05, - "loss": 3.3673, - "step": 73000 - }, - { - "epoch": 1.06, - "learning_rate": 9.338027510787781e-05, - "loss": 3.3712, - "step": 73500 - }, - { - "epoch": 1.07, - "learning_rate": 9.332834836612508e-05, - "loss": 3.3592, - "step": 74000 - }, - { - "epoch": 1.07, - "learning_rate": 9.327642162437234e-05, - "loss": 3.3582, - "step": 74500 - }, - { - "epoch": 1.08, - "learning_rate": 9.322449488261961e-05, - "loss": 3.3591, - "step": 75000 - }, - { - "epoch": 1.09, - "learning_rate": 9.317267199435038e-05, - "loss": 3.3514, - "step": 75500 - }, - { - "epoch": 1.09, - "learning_rate": 9.312074525259765e-05, - "loss": 3.3541, - "step": 76000 - }, - { - "epoch": 1.1, - "learning_rate": 9.30688185108449e-05, - "loss": 3.3484, - "step": 76500 - }, - { - "epoch": 1.11, - "learning_rate": 9.301689176909217e-05, - "loss": 3.3515, - "step": 77000 - }, - { - "epoch": 1.12, - "learning_rate": 9.296506888082294e-05, - "loss": 3.3443, - "step": 77500 - }, - { - "epoch": 1.12, - "learning_rate": 9.29131421390702e-05, - "loss": 3.339, - "step": 78000 - }, - { - "epoch": 1.13, - "learning_rate": 9.286121539731747e-05, - "loss": 3.3435, - "step": 78500 - }, - { - "epoch": 1.14, - "learning_rate": 9.280928865556473e-05, - "loss": 3.3377, - "step": 79000 - }, - { - "epoch": 1.14, - "learning_rate": 9.275746576729551e-05, - "loss": 3.3403, - "step": 79500 - }, - { - "epoch": 1.15, - "learning_rate": 9.270553902554276e-05, - "loss": 3.3369, - "step": 80000 - }, - { - "epoch": 1.16, - "learning_rate": 9.265361228379003e-05, - "loss": 3.335, - "step": 80500 - }, - { - "epoch": 1.17, - "learning_rate": 9.26016855420373e-05, - "loss": 3.3258, - "step": 81000 - }, - { - "epoch": 1.17, - "learning_rate": 9.254975880028455e-05, - "loss": 3.3295, - "step": 81500 - }, - { - "epoch": 1.18, - "learning_rate": 9.249793591201533e-05, - "loss": 3.3281, - "step": 82000 - }, - { - "epoch": 1.19, - "learning_rate": 9.244600917026259e-05, - "loss": 3.3226, - "step": 82500 - }, - { - "epoch": 1.19, - "learning_rate": 9.239408242850987e-05, - "loss": 3.3249, - "step": 83000 - }, - { - "epoch": 1.2, - "learning_rate": 9.234215568675712e-05, - "loss": 3.3193, - "step": 83500 - }, - { - "epoch": 1.21, - "learning_rate": 9.229022894500439e-05, - "loss": 3.3199, - "step": 84000 - }, - { - "epoch": 1.22, - "learning_rate": 9.223840605673516e-05, - "loss": 3.3189, - "step": 84500 - }, - { - "epoch": 1.22, - "learning_rate": 9.218647931498243e-05, - "loss": 3.3151, - "step": 85000 - }, - { - "epoch": 1.23, - "learning_rate": 9.21345525732297e-05, - "loss": 3.3098, - "step": 85500 - }, - { - "epoch": 1.24, - "learning_rate": 9.208262583147695e-05, - "loss": 3.3115, - "step": 86000 - }, - { - "epoch": 1.25, - "learning_rate": 9.203080294320773e-05, - "loss": 3.3078, - "step": 86500 - }, - { - "epoch": 1.25, - "learning_rate": 9.197887620145498e-05, - "loss": 3.3052, - "step": 87000 - }, - { - "epoch": 1.26, - "learning_rate": 9.192694945970227e-05, - "loss": 3.2965, - "step": 87500 - }, - { - "epoch": 1.27, - "learning_rate": 9.187502271794952e-05, - "loss": 3.3016, - "step": 88000 - }, - { - "epoch": 1.27, - "learning_rate": 9.182309597619678e-05, - "loss": 3.2967, - "step": 88500 - }, - { - "epoch": 1.28, - "learning_rate": 9.177127308792756e-05, - "loss": 3.2905, - "step": 89000 - }, - { - "epoch": 1.29, - "learning_rate": 9.171934634617481e-05, - "loss": 3.2908, - "step": 89500 - }, - { - "epoch": 1.3, - "learning_rate": 9.166741960442209e-05, - "loss": 3.2941, - "step": 90000 - }, - { - "epoch": 1.3, - "learning_rate": 9.161549286266935e-05, - "loss": 3.2925, - "step": 90500 - }, - { - "epoch": 1.31, - "learning_rate": 9.156366997440013e-05, - "loss": 3.2903, - "step": 91000 - }, - { - "epoch": 1.32, - "learning_rate": 9.151174323264738e-05, - "loss": 3.2896, - "step": 91500 - }, - { - "epoch": 1.32, - "learning_rate": 9.145981649089465e-05, - "loss": 3.2858, - "step": 92000 - }, - { - "epoch": 1.33, - "learning_rate": 9.140788974914192e-05, - "loss": 3.2812, - "step": 92500 - }, - { - "epoch": 1.34, - "learning_rate": 9.135606686087268e-05, - "loss": 3.2776, - "step": 93000 - }, - { - "epoch": 1.35, - "learning_rate": 9.130414011911995e-05, - "loss": 3.2781, - "step": 93500 - }, - { - "epoch": 1.35, - "learning_rate": 9.125221337736721e-05, - "loss": 3.2809, - "step": 94000 - }, - { - "epoch": 1.36, - "learning_rate": 9.120028663561449e-05, - "loss": 3.2739, - "step": 94500 - }, - { - "epoch": 1.37, - "learning_rate": 9.114846374734524e-05, - "loss": 3.2744, - "step": 95000 - }, - { - "epoch": 1.37, - "learning_rate": 9.109653700559252e-05, - "loss": 3.2751, - "step": 95500 - }, - { - "epoch": 1.38, - "learning_rate": 9.104461026383978e-05, - "loss": 3.2693, - "step": 96000 - }, - { - "epoch": 1.39, - "learning_rate": 9.099268352208705e-05, - "loss": 3.2679, - "step": 96500 - }, - { - "epoch": 1.4, - "learning_rate": 9.094075678033432e-05, - "loss": 3.2681, - "step": 97000 - }, - { - "epoch": 1.4, - "learning_rate": 9.088893389206508e-05, - "loss": 3.2628, - "step": 97500 - }, - { - "epoch": 1.41, - "learning_rate": 9.083700715031235e-05, - "loss": 3.2633, - "step": 98000 - }, - { - "epoch": 1.42, - "learning_rate": 9.07850804085596e-05, - "loss": 3.264, - "step": 98500 - }, - { - "epoch": 1.43, - "learning_rate": 9.073315366680687e-05, - "loss": 3.2599, - "step": 99000 - }, - { - "epoch": 1.43, - "learning_rate": 9.068133077853764e-05, - "loss": 3.2566, - "step": 99500 - }, - { - "epoch": 1.44, - "learning_rate": 9.062940403678491e-05, - "loss": 3.2573, - "step": 100000 - }, - { - "epoch": 1.45, - "learning_rate": 9.057747729503218e-05, - "loss": 3.253, - "step": 100500 - }, - { - "epoch": 1.45, - "learning_rate": 9.052555055327943e-05, - "loss": 3.2557, - "step": 101000 - }, - { - "epoch": 1.46, - "learning_rate": 9.047372766501021e-05, - "loss": 3.2468, - "step": 101500 - }, - { - "epoch": 1.47, - "learning_rate": 9.042180092325746e-05, - "loss": 3.2447, - "step": 102000 - }, - { - "epoch": 1.48, - "learning_rate": 9.036987418150473e-05, - "loss": 3.2485, - "step": 102500 - }, - { - "epoch": 1.48, - "learning_rate": 9.0317947439752e-05, - "loss": 3.2471, - "step": 103000 - }, - { - "epoch": 1.49, - "learning_rate": 9.026612455148277e-05, - "loss": 3.2449, - "step": 103500 - }, - { - "epoch": 1.5, - "learning_rate": 9.021419780973004e-05, - "loss": 3.2421, - "step": 104000 - }, - { - "epoch": 1.5, - "learning_rate": 9.01622710679773e-05, - "loss": 3.2497, - "step": 104500 - }, - { - "epoch": 1.51, - "learning_rate": 9.011034432622457e-05, - "loss": 3.2389, - "step": 105000 - }, - { - "epoch": 1.52, - "learning_rate": 9.005852143795534e-05, - "loss": 3.2306, - "step": 105500 - }, - { - "epoch": 1.53, - "learning_rate": 9.000659469620261e-05, - "loss": 3.234, - "step": 106000 - }, - { - "epoch": 1.53, - "learning_rate": 8.995466795444986e-05, - "loss": 3.2308, - "step": 106500 - }, - { - "epoch": 1.54, - "learning_rate": 8.990274121269713e-05, - "loss": 3.2319, - "step": 107000 - }, - { - "epoch": 1.55, - "learning_rate": 8.98509183244279e-05, - "loss": 3.2277, - "step": 107500 - }, - { - "epoch": 1.55, - "learning_rate": 8.979899158267516e-05, - "loss": 3.232, - "step": 108000 - }, - { - "epoch": 1.56, - "learning_rate": 8.974706484092243e-05, - "loss": 3.2291, - "step": 108500 - }, - { - "epoch": 1.57, - "learning_rate": 8.96951380991697e-05, - "loss": 3.2274, - "step": 109000 - }, - { - "epoch": 1.58, - "learning_rate": 8.964331521090047e-05, - "loss": 3.2276, - "step": 109500 - }, - { - "epoch": 1.58, - "learning_rate": 8.959138846914774e-05, - "loss": 3.2254, - "step": 110000 - }, - { - "epoch": 1.59, - "learning_rate": 8.953946172739499e-05, - "loss": 3.2187, - "step": 110500 - }, - { - "epoch": 1.6, - "learning_rate": 8.948753498564226e-05, - "loss": 3.2199, - "step": 111000 - }, - { - "epoch": 1.6, - "learning_rate": 8.943571209737302e-05, - "loss": 3.2205, - "step": 111500 - }, - { - "epoch": 1.61, - "learning_rate": 8.938378535562029e-05, - "loss": 3.2181, - "step": 112000 - }, - { - "epoch": 1.62, - "learning_rate": 8.933185861386756e-05, - "loss": 3.2149, - "step": 112500 - }, - { - "epoch": 1.63, - "learning_rate": 8.927993187211482e-05, - "loss": 3.2164, - "step": 113000 - }, - { - "epoch": 1.63, - "learning_rate": 8.922800513036208e-05, - "loss": 3.2197, - "step": 113500 - }, - { - "epoch": 1.64, - "learning_rate": 8.917618224209286e-05, - "loss": 3.2177, - "step": 114000 - }, - { - "epoch": 1.65, - "learning_rate": 8.912425550034013e-05, - "loss": 3.2016, - "step": 114500 - }, - { - "epoch": 1.66, - "learning_rate": 8.907232875858739e-05, - "loss": 3.2107, - "step": 115000 - }, - { - "epoch": 1.66, - "learning_rate": 8.902040201683466e-05, - "loss": 3.2063, - "step": 115500 - }, - { - "epoch": 1.67, - "learning_rate": 8.896857912856542e-05, - "loss": 3.2034, - "step": 116000 - }, - { - "epoch": 1.68, - "learning_rate": 8.891665238681269e-05, - "loss": 3.2072, - "step": 116500 - }, - { - "epoch": 1.68, - "learning_rate": 8.886472564505996e-05, - "loss": 3.204, - "step": 117000 - }, - { - "epoch": 1.69, - "learning_rate": 8.881279890330721e-05, - "loss": 3.2109, - "step": 117500 - }, - { - "epoch": 1.7, - "learning_rate": 8.876097601503799e-05, - "loss": 3.2029, - "step": 118000 - }, - { - "epoch": 1.71, - "learning_rate": 8.870904927328525e-05, - "loss": 3.2068, - "step": 118500 - }, - { - "epoch": 1.71, - "learning_rate": 8.865712253153252e-05, - "loss": 3.2024, - "step": 119000 - }, - { - "epoch": 1.72, - "learning_rate": 8.860519578977978e-05, - "loss": 3.1984, - "step": 119500 - }, - { - "epoch": 1.73, - "learning_rate": 8.855326904802704e-05, - "loss": 3.1962, - "step": 120000 - }, - { - "epoch": 1.73, - "learning_rate": 8.850144615975782e-05, - "loss": 3.1989, - "step": 120500 - }, - { - "epoch": 1.74, - "learning_rate": 8.844951941800507e-05, - "loss": 3.1991, - "step": 121000 - }, - { - "epoch": 1.75, - "learning_rate": 8.839759267625236e-05, - "loss": 3.1866, - "step": 121500 - }, - { - "epoch": 1.76, - "learning_rate": 8.834566593449961e-05, - "loss": 3.1931, - "step": 122000 - }, - { - "epoch": 1.76, - "learning_rate": 8.829384304623039e-05, - "loss": 3.1845, - "step": 122500 - }, - { - "epoch": 1.77, - "learning_rate": 8.824191630447764e-05, - "loss": 3.1921, - "step": 123000 - }, - { - "epoch": 1.78, - "learning_rate": 8.818998956272491e-05, - "loss": 3.1959, - "step": 123500 - }, - { - "epoch": 1.78, - "learning_rate": 8.813806282097218e-05, - "loss": 3.1871, - "step": 124000 - }, - { - "epoch": 1.79, - "learning_rate": 8.808623993270295e-05, - "loss": 3.185, - "step": 124500 - }, - { - "epoch": 1.8, - "learning_rate": 8.803431319095022e-05, - "loss": 3.1881, - "step": 125000 - }, - { - "epoch": 1.81, - "learning_rate": 8.798238644919747e-05, - "loss": 3.1883, - "step": 125500 - }, - { - "epoch": 1.81, - "learning_rate": 8.793045970744474e-05, - "loss": 3.1895, - "step": 126000 - }, - { - "epoch": 1.82, - "learning_rate": 8.787853296569201e-05, - "loss": 3.1828, - "step": 126500 - }, - { - "epoch": 1.83, - "learning_rate": 8.782671007742279e-05, - "loss": 3.1821, - "step": 127000 - }, - { - "epoch": 1.84, - "learning_rate": 8.777478333567004e-05, - "loss": 3.1756, - "step": 127500 - }, - { - "epoch": 1.84, - "learning_rate": 8.77228565939173e-05, - "loss": 3.1751, - "step": 128000 - }, - { - "epoch": 1.85, - "learning_rate": 8.767092985216458e-05, - "loss": 3.175, - "step": 128500 - }, - { - "epoch": 1.86, - "learning_rate": 8.761910696389533e-05, - "loss": 3.1728, - "step": 129000 - }, - { - "epoch": 1.86, - "learning_rate": 8.756718022214261e-05, - "loss": 3.1766, - "step": 129500 - }, - { - "epoch": 1.87, - "learning_rate": 8.751525348038987e-05, - "loss": 3.1716, - "step": 130000 - }, - { - "epoch": 1.88, - "learning_rate": 8.746332673863714e-05, - "loss": 3.173, - "step": 130500 - }, - { - "epoch": 1.89, - "learning_rate": 8.74115038503679e-05, - "loss": 3.1739, - "step": 131000 - }, - { - "epoch": 1.89, - "learning_rate": 8.735957710861517e-05, - "loss": 3.1694, - "step": 131500 - }, - { - "epoch": 1.9, - "learning_rate": 8.730765036686244e-05, - "loss": 3.1712, - "step": 132000 - }, - { - "epoch": 1.91, - "learning_rate": 8.72557236251097e-05, - "loss": 3.1677, - "step": 132500 - }, - { - "epoch": 1.91, - "learning_rate": 8.720390073684047e-05, - "loss": 3.1695, - "step": 133000 - }, - { - "epoch": 1.92, - "learning_rate": 8.715197399508773e-05, - "loss": 3.1677, - "step": 133500 - }, - { - "epoch": 1.93, - "learning_rate": 8.710004725333501e-05, - "loss": 3.1673, - "step": 134000 - }, - { - "epoch": 1.94, - "learning_rate": 8.704812051158226e-05, - "loss": 3.164, - "step": 134500 - }, - { - "epoch": 1.94, - "learning_rate": 8.699619376982952e-05, - "loss": 3.1672, - "step": 135000 - }, - { - "epoch": 1.95, - "learning_rate": 8.69443708815603e-05, - "loss": 3.1642, - "step": 135500 - }, - { - "epoch": 1.96, - "learning_rate": 8.689244413980757e-05, - "loss": 3.1599, - "step": 136000 - }, - { - "epoch": 1.96, - "learning_rate": 8.684051739805484e-05, - "loss": 3.1576, - "step": 136500 - }, - { - "epoch": 1.97, - "learning_rate": 8.678859065630209e-05, - "loss": 3.1604, - "step": 137000 - }, - { - "epoch": 1.98, - "learning_rate": 8.673676776803287e-05, - "loss": 3.1623, - "step": 137500 - }, - { - "epoch": 1.99, - "learning_rate": 8.668484102628012e-05, - "loss": 3.1602, - "step": 138000 - }, - { - "epoch": 1.99, - "learning_rate": 8.663291428452739e-05, - "loss": 3.1526, - "step": 138500 - }, - { - "epoch": 2.0, - "eval_accuracy": 0.45829144781113035, - "eval_loss": 2.9986870288848877, - "eval_runtime": 555.6147, - "eval_samples_per_second": 969.983, - "eval_steps_per_second": 40.416, - "step": 138946 - }, - { - "epoch": 2.0, - "learning_rate": 8.658098754277466e-05, - "loss": 3.1508, - "step": 139000 - }, - { - "epoch": 2.01, - "learning_rate": 8.652916465450543e-05, - "loss": 3.1468, - "step": 139500 - }, - { - "epoch": 2.02, - "learning_rate": 8.64772379127527e-05, - "loss": 3.1518, - "step": 140000 - }, - { - "epoch": 2.02, - "learning_rate": 8.642531117099995e-05, - "loss": 3.1466, - "step": 140500 - }, - { - "epoch": 2.03, - "learning_rate": 8.637338442924722e-05, - "loss": 3.1404, - "step": 141000 - }, - { - "epoch": 2.04, - "learning_rate": 8.632156154097799e-05, - "loss": 3.1467, - "step": 141500 - }, - { - "epoch": 2.04, - "learning_rate": 8.626963479922525e-05, - "loss": 3.1403, - "step": 142000 - }, - { - "epoch": 2.05, - "learning_rate": 8.621770805747252e-05, - "loss": 3.149, - "step": 142500 - }, - { - "epoch": 2.06, - "learning_rate": 8.616578131571979e-05, - "loss": 3.1449, - "step": 143000 - }, - { - "epoch": 2.07, - "learning_rate": 8.611395842745056e-05, - "loss": 3.1415, - "step": 143500 - }, - { - "epoch": 2.07, - "learning_rate": 8.606203168569782e-05, - "loss": 3.149, - "step": 144000 - }, - { - "epoch": 2.08, - "learning_rate": 8.601010494394509e-05, - "loss": 3.1419, - "step": 144500 - }, - { - "epoch": 2.09, - "learning_rate": 8.595817820219235e-05, - "loss": 3.1403, - "step": 145000 - }, - { - "epoch": 2.09, - "learning_rate": 8.590635531392313e-05, - "loss": 3.1436, - "step": 145500 - }, - { - "epoch": 2.1, - "learning_rate": 8.585442857217038e-05, - "loss": 3.1436, - "step": 146000 - }, - { - "epoch": 2.11, - "learning_rate": 8.580250183041765e-05, - "loss": 3.1351, - "step": 146500 - }, - { - "epoch": 2.12, - "learning_rate": 8.575057508866492e-05, - "loss": 3.1401, - "step": 147000 - }, - { - "epoch": 2.12, - "learning_rate": 8.569864834691219e-05, - "loss": 3.1371, - "step": 147500 - }, - { - "epoch": 2.13, - "learning_rate": 8.564682545864295e-05, - "loss": 3.134, - "step": 148000 - }, - { - "epoch": 2.14, - "learning_rate": 8.559489871689022e-05, - "loss": 3.1371, - "step": 148500 - }, - { - "epoch": 2.14, - "learning_rate": 8.554297197513748e-05, - "loss": 3.1346, - "step": 149000 - }, - { - "epoch": 2.15, - "learning_rate": 8.549104523338474e-05, - "loss": 3.1367, - "step": 149500 - }, - { - "epoch": 2.16, - "learning_rate": 8.543922234511551e-05, - "loss": 3.1298, - "step": 150000 - }, - { - "epoch": 2.17, - "learning_rate": 8.538729560336278e-05, - "loss": 3.1317, - "step": 150500 - }, - { - "epoch": 2.17, - "learning_rate": 8.533536886161005e-05, - "loss": 3.1318, - "step": 151000 - }, - { - "epoch": 2.18, - "learning_rate": 8.52834421198573e-05, - "loss": 3.1277, - "step": 151500 - }, - { - "epoch": 2.19, - "learning_rate": 8.523161923158808e-05, - "loss": 3.1296, - "step": 152000 - }, - { - "epoch": 2.2, - "learning_rate": 8.517969248983535e-05, - "loss": 3.127, - "step": 152500 - }, - { - "epoch": 2.2, - "learning_rate": 8.51277657480826e-05, - "loss": 3.1285, - "step": 153000 - }, - { - "epoch": 2.21, - "learning_rate": 8.507583900632987e-05, - "loss": 3.1263, - "step": 153500 - }, - { - "epoch": 2.22, - "learning_rate": 8.502401611806064e-05, - "loss": 3.1236, - "step": 154000 - }, - { - "epoch": 2.22, - "learning_rate": 8.497208937630791e-05, - "loss": 3.1239, - "step": 154500 - }, - { - "epoch": 2.23, - "learning_rate": 8.492016263455518e-05, - "loss": 3.1234, - "step": 155000 - }, - { - "epoch": 2.24, - "learning_rate": 8.486823589280244e-05, - "loss": 3.1228, - "step": 155500 - }, - { - "epoch": 2.25, - "learning_rate": 8.481641300453321e-05, - "loss": 3.124, - "step": 156000 - }, - { - "epoch": 2.25, - "learning_rate": 8.476448626278048e-05, - "loss": 3.12, - "step": 156500 - }, - { - "epoch": 2.26, - "learning_rate": 8.471255952102773e-05, - "loss": 3.1252, - "step": 157000 - }, - { - "epoch": 2.27, - "learning_rate": 8.4660632779275e-05, - "loss": 3.1204, - "step": 157500 - }, - { - "epoch": 2.27, - "learning_rate": 8.460870603752227e-05, - "loss": 3.121, - "step": 158000 - }, - { - "epoch": 2.28, - "learning_rate": 8.455688314925304e-05, - "loss": 3.1145, - "step": 158500 - }, - { - "epoch": 2.29, - "learning_rate": 8.45049564075003e-05, - "loss": 3.1222, - "step": 159000 - }, - { - "epoch": 2.3, - "learning_rate": 8.445302966574756e-05, - "loss": 3.114, - "step": 159500 - }, - { - "epoch": 2.3, - "learning_rate": 8.440110292399484e-05, - "loss": 3.1181, - "step": 160000 - }, - { - "epoch": 2.31, - "learning_rate": 8.43492800357256e-05, - "loss": 3.1147, - "step": 160500 - }, - { - "epoch": 2.32, - "learning_rate": 8.429735329397288e-05, - "loss": 3.1156, - "step": 161000 - }, - { - "epoch": 2.32, - "learning_rate": 8.424542655222013e-05, - "loss": 3.1177, - "step": 161500 - }, - { - "epoch": 2.33, - "learning_rate": 8.419349981046739e-05, - "loss": 3.1138, - "step": 162000 - }, - { - "epoch": 2.34, - "learning_rate": 8.414157306871467e-05, - "loss": 3.1093, - "step": 162500 - }, - { - "epoch": 2.35, - "learning_rate": 8.408975018044543e-05, - "loss": 3.1114, - "step": 163000 - }, - { - "epoch": 2.35, - "learning_rate": 8.40378234386927e-05, - "loss": 3.1081, - "step": 163500 - }, - { - "epoch": 2.36, - "learning_rate": 8.398589669693996e-05, - "loss": 3.1095, - "step": 164000 - }, - { - "epoch": 2.37, - "learning_rate": 8.393396995518722e-05, - "loss": 3.1119, - "step": 164500 - }, - { - "epoch": 2.38, - "learning_rate": 8.388204321343449e-05, - "loss": 3.1079, - "step": 165000 - }, - { - "epoch": 2.38, - "learning_rate": 8.383022032516526e-05, - "loss": 3.1074, - "step": 165500 - }, - { - "epoch": 2.39, - "learning_rate": 8.377829358341253e-05, - "loss": 3.1119, - "step": 166000 - }, - { - "epoch": 2.4, - "learning_rate": 8.372636684165978e-05, - "loss": 3.1053, - "step": 166500 - }, - { - "epoch": 2.4, - "learning_rate": 8.367444009990706e-05, - "loss": 3.1072, - "step": 167000 - }, - { - "epoch": 2.41, - "learning_rate": 8.362261721163782e-05, - "loss": 3.1026, - "step": 167500 - }, - { - "epoch": 2.42, - "learning_rate": 8.35706904698851e-05, - "loss": 3.1041, - "step": 168000 - }, - { - "epoch": 2.43, - "learning_rate": 8.351876372813235e-05, - "loss": 3.1049, - "step": 168500 - }, - { - "epoch": 2.43, - "learning_rate": 8.346683698637962e-05, - "loss": 3.1048, - "step": 169000 - }, - { - "epoch": 2.44, - "learning_rate": 8.341501409811039e-05, - "loss": 3.0944, - "step": 169500 - }, - { - "epoch": 2.45, - "learning_rate": 8.336308735635766e-05, - "loss": 3.0945, - "step": 170000 - }, - { - "epoch": 2.45, - "learning_rate": 8.331116061460492e-05, - "loss": 3.102, - "step": 170500 - }, - { - "epoch": 2.46, - "learning_rate": 8.325923387285218e-05, - "loss": 3.1024, - "step": 171000 - }, - { - "epoch": 2.47, - "learning_rate": 8.320741098458296e-05, - "loss": 3.0992, - "step": 171500 - }, - { - "epoch": 2.48, - "learning_rate": 8.315548424283021e-05, - "loss": 3.0986, - "step": 172000 - }, - { - "epoch": 2.48, - "learning_rate": 8.31035575010775e-05, - "loss": 3.0948, - "step": 172500 - }, - { - "epoch": 2.49, - "learning_rate": 8.305163075932475e-05, - "loss": 3.1028, - "step": 173000 - }, - { - "epoch": 2.5, - "learning_rate": 8.299980787105553e-05, - "loss": 3.0961, - "step": 173500 - }, - { - "epoch": 2.5, - "learning_rate": 8.294788112930279e-05, - "loss": 3.0904, - "step": 174000 - }, - { - "epoch": 2.51, - "learning_rate": 8.289595438755004e-05, - "loss": 3.0899, - "step": 174500 - }, - { - "epoch": 2.52, - "learning_rate": 8.284402764579732e-05, - "loss": 3.095, - "step": 175000 - }, - { - "epoch": 2.53, - "learning_rate": 8.279210090404458e-05, - "loss": 3.094, - "step": 175500 - }, - { - "epoch": 2.53, - "learning_rate": 8.274027801577536e-05, - "loss": 3.0944, - "step": 176000 - }, - { - "epoch": 2.54, - "learning_rate": 8.268835127402261e-05, - "loss": 3.0905, - "step": 176500 - }, - { - "epoch": 2.55, - "learning_rate": 8.263642453226988e-05, - "loss": 3.0903, - "step": 177000 - }, - { - "epoch": 2.55, - "learning_rate": 8.258449779051715e-05, - "loss": 3.0916, - "step": 177500 - }, - { - "epoch": 2.56, - "learning_rate": 8.25325710487644e-05, - "loss": 3.09, - "step": 178000 - }, - { - "epoch": 2.57, - "learning_rate": 8.248074816049518e-05, - "loss": 3.0923, - "step": 178500 - }, - { - "epoch": 2.58, - "learning_rate": 8.242882141874244e-05, - "loss": 3.0875, - "step": 179000 - }, - { - "epoch": 2.58, - "learning_rate": 8.23768946769897e-05, - "loss": 3.0869, - "step": 179500 - }, - { - "epoch": 2.59, - "learning_rate": 8.232496793523697e-05, - "loss": 3.0882, - "step": 180000 - }, - { - "epoch": 2.6, - "learning_rate": 8.227314504696774e-05, - "loss": 3.0838, - "step": 180500 - }, - { - "epoch": 2.61, - "learning_rate": 8.222121830521501e-05, - "loss": 3.0862, - "step": 181000 - }, - { - "epoch": 2.61, - "learning_rate": 8.216929156346228e-05, - "loss": 3.0856, - "step": 181500 - }, - { - "epoch": 2.62, - "learning_rate": 8.211736482170954e-05, - "loss": 3.0869, - "step": 182000 - }, - { - "epoch": 2.63, - "learning_rate": 8.20654380799568e-05, - "loss": 3.0862, - "step": 182500 - }, - { - "epoch": 2.63, - "learning_rate": 8.201361519168758e-05, - "loss": 3.0803, - "step": 183000 - }, - { - "epoch": 2.64, - "learning_rate": 8.196168844993483e-05, - "loss": 3.0821, - "step": 183500 - }, - { - "epoch": 2.65, - "learning_rate": 8.19097617081821e-05, - "loss": 3.0882, - "step": 184000 - }, - { - "epoch": 2.66, - "learning_rate": 8.185783496642937e-05, - "loss": 3.0878, - "step": 184500 - }, - { - "epoch": 2.66, - "learning_rate": 8.180601207816014e-05, - "loss": 3.0769, - "step": 185000 - }, - { - "epoch": 2.67, - "learning_rate": 8.17540853364074e-05, - "loss": 3.0818, - "step": 185500 - }, - { - "epoch": 2.68, - "learning_rate": 8.170215859465466e-05, - "loss": 3.0784, - "step": 186000 - }, - { - "epoch": 2.68, - "learning_rate": 8.165023185290193e-05, - "loss": 3.0813, - "step": 186500 - }, - { - "epoch": 2.69, - "learning_rate": 8.15983051111492e-05, - "loss": 3.0778, - "step": 187000 - }, - { - "epoch": 2.7, - "learning_rate": 8.154648222287996e-05, - "loss": 3.0768, - "step": 187500 - }, - { - "epoch": 2.71, - "learning_rate": 8.149455548112723e-05, - "loss": 3.0817, - "step": 188000 - }, - { - "epoch": 2.71, - "learning_rate": 8.14426287393745e-05, - "loss": 3.075, - "step": 188500 - }, - { - "epoch": 2.72, - "learning_rate": 8.139070199762175e-05, - "loss": 3.0792, - "step": 189000 - }, - { - "epoch": 2.73, - "learning_rate": 8.133887910935253e-05, - "loss": 3.0767, - "step": 189500 - }, - { - "epoch": 2.73, - "learning_rate": 8.128695236759979e-05, - "loss": 3.0698, - "step": 190000 - }, - { - "epoch": 2.74, - "learning_rate": 8.123502562584706e-05, - "loss": 3.0778, - "step": 190500 - }, - { - "epoch": 2.75, - "learning_rate": 8.118309888409432e-05, - "loss": 3.0745, - "step": 191000 - }, - { - "epoch": 2.76, - "learning_rate": 8.113127599582509e-05, - "loss": 3.0703, - "step": 191500 - }, - { - "epoch": 2.76, - "learning_rate": 8.107934925407236e-05, - "loss": 3.0746, - "step": 192000 - }, - { - "epoch": 2.77, - "learning_rate": 8.102742251231963e-05, - "loss": 3.0685, - "step": 192500 - }, - { - "epoch": 2.78, - "learning_rate": 8.09754957705669e-05, - "loss": 3.0662, - "step": 193000 - }, - { - "epoch": 2.79, - "learning_rate": 8.092367288229766e-05, - "loss": 3.0712, - "step": 193500 - }, - { - "epoch": 2.79, - "learning_rate": 8.087174614054493e-05, - "loss": 3.0735, - "step": 194000 - }, - { - "epoch": 2.8, - "learning_rate": 8.081981939879219e-05, - "loss": 3.0626, - "step": 194500 - }, - { - "epoch": 2.81, - "learning_rate": 8.076789265703945e-05, - "loss": 3.0676, - "step": 195000 - }, - { - "epoch": 2.81, - "learning_rate": 8.071596591528672e-05, - "loss": 3.0707, - "step": 195500 - }, - { - "epoch": 2.82, - "learning_rate": 8.066414302701749e-05, - "loss": 3.0651, - "step": 196000 - }, - { - "epoch": 2.83, - "learning_rate": 8.061221628526476e-05, - "loss": 3.0687, - "step": 196500 - }, - { - "epoch": 2.84, - "learning_rate": 8.056028954351201e-05, - "loss": 3.0635, - "step": 197000 - }, - { - "epoch": 2.84, - "learning_rate": 8.050836280175928e-05, - "loss": 3.0616, - "step": 197500 - }, - { - "epoch": 2.85, - "learning_rate": 8.045653991349005e-05, - "loss": 3.0603, - "step": 198000 - }, - { - "epoch": 2.86, - "learning_rate": 8.040461317173731e-05, - "loss": 3.0605, - "step": 198500 - }, - { - "epoch": 2.86, - "learning_rate": 8.035268642998458e-05, - "loss": 3.0689, - "step": 199000 - }, - { - "epoch": 2.87, - "learning_rate": 8.030075968823184e-05, - "loss": 3.0626, - "step": 199500 - }, - { - "epoch": 2.88, - "learning_rate": 8.024883294647912e-05, - "loss": 3.0667, - "step": 200000 - }, - { - "epoch": 2.89, - "learning_rate": 8.019701005820987e-05, - "loss": 3.0647, - "step": 200500 - }, - { - "epoch": 2.89, - "learning_rate": 8.014508331645715e-05, - "loss": 3.0613, - "step": 201000 - }, - { - "epoch": 2.9, - "learning_rate": 8.009315657470441e-05, - "loss": 3.0599, - "step": 201500 - }, - { - "epoch": 2.91, - "learning_rate": 8.004122983295168e-05, - "loss": 3.0612, - "step": 202000 - }, - { - "epoch": 2.91, - "learning_rate": 7.998940694468244e-05, - "loss": 3.0604, - "step": 202500 - }, - { - "epoch": 2.92, - "learning_rate": 7.993748020292971e-05, - "loss": 3.061, - "step": 203000 - }, - { - "epoch": 2.93, - "learning_rate": 7.988555346117698e-05, - "loss": 3.0594, - "step": 203500 - }, - { - "epoch": 2.94, - "learning_rate": 7.983362671942423e-05, - "loss": 3.0603, - "step": 204000 - }, - { - "epoch": 2.94, - "learning_rate": 7.978180383115501e-05, - "loss": 3.0604, - "step": 204500 - }, - { - "epoch": 2.95, - "learning_rate": 7.972987708940227e-05, - "loss": 3.0573, - "step": 205000 - }, - { - "epoch": 2.96, - "learning_rate": 7.967795034764955e-05, - "loss": 3.0548, - "step": 205500 - }, - { - "epoch": 2.97, - "learning_rate": 7.96260236058968e-05, - "loss": 3.052, - "step": 206000 - }, - { - "epoch": 2.97, - "learning_rate": 7.957420071762758e-05, - "loss": 3.0582, - "step": 206500 - }, - { - "epoch": 2.98, - "learning_rate": 7.952227397587484e-05, - "loss": 3.056, - "step": 207000 - }, - { - "epoch": 2.99, - "learning_rate": 7.94703472341221e-05, - "loss": 3.0518, - "step": 207500 - }, - { - "epoch": 2.99, - "learning_rate": 7.941842049236938e-05, - "loss": 3.0496, - "step": 208000 - }, - { - "epoch": 3.0, - "eval_accuracy": 0.47146243279201466, - "eval_loss": 2.8874731063842773, - "eval_runtime": 556.6053, - "eval_samples_per_second": 968.257, - "eval_steps_per_second": 40.345, - "step": 208419 - }, - { - "epoch": 3.0, - "learning_rate": 7.936659760410013e-05, - "loss": 3.0577, - "step": 208500 - }, - { - "epoch": 3.01, - "learning_rate": 7.931467086234741e-05, - "loss": 3.0492, - "step": 209000 - }, - { - "epoch": 3.02, - "learning_rate": 7.926274412059467e-05, - "loss": 3.0562, - "step": 209500 - }, - { - "epoch": 3.02, - "learning_rate": 7.921081737884193e-05, - "loss": 3.0496, - "step": 210000 - }, - { - "epoch": 3.03, - "learning_rate": 7.91589944905727e-05, - "loss": 3.0463, - "step": 210500 - }, - { - "epoch": 3.04, - "learning_rate": 7.910706774881997e-05, - "loss": 3.0431, - "step": 211000 - }, - { - "epoch": 3.04, - "learning_rate": 7.905514100706724e-05, - "loss": 3.0444, - "step": 211500 - }, - { - "epoch": 3.05, - "learning_rate": 7.900321426531449e-05, - "loss": 3.0426, - "step": 212000 - }, - { - "epoch": 3.06, - "learning_rate": 7.895139137704527e-05, - "loss": 3.0422, - "step": 212500 - }, - { - "epoch": 3.07, - "learning_rate": 7.889946463529253e-05, - "loss": 3.05, - "step": 213000 - }, - { - "epoch": 3.07, - "learning_rate": 7.884753789353981e-05, - "loss": 3.0402, - "step": 213500 - }, - { - "epoch": 3.08, - "learning_rate": 7.879561115178706e-05, - "loss": 3.0426, - "step": 214000 - }, - { - "epoch": 3.09, - "learning_rate": 7.874378826351784e-05, - "loss": 3.0388, - "step": 214500 - }, - { - "epoch": 3.09, - "learning_rate": 7.86918615217651e-05, - "loss": 3.0422, - "step": 215000 - }, - { - "epoch": 3.1, - "learning_rate": 7.863993478001237e-05, - "loss": 3.0474, - "step": 215500 - }, - { - "epoch": 3.11, - "learning_rate": 7.858800803825963e-05, - "loss": 3.0378, - "step": 216000 - }, - { - "epoch": 3.12, - "learning_rate": 7.85361851499904e-05, - "loss": 3.0379, - "step": 216500 - }, - { - "epoch": 3.12, - "learning_rate": 7.848425840823767e-05, - "loss": 3.0364, - "step": 217000 - }, - { - "epoch": 3.13, - "learning_rate": 7.843233166648492e-05, - "loss": 3.0432, - "step": 217500 - }, - { - "epoch": 3.14, - "learning_rate": 7.838040492473219e-05, - "loss": 3.0397, - "step": 218000 - }, - { - "epoch": 3.15, - "learning_rate": 7.832847818297946e-05, - "loss": 3.0409, - "step": 218500 - }, - { - "epoch": 3.15, - "learning_rate": 7.827665529471023e-05, - "loss": 3.0405, - "step": 219000 - }, - { - "epoch": 3.16, - "learning_rate": 7.82247285529575e-05, - "loss": 3.037, - "step": 219500 - }, - { - "epoch": 3.17, - "learning_rate": 7.817280181120475e-05, - "loss": 3.044, - "step": 220000 - }, - { - "epoch": 3.17, - "learning_rate": 7.812087506945203e-05, - "loss": 3.0299, - "step": 220500 - }, - { - "epoch": 3.18, - "learning_rate": 7.806905218118278e-05, - "loss": 3.042, - "step": 221000 - }, - { - "epoch": 3.19, - "learning_rate": 7.801712543943007e-05, - "loss": 3.0366, - "step": 221500 - }, - { - "epoch": 3.2, - "learning_rate": 7.796519869767732e-05, - "loss": 3.0399, - "step": 222000 - }, - { - "epoch": 3.2, - "learning_rate": 7.791327195592459e-05, - "loss": 3.0334, - "step": 222500 - }, - { - "epoch": 3.21, - "learning_rate": 7.786144906765535e-05, - "loss": 3.0362, - "step": 223000 - }, - { - "epoch": 3.22, - "learning_rate": 7.780952232590262e-05, - "loss": 3.0333, - "step": 223500 - }, - { - "epoch": 3.22, - "learning_rate": 7.775759558414989e-05, - "loss": 3.0333, - "step": 224000 - }, - { - "epoch": 3.23, - "learning_rate": 7.770566884239715e-05, - "loss": 3.0363, - "step": 224500 - }, - { - "epoch": 3.24, - "learning_rate": 7.765384595412793e-05, - "loss": 3.0352, - "step": 225000 - }, - { - "epoch": 3.25, - "learning_rate": 7.760191921237518e-05, - "loss": 3.0354, - "step": 225500 - }, - { - "epoch": 3.25, - "learning_rate": 7.754999247062245e-05, - "loss": 3.0313, - "step": 226000 - }, - { - "epoch": 3.26, - "learning_rate": 7.749806572886972e-05, - "loss": 3.0322, - "step": 226500 - }, - { - "epoch": 3.27, - "learning_rate": 7.744624284060048e-05, - "loss": 3.0338, - "step": 227000 - }, - { - "epoch": 3.27, - "learning_rate": 7.739431609884775e-05, - "loss": 3.0344, - "step": 227500 - }, - { - "epoch": 3.28, - "learning_rate": 7.734238935709502e-05, - "loss": 3.0337, - "step": 228000 - }, - { - "epoch": 3.29, - "learning_rate": 7.729046261534227e-05, - "loss": 3.0325, - "step": 228500 - }, - { - "epoch": 3.3, - "learning_rate": 7.723863972707305e-05, - "loss": 3.0302, - "step": 229000 - }, - { - "epoch": 3.3, - "learning_rate": 7.718671298532031e-05, - "loss": 3.0318, - "step": 229500 - }, - { - "epoch": 3.31, - "learning_rate": 7.713478624356758e-05, - "loss": 3.0278, - "step": 230000 - }, - { - "epoch": 3.32, - "learning_rate": 7.708285950181485e-05, - "loss": 3.0305, - "step": 230500 - }, - { - "epoch": 3.33, - "learning_rate": 7.703103661354561e-05, - "loss": 3.0268, - "step": 231000 - }, - { - "epoch": 3.33, - "learning_rate": 7.697910987179288e-05, - "loss": 3.0302, - "step": 231500 - }, - { - "epoch": 3.34, - "learning_rate": 7.692718313004015e-05, - "loss": 3.0309, - "step": 232000 - }, - { - "epoch": 3.35, - "learning_rate": 7.68752563882874e-05, - "loss": 3.0268, - "step": 232500 - }, - { - "epoch": 3.35, - "learning_rate": 7.682343350001818e-05, - "loss": 3.0218, - "step": 233000 - }, - { - "epoch": 3.36, - "learning_rate": 7.677150675826544e-05, - "loss": 3.034, - "step": 233500 - }, - { - "epoch": 3.37, - "learning_rate": 7.67195800165127e-05, - "loss": 3.026, - "step": 234000 - }, - { - "epoch": 3.38, - "learning_rate": 7.666765327475997e-05, - "loss": 3.0302, - "step": 234500 - }, - { - "epoch": 3.38, - "learning_rate": 7.661572653300724e-05, - "loss": 3.0273, - "step": 235000 - }, - { - "epoch": 3.39, - "learning_rate": 7.656390364473801e-05, - "loss": 3.022, - "step": 235500 - }, - { - "epoch": 3.4, - "learning_rate": 7.651197690298528e-05, - "loss": 3.0208, - "step": 236000 - }, - { - "epoch": 3.4, - "learning_rate": 7.646005016123253e-05, - "loss": 3.0262, - "step": 236500 - }, - { - "epoch": 3.41, - "learning_rate": 7.64081234194798e-05, - "loss": 3.0258, - "step": 237000 - }, - { - "epoch": 3.42, - "learning_rate": 7.635630053121057e-05, - "loss": 3.0246, - "step": 237500 - }, - { - "epoch": 3.43, - "learning_rate": 7.630437378945783e-05, - "loss": 3.0237, - "step": 238000 - }, - { - "epoch": 3.43, - "learning_rate": 7.62524470477051e-05, - "loss": 3.0202, - "step": 238500 - }, - { - "epoch": 3.44, - "learning_rate": 7.620052030595236e-05, - "loss": 3.0234, - "step": 239000 - }, - { - "epoch": 3.45, - "learning_rate": 7.614869741768314e-05, - "loss": 3.0184, - "step": 239500 - }, - { - "epoch": 3.45, - "learning_rate": 7.609677067593039e-05, - "loss": 3.0281, - "step": 240000 - }, - { - "epoch": 3.46, - "learning_rate": 7.604484393417767e-05, - "loss": 3.0167, - "step": 240500 - }, - { - "epoch": 3.47, - "learning_rate": 7.599291719242493e-05, - "loss": 3.0139, - "step": 241000 - }, - { - "epoch": 3.48, - "learning_rate": 7.594109430415571e-05, - "loss": 3.0222, - "step": 241500 - }, - { - "epoch": 3.48, - "learning_rate": 7.588916756240296e-05, - "loss": 3.0181, - "step": 242000 - }, - { - "epoch": 3.49, - "learning_rate": 7.583724082065023e-05, - "loss": 3.0246, - "step": 242500 - }, - { - "epoch": 3.5, - "learning_rate": 7.57853140788975e-05, - "loss": 3.0204, - "step": 243000 - }, - { - "epoch": 3.5, - "learning_rate": 7.573349119062827e-05, - "loss": 3.0188, - "step": 243500 - }, - { - "epoch": 3.51, - "learning_rate": 7.568156444887553e-05, - "loss": 3.0184, - "step": 244000 - }, - { - "epoch": 3.52, - "learning_rate": 7.562963770712279e-05, - "loss": 3.0176, - "step": 244500 - }, - { - "epoch": 3.53, - "learning_rate": 7.557771096537006e-05, - "loss": 3.0113, - "step": 245000 - }, - { - "epoch": 3.53, - "learning_rate": 7.552588807710082e-05, - "loss": 3.0115, - "step": 245500 - }, - { - "epoch": 3.54, - "learning_rate": 7.547396133534809e-05, - "loss": 3.0151, - "step": 246000 - }, - { - "epoch": 3.55, - "learning_rate": 7.542203459359536e-05, - "loss": 3.017, - "step": 246500 - }, - { - "epoch": 3.56, - "learning_rate": 7.537010785184261e-05, - "loss": 3.0165, - "step": 247000 - }, - { - "epoch": 3.56, - "learning_rate": 7.53182849635734e-05, - "loss": 3.0173, - "step": 247500 - }, - { - "epoch": 3.57, - "learning_rate": 7.526635822182065e-05, - "loss": 3.0107, - "step": 248000 - }, - { - "epoch": 3.58, - "learning_rate": 7.521443148006793e-05, - "loss": 3.0137, - "step": 248500 - }, - { - "epoch": 3.58, - "learning_rate": 7.516250473831519e-05, - "loss": 3.0112, - "step": 249000 - }, - { - "epoch": 3.59, - "learning_rate": 7.511068185004597e-05, - "loss": 3.0133, - "step": 249500 - }, - { - "epoch": 3.6, - "learning_rate": 7.505875510829322e-05, - "loss": 3.0133, - "step": 250000 - }, - { - "epoch": 3.61, - "learning_rate": 7.500682836654049e-05, - "loss": 3.0108, - "step": 250500 - }, - { - "epoch": 3.61, - "learning_rate": 7.495490162478776e-05, - "loss": 3.0104, - "step": 251000 - }, - { - "epoch": 3.62, - "learning_rate": 7.490307873651852e-05, - "loss": 3.0029, - "step": 251500 - }, - { - "epoch": 3.63, - "learning_rate": 7.485115199476579e-05, - "loss": 3.0127, - "step": 252000 - }, - { - "epoch": 3.63, - "learning_rate": 7.479922525301305e-05, - "loss": 3.0162, - "step": 252500 - }, - { - "epoch": 3.64, - "learning_rate": 7.474729851126033e-05, - "loss": 3.0193, - "step": 253000 - }, - { - "epoch": 3.65, - "learning_rate": 7.469547562299108e-05, - "loss": 3.0078, - "step": 253500 - }, - { - "epoch": 3.66, - "learning_rate": 7.464354888123836e-05, - "loss": 3.0112, - "step": 254000 - }, - { - "epoch": 3.66, - "learning_rate": 7.459162213948562e-05, - "loss": 3.0066, - "step": 254500 - }, - { - "epoch": 3.67, - "learning_rate": 7.453969539773287e-05, - "loss": 3.0065, - "step": 255000 - }, - { - "epoch": 3.68, - "learning_rate": 7.448787250946365e-05, - "loss": 3.0043, - "step": 255500 - }, - { - "epoch": 3.68, - "learning_rate": 7.443604962119443e-05, - "loss": 3.0053, - "step": 256000 - }, - { - "epoch": 3.69, - "learning_rate": 7.438412287944169e-05, - "loss": 3.0063, - "step": 256500 - }, - { - "epoch": 3.7, - "learning_rate": 7.433219613768894e-05, - "loss": 3.0063, - "step": 257000 - }, - { - "epoch": 3.71, - "learning_rate": 7.428026939593622e-05, - "loss": 3.0053, - "step": 257500 - }, - { - "epoch": 3.71, - "learning_rate": 7.422834265418348e-05, - "loss": 3.0084, - "step": 258000 - }, - { - "epoch": 3.72, - "learning_rate": 7.417641591243075e-05, - "loss": 3.0014, - "step": 258500 - }, - { - "epoch": 3.73, - "learning_rate": 7.412448917067801e-05, - "loss": 3.0082, - "step": 259000 - }, - { - "epoch": 3.74, - "learning_rate": 7.407256242892527e-05, - "loss": 3.0037, - "step": 259500 - }, - { - "epoch": 3.74, - "learning_rate": 7.402073954065605e-05, - "loss": 3.0099, - "step": 260000 - }, - { - "epoch": 3.75, - "learning_rate": 7.39688127989033e-05, - "loss": 3.0042, - "step": 260500 - }, - { - "epoch": 3.76, - "learning_rate": 7.391688605715059e-05, - "loss": 3.0057, - "step": 261000 - }, - { - "epoch": 3.76, - "learning_rate": 7.386495931539784e-05, - "loss": 3.0052, - "step": 261500 - }, - { - "epoch": 3.77, - "learning_rate": 7.381313642712862e-05, - "loss": 3.0107, - "step": 262000 - }, - { - "epoch": 3.78, - "learning_rate": 7.376120968537587e-05, - "loss": 3.0052, - "step": 262500 - }, - { - "epoch": 3.79, - "learning_rate": 7.370928294362314e-05, - "loss": 3.001, - "step": 263000 - }, - { - "epoch": 3.79, - "learning_rate": 7.365735620187041e-05, - "loss": 3.0021, - "step": 263500 - }, - { - "epoch": 3.8, - "learning_rate": 7.360553331360118e-05, - "loss": 3.0031, - "step": 264000 - }, - { - "epoch": 3.81, - "learning_rate": 7.355360657184845e-05, - "loss": 2.9966, - "step": 264500 - }, - { - "epoch": 3.81, - "learning_rate": 7.35016798300957e-05, - "loss": 2.9921, - "step": 265000 - }, - { - "epoch": 3.82, - "learning_rate": 7.344975308834297e-05, - "loss": 3.0004, - "step": 265500 - }, - { - "epoch": 3.83, - "learning_rate": 7.339793020007374e-05, - "loss": 3.0055, - "step": 266000 - }, - { - "epoch": 3.84, - "learning_rate": 7.3346003458321e-05, - "loss": 2.999, - "step": 266500 - }, - { - "epoch": 3.84, - "learning_rate": 7.329407671656827e-05, - "loss": 3.0015, - "step": 267000 - }, - { - "epoch": 3.85, - "learning_rate": 7.324214997481553e-05, - "loss": 3.0025, - "step": 267500 - }, - { - "epoch": 3.86, - "learning_rate": 7.31903270865463e-05, - "loss": 2.9993, - "step": 268000 - }, - { - "epoch": 3.86, - "learning_rate": 7.313840034479356e-05, - "loss": 2.9981, - "step": 268500 - }, - { - "epoch": 3.87, - "learning_rate": 7.308647360304083e-05, - "loss": 2.9977, - "step": 269000 - }, - { - "epoch": 3.88, - "learning_rate": 7.30345468612881e-05, - "loss": 2.9972, - "step": 269500 - }, - { - "epoch": 3.89, - "learning_rate": 7.298272397301888e-05, - "loss": 2.9959, - "step": 270000 - }, - { - "epoch": 3.89, - "learning_rate": 7.293079723126613e-05, - "loss": 2.99, - "step": 270500 - }, - { - "epoch": 3.9, - "learning_rate": 7.28788704895134e-05, - "loss": 2.9956, - "step": 271000 - }, - { - "epoch": 3.91, - "learning_rate": 7.282694374776067e-05, - "loss": 2.9931, - "step": 271500 - }, - { - "epoch": 3.92, - "learning_rate": 7.277512085949144e-05, - "loss": 2.9967, - "step": 272000 - }, - { - "epoch": 3.92, - "learning_rate": 7.27231941177387e-05, - "loss": 2.9981, - "step": 272500 - }, - { - "epoch": 3.93, - "learning_rate": 7.267126737598596e-05, - "loss": 2.9931, - "step": 273000 - }, - { - "epoch": 3.94, - "learning_rate": 7.261934063423323e-05, - "loss": 2.9998, - "step": 273500 - }, - { - "epoch": 3.94, - "learning_rate": 7.256751774596399e-05, - "loss": 2.9912, - "step": 274000 - }, - { - "epoch": 3.95, - "learning_rate": 7.251559100421126e-05, - "loss": 2.9935, - "step": 274500 - }, - { - "epoch": 3.96, - "learning_rate": 7.246366426245853e-05, - "loss": 2.9956, - "step": 275000 - }, - { - "epoch": 3.97, - "learning_rate": 7.24117375207058e-05, - "loss": 2.9943, - "step": 275500 - }, - { - "epoch": 3.97, - "learning_rate": 7.235991463243656e-05, - "loss": 2.9921, - "step": 276000 - }, - { - "epoch": 3.98, - "learning_rate": 7.230798789068383e-05, - "loss": 2.9995, - "step": 276500 - }, - { - "epoch": 3.99, - "learning_rate": 7.225606114893109e-05, - "loss": 2.9887, - "step": 277000 - }, - { - "epoch": 3.99, - "learning_rate": 7.220413440717836e-05, - "loss": 2.9923, - "step": 277500 - }, - { - "epoch": 4.0, - "eval_accuracy": 0.47882795291393787, - "eval_loss": 2.8258414268493652, - "eval_runtime": 554.5752, - "eval_samples_per_second": 971.801, - "eval_steps_per_second": 40.492, - "step": 277892 - }, - { - "epoch": 4.0, - "learning_rate": 7.215231151890912e-05, - "loss": 2.9839, - "step": 278000 - }, - { - "epoch": 4.01, - "learning_rate": 7.210038477715639e-05, - "loss": 2.9855, - "step": 278500 - }, - { - "epoch": 4.02, - "learning_rate": 7.204845803540366e-05, - "loss": 2.9805, - "step": 279000 - }, - { - "epoch": 4.02, - "learning_rate": 7.199653129365093e-05, - "loss": 2.985, - "step": 279500 - }, - { - "epoch": 4.03, - "learning_rate": 7.194470840538169e-05, - "loss": 2.9864, - "step": 280000 - }, - { - "epoch": 4.04, - "learning_rate": 7.189278166362896e-05, - "loss": 2.9829, - "step": 280500 - }, - { - "epoch": 4.04, - "learning_rate": 7.184085492187622e-05, - "loss": 2.9848, - "step": 281000 - }, - { - "epoch": 4.05, - "learning_rate": 7.178892818012348e-05, - "loss": 2.985, - "step": 281500 - }, - { - "epoch": 4.06, - "learning_rate": 7.173700143837075e-05, - "loss": 2.9819, - "step": 282000 - }, - { - "epoch": 4.07, - "learning_rate": 7.168507469661802e-05, - "loss": 2.9875, - "step": 282500 - }, - { - "epoch": 4.07, - "learning_rate": 7.163314795486528e-05, - "loss": 2.9881, - "step": 283000 - }, - { - "epoch": 4.08, - "learning_rate": 7.158122121311254e-05, - "loss": 2.9871, - "step": 283500 - }, - { - "epoch": 4.09, - "learning_rate": 7.152939832484331e-05, - "loss": 2.9817, - "step": 284000 - }, - { - "epoch": 4.1, - "learning_rate": 7.147747158309058e-05, - "loss": 2.9802, - "step": 284500 - }, - { - "epoch": 4.1, - "learning_rate": 7.142554484133785e-05, - "loss": 2.9797, - "step": 285000 - }, - { - "epoch": 4.11, - "learning_rate": 7.13736180995851e-05, - "loss": 2.9804, - "step": 285500 - }, - { - "epoch": 4.12, - "learning_rate": 7.132179521131588e-05, - "loss": 2.9816, - "step": 286000 - }, - { - "epoch": 4.12, - "learning_rate": 7.126986846956314e-05, - "loss": 2.9841, - "step": 286500 - }, - { - "epoch": 4.13, - "learning_rate": 7.121794172781042e-05, - "loss": 2.984, - "step": 287000 - }, - { - "epoch": 4.14, - "learning_rate": 7.116601498605767e-05, - "loss": 2.9751, - "step": 287500 - }, - { - "epoch": 4.15, - "learning_rate": 7.111419209778845e-05, - "loss": 2.9818, - "step": 288000 - }, - { - "epoch": 4.15, - "learning_rate": 7.10622653560357e-05, - "loss": 2.9849, - "step": 288500 - }, - { - "epoch": 4.16, - "learning_rate": 7.101033861428297e-05, - "loss": 2.9847, - "step": 289000 - }, - { - "epoch": 4.17, - "learning_rate": 7.095841187253024e-05, - "loss": 2.9818, - "step": 289500 - }, - { - "epoch": 4.17, - "learning_rate": 7.090658898426101e-05, - "loss": 2.9765, - "step": 290000 - }, - { - "epoch": 4.18, - "learning_rate": 7.085466224250828e-05, - "loss": 2.9795, - "step": 290500 - }, - { - "epoch": 4.19, - "learning_rate": 7.080273550075553e-05, - "loss": 2.9787, - "step": 291000 - }, - { - "epoch": 4.2, - "learning_rate": 7.07508087590028e-05, - "loss": 2.98, - "step": 291500 - }, - { - "epoch": 4.2, - "learning_rate": 7.069898587073357e-05, - "loss": 2.9789, - "step": 292000 - }, - { - "epoch": 4.21, - "learning_rate": 7.064705912898084e-05, - "loss": 2.9762, - "step": 292500 - }, - { - "epoch": 4.22, - "learning_rate": 7.05951323872281e-05, - "loss": 2.9769, - "step": 293000 - }, - { - "epoch": 4.22, - "learning_rate": 7.054320564547536e-05, - "loss": 2.979, - "step": 293500 - }, - { - "epoch": 4.23, - "learning_rate": 7.049138275720614e-05, - "loss": 2.9742, - "step": 294000 - }, - { - "epoch": 4.24, - "learning_rate": 7.043945601545339e-05, - "loss": 2.9813, - "step": 294500 - }, - { - "epoch": 4.25, - "learning_rate": 7.038752927370067e-05, - "loss": 2.9814, - "step": 295000 - }, - { - "epoch": 4.25, - "learning_rate": 7.033560253194793e-05, - "loss": 2.976, - "step": 295500 - }, - { - "epoch": 4.26, - "learning_rate": 7.028377964367871e-05, - "loss": 2.9778, - "step": 296000 - }, - { - "epoch": 4.27, - "learning_rate": 7.023185290192596e-05, - "loss": 2.9772, - "step": 296500 - }, - { - "epoch": 4.28, - "learning_rate": 7.017992616017323e-05, - "loss": 2.976, - "step": 297000 - }, - { - "epoch": 4.28, - "learning_rate": 7.01279994184205e-05, - "loss": 2.9775, - "step": 297500 - }, - { - "epoch": 4.29, - "learning_rate": 7.007617653015127e-05, - "loss": 2.9754, - "step": 298000 - }, - { - "epoch": 4.3, - "learning_rate": 7.002424978839854e-05, - "loss": 2.9779, - "step": 298500 - }, - { - "epoch": 4.3, - "learning_rate": 6.997232304664579e-05, - "loss": 2.9719, - "step": 299000 - }, - { - "epoch": 4.31, - "learning_rate": 6.992039630489307e-05, - "loss": 2.9724, - "step": 299500 - }, - { - "epoch": 4.32, - "learning_rate": 6.986857341662382e-05, - "loss": 2.9752, - "step": 300000 - }, - { - "epoch": 4.33, - "learning_rate": 6.98166466748711e-05, - "loss": 2.9726, - "step": 300500 - }, - { - "epoch": 4.33, - "learning_rate": 6.976471993311836e-05, - "loss": 2.972, - "step": 301000 - }, - { - "epoch": 4.34, - "learning_rate": 6.971279319136562e-05, - "loss": 2.977, - "step": 301500 - }, - { - "epoch": 4.35, - "learning_rate": 6.96609703030964e-05, - "loss": 2.9734, - "step": 302000 - }, - { - "epoch": 4.35, - "learning_rate": 6.960904356134365e-05, - "loss": 2.9772, - "step": 302500 - }, - { - "epoch": 4.36, - "learning_rate": 6.955711681959093e-05, - "loss": 2.9741, - "step": 303000 - }, - { - "epoch": 4.37, - "learning_rate": 6.950519007783819e-05, - "loss": 2.9672, - "step": 303500 - }, - { - "epoch": 4.38, - "learning_rate": 6.945336718956897e-05, - "loss": 2.9734, - "step": 304000 - }, - { - "epoch": 4.38, - "learning_rate": 6.940144044781622e-05, - "loss": 2.9761, - "step": 304500 - }, - { - "epoch": 4.39, - "learning_rate": 6.934951370606349e-05, - "loss": 2.9695, - "step": 305000 - }, - { - "epoch": 4.4, - "learning_rate": 6.929758696431076e-05, - "loss": 2.9699, - "step": 305500 - }, - { - "epoch": 4.4, - "learning_rate": 6.924576407604152e-05, - "loss": 2.9721, - "step": 306000 - }, - { - "epoch": 4.41, - "learning_rate": 6.919383733428879e-05, - "loss": 2.968, - "step": 306500 - }, - { - "epoch": 4.42, - "learning_rate": 6.914191059253605e-05, - "loss": 2.9677, - "step": 307000 - }, - { - "epoch": 4.43, - "learning_rate": 6.908998385078332e-05, - "loss": 2.9723, - "step": 307500 - }, - { - "epoch": 4.43, - "learning_rate": 6.903816096251408e-05, - "loss": 2.9701, - "step": 308000 - }, - { - "epoch": 4.44, - "learning_rate": 6.898623422076135e-05, - "loss": 2.9717, - "step": 308500 - }, - { - "epoch": 4.45, - "learning_rate": 6.893430747900862e-05, - "loss": 2.9691, - "step": 309000 - }, - { - "epoch": 4.45, - "learning_rate": 6.888238073725589e-05, - "loss": 2.9677, - "step": 309500 - }, - { - "epoch": 4.46, - "learning_rate": 6.883055784898665e-05, - "loss": 2.9655, - "step": 310000 - }, - { - "epoch": 4.47, - "learning_rate": 6.877863110723392e-05, - "loss": 2.9741, - "step": 310500 - }, - { - "epoch": 4.48, - "learning_rate": 6.872680821896469e-05, - "loss": 2.9697, - "step": 311000 - }, - { - "epoch": 4.48, - "learning_rate": 6.867488147721196e-05, - "loss": 2.9706, - "step": 311500 - }, - { - "epoch": 4.49, - "learning_rate": 6.862295473545922e-05, - "loss": 2.9717, - "step": 312000 - }, - { - "epoch": 4.5, - "learning_rate": 6.857102799370648e-05, - "loss": 2.9739, - "step": 312500 - }, - { - "epoch": 4.51, - "learning_rate": 6.851910125195375e-05, - "loss": 2.9659, - "step": 313000 - }, - { - "epoch": 4.51, - "learning_rate": 6.846717451020102e-05, - "loss": 2.9669, - "step": 313500 - }, - { - "epoch": 4.52, - "learning_rate": 6.841524776844827e-05, - "loss": 2.9696, - "step": 314000 - }, - { - "epoch": 4.53, - "learning_rate": 6.836332102669554e-05, - "loss": 2.957, - "step": 314500 - }, - { - "epoch": 4.53, - "learning_rate": 6.83114981384263e-05, - "loss": 2.9674, - "step": 315000 - }, - { - "epoch": 4.54, - "learning_rate": 6.825957139667357e-05, - "loss": 2.9649, - "step": 315500 - }, - { - "epoch": 4.55, - "learning_rate": 6.820764465492084e-05, - "loss": 2.9639, - "step": 316000 - }, - { - "epoch": 4.56, - "learning_rate": 6.815571791316811e-05, - "loss": 2.9611, - "step": 316500 - }, - { - "epoch": 4.56, - "learning_rate": 6.810389502489888e-05, - "loss": 2.9636, - "step": 317000 - }, - { - "epoch": 4.57, - "learning_rate": 6.805196828314614e-05, - "loss": 2.9648, - "step": 317500 - }, - { - "epoch": 4.58, - "learning_rate": 6.80000415413934e-05, - "loss": 2.9625, - "step": 318000 - }, - { - "epoch": 4.58, - "learning_rate": 6.794811479964067e-05, - "loss": 2.9621, - "step": 318500 - }, - { - "epoch": 4.59, - "learning_rate": 6.789618805788794e-05, - "loss": 2.965, - "step": 319000 - }, - { - "epoch": 4.6, - "learning_rate": 6.78443651696187e-05, - "loss": 2.9637, - "step": 319500 - }, - { - "epoch": 4.61, - "learning_rate": 6.779243842786597e-05, - "loss": 2.9583, - "step": 320000 - }, - { - "epoch": 4.61, - "learning_rate": 6.774051168611324e-05, - "loss": 2.962, - "step": 320500 - }, - { - "epoch": 4.62, - "learning_rate": 6.76885849443605e-05, - "loss": 2.9632, - "step": 321000 - }, - { - "epoch": 4.63, - "learning_rate": 6.763676205609127e-05, - "loss": 2.9629, - "step": 321500 - }, - { - "epoch": 4.63, - "learning_rate": 6.758483531433854e-05, - "loss": 2.9639, - "step": 322000 - }, - { - "epoch": 4.64, - "learning_rate": 6.75329085725858e-05, - "loss": 2.9658, - "step": 322500 - }, - { - "epoch": 4.65, - "learning_rate": 6.748098183083306e-05, - "loss": 2.9575, - "step": 323000 - }, - { - "epoch": 4.66, - "learning_rate": 6.742915894256383e-05, - "loss": 2.9572, - "step": 323500 - }, - { - "epoch": 4.66, - "learning_rate": 6.73772322008111e-05, - "loss": 2.9583, - "step": 324000 - }, - { - "epoch": 4.67, - "learning_rate": 6.732530545905837e-05, - "loss": 2.9591, - "step": 324500 - }, - { - "epoch": 4.68, - "learning_rate": 6.727348257078913e-05, - "loss": 2.9583, - "step": 325000 - }, - { - "epoch": 4.69, - "learning_rate": 6.72215558290364e-05, - "loss": 2.9602, - "step": 325500 - }, - { - "epoch": 4.69, - "learning_rate": 6.716962908728366e-05, - "loss": 2.9629, - "step": 326000 - }, - { - "epoch": 4.7, - "learning_rate": 6.711770234553092e-05, - "loss": 2.9552, - "step": 326500 - }, - { - "epoch": 4.71, - "learning_rate": 6.706577560377819e-05, - "loss": 2.9538, - "step": 327000 - }, - { - "epoch": 4.71, - "learning_rate": 6.701384886202545e-05, - "loss": 2.9593, - "step": 327500 - }, - { - "epoch": 4.72, - "learning_rate": 6.696192212027273e-05, - "loss": 2.9516, - "step": 328000 - }, - { - "epoch": 4.73, - "learning_rate": 6.690999537851998e-05, - "loss": 2.9581, - "step": 328500 - }, - { - "epoch": 4.74, - "learning_rate": 6.685817249025076e-05, - "loss": 2.9589, - "step": 329000 - }, - { - "epoch": 4.74, - "learning_rate": 6.680624574849802e-05, - "loss": 2.9584, - "step": 329500 - }, - { - "epoch": 4.75, - "learning_rate": 6.675431900674529e-05, - "loss": 2.9634, - "step": 330000 - }, - { - "epoch": 4.76, - "learning_rate": 6.670239226499256e-05, - "loss": 2.9556, - "step": 330500 - }, - { - "epoch": 4.76, - "learning_rate": 6.665056937672332e-05, - "loss": 2.9562, - "step": 331000 - }, - { - "epoch": 4.77, - "learning_rate": 6.659864263497059e-05, - "loss": 2.9592, - "step": 331500 - }, - { - "epoch": 4.78, - "learning_rate": 6.654671589321784e-05, - "loss": 2.9575, - "step": 332000 - }, - { - "epoch": 4.79, - "learning_rate": 6.649478915146513e-05, - "loss": 2.9594, - "step": 332500 - }, - { - "epoch": 4.79, - "learning_rate": 6.644296626319588e-05, - "loss": 2.9524, - "step": 333000 - }, - { - "epoch": 4.8, - "learning_rate": 6.639103952144316e-05, - "loss": 2.9516, - "step": 333500 - }, - { - "epoch": 4.81, - "learning_rate": 6.633911277969042e-05, - "loss": 2.9551, - "step": 334000 - }, - { - "epoch": 4.81, - "learning_rate": 6.628718603793767e-05, - "loss": 2.9525, - "step": 334500 - }, - { - "epoch": 4.82, - "learning_rate": 6.623536314966845e-05, - "loss": 2.9525, - "step": 335000 - }, - { - "epoch": 4.83, - "learning_rate": 6.61834364079157e-05, - "loss": 2.9524, - "step": 335500 - }, - { - "epoch": 4.84, - "learning_rate": 6.613150966616299e-05, - "loss": 2.9569, - "step": 336000 - }, - { - "epoch": 4.84, - "learning_rate": 6.607968677789374e-05, - "loss": 2.9513, - "step": 336500 - }, - { - "epoch": 4.85, - "learning_rate": 6.602776003614102e-05, - "loss": 2.9519, - "step": 337000 - }, - { - "epoch": 4.86, - "learning_rate": 6.597583329438828e-05, - "loss": 2.9558, - "step": 337500 - }, - { - "epoch": 4.87, - "learning_rate": 6.592390655263554e-05, - "loss": 2.9499, - "step": 338000 - }, - { - "epoch": 4.87, - "learning_rate": 6.587208366436631e-05, - "loss": 2.9534, - "step": 338500 - }, - { - "epoch": 4.88, - "learning_rate": 6.582015692261358e-05, - "loss": 2.9502, - "step": 339000 - }, - { - "epoch": 4.89, - "learning_rate": 6.576823018086085e-05, - "loss": 2.9567, - "step": 339500 - }, - { - "epoch": 4.89, - "learning_rate": 6.57163034391081e-05, - "loss": 2.9484, - "step": 340000 - }, - { - "epoch": 4.9, - "learning_rate": 6.566437669735538e-05, - "loss": 2.9445, - "step": 340500 - }, - { - "epoch": 4.91, - "learning_rate": 6.561244995560264e-05, - "loss": 2.947, - "step": 341000 - }, - { - "epoch": 4.92, - "learning_rate": 6.55605232138499e-05, - "loss": 2.9539, - "step": 341500 - }, - { - "epoch": 4.92, - "learning_rate": 6.550859647209717e-05, - "loss": 2.9521, - "step": 342000 - }, - { - "epoch": 4.93, - "learning_rate": 6.545677358382794e-05, - "loss": 2.9462, - "step": 342500 - }, - { - "epoch": 4.94, - "learning_rate": 6.540495069555871e-05, - "loss": 2.9432, - "step": 343000 - }, - { - "epoch": 4.94, - "learning_rate": 6.535302395380598e-05, - "loss": 2.9502, - "step": 343500 - }, - { - "epoch": 4.95, - "learning_rate": 6.530109721205324e-05, - "loss": 2.9523, - "step": 344000 - }, - { - "epoch": 4.96, - "learning_rate": 6.52491704703005e-05, - "loss": 2.9488, - "step": 344500 - }, - { - "epoch": 4.97, - "learning_rate": 6.519724372854777e-05, - "loss": 2.9454, - "step": 345000 - }, - { - "epoch": 4.97, - "learning_rate": 6.514531698679504e-05, - "loss": 2.9517, - "step": 345500 - }, - { - "epoch": 4.98, - "learning_rate": 6.509339024504229e-05, - "loss": 2.9463, - "step": 346000 - }, - { - "epoch": 4.99, - "learning_rate": 6.504146350328956e-05, - "loss": 2.9481, - "step": 346500 - }, - { - "epoch": 4.99, - "learning_rate": 6.498964061502032e-05, - "loss": 2.9429, - "step": 347000 - }, - { - "epoch": 5.0, - "eval_accuracy": 0.4848880222099253, - "eval_loss": 2.776484966278076, - "eval_runtime": 557.0561, - "eval_samples_per_second": 967.474, - "eval_steps_per_second": 40.312, - "step": 347365 - }, - { - "epoch": 5.0, - "learning_rate": 6.49377138732676e-05, - "loss": 2.9526, - "step": 347500 - }, - { - "epoch": 5.01, - "learning_rate": 6.488578713151486e-05, - "loss": 2.9408, - "step": 348000 - }, - { - "epoch": 5.02, - "learning_rate": 6.483386038976213e-05, - "loss": 2.9338, - "step": 348500 - }, - { - "epoch": 5.02, - "learning_rate": 6.47820375014929e-05, - "loss": 2.9403, - "step": 349000 - }, - { - "epoch": 5.03, - "learning_rate": 6.473011075974016e-05, - "loss": 2.943, - "step": 349500 - }, - { - "epoch": 5.04, - "learning_rate": 6.467818401798743e-05, - "loss": 2.9435, - "step": 350000 - }, - { - "epoch": 5.05, - "learning_rate": 6.462625727623469e-05, - "loss": 2.9379, - "step": 350500 - }, - { - "epoch": 5.05, - "learning_rate": 6.457443438796547e-05, - "loss": 2.9415, - "step": 351000 - }, - { - "epoch": 5.06, - "learning_rate": 6.452250764621272e-05, - "loss": 2.939, - "step": 351500 - }, - { - "epoch": 5.07, - "learning_rate": 6.447058090445999e-05, - "loss": 2.9381, - "step": 352000 - }, - { - "epoch": 5.07, - "learning_rate": 6.441865416270726e-05, - "loss": 2.9406, - "step": 352500 - }, - { - "epoch": 5.08, - "learning_rate": 6.436683127443802e-05, - "loss": 2.9368, - "step": 353000 - }, - { - "epoch": 5.09, - "learning_rate": 6.431490453268529e-05, - "loss": 2.9434, - "step": 353500 - }, - { - "epoch": 5.1, - "learning_rate": 6.426308164441606e-05, - "loss": 2.9421, - "step": 354000 - }, - { - "epoch": 5.1, - "learning_rate": 6.421115490266333e-05, - "loss": 2.9441, - "step": 354500 - }, - { - "epoch": 5.11, - "learning_rate": 6.41592281609106e-05, - "loss": 2.9378, - "step": 355000 - }, - { - "epoch": 5.12, - "learning_rate": 6.410730141915785e-05, - "loss": 2.9469, - "step": 355500 - }, - { - "epoch": 5.12, - "learning_rate": 6.405537467740512e-05, - "loss": 2.9419, - "step": 356000 - }, - { - "epoch": 5.13, - "learning_rate": 6.400344793565239e-05, - "loss": 2.9434, - "step": 356500 - }, - { - "epoch": 5.14, - "learning_rate": 6.395152119389964e-05, - "loss": 2.9377, - "step": 357000 - }, - { - "epoch": 5.15, - "learning_rate": 6.389959445214691e-05, - "loss": 2.9334, - "step": 357500 - }, - { - "epoch": 5.15, - "learning_rate": 6.384777156387769e-05, - "loss": 2.9346, - "step": 358000 - }, - { - "epoch": 5.16, - "learning_rate": 6.379584482212494e-05, - "loss": 2.9419, - "step": 358500 - }, - { - "epoch": 5.17, - "learning_rate": 6.374391808037221e-05, - "loss": 2.9414, - "step": 359000 - }, - { - "epoch": 5.17, - "learning_rate": 6.369199133861948e-05, - "loss": 2.9414, - "step": 359500 - }, - { - "epoch": 5.18, - "learning_rate": 6.364016845035025e-05, - "loss": 2.9406, - "step": 360000 - }, - { - "epoch": 5.19, - "learning_rate": 6.358824170859752e-05, - "loss": 2.935, - "step": 360500 - }, - { - "epoch": 5.2, - "learning_rate": 6.353631496684478e-05, - "loss": 2.9327, - "step": 361000 - }, - { - "epoch": 5.2, - "learning_rate": 6.348438822509204e-05, - "loss": 2.9346, - "step": 361500 - }, - { - "epoch": 5.21, - "learning_rate": 6.343256533682282e-05, - "loss": 2.9414, - "step": 362000 - }, - { - "epoch": 5.22, - "learning_rate": 6.338063859507007e-05, - "loss": 2.9342, - "step": 362500 - }, - { - "epoch": 5.23, - "learning_rate": 6.332871185331734e-05, - "loss": 2.935, - "step": 363000 - }, - { - "epoch": 5.23, - "learning_rate": 6.327678511156461e-05, - "loss": 2.9388, - "step": 363500 - }, - { - "epoch": 5.24, - "learning_rate": 6.322496222329538e-05, - "loss": 2.9328, - "step": 364000 - }, - { - "epoch": 5.25, - "learning_rate": 6.317313933502614e-05, - "loss": 2.937, - "step": 364500 - }, - { - "epoch": 5.25, - "learning_rate": 6.312121259327341e-05, - "loss": 2.9353, - "step": 365000 - }, - { - "epoch": 5.26, - "learning_rate": 6.306928585152068e-05, - "loss": 2.933, - "step": 365500 - }, - { - "epoch": 5.27, - "learning_rate": 6.301735910976793e-05, - "loss": 2.9334, - "step": 366000 - }, - { - "epoch": 5.28, - "learning_rate": 6.296543236801522e-05, - "loss": 2.9291, - "step": 366500 - }, - { - "epoch": 5.28, - "learning_rate": 6.291350562626247e-05, - "loss": 2.9314, - "step": 367000 - }, - { - "epoch": 5.29, - "learning_rate": 6.286157888450974e-05, - "loss": 2.9353, - "step": 367500 - }, - { - "epoch": 5.3, - "learning_rate": 6.2809652142757e-05, - "loss": 2.9302, - "step": 368000 - }, - { - "epoch": 5.3, - "learning_rate": 6.275782925448777e-05, - "loss": 2.9355, - "step": 368500 - }, - { - "epoch": 5.31, - "learning_rate": 6.270590251273504e-05, - "loss": 2.9278, - "step": 369000 - }, - { - "epoch": 5.32, - "learning_rate": 6.26539757709823e-05, - "loss": 2.9383, - "step": 369500 - }, - { - "epoch": 5.33, - "learning_rate": 6.260204902922956e-05, - "loss": 2.9385, - "step": 370000 - }, - { - "epoch": 5.33, - "learning_rate": 6.255022614096033e-05, - "loss": 2.9357, - "step": 370500 - }, - { - "epoch": 5.34, - "learning_rate": 6.24982993992076e-05, - "loss": 2.9327, - "step": 371000 - }, - { - "epoch": 5.35, - "learning_rate": 6.244637265745487e-05, - "loss": 2.9336, - "step": 371500 - }, - { - "epoch": 5.35, - "learning_rate": 6.239444591570212e-05, - "loss": 2.9371, - "step": 372000 - }, - { - "epoch": 5.36, - "learning_rate": 6.23426230274329e-05, - "loss": 2.9332, - "step": 372500 - }, - { - "epoch": 5.37, - "learning_rate": 6.229069628568016e-05, - "loss": 2.9298, - "step": 373000 - }, - { - "epoch": 5.38, - "learning_rate": 6.223876954392744e-05, - "loss": 2.934, - "step": 373500 - }, - { - "epoch": 5.38, - "learning_rate": 6.218684280217469e-05, - "loss": 2.9302, - "step": 374000 - }, - { - "epoch": 5.39, - "learning_rate": 6.213512376738897e-05, - "loss": 2.9246, - "step": 374500 - }, - { - "epoch": 5.4, - "learning_rate": 6.208319702563623e-05, - "loss": 2.9322, - "step": 375000 - }, - { - "epoch": 5.4, - "learning_rate": 6.203127028388351e-05, - "loss": 2.9264, - "step": 375500 - }, - { - "epoch": 5.41, - "learning_rate": 6.197934354213076e-05, - "loss": 2.9265, - "step": 376000 - }, - { - "epoch": 5.42, - "learning_rate": 6.192741680037803e-05, - "loss": 2.929, - "step": 376500 - }, - { - "epoch": 5.43, - "learning_rate": 6.18755939121088e-05, - "loss": 2.9327, - "step": 377000 - }, - { - "epoch": 5.43, - "learning_rate": 6.182366717035606e-05, - "loss": 2.9272, - "step": 377500 - }, - { - "epoch": 5.44, - "learning_rate": 6.177174042860333e-05, - "loss": 2.937, - "step": 378000 - }, - { - "epoch": 5.45, - "learning_rate": 6.171981368685059e-05, - "loss": 2.9328, - "step": 378500 - }, - { - "epoch": 5.46, - "learning_rate": 6.166799079858137e-05, - "loss": 2.9304, - "step": 379000 - }, - { - "epoch": 5.46, - "learning_rate": 6.161606405682862e-05, - "loss": 2.9249, - "step": 379500 - }, - { - "epoch": 5.47, - "learning_rate": 6.15641373150759e-05, - "loss": 2.9265, - "step": 380000 - }, - { - "epoch": 5.48, - "learning_rate": 6.151221057332316e-05, - "loss": 2.9285, - "step": 380500 - }, - { - "epoch": 5.48, - "learning_rate": 6.146028383157041e-05, - "loss": 2.9333, - "step": 381000 - }, - { - "epoch": 5.49, - "learning_rate": 6.14083570898177e-05, - "loss": 2.9312, - "step": 381500 - }, - { - "epoch": 5.5, - "learning_rate": 6.135643034806495e-05, - "loss": 2.9301, - "step": 382000 - }, - { - "epoch": 5.51, - "learning_rate": 6.130450360631222e-05, - "loss": 2.9337, - "step": 382500 - }, - { - "epoch": 5.51, - "learning_rate": 6.125268071804298e-05, - "loss": 2.9327, - "step": 383000 - }, - { - "epoch": 5.52, - "learning_rate": 6.120075397629025e-05, - "loss": 2.9299, - "step": 383500 - }, - { - "epoch": 5.53, - "learning_rate": 6.114882723453752e-05, - "loss": 2.9291, - "step": 384000 - }, - { - "epoch": 5.53, - "learning_rate": 6.109690049278478e-05, - "loss": 2.9257, - "step": 384500 - }, - { - "epoch": 5.54, - "learning_rate": 6.104507760451556e-05, - "loss": 2.929, - "step": 385000 - }, - { - "epoch": 5.55, - "learning_rate": 6.099315086276282e-05, - "loss": 2.9248, - "step": 385500 - }, - { - "epoch": 5.56, - "learning_rate": 6.094132797449359e-05, - "loss": 2.9235, - "step": 386000 - }, - { - "epoch": 5.56, - "learning_rate": 6.088940123274085e-05, - "loss": 2.9279, - "step": 386500 - }, - { - "epoch": 5.57, - "learning_rate": 6.083747449098812e-05, - "loss": 2.9208, - "step": 387000 - }, - { - "epoch": 5.58, - "learning_rate": 6.078554774923538e-05, - "loss": 2.9243, - "step": 387500 - }, - { - "epoch": 5.58, - "learning_rate": 6.073362100748264e-05, - "loss": 2.9322, - "step": 388000 - }, - { - "epoch": 5.59, - "learning_rate": 6.068169426572992e-05, - "loss": 2.9229, - "step": 388500 - }, - { - "epoch": 5.6, - "learning_rate": 6.062976752397718e-05, - "loss": 2.9244, - "step": 389000 - }, - { - "epoch": 5.61, - "learning_rate": 6.0577840782224434e-05, - "loss": 2.9246, - "step": 389500 - }, - { - "epoch": 5.61, - "learning_rate": 6.0526017893955214e-05, - "loss": 2.923, - "step": 390000 - }, - { - "epoch": 5.62, - "learning_rate": 6.047409115220247e-05, - "loss": 2.9228, - "step": 390500 - }, - { - "epoch": 5.63, - "learning_rate": 6.0422164410449744e-05, - "loss": 2.9233, - "step": 391000 - }, - { - "epoch": 5.64, - "learning_rate": 6.0370237668697006e-05, - "loss": 2.9257, - "step": 391500 - }, - { - "epoch": 5.64, - "learning_rate": 6.031841478042778e-05, - "loss": 2.9258, - "step": 392000 - }, - { - "epoch": 5.65, - "learning_rate": 6.026648803867504e-05, - "loss": 2.9204, - "step": 392500 - }, - { - "epoch": 5.66, - "learning_rate": 6.02145612969223e-05, - "loss": 2.9237, - "step": 393000 - }, - { - "epoch": 5.66, - "learning_rate": 6.016263455516957e-05, - "loss": 2.9258, - "step": 393500 - }, - { - "epoch": 5.67, - "learning_rate": 6.0110811666900336e-05, - "loss": 2.9218, - "step": 394000 - }, - { - "epoch": 5.68, - "learning_rate": 6.0058884925147604e-05, - "loss": 2.9255, - "step": 394500 - }, - { - "epoch": 5.69, - "learning_rate": 6.000706203687837e-05, - "loss": 2.9159, - "step": 395000 - }, - { - "epoch": 5.69, - "learning_rate": 5.995513529512564e-05, - "loss": 2.9222, - "step": 395500 - }, - { - "epoch": 5.7, - "learning_rate": 5.99032085533729e-05, - "loss": 2.923, - "step": 396000 - }, - { - "epoch": 5.71, - "learning_rate": 5.9851281811620176e-05, - "loss": 2.9256, - "step": 396500 - }, - { - "epoch": 5.71, - "learning_rate": 5.979935506986744e-05, - "loss": 2.9139, - "step": 397000 - }, - { - "epoch": 5.72, - "learning_rate": 5.97474283281147e-05, - "loss": 2.9261, - "step": 397500 - }, - { - "epoch": 5.73, - "learning_rate": 5.969550158636197e-05, - "loss": 2.9281, - "step": 398000 - }, - { - "epoch": 5.74, - "learning_rate": 5.964357484460923e-05, - "loss": 2.9167, - "step": 398500 - }, - { - "epoch": 5.74, - "learning_rate": 5.959175195634e-05, - "loss": 2.9195, - "step": 399000 - }, - { - "epoch": 5.75, - "learning_rate": 5.953982521458726e-05, - "loss": 2.9219, - "step": 399500 - }, - { - "epoch": 5.76, - "learning_rate": 5.9487898472834524e-05, - "loss": 2.9154, - "step": 400000 - }, - { - "epoch": 5.76, - "learning_rate": 5.94359717310818e-05, - "loss": 2.919, - "step": 400500 - }, - { - "epoch": 5.77, - "learning_rate": 5.938414884281256e-05, - "loss": 2.9222, - "step": 401000 - }, - { - "epoch": 5.78, - "learning_rate": 5.933232595454333e-05, - "loss": 2.9198, - "step": 401500 - }, - { - "epoch": 5.79, - "learning_rate": 5.9280399212790594e-05, - "loss": 2.9175, - "step": 402000 - }, - { - "epoch": 5.79, - "learning_rate": 5.922847247103787e-05, - "loss": 2.921, - "step": 402500 - }, - { - "epoch": 5.8, - "learning_rate": 5.917654572928512e-05, - "loss": 2.92, - "step": 403000 - }, - { - "epoch": 5.81, - "learning_rate": 5.9124618987532385e-05, - "loss": 2.9212, - "step": 403500 - }, - { - "epoch": 5.82, - "learning_rate": 5.907269224577966e-05, - "loss": 2.9167, - "step": 404000 - }, - { - "epoch": 5.82, - "learning_rate": 5.902076550402692e-05, - "loss": 2.9267, - "step": 404500 - }, - { - "epoch": 5.83, - "learning_rate": 5.8968942615757694e-05, - "loss": 2.9164, - "step": 405000 - }, - { - "epoch": 5.84, - "learning_rate": 5.8917015874004956e-05, - "loss": 2.9178, - "step": 405500 - }, - { - "epoch": 5.84, - "learning_rate": 5.8865089132252224e-05, - "loss": 2.9217, - "step": 406000 - }, - { - "epoch": 5.85, - "learning_rate": 5.8813162390499486e-05, - "loss": 2.9161, - "step": 406500 - }, - { - "epoch": 5.86, - "learning_rate": 5.876123564874675e-05, - "loss": 2.9165, - "step": 407000 - }, - { - "epoch": 5.87, - "learning_rate": 5.870930890699402e-05, - "loss": 2.9161, - "step": 407500 - }, - { - "epoch": 5.87, - "learning_rate": 5.8657382165241284e-05, - "loss": 2.9128, - "step": 408000 - }, - { - "epoch": 5.88, - "learning_rate": 5.8605455423488545e-05, - "loss": 2.915, - "step": 408500 - }, - { - "epoch": 5.89, - "learning_rate": 5.855363253521932e-05, - "loss": 2.9123, - "step": 409000 - }, - { - "epoch": 5.89, - "learning_rate": 5.850170579346658e-05, - "loss": 2.9184, - "step": 409500 - }, - { - "epoch": 5.9, - "learning_rate": 5.844988290519735e-05, - "loss": 2.9174, - "step": 410000 - }, - { - "epoch": 5.91, - "learning_rate": 5.8397956163444614e-05, - "loss": 2.9165, - "step": 410500 - }, - { - "epoch": 5.92, - "learning_rate": 5.834602942169188e-05, - "loss": 2.92, - "step": 411000 - }, - { - "epoch": 5.92, - "learning_rate": 5.8294102679939144e-05, - "loss": 2.9177, - "step": 411500 - }, - { - "epoch": 5.93, - "learning_rate": 5.8242175938186406e-05, - "loss": 2.9106, - "step": 412000 - }, - { - "epoch": 5.94, - "learning_rate": 5.8190249196433674e-05, - "loss": 2.9112, - "step": 412500 - }, - { - "epoch": 5.94, - "learning_rate": 5.8138322454680936e-05, - "loss": 2.9091, - "step": 413000 - }, - { - "epoch": 5.95, - "learning_rate": 5.80863957129282e-05, - "loss": 2.9134, - "step": 413500 - }, - { - "epoch": 5.96, - "learning_rate": 5.803457282465897e-05, - "loss": 2.9193, - "step": 414000 - }, - { - "epoch": 5.97, - "learning_rate": 5.798264608290623e-05, - "loss": 2.9187, - "step": 414500 - }, - { - "epoch": 5.97, - "learning_rate": 5.793071934115351e-05, - "loss": 2.9112, - "step": 415000 - }, - { - "epoch": 5.98, - "learning_rate": 5.787879259940077e-05, - "loss": 2.9137, - "step": 415500 - }, - { - "epoch": 5.99, - "learning_rate": 5.782696971113154e-05, - "loss": 2.9156, - "step": 416000 - }, - { - "epoch": 6.0, - "learning_rate": 5.77750429693788e-05, - "loss": 2.912, - "step": 416500 - }, - { - "epoch": 6.0, - "eval_accuracy": 0.48902336641864935, - "eval_loss": 2.7482354640960693, - "eval_runtime": 555.619, - "eval_samples_per_second": 969.976, - "eval_steps_per_second": 40.416, - "step": 416838 - }, - { - "epoch": 6.0, - "learning_rate": 5.772311622762607e-05, - "loss": 2.9168, - "step": 417000 - }, - { - "epoch": 6.01, - "learning_rate": 5.767118948587333e-05, - "loss": 2.9066, - "step": 417500 - }, - { - "epoch": 6.02, - "learning_rate": 5.7619366597604106e-05, - "loss": 2.9085, - "step": 418000 - }, - { - "epoch": 6.02, - "learning_rate": 5.756743985585137e-05, - "loss": 2.9115, - "step": 418500 - }, - { - "epoch": 6.03, - "learning_rate": 5.751551311409863e-05, - "loss": 2.9096, - "step": 419000 - }, - { - "epoch": 6.04, - "learning_rate": 5.7463586372345904e-05, - "loss": 2.9079, - "step": 419500 - }, - { - "epoch": 6.05, - "learning_rate": 5.741176348407666e-05, - "loss": 2.908, - "step": 420000 - }, - { - "epoch": 6.05, - "learning_rate": 5.735983674232394e-05, - "loss": 2.9057, - "step": 420500 - }, - { - "epoch": 6.06, - "learning_rate": 5.73079100005712e-05, - "loss": 2.9085, - "step": 421000 - }, - { - "epoch": 6.07, - "learning_rate": 5.7255983258818454e-05, - "loss": 2.91, - "step": 421500 - }, - { - "epoch": 6.07, - "learning_rate": 5.720426422403273e-05, - "loss": 2.9037, - "step": 422000 - }, - { - "epoch": 6.08, - "learning_rate": 5.715233748228001e-05, - "loss": 2.9051, - "step": 422500 - }, - { - "epoch": 6.09, - "learning_rate": 5.710041074052727e-05, - "loss": 2.9055, - "step": 423000 - }, - { - "epoch": 6.1, - "learning_rate": 5.7048483998774524e-05, - "loss": 2.9031, - "step": 423500 - }, - { - "epoch": 6.1, - "learning_rate": 5.69965572570218e-05, - "loss": 2.9107, - "step": 424000 - }, - { - "epoch": 6.11, - "learning_rate": 5.694463051526906e-05, - "loss": 2.9119, - "step": 424500 - }, - { - "epoch": 6.12, - "learning_rate": 5.689270377351632e-05, - "loss": 2.9062, - "step": 425000 - }, - { - "epoch": 6.12, - "learning_rate": 5.6840880885247095e-05, - "loss": 2.9097, - "step": 425500 - }, - { - "epoch": 6.13, - "learning_rate": 5.678895414349436e-05, - "loss": 2.91, - "step": 426000 - }, - { - "epoch": 6.14, - "learning_rate": 5.6737027401741624e-05, - "loss": 2.9084, - "step": 426500 - }, - { - "epoch": 6.15, - "learning_rate": 5.6685100659988886e-05, - "loss": 2.9048, - "step": 427000 - }, - { - "epoch": 6.15, - "learning_rate": 5.663317391823616e-05, - "loss": 2.9078, - "step": 427500 - }, - { - "epoch": 6.16, - "learning_rate": 5.658124717648342e-05, - "loss": 2.9071, - "step": 428000 - }, - { - "epoch": 6.17, - "learning_rate": 5.6529424288214196e-05, - "loss": 2.9054, - "step": 428500 - }, - { - "epoch": 6.18, - "learning_rate": 5.647749754646146e-05, - "loss": 2.9039, - "step": 429000 - }, - { - "epoch": 6.18, - "learning_rate": 5.642557080470872e-05, - "loss": 2.9074, - "step": 429500 - }, - { - "epoch": 6.19, - "learning_rate": 5.637364406295599e-05, - "loss": 2.9046, - "step": 430000 - }, - { - "epoch": 6.2, - "learning_rate": 5.632171732120325e-05, - "loss": 2.9108, - "step": 430500 - }, - { - "epoch": 6.2, - "learning_rate": 5.626979057945051e-05, - "loss": 2.9057, - "step": 431000 - }, - { - "epoch": 6.21, - "learning_rate": 5.6217863837697785e-05, - "loss": 2.9084, - "step": 431500 - }, - { - "epoch": 6.22, - "learning_rate": 5.616593709594504e-05, - "loss": 2.9065, - "step": 432000 - }, - { - "epoch": 6.23, - "learning_rate": 5.611411420767582e-05, - "loss": 2.9067, - "step": 432500 - }, - { - "epoch": 6.23, - "learning_rate": 5.6062187465923074e-05, - "loss": 2.901, - "step": 433000 - }, - { - "epoch": 6.24, - "learning_rate": 5.6010260724170336e-05, - "loss": 2.9072, - "step": 433500 - }, - { - "epoch": 6.25, - "learning_rate": 5.595843783590111e-05, - "loss": 2.9099, - "step": 434000 - }, - { - "epoch": 6.25, - "learning_rate": 5.590651109414837e-05, - "loss": 2.9031, - "step": 434500 - }, - { - "epoch": 6.26, - "learning_rate": 5.5854584352395645e-05, - "loss": 2.9005, - "step": 435000 - }, - { - "epoch": 6.27, - "learning_rate": 5.580265761064291e-05, - "loss": 2.9049, - "step": 435500 - }, - { - "epoch": 6.28, - "learning_rate": 5.5750730868890175e-05, - "loss": 2.8981, - "step": 436000 - }, - { - "epoch": 6.28, - "learning_rate": 5.569880412713744e-05, - "loss": 2.9037, - "step": 436500 - }, - { - "epoch": 6.29, - "learning_rate": 5.56468773853847e-05, - "loss": 2.8981, - "step": 437000 - }, - { - "epoch": 6.3, - "learning_rate": 5.559495064363197e-05, - "loss": 2.8996, - "step": 437500 - }, - { - "epoch": 6.3, - "learning_rate": 5.5543231608846244e-05, - "loss": 2.9009, - "step": 438000 - }, - { - "epoch": 6.31, - "learning_rate": 5.5491304867093506e-05, - "loss": 2.9006, - "step": 438500 - }, - { - "epoch": 6.32, - "learning_rate": 5.543937812534077e-05, - "loss": 2.9065, - "step": 439000 - }, - { - "epoch": 6.33, - "learning_rate": 5.538745138358804e-05, - "loss": 2.9018, - "step": 439500 - }, - { - "epoch": 6.33, - "learning_rate": 5.5335524641835304e-05, - "loss": 2.8954, - "step": 440000 - }, - { - "epoch": 6.34, - "learning_rate": 5.5283597900082565e-05, - "loss": 2.8976, - "step": 440500 - }, - { - "epoch": 6.35, - "learning_rate": 5.523177501181334e-05, - "loss": 2.8944, - "step": 441000 - }, - { - "epoch": 6.35, - "learning_rate": 5.51798482700606e-05, - "loss": 2.8965, - "step": 441500 - }, - { - "epoch": 6.36, - "learning_rate": 5.512792152830787e-05, - "loss": 2.895, - "step": 442000 - }, - { - "epoch": 6.37, - "learning_rate": 5.507599478655513e-05, - "loss": 2.9018, - "step": 442500 - }, - { - "epoch": 6.38, - "learning_rate": 5.502406804480239e-05, - "loss": 2.9048, - "step": 443000 - }, - { - "epoch": 6.38, - "learning_rate": 5.497214130304966e-05, - "loss": 2.8997, - "step": 443500 - }, - { - "epoch": 6.39, - "learning_rate": 5.492021456129692e-05, - "loss": 2.8998, - "step": 444000 - }, - { - "epoch": 6.4, - "learning_rate": 5.4868287819544196e-05, - "loss": 2.9007, - "step": 444500 - }, - { - "epoch": 6.41, - "learning_rate": 5.4816464931274956e-05, - "loss": 2.9055, - "step": 445000 - }, - { - "epoch": 6.41, - "learning_rate": 5.476453818952223e-05, - "loss": 2.8977, - "step": 445500 - }, - { - "epoch": 6.42, - "learning_rate": 5.471261144776949e-05, - "loss": 2.9003, - "step": 446000 - }, - { - "epoch": 6.43, - "learning_rate": 5.4660684706016754e-05, - "loss": 2.8967, - "step": 446500 - }, - { - "epoch": 6.43, - "learning_rate": 5.460886181774753e-05, - "loss": 2.8997, - "step": 447000 - }, - { - "epoch": 6.44, - "learning_rate": 5.455693507599479e-05, - "loss": 2.8943, - "step": 447500 - }, - { - "epoch": 6.45, - "learning_rate": 5.4505008334242056e-05, - "loss": 2.9019, - "step": 448000 - }, - { - "epoch": 6.46, - "learning_rate": 5.445308159248932e-05, - "loss": 2.8945, - "step": 448500 - }, - { - "epoch": 6.46, - "learning_rate": 5.440115485073658e-05, - "loss": 2.9044, - "step": 449000 - }, - { - "epoch": 6.47, - "learning_rate": 5.4349228108983855e-05, - "loss": 2.8998, - "step": 449500 - }, - { - "epoch": 6.48, - "learning_rate": 5.429730136723111e-05, - "loss": 2.9025, - "step": 450000 - }, - { - "epoch": 6.48, - "learning_rate": 5.424537462547837e-05, - "loss": 2.8977, - "step": 450500 - }, - { - "epoch": 6.49, - "learning_rate": 5.419355173720915e-05, - "loss": 2.901, - "step": 451000 - }, - { - "epoch": 6.5, - "learning_rate": 5.4141624995456405e-05, - "loss": 2.9014, - "step": 451500 - }, - { - "epoch": 6.51, - "learning_rate": 5.408969825370368e-05, - "loss": 2.8957, - "step": 452000 - }, - { - "epoch": 6.51, - "learning_rate": 5.403777151195094e-05, - "loss": 2.8948, - "step": 452500 - }, - { - "epoch": 6.52, - "learning_rate": 5.3985948623681715e-05, - "loss": 2.8997, - "step": 453000 - }, - { - "epoch": 6.53, - "learning_rate": 5.3934125735412474e-05, - "loss": 2.9, - "step": 453500 - }, - { - "epoch": 6.53, - "learning_rate": 5.388219899365975e-05, - "loss": 2.8956, - "step": 454000 - }, - { - "epoch": 6.54, - "learning_rate": 5.383027225190701e-05, - "loss": 2.8962, - "step": 454500 - }, - { - "epoch": 6.55, - "learning_rate": 5.377834551015428e-05, - "loss": 2.8925, - "step": 455000 - }, - { - "epoch": 6.56, - "learning_rate": 5.372641876840154e-05, - "loss": 2.893, - "step": 455500 - }, - { - "epoch": 6.56, - "learning_rate": 5.3674595880132314e-05, - "loss": 2.8941, - "step": 456000 - }, - { - "epoch": 6.57, - "learning_rate": 5.3622669138379575e-05, - "loss": 2.8929, - "step": 456500 - }, - { - "epoch": 6.58, - "learning_rate": 5.357074239662684e-05, - "loss": 2.8962, - "step": 457000 - }, - { - "epoch": 6.59, - "learning_rate": 5.351881565487411e-05, - "loss": 2.8941, - "step": 457500 - }, - { - "epoch": 6.59, - "learning_rate": 5.3466888913121373e-05, - "loss": 2.8964, - "step": 458000 - }, - { - "epoch": 6.6, - "learning_rate": 5.3414962171368635e-05, - "loss": 2.8902, - "step": 458500 - }, - { - "epoch": 6.61, - "learning_rate": 5.33630354296159e-05, - "loss": 2.8918, - "step": 459000 - }, - { - "epoch": 6.61, - "learning_rate": 5.331121254134667e-05, - "loss": 2.8899, - "step": 459500 - }, - { - "epoch": 6.62, - "learning_rate": 5.325928579959394e-05, - "loss": 2.892, - "step": 460000 - }, - { - "epoch": 6.63, - "learning_rate": 5.32073590578412e-05, - "loss": 2.8875, - "step": 460500 - }, - { - "epoch": 6.64, - "learning_rate": 5.315543231608846e-05, - "loss": 2.8966, - "step": 461000 - }, - { - "epoch": 6.64, - "learning_rate": 5.310350557433573e-05, - "loss": 2.8965, - "step": 461500 - }, - { - "epoch": 6.65, - "learning_rate": 5.305157883258299e-05, - "loss": 2.8911, - "step": 462000 - }, - { - "epoch": 6.66, - "learning_rate": 5.299965209083025e-05, - "loss": 2.8978, - "step": 462500 - }, - { - "epoch": 6.66, - "learning_rate": 5.294772534907753e-05, - "loss": 2.8879, - "step": 463000 - }, - { - "epoch": 6.67, - "learning_rate": 5.289590246080829e-05, - "loss": 2.8947, - "step": 463500 - }, - { - "epoch": 6.68, - "learning_rate": 5.284397571905556e-05, - "loss": 2.8923, - "step": 464000 - }, - { - "epoch": 6.69, - "learning_rate": 5.2792152830786335e-05, - "loss": 2.8935, - "step": 464500 - }, - { - "epoch": 6.69, - "learning_rate": 5.2740226089033596e-05, - "loss": 2.8976, - "step": 465000 - }, - { - "epoch": 6.7, - "learning_rate": 5.268829934728086e-05, - "loss": 2.8908, - "step": 465500 - }, - { - "epoch": 6.71, - "learning_rate": 5.2636372605528126e-05, - "loss": 2.8968, - "step": 466000 - }, - { - "epoch": 6.71, - "learning_rate": 5.258454971725889e-05, - "loss": 2.8926, - "step": 466500 - }, - { - "epoch": 6.72, - "learning_rate": 5.253262297550616e-05, - "loss": 2.8911, - "step": 467000 - }, - { - "epoch": 6.73, - "learning_rate": 5.248069623375342e-05, - "loss": 2.8876, - "step": 467500 - }, - { - "epoch": 6.74, - "learning_rate": 5.2428769492000684e-05, - "loss": 2.8953, - "step": 468000 - }, - { - "epoch": 6.74, - "learning_rate": 5.237684275024796e-05, - "loss": 2.8906, - "step": 468500 - }, - { - "epoch": 6.75, - "learning_rate": 5.232491600849522e-05, - "loss": 2.8908, - "step": 469000 - }, - { - "epoch": 6.76, - "learning_rate": 5.2272989266742475e-05, - "loss": 2.8863, - "step": 469500 - }, - { - "epoch": 6.77, - "learning_rate": 5.222106252498975e-05, - "loss": 2.893, - "step": 470000 - }, - { - "epoch": 6.77, - "learning_rate": 5.216923963672051e-05, - "loss": 2.8898, - "step": 470500 - }, - { - "epoch": 6.78, - "learning_rate": 5.2117312894967785e-05, - "loss": 2.8915, - "step": 471000 - }, - { - "epoch": 6.79, - "learning_rate": 5.2065386153215046e-05, - "loss": 2.8898, - "step": 471500 - }, - { - "epoch": 6.79, - "learning_rate": 5.201345941146231e-05, - "loss": 2.8905, - "step": 472000 - }, - { - "epoch": 6.8, - "learning_rate": 5.196163652319308e-05, - "loss": 2.8931, - "step": 472500 - }, - { - "epoch": 6.81, - "learning_rate": 5.190970978144034e-05, - "loss": 2.8879, - "step": 473000 - }, - { - "epoch": 6.82, - "learning_rate": 5.1857886893171115e-05, - "loss": 2.8862, - "step": 473500 - }, - { - "epoch": 6.82, - "learning_rate": 5.1805960151418383e-05, - "loss": 2.892, - "step": 474000 - }, - { - "epoch": 6.83, - "learning_rate": 5.1754033409665645e-05, - "loss": 2.8941, - "step": 474500 - }, - { - "epoch": 6.84, - "learning_rate": 5.1702106667912906e-05, - "loss": 2.8931, - "step": 475000 - }, - { - "epoch": 6.84, - "learning_rate": 5.165017992616018e-05, - "loss": 2.8913, - "step": 475500 - }, - { - "epoch": 6.85, - "learning_rate": 5.159825318440744e-05, - "loss": 2.8887, - "step": 476000 - }, - { - "epoch": 6.86, - "learning_rate": 5.1546326442654705e-05, - "loss": 2.8858, - "step": 476500 - }, - { - "epoch": 6.87, - "learning_rate": 5.149439970090197e-05, - "loss": 2.8906, - "step": 477000 - }, - { - "epoch": 6.87, - "learning_rate": 5.144257681263274e-05, - "loss": 2.8896, - "step": 477500 - }, - { - "epoch": 6.88, - "learning_rate": 5.139075392436351e-05, - "loss": 2.8849, - "step": 478000 - }, - { - "epoch": 6.89, - "learning_rate": 5.1338827182610774e-05, - "loss": 2.8898, - "step": 478500 - }, - { - "epoch": 6.89, - "learning_rate": 5.128690044085804e-05, - "loss": 2.8887, - "step": 479000 - }, - { - "epoch": 6.9, - "learning_rate": 5.1234973699105303e-05, - "loss": 2.889, - "step": 479500 - }, - { - "epoch": 6.91, - "learning_rate": 5.1183046957352565e-05, - "loss": 2.8917, - "step": 480000 - }, - { - "epoch": 6.92, - "learning_rate": 5.113112021559984e-05, - "loss": 2.8886, - "step": 480500 - }, - { - "epoch": 6.92, - "learning_rate": 5.1079193473847095e-05, - "loss": 2.8882, - "step": 481000 - }, - { - "epoch": 6.93, - "learning_rate": 5.1027266732094356e-05, - "loss": 2.8889, - "step": 481500 - }, - { - "epoch": 6.94, - "learning_rate": 5.097544384382513e-05, - "loss": 2.8809, - "step": 482000 - }, - { - "epoch": 6.95, - "learning_rate": 5.092351710207239e-05, - "loss": 2.8855, - "step": 482500 - }, - { - "epoch": 6.95, - "learning_rate": 5.0871590360319666e-05, - "loss": 2.89, - "step": 483000 - }, - { - "epoch": 6.96, - "learning_rate": 5.081966361856693e-05, - "loss": 2.887, - "step": 483500 - }, - { - "epoch": 6.97, - "learning_rate": 5.07678407302977e-05, - "loss": 2.8858, - "step": 484000 - }, - { - "epoch": 6.97, - "learning_rate": 5.071591398854496e-05, - "loss": 2.8844, - "step": 484500 - }, - { - "epoch": 6.98, - "learning_rate": 5.066398724679223e-05, - "loss": 2.8914, - "step": 485000 - }, - { - "epoch": 6.99, - "learning_rate": 5.061206050503949e-05, - "loss": 2.8832, - "step": 485500 - }, - { - "epoch": 7.0, - "learning_rate": 5.056034147025377e-05, - "loss": 2.8813, - "step": 486000 - }, - { - "epoch": 7.0, - "eval_accuracy": 0.49375853291067817, - "eval_loss": 2.7103359699249268, - "eval_runtime": 555.3691, - "eval_samples_per_second": 970.412, - "eval_steps_per_second": 40.434, - "step": 486311 - }, - { - "epoch": 7.0, - "learning_rate": 5.050841472850103e-05, - "loss": 2.8813, - "step": 486500 - }, - { - "epoch": 7.01, - "learning_rate": 5.04564879867483e-05, - "loss": 2.8798, - "step": 487000 - }, - { - "epoch": 7.02, - "learning_rate": 5.040456124499556e-05, - "loss": 2.8848, - "step": 487500 - }, - { - "epoch": 7.02, - "learning_rate": 5.035263450324282e-05, - "loss": 2.8811, - "step": 488000 - }, - { - "epoch": 7.03, - "learning_rate": 5.03007077614901e-05, - "loss": 2.8786, - "step": 488500 - }, - { - "epoch": 7.04, - "learning_rate": 5.024878101973736e-05, - "loss": 2.8777, - "step": 489000 - }, - { - "epoch": 7.05, - "learning_rate": 5.019685427798462e-05, - "loss": 2.8776, - "step": 489500 - }, - { - "epoch": 7.05, - "learning_rate": 5.0145031389715393e-05, - "loss": 2.8799, - "step": 490000 - }, - { - "epoch": 7.06, - "learning_rate": 5.0093208501446166e-05, - "loss": 2.883, - "step": 490500 - }, - { - "epoch": 7.07, - "learning_rate": 5.004128175969343e-05, - "loss": 2.8822, - "step": 491000 - }, - { - "epoch": 7.07, - "learning_rate": 4.998935501794069e-05, - "loss": 2.8739, - "step": 491500 - }, - { - "epoch": 7.08, - "learning_rate": 4.993742827618796e-05, - "loss": 2.8832, - "step": 492000 - }, - { - "epoch": 7.09, - "learning_rate": 4.988550153443522e-05, - "loss": 2.8728, - "step": 492500 - }, - { - "epoch": 7.1, - "learning_rate": 4.983357479268249e-05, - "loss": 2.8796, - "step": 493000 - }, - { - "epoch": 7.1, - "learning_rate": 4.978164805092975e-05, - "loss": 2.8773, - "step": 493500 - }, - { - "epoch": 7.11, - "learning_rate": 4.972972130917701e-05, - "loss": 2.8783, - "step": 494000 - }, - { - "epoch": 7.12, - "learning_rate": 4.9677898420907784e-05, - "loss": 2.8848, - "step": 494500 - }, - { - "epoch": 7.13, - "learning_rate": 4.962607553263856e-05, - "loss": 2.879, - "step": 495000 - }, - { - "epoch": 7.13, - "learning_rate": 4.957414879088582e-05, - "loss": 2.879, - "step": 495500 - }, - { - "epoch": 7.14, - "learning_rate": 4.9522222049133086e-05, - "loss": 2.8763, - "step": 496000 - }, - { - "epoch": 7.15, - "learning_rate": 4.947029530738035e-05, - "loss": 2.8815, - "step": 496500 - }, - { - "epoch": 7.15, - "learning_rate": 4.9418368565627616e-05, - "loss": 2.8765, - "step": 497000 - }, - { - "epoch": 7.16, - "learning_rate": 4.9366441823874885e-05, - "loss": 2.8775, - "step": 497500 - }, - { - "epoch": 7.17, - "learning_rate": 4.931451508212214e-05, - "loss": 2.8765, - "step": 498000 - }, - { - "epoch": 7.18, - "learning_rate": 4.926258834036941e-05, - "loss": 2.8789, - "step": 498500 - }, - { - "epoch": 7.18, - "learning_rate": 4.9210765452100174e-05, - "loss": 2.8767, - "step": 499000 - }, - { - "epoch": 7.19, - "learning_rate": 4.915883871034744e-05, - "loss": 2.8755, - "step": 499500 - }, - { - "epoch": 7.2, - "learning_rate": 4.910691196859471e-05, - "loss": 2.8766, - "step": 500000 - }, - { - "epoch": 7.2, - "learning_rate": 4.905498522684198e-05, - "loss": 2.8829, - "step": 500500 - }, - { - "epoch": 7.21, - "learning_rate": 4.9003162338572745e-05, - "loss": 2.8784, - "step": 501000 - }, - { - "epoch": 7.22, - "learning_rate": 4.895123559682001e-05, - "loss": 2.8789, - "step": 501500 - }, - { - "epoch": 7.23, - "learning_rate": 4.889941270855078e-05, - "loss": 2.8769, - "step": 502000 - }, - { - "epoch": 7.23, - "learning_rate": 4.884748596679805e-05, - "loss": 2.8792, - "step": 502500 - }, - { - "epoch": 7.24, - "learning_rate": 4.879555922504531e-05, - "loss": 2.8722, - "step": 503000 - }, - { - "epoch": 7.25, - "learning_rate": 4.874363248329257e-05, - "loss": 2.8766, - "step": 503500 - }, - { - "epoch": 7.25, - "learning_rate": 4.869170574153984e-05, - "loss": 2.8786, - "step": 504000 - }, - { - "epoch": 7.26, - "learning_rate": 4.86397789997871e-05, - "loss": 2.8784, - "step": 504500 - }, - { - "epoch": 7.27, - "learning_rate": 4.858785225803437e-05, - "loss": 2.8774, - "step": 505000 - }, - { - "epoch": 7.28, - "learning_rate": 4.853592551628163e-05, - "loss": 2.8709, - "step": 505500 - }, - { - "epoch": 7.28, - "learning_rate": 4.8484102628012403e-05, - "loss": 2.8734, - "step": 506000 - }, - { - "epoch": 7.29, - "learning_rate": 4.8432175886259665e-05, - "loss": 2.874, - "step": 506500 - }, - { - "epoch": 7.3, - "learning_rate": 4.838024914450693e-05, - "loss": 2.8691, - "step": 507000 - }, - { - "epoch": 7.3, - "learning_rate": 4.8328322402754195e-05, - "loss": 2.8739, - "step": 507500 - }, - { - "epoch": 7.31, - "learning_rate": 4.827649951448497e-05, - "loss": 2.8743, - "step": 508000 - }, - { - "epoch": 7.32, - "learning_rate": 4.8224676626215734e-05, - "loss": 2.8738, - "step": 508500 - }, - { - "epoch": 7.33, - "learning_rate": 4.8172749884463e-05, - "loss": 2.8711, - "step": 509000 - }, - { - "epoch": 7.33, - "learning_rate": 4.8120823142710264e-05, - "loss": 2.8737, - "step": 509500 - }, - { - "epoch": 7.34, - "learning_rate": 4.806889640095753e-05, - "loss": 2.8775, - "step": 510000 - }, - { - "epoch": 7.35, - "learning_rate": 4.8016969659204794e-05, - "loss": 2.8719, - "step": 510500 - }, - { - "epoch": 7.36, - "learning_rate": 4.796504291745206e-05, - "loss": 2.8738, - "step": 511000 - }, - { - "epoch": 7.36, - "learning_rate": 4.791322002918283e-05, - "loss": 2.8789, - "step": 511500 - }, - { - "epoch": 7.37, - "learning_rate": 4.7861293287430096e-05, - "loss": 2.873, - "step": 512000 - }, - { - "epoch": 7.38, - "learning_rate": 4.780936654567736e-05, - "loss": 2.8702, - "step": 512500 - }, - { - "epoch": 7.38, - "learning_rate": 4.7757439803924626e-05, - "loss": 2.8784, - "step": 513000 - }, - { - "epoch": 7.39, - "learning_rate": 4.7705513062171895e-05, - "loss": 2.868, - "step": 513500 - }, - { - "epoch": 7.4, - "learning_rate": 4.765358632041915e-05, - "loss": 2.8754, - "step": 514000 - }, - { - "epoch": 7.41, - "learning_rate": 4.760176343214993e-05, - "loss": 2.8758, - "step": 514500 - }, - { - "epoch": 7.41, - "learning_rate": 4.754983669039719e-05, - "loss": 2.8736, - "step": 515000 - }, - { - "epoch": 7.42, - "learning_rate": 4.749790994864445e-05, - "loss": 2.8699, - "step": 515500 - }, - { - "epoch": 7.43, - "learning_rate": 4.744598320689172e-05, - "loss": 2.8705, - "step": 516000 - }, - { - "epoch": 7.43, - "learning_rate": 4.739405646513899e-05, - "loss": 2.8713, - "step": 516500 - }, - { - "epoch": 7.44, - "learning_rate": 4.734212972338625e-05, - "loss": 2.8763, - "step": 517000 - }, - { - "epoch": 7.45, - "learning_rate": 4.729020298163351e-05, - "loss": 2.8744, - "step": 517500 - }, - { - "epoch": 7.46, - "learning_rate": 4.7238380093364285e-05, - "loss": 2.8726, - "step": 518000 - }, - { - "epoch": 7.46, - "learning_rate": 4.7186453351611546e-05, - "loss": 2.8754, - "step": 518500 - }, - { - "epoch": 7.47, - "learning_rate": 4.7134526609858815e-05, - "loss": 2.8741, - "step": 519000 - }, - { - "epoch": 7.48, - "learning_rate": 4.708259986810608e-05, - "loss": 2.8763, - "step": 519500 - }, - { - "epoch": 7.48, - "learning_rate": 4.7030673126353344e-05, - "loss": 2.8752, - "step": 520000 - }, - { - "epoch": 7.49, - "learning_rate": 4.6978746384600606e-05, - "loss": 2.8681, - "step": 520500 - }, - { - "epoch": 7.5, - "learning_rate": 4.6926819642847874e-05, - "loss": 2.8696, - "step": 521000 - }, - { - "epoch": 7.51, - "learning_rate": 4.6874892901095136e-05, - "loss": 2.8672, - "step": 521500 - }, - { - "epoch": 7.51, - "learning_rate": 4.6823173866309413e-05, - "loss": 2.8729, - "step": 522000 - }, - { - "epoch": 7.52, - "learning_rate": 4.6771247124556675e-05, - "loss": 2.8745, - "step": 522500 - }, - { - "epoch": 7.53, - "learning_rate": 4.671932038280394e-05, - "loss": 2.8692, - "step": 523000 - }, - { - "epoch": 7.54, - "learning_rate": 4.6667393641051205e-05, - "loss": 2.8736, - "step": 523500 - }, - { - "epoch": 7.54, - "learning_rate": 4.661546689929847e-05, - "loss": 2.8702, - "step": 524000 - }, - { - "epoch": 7.55, - "learning_rate": 4.6563540157545735e-05, - "loss": 2.8747, - "step": 524500 - }, - { - "epoch": 7.56, - "learning_rate": 4.6511613415793e-05, - "loss": 2.8673, - "step": 525000 - }, - { - "epoch": 7.56, - "learning_rate": 4.6459686674040264e-05, - "loss": 2.8686, - "step": 525500 - }, - { - "epoch": 7.57, - "learning_rate": 4.640786378577104e-05, - "loss": 2.8671, - "step": 526000 - }, - { - "epoch": 7.58, - "learning_rate": 4.63559370440183e-05, - "loss": 2.8714, - "step": 526500 - }, - { - "epoch": 7.59, - "learning_rate": 4.630401030226557e-05, - "loss": 2.8695, - "step": 527000 - }, - { - "epoch": 7.59, - "learning_rate": 4.6252083560512835e-05, - "loss": 2.8713, - "step": 527500 - }, - { - "epoch": 7.6, - "learning_rate": 4.62002606722436e-05, - "loss": 2.8692, - "step": 528000 - }, - { - "epoch": 7.61, - "learning_rate": 4.614833393049087e-05, - "loss": 2.8701, - "step": 528500 - }, - { - "epoch": 7.61, - "learning_rate": 4.6096511042221636e-05, - "loss": 2.8685, - "step": 529000 - }, - { - "epoch": 7.62, - "learning_rate": 4.6044584300468905e-05, - "loss": 2.8695, - "step": 529500 - }, - { - "epoch": 7.63, - "learning_rate": 4.599276141219967e-05, - "loss": 2.8624, - "step": 530000 - }, - { - "epoch": 7.64, - "learning_rate": 4.594083467044694e-05, - "loss": 2.8658, - "step": 530500 - }, - { - "epoch": 7.64, - "learning_rate": 4.58889079286942e-05, - "loss": 2.8725, - "step": 531000 - }, - { - "epoch": 7.65, - "learning_rate": 4.583698118694146e-05, - "loss": 2.8695, - "step": 531500 - }, - { - "epoch": 7.66, - "learning_rate": 4.578505444518873e-05, - "loss": 2.862, - "step": 532000 - }, - { - "epoch": 7.66, - "learning_rate": 4.5733127703436e-05, - "loss": 2.8737, - "step": 532500 - }, - { - "epoch": 7.67, - "learning_rate": 4.568120096168326e-05, - "loss": 2.8715, - "step": 533000 - }, - { - "epoch": 7.68, - "learning_rate": 4.562927421993052e-05, - "loss": 2.8704, - "step": 533500 - }, - { - "epoch": 7.69, - "learning_rate": 4.557734747817779e-05, - "loss": 2.8641, - "step": 534000 - }, - { - "epoch": 7.69, - "learning_rate": 4.552542073642506e-05, - "loss": 2.8709, - "step": 534500 - }, - { - "epoch": 7.7, - "learning_rate": 4.547349399467232e-05, - "loss": 2.8693, - "step": 535000 - }, - { - "epoch": 7.71, - "learning_rate": 4.542156725291958e-05, - "loss": 2.8679, - "step": 535500 - }, - { - "epoch": 7.72, - "learning_rate": 4.5369744364650354e-05, - "loss": 2.8706, - "step": 536000 - }, - { - "epoch": 7.72, - "learning_rate": 4.5317817622897616e-05, - "loss": 2.8684, - "step": 536500 - }, - { - "epoch": 7.73, - "learning_rate": 4.5265890881144884e-05, - "loss": 2.8682, - "step": 537000 - }, - { - "epoch": 7.74, - "learning_rate": 4.5213964139392146e-05, - "loss": 2.8671, - "step": 537500 - }, - { - "epoch": 7.74, - "learning_rate": 4.516214125112292e-05, - "loss": 2.8749, - "step": 538000 - }, - { - "epoch": 7.75, - "learning_rate": 4.511021450937018e-05, - "loss": 2.8687, - "step": 538500 - }, - { - "epoch": 7.76, - "learning_rate": 4.505828776761745e-05, - "loss": 2.8699, - "step": 539000 - }, - { - "epoch": 7.77, - "learning_rate": 4.500646487934822e-05, - "loss": 2.8659, - "step": 539500 - }, - { - "epoch": 7.77, - "learning_rate": 4.495453813759548e-05, - "loss": 2.8665, - "step": 540000 - }, - { - "epoch": 7.78, - "learning_rate": 4.4902611395842745e-05, - "loss": 2.8668, - "step": 540500 - }, - { - "epoch": 7.79, - "learning_rate": 4.485068465409001e-05, - "loss": 2.872, - "step": 541000 - }, - { - "epoch": 7.79, - "learning_rate": 4.4798757912337274e-05, - "loss": 2.8627, - "step": 541500 - }, - { - "epoch": 7.8, - "learning_rate": 4.474683117058454e-05, - "loss": 2.8638, - "step": 542000 - }, - { - "epoch": 7.81, - "learning_rate": 4.4694904428831804e-05, - "loss": 2.8647, - "step": 542500 - }, - { - "epoch": 7.82, - "learning_rate": 4.4642977687079066e-05, - "loss": 2.8621, - "step": 543000 - }, - { - "epoch": 7.82, - "learning_rate": 4.4591154798809845e-05, - "loss": 2.8669, - "step": 543500 - }, - { - "epoch": 7.83, - "learning_rate": 4.453922805705711e-05, - "loss": 2.8648, - "step": 544000 - }, - { - "epoch": 7.84, - "learning_rate": 4.448730131530437e-05, - "loss": 2.8584, - "step": 544500 - }, - { - "epoch": 7.84, - "learning_rate": 4.443537457355164e-05, - "loss": 2.8608, - "step": 545000 - }, - { - "epoch": 7.85, - "learning_rate": 4.43835516852824e-05, - "loss": 2.8663, - "step": 545500 - }, - { - "epoch": 7.86, - "learning_rate": 4.433162494352967e-05, - "loss": 2.8612, - "step": 546000 - }, - { - "epoch": 7.87, - "learning_rate": 4.427969820177694e-05, - "loss": 2.8626, - "step": 546500 - }, - { - "epoch": 7.87, - "learning_rate": 4.4227771460024194e-05, - "loss": 2.8659, - "step": 547000 - }, - { - "epoch": 7.88, - "learning_rate": 4.4175948571754974e-05, - "loss": 2.8647, - "step": 547500 - }, - { - "epoch": 7.89, - "learning_rate": 4.4124021830002236e-05, - "loss": 2.8618, - "step": 548000 - }, - { - "epoch": 7.9, - "learning_rate": 4.407219894173301e-05, - "loss": 2.8597, - "step": 548500 - }, - { - "epoch": 7.9, - "learning_rate": 4.402027219998027e-05, - "loss": 2.8669, - "step": 549000 - }, - { - "epoch": 7.91, - "learning_rate": 4.396834545822753e-05, - "loss": 2.8605, - "step": 549500 - }, - { - "epoch": 7.92, - "learning_rate": 4.39164187164748e-05, - "loss": 2.8683, - "step": 550000 - }, - { - "epoch": 7.92, - "learning_rate": 4.386449197472207e-05, - "loss": 2.8648, - "step": 550500 - }, - { - "epoch": 7.93, - "learning_rate": 4.381256523296933e-05, - "loss": 2.8607, - "step": 551000 - }, - { - "epoch": 7.94, - "learning_rate": 4.376063849121659e-05, - "loss": 2.8627, - "step": 551500 - }, - { - "epoch": 7.95, - "learning_rate": 4.370871174946386e-05, - "loss": 2.8654, - "step": 552000 - }, - { - "epoch": 7.95, - "learning_rate": 4.3656888861194626e-05, - "loss": 2.8593, - "step": 552500 - }, - { - "epoch": 7.96, - "learning_rate": 4.36050659729254e-05, - "loss": 2.8665, - "step": 553000 - }, - { - "epoch": 7.97, - "learning_rate": 4.355313923117266e-05, - "loss": 2.8612, - "step": 553500 - }, - { - "epoch": 7.97, - "learning_rate": 4.350121248941993e-05, - "loss": 2.8664, - "step": 554000 - }, - { - "epoch": 7.98, - "learning_rate": 4.34492857476672e-05, - "loss": 2.8526, - "step": 554500 - }, - { - "epoch": 7.99, - "learning_rate": 4.339735900591446e-05, - "loss": 2.8589, - "step": 555000 - }, - { - "epoch": 8.0, - "learning_rate": 4.334553611764523e-05, - "loss": 2.8609, - "step": 555500 - }, - { - "epoch": 8.0, - "eval_accuracy": 0.49625970451459295, - "eval_loss": 2.6880686283111572, - "eval_runtime": 555.8516, - "eval_samples_per_second": 969.57, - "eval_steps_per_second": 40.399, - "step": 555784 - }, - { - "epoch": 8.0, - "learning_rate": 4.329360937589249e-05, - "loss": 2.8652, - "step": 556000 - }, - { - "epoch": 8.01, - "learning_rate": 4.3241682634139755e-05, - "loss": 2.86, - "step": 556500 - }, - { - "epoch": 8.02, - "learning_rate": 4.318975589238702e-05, - "loss": 2.8523, - "step": 557000 - }, - { - "epoch": 8.02, - "learning_rate": 4.3137829150634284e-05, - "loss": 2.8513, - "step": 557500 - }, - { - "epoch": 8.03, - "learning_rate": 4.308590240888155e-05, - "loss": 2.8526, - "step": 558000 - }, - { - "epoch": 8.04, - "learning_rate": 4.3033975667128814e-05, - "loss": 2.8663, - "step": 558500 - }, - { - "epoch": 8.05, - "learning_rate": 4.298204892537608e-05, - "loss": 2.8554, - "step": 559000 - }, - { - "epoch": 8.05, - "learning_rate": 4.2930226037106855e-05, - "loss": 2.8556, - "step": 559500 - }, - { - "epoch": 8.06, - "learning_rate": 4.287829929535412e-05, - "loss": 2.856, - "step": 560000 - }, - { - "epoch": 8.07, - "learning_rate": 4.282647640708489e-05, - "loss": 2.8588, - "step": 560500 - }, - { - "epoch": 8.08, - "learning_rate": 4.277454966533215e-05, - "loss": 2.8524, - "step": 561000 - }, - { - "epoch": 8.08, - "learning_rate": 4.272262292357941e-05, - "loss": 2.8519, - "step": 561500 - }, - { - "epoch": 8.09, - "learning_rate": 4.267069618182668e-05, - "loss": 2.8579, - "step": 562000 - }, - { - "epoch": 8.1, - "learning_rate": 4.261876944007395e-05, - "loss": 2.853, - "step": 562500 - }, - { - "epoch": 8.1, - "learning_rate": 4.256684269832121e-05, - "loss": 2.8629, - "step": 563000 - }, - { - "epoch": 8.11, - "learning_rate": 4.251491595656847e-05, - "loss": 2.8584, - "step": 563500 - }, - { - "epoch": 8.12, - "learning_rate": 4.246298921481574e-05, - "loss": 2.853, - "step": 564000 - }, - { - "epoch": 8.13, - "learning_rate": 4.241116632654651e-05, - "loss": 2.8529, - "step": 564500 - }, - { - "epoch": 8.13, - "learning_rate": 4.2359239584793775e-05, - "loss": 2.8497, - "step": 565000 - }, - { - "epoch": 8.14, - "learning_rate": 4.2307312843041044e-05, - "loss": 2.8558, - "step": 565500 - }, - { - "epoch": 8.15, - "learning_rate": 4.2255386101288305e-05, - "loss": 2.8552, - "step": 566000 - }, - { - "epoch": 8.15, - "learning_rate": 4.220356321301908e-05, - "loss": 2.8553, - "step": 566500 - }, - { - "epoch": 8.16, - "learning_rate": 4.215163647126634e-05, - "loss": 2.8547, - "step": 567000 - }, - { - "epoch": 8.17, - "learning_rate": 4.20997097295136e-05, - "loss": 2.8513, - "step": 567500 - }, - { - "epoch": 8.18, - "learning_rate": 4.204778298776087e-05, - "loss": 2.855, - "step": 568000 - }, - { - "epoch": 8.18, - "learning_rate": 4.199585624600814e-05, - "loss": 2.8517, - "step": 568500 - }, - { - "epoch": 8.19, - "learning_rate": 4.1944033357738904e-05, - "loss": 2.8513, - "step": 569000 - }, - { - "epoch": 8.2, - "learning_rate": 4.189210661598617e-05, - "loss": 2.8525, - "step": 569500 - }, - { - "epoch": 8.2, - "learning_rate": 4.184028372771694e-05, - "loss": 2.8587, - "step": 570000 - }, - { - "epoch": 8.21, - "learning_rate": 4.178835698596421e-05, - "loss": 2.8499, - "step": 570500 - }, - { - "epoch": 8.22, - "learning_rate": 4.173643024421147e-05, - "loss": 2.8525, - "step": 571000 - }, - { - "epoch": 8.23, - "learning_rate": 4.168450350245873e-05, - "loss": 2.8523, - "step": 571500 - }, - { - "epoch": 8.23, - "learning_rate": 4.1632576760706e-05, - "loss": 2.851, - "step": 572000 - }, - { - "epoch": 8.24, - "learning_rate": 4.1580650018953267e-05, - "loss": 2.861, - "step": 572500 - }, - { - "epoch": 8.25, - "learning_rate": 4.152872327720053e-05, - "loss": 2.8567, - "step": 573000 - }, - { - "epoch": 8.26, - "learning_rate": 4.147679653544779e-05, - "loss": 2.8534, - "step": 573500 - }, - { - "epoch": 8.26, - "learning_rate": 4.142497364717856e-05, - "loss": 2.8493, - "step": 574000 - }, - { - "epoch": 8.27, - "learning_rate": 4.1373046905425824e-05, - "loss": 2.8514, - "step": 574500 - }, - { - "epoch": 8.28, - "learning_rate": 4.132112016367309e-05, - "loss": 2.8511, - "step": 575000 - }, - { - "epoch": 8.28, - "learning_rate": 4.1269193421920354e-05, - "loss": 2.8514, - "step": 575500 - }, - { - "epoch": 8.29, - "learning_rate": 4.121726668016762e-05, - "loss": 2.854, - "step": 576000 - }, - { - "epoch": 8.3, - "learning_rate": 4.116533993841489e-05, - "loss": 2.8539, - "step": 576500 - }, - { - "epoch": 8.31, - "learning_rate": 4.111351705014566e-05, - "loss": 2.8456, - "step": 577000 - }, - { - "epoch": 8.31, - "learning_rate": 4.1061590308392925e-05, - "loss": 2.8602, - "step": 577500 - }, - { - "epoch": 8.32, - "learning_rate": 4.1009663566640187e-05, - "loss": 2.8506, - "step": 578000 - }, - { - "epoch": 8.33, - "learning_rate": 4.095773682488745e-05, - "loss": 2.8524, - "step": 578500 - }, - { - "epoch": 8.33, - "learning_rate": 4.0905810083134716e-05, - "loss": 2.8504, - "step": 579000 - }, - { - "epoch": 8.34, - "learning_rate": 4.085398719486548e-05, - "loss": 2.8484, - "step": 579500 - }, - { - "epoch": 8.35, - "learning_rate": 4.080206045311275e-05, - "loss": 2.8479, - "step": 580000 - }, - { - "epoch": 8.36, - "learning_rate": 4.075013371136002e-05, - "loss": 2.8561, - "step": 580500 - }, - { - "epoch": 8.36, - "learning_rate": 4.069820696960728e-05, - "loss": 2.85, - "step": 581000 - }, - { - "epoch": 8.37, - "learning_rate": 4.064628022785454e-05, - "loss": 2.8561, - "step": 581500 - }, - { - "epoch": 8.38, - "learning_rate": 4.059435348610181e-05, - "loss": 2.8504, - "step": 582000 - }, - { - "epoch": 8.38, - "learning_rate": 4.054242674434908e-05, - "loss": 2.8555, - "step": 582500 - }, - { - "epoch": 8.39, - "learning_rate": 4.0490603856079845e-05, - "loss": 2.8438, - "step": 583000 - }, - { - "epoch": 8.4, - "learning_rate": 4.043867711432711e-05, - "loss": 2.8494, - "step": 583500 - }, - { - "epoch": 8.41, - "learning_rate": 4.0386750372574375e-05, - "loss": 2.8473, - "step": 584000 - }, - { - "epoch": 8.41, - "learning_rate": 4.0334823630821636e-05, - "loss": 2.8479, - "step": 584500 - }, - { - "epoch": 8.42, - "learning_rate": 4.0282896889068905e-05, - "loss": 2.8488, - "step": 585000 - }, - { - "epoch": 8.43, - "learning_rate": 4.0230970147316166e-05, - "loss": 2.8506, - "step": 585500 - }, - { - "epoch": 8.43, - "learning_rate": 4.0179043405563434e-05, - "loss": 2.8476, - "step": 586000 - }, - { - "epoch": 8.44, - "learning_rate": 4.0127116663810696e-05, - "loss": 2.8448, - "step": 586500 - }, - { - "epoch": 8.45, - "learning_rate": 4.007529377554147e-05, - "loss": 2.8518, - "step": 587000 - }, - { - "epoch": 8.46, - "learning_rate": 4.002336703378873e-05, - "loss": 2.8421, - "step": 587500 - }, - { - "epoch": 8.46, - "learning_rate": 3.9971440292036e-05, - "loss": 2.8544, - "step": 588000 - }, - { - "epoch": 8.47, - "learning_rate": 3.9919617403766765e-05, - "loss": 2.8506, - "step": 588500 - }, - { - "epoch": 8.48, - "learning_rate": 3.986769066201403e-05, - "loss": 2.8495, - "step": 589000 - }, - { - "epoch": 8.49, - "learning_rate": 3.9815763920261295e-05, - "loss": 2.8504, - "step": 589500 - }, - { - "epoch": 8.49, - "learning_rate": 3.976383717850856e-05, - "loss": 2.844, - "step": 590000 - }, - { - "epoch": 8.5, - "learning_rate": 3.971191043675583e-05, - "loss": 2.8472, - "step": 590500 - }, - { - "epoch": 8.51, - "learning_rate": 3.9659983695003086e-05, - "loss": 2.8443, - "step": 591000 - }, - { - "epoch": 8.51, - "learning_rate": 3.9608056953250354e-05, - "loss": 2.8546, - "step": 591500 - }, - { - "epoch": 8.52, - "learning_rate": 3.955623406498113e-05, - "loss": 2.8484, - "step": 592000 - }, - { - "epoch": 8.53, - "learning_rate": 3.950430732322839e-05, - "loss": 2.8438, - "step": 592500 - }, - { - "epoch": 8.54, - "learning_rate": 3.945238058147566e-05, - "loss": 2.8479, - "step": 593000 - }, - { - "epoch": 8.54, - "learning_rate": 3.9400453839722926e-05, - "loss": 2.8455, - "step": 593500 - }, - { - "epoch": 8.55, - "learning_rate": 3.934852709797018e-05, - "loss": 2.8422, - "step": 594000 - }, - { - "epoch": 8.56, - "learning_rate": 3.929660035621745e-05, - "loss": 2.847, - "step": 594500 - }, - { - "epoch": 8.56, - "learning_rate": 3.924467361446472e-05, - "loss": 2.8453, - "step": 595000 - }, - { - "epoch": 8.57, - "learning_rate": 3.919274687271198e-05, - "loss": 2.8533, - "step": 595500 - }, - { - "epoch": 8.58, - "learning_rate": 3.9141027837926256e-05, - "loss": 2.8511, - "step": 596000 - }, - { - "epoch": 8.59, - "learning_rate": 3.908910109617352e-05, - "loss": 2.8456, - "step": 596500 - }, - { - "epoch": 8.59, - "learning_rate": 3.9037174354420786e-05, - "loss": 2.8501, - "step": 597000 - }, - { - "epoch": 8.6, - "learning_rate": 3.8985247612668054e-05, - "loss": 2.8479, - "step": 597500 - }, - { - "epoch": 8.61, - "learning_rate": 3.8933320870915316e-05, - "loss": 2.8488, - "step": 598000 - }, - { - "epoch": 8.61, - "learning_rate": 3.888139412916258e-05, - "loss": 2.8456, - "step": 598500 - }, - { - "epoch": 8.62, - "learning_rate": 3.8829467387409846e-05, - "loss": 2.8479, - "step": 599000 - }, - { - "epoch": 8.63, - "learning_rate": 3.877764449914061e-05, - "loss": 2.8457, - "step": 599500 - }, - { - "epoch": 8.64, - "learning_rate": 3.872571775738788e-05, - "loss": 2.8439, - "step": 600000 - }, - { - "epoch": 8.64, - "learning_rate": 3.867379101563514e-05, - "loss": 2.8488, - "step": 600500 - }, - { - "epoch": 8.65, - "learning_rate": 3.862186427388241e-05, - "loss": 2.8437, - "step": 601000 - }, - { - "epoch": 8.66, - "learning_rate": 3.856993753212967e-05, - "loss": 2.8445, - "step": 601500 - }, - { - "epoch": 8.67, - "learning_rate": 3.851801079037694e-05, - "loss": 2.842, - "step": 602000 - }, - { - "epoch": 8.67, - "learning_rate": 3.8466187902107706e-05, - "loss": 2.8458, - "step": 602500 - }, - { - "epoch": 8.68, - "learning_rate": 3.8414261160354974e-05, - "loss": 2.8489, - "step": 603000 - }, - { - "epoch": 8.69, - "learning_rate": 3.8362334418602236e-05, - "loss": 2.8407, - "step": 603500 - }, - { - "epoch": 8.69, - "learning_rate": 3.8310407676849504e-05, - "loss": 2.8452, - "step": 604000 - }, - { - "epoch": 8.7, - "learning_rate": 3.8258480935096766e-05, - "loss": 2.8495, - "step": 604500 - }, - { - "epoch": 8.71, - "learning_rate": 3.820655419334403e-05, - "loss": 2.8426, - "step": 605000 - }, - { - "epoch": 8.72, - "learning_rate": 3.8154627451591295e-05, - "loss": 2.845, - "step": 605500 - }, - { - "epoch": 8.72, - "learning_rate": 3.810280456332207e-05, - "loss": 2.8437, - "step": 606000 - }, - { - "epoch": 8.73, - "learning_rate": 3.805087782156933e-05, - "loss": 2.8411, - "step": 606500 - }, - { - "epoch": 8.74, - "learning_rate": 3.79989510798166e-05, - "loss": 2.8449, - "step": 607000 - }, - { - "epoch": 8.74, - "learning_rate": 3.7947024338063866e-05, - "loss": 2.8474, - "step": 607500 - }, - { - "epoch": 8.75, - "learning_rate": 3.789520144979463e-05, - "loss": 2.8431, - "step": 608000 - }, - { - "epoch": 8.76, - "learning_rate": 3.78432747080419e-05, - "loss": 2.8437, - "step": 608500 - }, - { - "epoch": 8.77, - "learning_rate": 3.7791347966289156e-05, - "loss": 2.8445, - "step": 609000 - }, - { - "epoch": 8.77, - "learning_rate": 3.7739421224536424e-05, - "loss": 2.8438, - "step": 609500 - }, - { - "epoch": 8.78, - "learning_rate": 3.768749448278369e-05, - "loss": 2.8441, - "step": 610000 - }, - { - "epoch": 8.79, - "learning_rate": 3.763567159451446e-05, - "loss": 2.8497, - "step": 610500 - }, - { - "epoch": 8.79, - "learning_rate": 3.758374485276173e-05, - "loss": 2.8466, - "step": 611000 - }, - { - "epoch": 8.8, - "learning_rate": 3.7531818111008995e-05, - "loss": 2.8451, - "step": 611500 - }, - { - "epoch": 8.81, - "learning_rate": 3.747989136925626e-05, - "loss": 2.8407, - "step": 612000 - }, - { - "epoch": 8.82, - "learning_rate": 3.742806848098703e-05, - "loss": 2.8453, - "step": 612500 - }, - { - "epoch": 8.82, - "learning_rate": 3.737614173923429e-05, - "loss": 2.8403, - "step": 613000 - }, - { - "epoch": 8.83, - "learning_rate": 3.732421499748155e-05, - "loss": 2.8445, - "step": 613500 - }, - { - "epoch": 8.84, - "learning_rate": 3.727228825572882e-05, - "loss": 2.8422, - "step": 614000 - }, - { - "epoch": 8.85, - "learning_rate": 3.722036151397608e-05, - "loss": 2.8411, - "step": 614500 - }, - { - "epoch": 8.85, - "learning_rate": 3.716843477222335e-05, - "loss": 2.8402, - "step": 615000 - }, - { - "epoch": 8.86, - "learning_rate": 3.711650803047061e-05, - "loss": 2.8368, - "step": 615500 - }, - { - "epoch": 8.87, - "learning_rate": 3.706458128871788e-05, - "loss": 2.846, - "step": 616000 - }, - { - "epoch": 8.87, - "learning_rate": 3.701286225393216e-05, - "loss": 2.8407, - "step": 616500 - }, - { - "epoch": 8.88, - "learning_rate": 3.696093551217942e-05, - "loss": 2.8388, - "step": 617000 - }, - { - "epoch": 8.89, - "learning_rate": 3.690900877042668e-05, - "loss": 2.8414, - "step": 617500 - }, - { - "epoch": 8.9, - "learning_rate": 3.685708202867395e-05, - "loss": 2.8437, - "step": 618000 - }, - { - "epoch": 8.9, - "learning_rate": 3.680515528692121e-05, - "loss": 2.8418, - "step": 618500 - }, - { - "epoch": 8.91, - "learning_rate": 3.675322854516848e-05, - "loss": 2.8413, - "step": 619000 - }, - { - "epoch": 8.92, - "learning_rate": 3.6701405656899246e-05, - "loss": 2.8469, - "step": 619500 - }, - { - "epoch": 8.92, - "learning_rate": 3.6649478915146514e-05, - "loss": 2.8402, - "step": 620000 - }, - { - "epoch": 8.93, - "learning_rate": 3.6597552173393776e-05, - "loss": 2.842, - "step": 620500 - }, - { - "epoch": 8.94, - "learning_rate": 3.6545625431641044e-05, - "loss": 2.84, - "step": 621000 - }, - { - "epoch": 8.95, - "learning_rate": 3.6493698689888305e-05, - "loss": 2.8442, - "step": 621500 - }, - { - "epoch": 8.95, - "learning_rate": 3.6441771948135574e-05, - "loss": 2.8432, - "step": 622000 - }, - { - "epoch": 8.96, - "learning_rate": 3.638984520638284e-05, - "loss": 2.8384, - "step": 622500 - }, - { - "epoch": 8.97, - "learning_rate": 3.63379184646301e-05, - "loss": 2.8414, - "step": 623000 - }, - { - "epoch": 8.97, - "learning_rate": 3.6286095576360876e-05, - "loss": 2.8429, - "step": 623500 - }, - { - "epoch": 8.98, - "learning_rate": 3.623416883460813e-05, - "loss": 2.8397, - "step": 624000 - }, - { - "epoch": 8.99, - "learning_rate": 3.61822420928554e-05, - "loss": 2.8377, - "step": 624500 - }, - { - "epoch": 9.0, - "learning_rate": 3.6130419204586166e-05, - "loss": 2.8352, - "step": 625000 - }, - { - "epoch": 9.0, - "eval_accuracy": 0.49907601112677125, - "eval_loss": 2.6701717376708984, - "eval_runtime": 557.6796, - "eval_samples_per_second": 966.392, - "eval_steps_per_second": 40.267, - "step": 625257 - }, - { - "epoch": 9.0, - "learning_rate": 3.6078492462833434e-05, - "loss": 2.84, - "step": 625500 - }, - { - "epoch": 9.01, - "learning_rate": 3.60265657210807e-05, - "loss": 2.8314, - "step": 626000 - }, - { - "epoch": 9.02, - "learning_rate": 3.597463897932797e-05, - "loss": 2.8356, - "step": 626500 - }, - { - "epoch": 9.03, - "learning_rate": 3.592271223757523e-05, - "loss": 2.8391, - "step": 627000 - }, - { - "epoch": 9.03, - "learning_rate": 3.5870785495822494e-05, - "loss": 2.8317, - "step": 627500 - }, - { - "epoch": 9.04, - "learning_rate": 3.581896260755327e-05, - "loss": 2.8298, - "step": 628000 - }, - { - "epoch": 9.05, - "learning_rate": 3.576703586580053e-05, - "loss": 2.8356, - "step": 628500 - }, - { - "epoch": 9.05, - "learning_rate": 3.5715109124047796e-05, - "loss": 2.8319, - "step": 629000 - }, - { - "epoch": 9.06, - "learning_rate": 3.566318238229506e-05, - "loss": 2.8361, - "step": 629500 - }, - { - "epoch": 9.07, - "learning_rate": 3.5611255640542326e-05, - "loss": 2.8352, - "step": 630000 - }, - { - "epoch": 9.08, - "learning_rate": 3.555932889878959e-05, - "loss": 2.8362, - "step": 630500 - }, - { - "epoch": 9.08, - "learning_rate": 3.5507402157036856e-05, - "loss": 2.838, - "step": 631000 - }, - { - "epoch": 9.09, - "learning_rate": 3.545547541528412e-05, - "loss": 2.8323, - "step": 631500 - }, - { - "epoch": 9.1, - "learning_rate": 3.540365252701489e-05, - "loss": 2.8342, - "step": 632000 - }, - { - "epoch": 9.1, - "learning_rate": 3.535172578526215e-05, - "loss": 2.8374, - "step": 632500 - }, - { - "epoch": 9.11, - "learning_rate": 3.5299902896992925e-05, - "loss": 2.835, - "step": 633000 - }, - { - "epoch": 9.12, - "learning_rate": 3.524797615524019e-05, - "loss": 2.8346, - "step": 633500 - }, - { - "epoch": 9.13, - "learning_rate": 3.5196049413487455e-05, - "loss": 2.8382, - "step": 634000 - }, - { - "epoch": 9.13, - "learning_rate": 3.5144122671734716e-05, - "loss": 2.8309, - "step": 634500 - }, - { - "epoch": 9.14, - "learning_rate": 3.5092195929981985e-05, - "loss": 2.8379, - "step": 635000 - }, - { - "epoch": 9.15, - "learning_rate": 3.504037304171275e-05, - "loss": 2.8338, - "step": 635500 - }, - { - "epoch": 9.15, - "learning_rate": 3.498844629996002e-05, - "loss": 2.8332, - "step": 636000 - }, - { - "epoch": 9.16, - "learning_rate": 3.493651955820728e-05, - "loss": 2.8359, - "step": 636500 - }, - { - "epoch": 9.17, - "learning_rate": 3.488459281645455e-05, - "loss": 2.8306, - "step": 637000 - }, - { - "epoch": 9.18, - "learning_rate": 3.483266607470182e-05, - "loss": 2.839, - "step": 637500 - }, - { - "epoch": 9.18, - "learning_rate": 3.478073933294907e-05, - "loss": 2.8349, - "step": 638000 - }, - { - "epoch": 9.19, - "learning_rate": 3.472881259119634e-05, - "loss": 2.8302, - "step": 638500 - }, - { - "epoch": 9.2, - "learning_rate": 3.467698970292711e-05, - "loss": 2.828, - "step": 639000 - }, - { - "epoch": 9.21, - "learning_rate": 3.4625062961174375e-05, - "loss": 2.8344, - "step": 639500 - }, - { - "epoch": 9.21, - "learning_rate": 3.457313621942164e-05, - "loss": 2.8365, - "step": 640000 - }, - { - "epoch": 9.22, - "learning_rate": 3.452120947766891e-05, - "loss": 2.8357, - "step": 640500 - }, - { - "epoch": 9.23, - "learning_rate": 3.4469282735916166e-05, - "loss": 2.8318, - "step": 641000 - }, - { - "epoch": 9.23, - "learning_rate": 3.4417459847646946e-05, - "loss": 2.8369, - "step": 641500 - }, - { - "epoch": 9.24, - "learning_rate": 3.436553310589421e-05, - "loss": 2.8379, - "step": 642000 - }, - { - "epoch": 9.25, - "learning_rate": 3.431360636414147e-05, - "loss": 2.8327, - "step": 642500 - }, - { - "epoch": 9.26, - "learning_rate": 3.426167962238874e-05, - "loss": 2.8311, - "step": 643000 - }, - { - "epoch": 9.26, - "learning_rate": 3.4209752880636e-05, - "loss": 2.8275, - "step": 643500 - }, - { - "epoch": 9.27, - "learning_rate": 3.415782613888327e-05, - "loss": 2.834, - "step": 644000 - }, - { - "epoch": 9.28, - "learning_rate": 3.410589939713053e-05, - "loss": 2.8364, - "step": 644500 - }, - { - "epoch": 9.28, - "learning_rate": 3.40540765088613e-05, - "loss": 2.832, - "step": 645000 - }, - { - "epoch": 9.29, - "learning_rate": 3.400214976710856e-05, - "loss": 2.8355, - "step": 645500 - }, - { - "epoch": 9.3, - "learning_rate": 3.395022302535583e-05, - "loss": 2.8331, - "step": 646000 - }, - { - "epoch": 9.31, - "learning_rate": 3.389829628360309e-05, - "loss": 2.833, - "step": 646500 - }, - { - "epoch": 9.31, - "learning_rate": 3.3846473395333866e-05, - "loss": 2.8313, - "step": 647000 - }, - { - "epoch": 9.32, - "learning_rate": 3.379454665358113e-05, - "loss": 2.8312, - "step": 647500 - }, - { - "epoch": 9.33, - "learning_rate": 3.3742619911828396e-05, - "loss": 2.8333, - "step": 648000 - }, - { - "epoch": 9.33, - "learning_rate": 3.369069317007566e-05, - "loss": 2.8306, - "step": 648500 - }, - { - "epoch": 9.34, - "learning_rate": 3.363887028180643e-05, - "loss": 2.832, - "step": 649000 - }, - { - "epoch": 9.35, - "learning_rate": 3.358694354005369e-05, - "loss": 2.8346, - "step": 649500 - }, - { - "epoch": 9.36, - "learning_rate": 3.353501679830096e-05, - "loss": 2.8321, - "step": 650000 - }, - { - "epoch": 9.36, - "learning_rate": 3.348309005654822e-05, - "loss": 2.8266, - "step": 650500 - }, - { - "epoch": 9.37, - "learning_rate": 3.343116331479549e-05, - "loss": 2.836, - "step": 651000 - }, - { - "epoch": 9.38, - "learning_rate": 3.3379340426526256e-05, - "loss": 2.8332, - "step": 651500 - }, - { - "epoch": 9.38, - "learning_rate": 3.3327413684773525e-05, - "loss": 2.8319, - "step": 652000 - }, - { - "epoch": 9.39, - "learning_rate": 3.3275486943020786e-05, - "loss": 2.8319, - "step": 652500 - }, - { - "epoch": 9.4, - "learning_rate": 3.322356020126805e-05, - "loss": 2.8284, - "step": 653000 - }, - { - "epoch": 9.41, - "learning_rate": 3.3171633459515316e-05, - "loss": 2.8259, - "step": 653500 - }, - { - "epoch": 9.41, - "learning_rate": 3.3119706717762584e-05, - "loss": 2.8288, - "step": 654000 - }, - { - "epoch": 9.42, - "learning_rate": 3.306777997600985e-05, - "loss": 2.8329, - "step": 654500 - }, - { - "epoch": 9.43, - "learning_rate": 3.301585323425711e-05, - "loss": 2.8321, - "step": 655000 - }, - { - "epoch": 9.44, - "learning_rate": 3.296403034598789e-05, - "loss": 2.8312, - "step": 655500 - }, - { - "epoch": 9.44, - "learning_rate": 3.291210360423514e-05, - "loss": 2.8308, - "step": 656000 - }, - { - "epoch": 9.45, - "learning_rate": 3.286017686248241e-05, - "loss": 2.8266, - "step": 656500 - }, - { - "epoch": 9.46, - "learning_rate": 3.280825012072968e-05, - "loss": 2.8317, - "step": 657000 - }, - { - "epoch": 9.46, - "learning_rate": 3.2756427232460445e-05, - "loss": 2.8337, - "step": 657500 - }, - { - "epoch": 9.47, - "learning_rate": 3.270450049070771e-05, - "loss": 2.8302, - "step": 658000 - }, - { - "epoch": 9.48, - "learning_rate": 3.2652573748954974e-05, - "loss": 2.8275, - "step": 658500 - }, - { - "epoch": 9.49, - "learning_rate": 3.260064700720224e-05, - "loss": 2.8284, - "step": 659000 - }, - { - "epoch": 9.49, - "learning_rate": 3.2548824118933016e-05, - "loss": 2.8285, - "step": 659500 - }, - { - "epoch": 9.5, - "learning_rate": 3.249689737718028e-05, - "loss": 2.8306, - "step": 660000 - }, - { - "epoch": 9.51, - "learning_rate": 3.244497063542754e-05, - "loss": 2.8324, - "step": 660500 - }, - { - "epoch": 9.51, - "learning_rate": 3.239304389367481e-05, - "loss": 2.8272, - "step": 661000 - }, - { - "epoch": 9.52, - "learning_rate": 3.234111715192207e-05, - "loss": 2.8337, - "step": 661500 - }, - { - "epoch": 9.53, - "learning_rate": 3.228919041016934e-05, - "loss": 2.8277, - "step": 662000 - }, - { - "epoch": 9.54, - "learning_rate": 3.22372636684166e-05, - "loss": 2.8251, - "step": 662500 - }, - { - "epoch": 9.54, - "learning_rate": 3.218544078014737e-05, - "loss": 2.831, - "step": 663000 - }, - { - "epoch": 9.55, - "learning_rate": 3.213351403839463e-05, - "loss": 2.8286, - "step": 663500 - }, - { - "epoch": 9.56, - "learning_rate": 3.20815872966419e-05, - "loss": 2.8298, - "step": 664000 - }, - { - "epoch": 9.56, - "learning_rate": 3.202966055488916e-05, - "loss": 2.8234, - "step": 664500 - }, - { - "epoch": 9.57, - "learning_rate": 3.197773381313643e-05, - "loss": 2.8285, - "step": 665000 - }, - { - "epoch": 9.58, - "learning_rate": 3.192580707138369e-05, - "loss": 2.8285, - "step": 665500 - }, - { - "epoch": 9.59, - "learning_rate": 3.1873880329630954e-05, - "loss": 2.8288, - "step": 666000 - }, - { - "epoch": 9.59, - "learning_rate": 3.182195358787822e-05, - "loss": 2.8278, - "step": 666500 - }, - { - "epoch": 9.6, - "learning_rate": 3.177013069960899e-05, - "loss": 2.8275, - "step": 667000 - }, - { - "epoch": 9.61, - "learning_rate": 3.171830781133976e-05, - "loss": 2.8216, - "step": 667500 - }, - { - "epoch": 9.62, - "learning_rate": 3.166638106958702e-05, - "loss": 2.8224, - "step": 668000 - }, - { - "epoch": 9.62, - "learning_rate": 3.161445432783429e-05, - "loss": 2.8297, - "step": 668500 - }, - { - "epoch": 9.63, - "learning_rate": 3.156252758608156e-05, - "loss": 2.827, - "step": 669000 - }, - { - "epoch": 9.64, - "learning_rate": 3.151060084432883e-05, - "loss": 2.8302, - "step": 669500 - }, - { - "epoch": 9.64, - "learning_rate": 3.1458777956059594e-05, - "loss": 2.8289, - "step": 670000 - }, - { - "epoch": 9.65, - "learning_rate": 3.140685121430686e-05, - "loss": 2.8283, - "step": 670500 - }, - { - "epoch": 9.66, - "learning_rate": 3.135492447255412e-05, - "loss": 2.824, - "step": 671000 - }, - { - "epoch": 9.67, - "learning_rate": 3.1302997730801385e-05, - "loss": 2.825, - "step": 671500 - }, - { - "epoch": 9.67, - "learning_rate": 3.125117484253215e-05, - "loss": 2.8198, - "step": 672000 - }, - { - "epoch": 9.68, - "learning_rate": 3.119924810077942e-05, - "loss": 2.8254, - "step": 672500 - }, - { - "epoch": 9.69, - "learning_rate": 3.114732135902669e-05, - "loss": 2.8216, - "step": 673000 - }, - { - "epoch": 9.69, - "learning_rate": 3.1095394617273957e-05, - "loss": 2.8273, - "step": 673500 - }, - { - "epoch": 9.7, - "learning_rate": 3.104346787552122e-05, - "loss": 2.8231, - "step": 674000 - }, - { - "epoch": 9.71, - "learning_rate": 3.099154113376848e-05, - "loss": 2.8267, - "step": 674500 - }, - { - "epoch": 9.72, - "learning_rate": 3.093971824549925e-05, - "loss": 2.8241, - "step": 675000 - }, - { - "epoch": 9.72, - "learning_rate": 3.0887791503746514e-05, - "loss": 2.8218, - "step": 675500 - }, - { - "epoch": 9.73, - "learning_rate": 3.083586476199378e-05, - "loss": 2.8262, - "step": 676000 - }, - { - "epoch": 9.74, - "learning_rate": 3.0783938020241044e-05, - "loss": 2.8207, - "step": 676500 - }, - { - "epoch": 9.74, - "learning_rate": 3.073201127848831e-05, - "loss": 2.8265, - "step": 677000 - }, - { - "epoch": 9.75, - "learning_rate": 3.068018839021908e-05, - "loss": 2.8176, - "step": 677500 - }, - { - "epoch": 9.76, - "learning_rate": 3.062826164846635e-05, - "loss": 2.8262, - "step": 678000 - }, - { - "epoch": 9.77, - "learning_rate": 3.057633490671361e-05, - "loss": 2.8195, - "step": 678500 - }, - { - "epoch": 9.77, - "learning_rate": 3.0524408164960877e-05, - "loss": 2.8232, - "step": 679000 - }, - { - "epoch": 9.78, - "learning_rate": 3.0472481423208138e-05, - "loss": 2.8231, - "step": 679500 - }, - { - "epoch": 9.79, - "learning_rate": 3.0420554681455403e-05, - "loss": 2.8215, - "step": 680000 - }, - { - "epoch": 9.8, - "learning_rate": 3.036862793970267e-05, - "loss": 2.825, - "step": 680500 - }, - { - "epoch": 9.8, - "learning_rate": 3.0316701197949933e-05, - "loss": 2.8225, - "step": 681000 - }, - { - "epoch": 9.81, - "learning_rate": 3.0264878309680706e-05, - "loss": 2.8144, - "step": 681500 - }, - { - "epoch": 9.82, - "learning_rate": 3.0212951567927967e-05, - "loss": 2.8204, - "step": 682000 - }, - { - "epoch": 9.82, - "learning_rate": 3.0161024826175232e-05, - "loss": 2.8276, - "step": 682500 - }, - { - "epoch": 9.83, - "learning_rate": 3.01090980844225e-05, - "loss": 2.8234, - "step": 683000 - }, - { - "epoch": 9.84, - "learning_rate": 3.0057379049636775e-05, - "loss": 2.8245, - "step": 683500 - }, - { - "epoch": 9.85, - "learning_rate": 3.000545230788404e-05, - "loss": 2.8231, - "step": 684000 - }, - { - "epoch": 9.85, - "learning_rate": 2.99535255661313e-05, - "loss": 2.8219, - "step": 684500 - }, - { - "epoch": 9.86, - "learning_rate": 2.990159882437857e-05, - "loss": 2.821, - "step": 685000 - }, - { - "epoch": 9.87, - "learning_rate": 2.9849672082625835e-05, - "loss": 2.8236, - "step": 685500 - }, - { - "epoch": 9.87, - "learning_rate": 2.9797745340873096e-05, - "loss": 2.8212, - "step": 686000 - }, - { - "epoch": 9.88, - "learning_rate": 2.974581859912036e-05, - "loss": 2.8231, - "step": 686500 - }, - { - "epoch": 9.89, - "learning_rate": 2.969399571085113e-05, - "loss": 2.8228, - "step": 687000 - }, - { - "epoch": 9.9, - "learning_rate": 2.9642068969098395e-05, - "loss": 2.8205, - "step": 687500 - }, - { - "epoch": 9.9, - "learning_rate": 2.9590142227345664e-05, - "loss": 2.8207, - "step": 688000 - }, - { - "epoch": 9.91, - "learning_rate": 2.953821548559293e-05, - "loss": 2.8209, - "step": 688500 - }, - { - "epoch": 9.92, - "learning_rate": 2.948628874384019e-05, - "loss": 2.8211, - "step": 689000 - }, - { - "epoch": 9.92, - "learning_rate": 2.9434362002087455e-05, - "loss": 2.8211, - "step": 689500 - }, - { - "epoch": 9.93, - "learning_rate": 2.9382435260334723e-05, - "loss": 2.8194, - "step": 690000 - }, - { - "epoch": 9.94, - "learning_rate": 2.9330508518581985e-05, - "loss": 2.8241, - "step": 690500 - }, - { - "epoch": 9.95, - "learning_rate": 2.9278685630312758e-05, - "loss": 2.8171, - "step": 691000 - }, - { - "epoch": 9.95, - "learning_rate": 2.922675888856002e-05, - "loss": 2.8193, - "step": 691500 - }, - { - "epoch": 9.96, - "learning_rate": 2.9174832146807284e-05, - "loss": 2.8213, - "step": 692000 - }, - { - "epoch": 9.97, - "learning_rate": 2.9122905405054553e-05, - "loss": 2.8249, - "step": 692500 - }, - { - "epoch": 9.98, - "learning_rate": 2.9070978663301818e-05, - "loss": 2.8247, - "step": 693000 - }, - { - "epoch": 9.98, - "learning_rate": 2.9019155775032587e-05, - "loss": 2.8203, - "step": 693500 - }, - { - "epoch": 9.99, - "learning_rate": 2.8967229033279852e-05, - "loss": 2.8256, - "step": 694000 - }, - { - "epoch": 10.0, - "learning_rate": 2.8915302291527114e-05, - "loss": 2.8163, - "step": 694500 - }, - { - "epoch": 10.0, - "eval_accuracy": 0.5010026952779273, - "eval_loss": 2.650995969772339, - "eval_runtime": 555.6978, - "eval_samples_per_second": 969.838, - "eval_steps_per_second": 40.41, - "step": 694730 - }, - { - "epoch": 10.0, - "learning_rate": 2.886337554977438e-05, - "loss": 2.8173, - "step": 695000 - }, - { - "epoch": 10.01, - "learning_rate": 2.8811448808021647e-05, - "loss": 2.8197, - "step": 695500 - }, - { - "epoch": 10.02, - "learning_rate": 2.8759522066268908e-05, - "loss": 2.8121, - "step": 696000 - }, - { - "epoch": 10.03, - "learning_rate": 2.8707595324516173e-05, - "loss": 2.8132, - "step": 696500 - }, - { - "epoch": 10.03, - "learning_rate": 2.8655668582763438e-05, - "loss": 2.8159, - "step": 697000 - }, - { - "epoch": 10.04, - "learning_rate": 2.8603949547977716e-05, - "loss": 2.8141, - "step": 697500 - }, - { - "epoch": 10.05, - "learning_rate": 2.855202280622498e-05, - "loss": 2.819, - "step": 698000 - }, - { - "epoch": 10.05, - "learning_rate": 2.8500096064472242e-05, - "loss": 2.8164, - "step": 698500 - }, - { - "epoch": 10.06, - "learning_rate": 2.844816932271951e-05, - "loss": 2.8188, - "step": 699000 - }, - { - "epoch": 10.07, - "learning_rate": 2.8396242580966775e-05, - "loss": 2.8136, - "step": 699500 - }, - { - "epoch": 10.08, - "learning_rate": 2.8344315839214037e-05, - "loss": 2.8099, - "step": 700000 - }, - { - "epoch": 10.08, - "learning_rate": 2.8292389097461302e-05, - "loss": 2.8207, - "step": 700500 - }, - { - "epoch": 10.09, - "learning_rate": 2.824046235570857e-05, - "loss": 2.8109, - "step": 701000 - }, - { - "epoch": 10.1, - "learning_rate": 2.8188639467439336e-05, - "loss": 2.8167, - "step": 701500 - }, - { - "epoch": 10.1, - "learning_rate": 2.8136712725686605e-05, - "loss": 2.8171, - "step": 702000 - }, - { - "epoch": 10.11, - "learning_rate": 2.808478598393387e-05, - "loss": 2.8198, - "step": 702500 - }, - { - "epoch": 10.12, - "learning_rate": 2.803285924218113e-05, - "loss": 2.8165, - "step": 703000 - }, - { - "epoch": 10.13, - "learning_rate": 2.7981036353911904e-05, - "loss": 2.8162, - "step": 703500 - }, - { - "epoch": 10.13, - "learning_rate": 2.7929109612159166e-05, - "loss": 2.8141, - "step": 704000 - }, - { - "epoch": 10.14, - "learning_rate": 2.787718287040643e-05, - "loss": 2.8126, - "step": 704500 - }, - { - "epoch": 10.15, - "learning_rate": 2.78252561286537e-05, - "loss": 2.8126, - "step": 705000 - }, - { - "epoch": 10.16, - "learning_rate": 2.777332938690096e-05, - "loss": 2.8168, - "step": 705500 - }, - { - "epoch": 10.16, - "learning_rate": 2.7721402645148225e-05, - "loss": 2.8162, - "step": 706000 - }, - { - "epoch": 10.17, - "learning_rate": 2.7669579756878995e-05, - "loss": 2.8156, - "step": 706500 - }, - { - "epoch": 10.18, - "learning_rate": 2.761765301512626e-05, - "loss": 2.8092, - "step": 707000 - }, - { - "epoch": 10.18, - "learning_rate": 2.7565726273373528e-05, - "loss": 2.8151, - "step": 707500 - }, - { - "epoch": 10.19, - "learning_rate": 2.7513799531620793e-05, - "loss": 2.8134, - "step": 708000 - }, - { - "epoch": 10.2, - "learning_rate": 2.7461872789868054e-05, - "loss": 2.8125, - "step": 708500 - }, - { - "epoch": 10.21, - "learning_rate": 2.741015375508233e-05, - "loss": 2.8139, - "step": 709000 - }, - { - "epoch": 10.21, - "learning_rate": 2.7358227013329597e-05, - "loss": 2.8176, - "step": 709500 - }, - { - "epoch": 10.22, - "learning_rate": 2.7306300271576862e-05, - "loss": 2.8119, - "step": 710000 - }, - { - "epoch": 10.23, - "learning_rate": 2.7254373529824124e-05, - "loss": 2.8132, - "step": 710500 - }, - { - "epoch": 10.23, - "learning_rate": 2.720244678807139e-05, - "loss": 2.817, - "step": 711000 - }, - { - "epoch": 10.24, - "learning_rate": 2.7150520046318657e-05, - "loss": 2.8143, - "step": 711500 - }, - { - "epoch": 10.25, - "learning_rate": 2.709859330456592e-05, - "loss": 2.8172, - "step": 712000 - }, - { - "epoch": 10.26, - "learning_rate": 2.7046666562813183e-05, - "loss": 2.8124, - "step": 712500 - }, - { - "epoch": 10.26, - "learning_rate": 2.6994739821060448e-05, - "loss": 2.814, - "step": 713000 - }, - { - "epoch": 10.27, - "learning_rate": 2.6942813079307716e-05, - "loss": 2.812, - "step": 713500 - }, - { - "epoch": 10.28, - "learning_rate": 2.6890886337554978e-05, - "loss": 2.8138, - "step": 714000 - }, - { - "epoch": 10.28, - "learning_rate": 2.683906344928575e-05, - "loss": 2.8112, - "step": 714500 - }, - { - "epoch": 10.29, - "learning_rate": 2.6787136707533012e-05, - "loss": 2.811, - "step": 715000 - }, - { - "epoch": 10.3, - "learning_rate": 2.6735209965780277e-05, - "loss": 2.8142, - "step": 715500 - }, - { - "epoch": 10.31, - "learning_rate": 2.6683283224027546e-05, - "loss": 2.812, - "step": 716000 - }, - { - "epoch": 10.31, - "learning_rate": 2.663135648227481e-05, - "loss": 2.8149, - "step": 716500 - }, - { - "epoch": 10.32, - "learning_rate": 2.657953359400558e-05, - "loss": 2.8093, - "step": 717000 - }, - { - "epoch": 10.33, - "learning_rate": 2.6527606852252845e-05, - "loss": 2.8155, - "step": 717500 - }, - { - "epoch": 10.33, - "learning_rate": 2.6475680110500107e-05, - "loss": 2.8115, - "step": 718000 - }, - { - "epoch": 10.34, - "learning_rate": 2.642375336874737e-05, - "loss": 2.8132, - "step": 718500 - }, - { - "epoch": 10.35, - "learning_rate": 2.637182662699464e-05, - "loss": 2.8131, - "step": 719000 - }, - { - "epoch": 10.36, - "learning_rate": 2.63198998852419e-05, - "loss": 2.8088, - "step": 719500 - }, - { - "epoch": 10.36, - "learning_rate": 2.6268076996972674e-05, - "loss": 2.8079, - "step": 720000 - }, - { - "epoch": 10.37, - "learning_rate": 2.6216150255219936e-05, - "loss": 2.8035, - "step": 720500 - }, - { - "epoch": 10.38, - "learning_rate": 2.61642235134672e-05, - "loss": 2.8155, - "step": 721000 - }, - { - "epoch": 10.39, - "learning_rate": 2.611229677171447e-05, - "loss": 2.8114, - "step": 721500 - }, - { - "epoch": 10.39, - "learning_rate": 2.6060473883445235e-05, - "loss": 2.8157, - "step": 722000 - }, - { - "epoch": 10.4, - "learning_rate": 2.6008547141692504e-05, - "loss": 2.8042, - "step": 722500 - }, - { - "epoch": 10.41, - "learning_rate": 2.595662039993977e-05, - "loss": 2.8137, - "step": 723000 - }, - { - "epoch": 10.41, - "learning_rate": 2.590469365818703e-05, - "loss": 2.8136, - "step": 723500 - }, - { - "epoch": 10.42, - "learning_rate": 2.5852766916434295e-05, - "loss": 2.8074, - "step": 724000 - }, - { - "epoch": 10.43, - "learning_rate": 2.5800944028165064e-05, - "loss": 2.8052, - "step": 724500 - }, - { - "epoch": 10.44, - "learning_rate": 2.574901728641233e-05, - "loss": 2.8065, - "step": 725000 - }, - { - "epoch": 10.44, - "learning_rate": 2.5697090544659598e-05, - "loss": 2.814, - "step": 725500 - }, - { - "epoch": 10.45, - "learning_rate": 2.5645163802906856e-05, - "loss": 2.8128, - "step": 726000 - }, - { - "epoch": 10.46, - "learning_rate": 2.5593237061154124e-05, - "loss": 2.8117, - "step": 726500 - }, - { - "epoch": 10.46, - "learning_rate": 2.554131031940139e-05, - "loss": 2.8092, - "step": 727000 - }, - { - "epoch": 10.47, - "learning_rate": 2.5489383577648657e-05, - "loss": 2.8079, - "step": 727500 - }, - { - "epoch": 10.48, - "learning_rate": 2.543745683589592e-05, - "loss": 2.8108, - "step": 728000 - }, - { - "epoch": 10.49, - "learning_rate": 2.5385633947626692e-05, - "loss": 2.8113, - "step": 728500 - }, - { - "epoch": 10.49, - "learning_rate": 2.5333811059357458e-05, - "loss": 2.8091, - "step": 729000 - }, - { - "epoch": 10.5, - "learning_rate": 2.5281884317604726e-05, - "loss": 2.8056, - "step": 729500 - }, - { - "epoch": 10.51, - "learning_rate": 2.5229957575851988e-05, - "loss": 2.8141, - "step": 730000 - }, - { - "epoch": 10.51, - "learning_rate": 2.5178030834099253e-05, - "loss": 2.8058, - "step": 730500 - }, - { - "epoch": 10.52, - "learning_rate": 2.512610409234652e-05, - "loss": 2.8084, - "step": 731000 - }, - { - "epoch": 10.53, - "learning_rate": 2.5074177350593786e-05, - "loss": 2.8066, - "step": 731500 - }, - { - "epoch": 10.54, - "learning_rate": 2.5022250608841047e-05, - "loss": 2.8056, - "step": 732000 - }, - { - "epoch": 10.54, - "learning_rate": 2.4970427720571817e-05, - "loss": 2.8085, - "step": 732500 - }, - { - "epoch": 10.55, - "learning_rate": 2.4918500978819085e-05, - "loss": 2.8108, - "step": 733000 - }, - { - "epoch": 10.56, - "learning_rate": 2.4866574237066347e-05, - "loss": 2.8139, - "step": 733500 - }, - { - "epoch": 10.57, - "learning_rate": 2.4814647495313612e-05, - "loss": 2.8046, - "step": 734000 - }, - { - "epoch": 10.57, - "learning_rate": 2.4762720753560877e-05, - "loss": 2.8119, - "step": 734500 - }, - { - "epoch": 10.58, - "learning_rate": 2.471089786529165e-05, - "loss": 2.8117, - "step": 735000 - }, - { - "epoch": 10.59, - "learning_rate": 2.465897112353891e-05, - "loss": 2.8102, - "step": 735500 - }, - { - "epoch": 10.59, - "learning_rate": 2.4607044381786176e-05, - "loss": 2.8075, - "step": 736000 - }, - { - "epoch": 10.6, - "learning_rate": 2.455511764003344e-05, - "loss": 2.8127, - "step": 736500 - }, - { - "epoch": 10.61, - "learning_rate": 2.450329475176421e-05, - "loss": 2.8076, - "step": 737000 - }, - { - "epoch": 10.62, - "learning_rate": 2.4451368010011476e-05, - "loss": 2.8036, - "step": 737500 - }, - { - "epoch": 10.62, - "learning_rate": 2.439944126825874e-05, - "loss": 2.8055, - "step": 738000 - }, - { - "epoch": 10.63, - "learning_rate": 2.434751452650601e-05, - "loss": 2.8104, - "step": 738500 - }, - { - "epoch": 10.64, - "learning_rate": 2.429558778475327e-05, - "loss": 2.8087, - "step": 739000 - }, - { - "epoch": 10.64, - "learning_rate": 2.4243764896484043e-05, - "loss": 2.8048, - "step": 739500 - }, - { - "epoch": 10.65, - "learning_rate": 2.4191838154731305e-05, - "loss": 2.8087, - "step": 740000 - }, - { - "epoch": 10.66, - "learning_rate": 2.4139911412978573e-05, - "loss": 2.8019, - "step": 740500 - }, - { - "epoch": 10.67, - "learning_rate": 2.4087984671225835e-05, - "loss": 2.8137, - "step": 741000 - }, - { - "epoch": 10.67, - "learning_rate": 2.40360579294731e-05, - "loss": 2.808, - "step": 741500 - }, - { - "epoch": 10.68, - "learning_rate": 2.398423504120387e-05, - "loss": 2.8106, - "step": 742000 - }, - { - "epoch": 10.69, - "learning_rate": 2.3932308299451137e-05, - "loss": 2.8067, - "step": 742500 - }, - { - "epoch": 10.69, - "learning_rate": 2.38803815576984e-05, - "loss": 2.8072, - "step": 743000 - }, - { - "epoch": 10.7, - "learning_rate": 2.3828454815945664e-05, - "loss": 2.8095, - "step": 743500 - }, - { - "epoch": 10.71, - "learning_rate": 2.377652807419293e-05, - "loss": 2.8015, - "step": 744000 - }, - { - "epoch": 10.72, - "learning_rate": 2.3724601332440194e-05, - "loss": 2.8056, - "step": 744500 - }, - { - "epoch": 10.72, - "learning_rate": 2.367267459068746e-05, - "loss": 2.8076, - "step": 745000 - }, - { - "epoch": 10.73, - "learning_rate": 2.3620747848934724e-05, - "loss": 2.8052, - "step": 745500 - }, - { - "epoch": 10.74, - "learning_rate": 2.3568924960665497e-05, - "loss": 2.8051, - "step": 746000 - }, - { - "epoch": 10.75, - "learning_rate": 2.3516998218912758e-05, - "loss": 2.8012, - "step": 746500 - }, - { - "epoch": 10.75, - "learning_rate": 2.346517533064353e-05, - "loss": 2.8097, - "step": 747000 - }, - { - "epoch": 10.76, - "learning_rate": 2.3413248588890793e-05, - "loss": 2.802, - "step": 747500 - }, - { - "epoch": 10.77, - "learning_rate": 2.336132184713806e-05, - "loss": 2.801, - "step": 748000 - }, - { - "epoch": 10.77, - "learning_rate": 2.3309395105385322e-05, - "loss": 2.8061, - "step": 748500 - }, - { - "epoch": 10.78, - "learning_rate": 2.325746836363259e-05, - "loss": 2.8079, - "step": 749000 - }, - { - "epoch": 10.79, - "learning_rate": 2.3205541621879852e-05, - "loss": 2.8062, - "step": 749500 - }, - { - "epoch": 10.8, - "learning_rate": 2.3153614880127117e-05, - "loss": 2.8007, - "step": 750000 - }, - { - "epoch": 10.8, - "learning_rate": 2.3101688138374382e-05, - "loss": 2.8057, - "step": 750500 - }, - { - "epoch": 10.81, - "learning_rate": 2.304986525010515e-05, - "loss": 2.8045, - "step": 751000 - }, - { - "epoch": 10.82, - "learning_rate": 2.2997938508352417e-05, - "loss": 2.8053, - "step": 751500 - }, - { - "epoch": 10.82, - "learning_rate": 2.294601176659968e-05, - "loss": 2.8026, - "step": 752000 - }, - { - "epoch": 10.83, - "learning_rate": 2.2894085024846946e-05, - "loss": 2.805, - "step": 752500 - }, - { - "epoch": 10.84, - "learning_rate": 2.2842262136577716e-05, - "loss": 2.8023, - "step": 753000 - }, - { - "epoch": 10.85, - "learning_rate": 2.2790335394824984e-05, - "loss": 2.8072, - "step": 753500 - }, - { - "epoch": 10.85, - "learning_rate": 2.2738408653072246e-05, - "loss": 2.8021, - "step": 754000 - }, - { - "epoch": 10.86, - "learning_rate": 2.2686481911319514e-05, - "loss": 2.8027, - "step": 754500 - }, - { - "epoch": 10.87, - "learning_rate": 2.2634555169566776e-05, - "loss": 2.8036, - "step": 755000 - }, - { - "epoch": 10.87, - "learning_rate": 2.258273228129755e-05, - "loss": 2.8052, - "step": 755500 - }, - { - "epoch": 10.88, - "learning_rate": 2.253080553954481e-05, - "loss": 2.8014, - "step": 756000 - }, - { - "epoch": 10.89, - "learning_rate": 2.247887879779208e-05, - "loss": 2.8009, - "step": 756500 - }, - { - "epoch": 10.9, - "learning_rate": 2.242695205603934e-05, - "loss": 2.804, - "step": 757000 - }, - { - "epoch": 10.9, - "learning_rate": 2.2375129167770113e-05, - "loss": 2.8077, - "step": 757500 - }, - { - "epoch": 10.91, - "learning_rate": 2.2323202426017374e-05, - "loss": 2.7989, - "step": 758000 - }, - { - "epoch": 10.92, - "learning_rate": 2.227127568426464e-05, - "loss": 2.8051, - "step": 758500 - }, - { - "epoch": 10.93, - "learning_rate": 2.2219348942511904e-05, - "loss": 2.8048, - "step": 759000 - }, - { - "epoch": 10.93, - "learning_rate": 2.216742220075917e-05, - "loss": 2.7965, - "step": 759500 - }, - { - "epoch": 10.94, - "learning_rate": 2.211559931248994e-05, - "loss": 2.801, - "step": 760000 - }, - { - "epoch": 10.95, - "learning_rate": 2.2063672570737204e-05, - "loss": 2.8038, - "step": 760500 - }, - { - "epoch": 10.95, - "learning_rate": 2.201174582898447e-05, - "loss": 2.8041, - "step": 761000 - }, - { - "epoch": 10.96, - "learning_rate": 2.1959819087231733e-05, - "loss": 2.8061, - "step": 761500 - }, - { - "epoch": 10.97, - "learning_rate": 2.1907892345479002e-05, - "loss": 2.8046, - "step": 762000 - }, - { - "epoch": 10.98, - "learning_rate": 2.1856069457209768e-05, - "loss": 2.8002, - "step": 762500 - }, - { - "epoch": 10.98, - "learning_rate": 2.1804142715457036e-05, - "loss": 2.8039, - "step": 763000 - }, - { - "epoch": 10.99, - "learning_rate": 2.1752215973704298e-05, - "loss": 2.802, - "step": 763500 - }, - { - "epoch": 11.0, - "learning_rate": 2.1700289231951566e-05, - "loss": 2.8026, - "step": 764000 - }, - { - "epoch": 11.0, - "eval_accuracy": 0.5046112569907905, - "eval_loss": 2.62461256980896, - "eval_runtime": 555.9604, - "eval_samples_per_second": 969.38, - "eval_steps_per_second": 40.391, - "step": 764203 - }, - { - "epoch": 11.0, - "learning_rate": 2.1648362490198828e-05, - "loss": 2.7987, - "step": 764500 - }, - { - "epoch": 11.01, - "learning_rate": 2.15965396019296e-05, - "loss": 2.7989, - "step": 765000 - }, - { - "epoch": 11.02, - "learning_rate": 2.1544612860176862e-05, - "loss": 2.7949, - "step": 765500 - }, - { - "epoch": 11.03, - "learning_rate": 2.149268611842413e-05, - "loss": 2.8003, - "step": 766000 - }, - { - "epoch": 11.03, - "learning_rate": 2.1440759376671392e-05, - "loss": 2.7988, - "step": 766500 - }, - { - "epoch": 11.04, - "learning_rate": 2.1388936488402165e-05, - "loss": 2.796, - "step": 767000 - }, - { - "epoch": 11.05, - "learning_rate": 2.1337009746649427e-05, - "loss": 2.7986, - "step": 767500 - }, - { - "epoch": 11.05, - "learning_rate": 2.128508300489669e-05, - "loss": 2.7976, - "step": 768000 - }, - { - "epoch": 11.06, - "learning_rate": 2.1233156263143956e-05, - "loss": 2.7985, - "step": 768500 - }, - { - "epoch": 11.07, - "learning_rate": 2.118122952139122e-05, - "loss": 2.7967, - "step": 769000 - }, - { - "epoch": 11.08, - "learning_rate": 2.112930277963849e-05, - "loss": 2.7995, - "step": 769500 - }, - { - "epoch": 11.08, - "learning_rate": 2.1077479891369256e-05, - "loss": 2.7985, - "step": 770000 - }, - { - "epoch": 11.09, - "learning_rate": 2.1025553149616524e-05, - "loss": 2.7961, - "step": 770500 - }, - { - "epoch": 11.1, - "learning_rate": 2.0973626407863786e-05, - "loss": 2.7965, - "step": 771000 - }, - { - "epoch": 11.11, - "learning_rate": 2.0921699666111054e-05, - "loss": 2.7944, - "step": 771500 - }, - { - "epoch": 11.11, - "learning_rate": 2.0869772924358315e-05, - "loss": 2.7952, - "step": 772000 - }, - { - "epoch": 11.12, - "learning_rate": 2.081784618260558e-05, - "loss": 2.7922, - "step": 772500 - }, - { - "epoch": 11.13, - "learning_rate": 2.0765919440852845e-05, - "loss": 2.7979, - "step": 773000 - }, - { - "epoch": 11.13, - "learning_rate": 2.0714096552583618e-05, - "loss": 2.7943, - "step": 773500 - }, - { - "epoch": 11.14, - "learning_rate": 2.066216981083088e-05, - "loss": 2.7962, - "step": 774000 - }, - { - "epoch": 11.15, - "learning_rate": 2.0610243069078145e-05, - "loss": 2.7974, - "step": 774500 - }, - { - "epoch": 11.16, - "learning_rate": 2.055831632732541e-05, - "loss": 2.7969, - "step": 775000 - }, - { - "epoch": 11.16, - "learning_rate": 2.050649343905618e-05, - "loss": 2.7929, - "step": 775500 - }, - { - "epoch": 11.17, - "learning_rate": 2.0454566697303444e-05, - "loss": 2.7957, - "step": 776000 - }, - { - "epoch": 11.18, - "learning_rate": 2.040263995555071e-05, - "loss": 2.7974, - "step": 776500 - }, - { - "epoch": 11.18, - "learning_rate": 2.0350713213797977e-05, - "loss": 2.8017, - "step": 777000 - }, - { - "epoch": 11.19, - "learning_rate": 2.029878647204524e-05, - "loss": 2.7986, - "step": 777500 - }, - { - "epoch": 11.2, - "learning_rate": 2.0246859730292507e-05, - "loss": 2.7928, - "step": 778000 - }, - { - "epoch": 11.21, - "learning_rate": 2.019493298853977e-05, - "loss": 2.7981, - "step": 778500 - }, - { - "epoch": 11.21, - "learning_rate": 2.014311010027054e-05, - "loss": 2.7985, - "step": 779000 - }, - { - "epoch": 11.22, - "learning_rate": 2.0091183358517803e-05, - "loss": 2.7979, - "step": 779500 - }, - { - "epoch": 11.23, - "learning_rate": 2.0039256616765068e-05, - "loss": 2.8037, - "step": 780000 - }, - { - "epoch": 11.23, - "learning_rate": 1.9987329875012333e-05, - "loss": 2.7996, - "step": 780500 - }, - { - "epoch": 11.24, - "learning_rate": 1.9935403133259598e-05, - "loss": 2.7954, - "step": 781000 - }, - { - "epoch": 11.25, - "learning_rate": 1.9883476391506863e-05, - "loss": 2.8006, - "step": 781500 - }, - { - "epoch": 11.26, - "learning_rate": 1.9831549649754128e-05, - "loss": 2.7932, - "step": 782000 - }, - { - "epoch": 11.26, - "learning_rate": 1.9779622908001393e-05, - "loss": 2.7921, - "step": 782500 - }, - { - "epoch": 11.27, - "learning_rate": 1.972790387321567e-05, - "loss": 2.7929, - "step": 783000 - }, - { - "epoch": 11.28, - "learning_rate": 1.9675977131462932e-05, - "loss": 2.7967, - "step": 783500 - }, - { - "epoch": 11.28, - "learning_rate": 1.9624050389710197e-05, - "loss": 2.7964, - "step": 784000 - }, - { - "epoch": 11.29, - "learning_rate": 1.957212364795746e-05, - "loss": 2.7967, - "step": 784500 - }, - { - "epoch": 11.3, - "learning_rate": 1.9520196906204726e-05, - "loss": 2.7948, - "step": 785000 - }, - { - "epoch": 11.31, - "learning_rate": 1.94683740179355e-05, - "loss": 2.7907, - "step": 785500 - }, - { - "epoch": 11.31, - "learning_rate": 1.941644727618276e-05, - "loss": 2.7975, - "step": 786000 - }, - { - "epoch": 11.32, - "learning_rate": 1.936452053443003e-05, - "loss": 2.7976, - "step": 786500 - }, - { - "epoch": 11.33, - "learning_rate": 1.931259379267729e-05, - "loss": 2.797, - "step": 787000 - }, - { - "epoch": 11.34, - "learning_rate": 1.926066705092456e-05, - "loss": 2.7934, - "step": 787500 - }, - { - "epoch": 11.34, - "learning_rate": 1.920874030917182e-05, - "loss": 2.7977, - "step": 788000 - }, - { - "epoch": 11.35, - "learning_rate": 1.9156917420902594e-05, - "loss": 2.7952, - "step": 788500 - }, - { - "epoch": 11.36, - "learning_rate": 1.9104990679149855e-05, - "loss": 2.7913, - "step": 789000 - }, - { - "epoch": 11.36, - "learning_rate": 1.905306393739712e-05, - "loss": 2.7928, - "step": 789500 - }, - { - "epoch": 11.37, - "learning_rate": 1.9001137195644385e-05, - "loss": 2.7925, - "step": 790000 - }, - { - "epoch": 11.38, - "learning_rate": 1.894921045389165e-05, - "loss": 2.7978, - "step": 790500 - }, - { - "epoch": 11.39, - "learning_rate": 1.8897283712138915e-05, - "loss": 2.7974, - "step": 791000 - }, - { - "epoch": 11.39, - "learning_rate": 1.884535697038618e-05, - "loss": 2.7968, - "step": 791500 - }, - { - "epoch": 11.4, - "learning_rate": 1.8793430228633445e-05, - "loss": 2.7926, - "step": 792000 - }, - { - "epoch": 11.41, - "learning_rate": 1.8741607340364214e-05, - "loss": 2.7963, - "step": 792500 - }, - { - "epoch": 11.41, - "learning_rate": 1.8689680598611483e-05, - "loss": 2.7893, - "step": 793000 - }, - { - "epoch": 11.42, - "learning_rate": 1.863785771034225e-05, - "loss": 2.7877, - "step": 793500 - }, - { - "epoch": 11.43, - "learning_rate": 1.8585930968589517e-05, - "loss": 2.796, - "step": 794000 - }, - { - "epoch": 11.44, - "learning_rate": 1.853400422683678e-05, - "loss": 2.7906, - "step": 794500 - }, - { - "epoch": 11.44, - "learning_rate": 1.8482077485084047e-05, - "loss": 2.7987, - "step": 795000 - }, - { - "epoch": 11.45, - "learning_rate": 1.843015074333131e-05, - "loss": 2.7931, - "step": 795500 - }, - { - "epoch": 11.46, - "learning_rate": 1.837832785506208e-05, - "loss": 2.7866, - "step": 796000 - }, - { - "epoch": 11.46, - "learning_rate": 1.8326401113309343e-05, - "loss": 2.7934, - "step": 796500 - }, - { - "epoch": 11.47, - "learning_rate": 1.8274474371556608e-05, - "loss": 2.7929, - "step": 797000 - }, - { - "epoch": 11.48, - "learning_rate": 1.8222547629803873e-05, - "loss": 2.7929, - "step": 797500 - }, - { - "epoch": 11.49, - "learning_rate": 1.8170620888051138e-05, - "loss": 2.7977, - "step": 798000 - }, - { - "epoch": 11.49, - "learning_rate": 1.8118694146298403e-05, - "loss": 2.7898, - "step": 798500 - }, - { - "epoch": 11.5, - "learning_rate": 1.8066871258029172e-05, - "loss": 2.787, - "step": 799000 - }, - { - "epoch": 11.51, - "learning_rate": 1.8014944516276437e-05, - "loss": 2.7933, - "step": 799500 - }, - { - "epoch": 11.52, - "learning_rate": 1.7963017774523702e-05, - "loss": 2.7907, - "step": 800000 - }, - { - "epoch": 11.52, - "learning_rate": 1.791109103277097e-05, - "loss": 2.7924, - "step": 800500 - }, - { - "epoch": 11.53, - "learning_rate": 1.7859164291018232e-05, - "loss": 2.7948, - "step": 801000 - }, - { - "epoch": 11.54, - "learning_rate": 1.7807237549265497e-05, - "loss": 2.7944, - "step": 801500 - }, - { - "epoch": 11.54, - "learning_rate": 1.775531080751276e-05, - "loss": 2.795, - "step": 802000 - }, - { - "epoch": 11.55, - "learning_rate": 1.7703487919243535e-05, - "loss": 2.7972, - "step": 802500 - }, - { - "epoch": 11.56, - "learning_rate": 1.7651561177490796e-05, - "loss": 2.7953, - "step": 803000 - }, - { - "epoch": 11.57, - "learning_rate": 1.759963443573806e-05, - "loss": 2.7893, - "step": 803500 - }, - { - "epoch": 11.57, - "learning_rate": 1.7547707693985326e-05, - "loss": 2.7897, - "step": 804000 - }, - { - "epoch": 11.58, - "learning_rate": 1.749578095223259e-05, - "loss": 2.792, - "step": 804500 - }, - { - "epoch": 11.59, - "learning_rate": 1.7443854210479856e-05, - "loss": 2.791, - "step": 805000 - }, - { - "epoch": 11.59, - "learning_rate": 1.7392031322210625e-05, - "loss": 2.7957, - "step": 805500 - }, - { - "epoch": 11.6, - "learning_rate": 1.734010458045789e-05, - "loss": 2.7909, - "step": 806000 - }, - { - "epoch": 11.61, - "learning_rate": 1.7288177838705155e-05, - "loss": 2.7908, - "step": 806500 - }, - { - "epoch": 11.62, - "learning_rate": 1.723625109695242e-05, - "loss": 2.7928, - "step": 807000 - }, - { - "epoch": 11.62, - "learning_rate": 1.7184324355199685e-05, - "loss": 2.7933, - "step": 807500 - }, - { - "epoch": 11.63, - "learning_rate": 1.713239761344695e-05, - "loss": 2.7879, - "step": 808000 - }, - { - "epoch": 11.64, - "learning_rate": 1.7080470871694215e-05, - "loss": 2.7841, - "step": 808500 - }, - { - "epoch": 11.64, - "learning_rate": 1.7028647983424984e-05, - "loss": 2.7956, - "step": 809000 - }, - { - "epoch": 11.65, - "learning_rate": 1.697672124167225e-05, - "loss": 2.789, - "step": 809500 - }, - { - "epoch": 11.66, - "learning_rate": 1.6924794499919514e-05, - "loss": 2.7915, - "step": 810000 - }, - { - "epoch": 11.67, - "learning_rate": 1.687286775816678e-05, - "loss": 2.7944, - "step": 810500 - }, - { - "epoch": 11.67, - "learning_rate": 1.6820941016414044e-05, - "loss": 2.7884, - "step": 811000 - }, - { - "epoch": 11.68, - "learning_rate": 1.676901427466131e-05, - "loss": 2.786, - "step": 811500 - }, - { - "epoch": 11.69, - "learning_rate": 1.6717087532908574e-05, - "loss": 2.791, - "step": 812000 - }, - { - "epoch": 11.7, - "learning_rate": 1.666516079115584e-05, - "loss": 2.7934, - "step": 812500 - }, - { - "epoch": 11.7, - "learning_rate": 1.661333790288661e-05, - "loss": 2.7899, - "step": 813000 - }, - { - "epoch": 11.71, - "learning_rate": 1.6561515014617378e-05, - "loss": 2.7997, - "step": 813500 - }, - { - "epoch": 11.72, - "learning_rate": 1.6509588272864643e-05, - "loss": 2.7874, - "step": 814000 - }, - { - "epoch": 11.72, - "learning_rate": 1.6457661531111908e-05, - "loss": 2.788, - "step": 814500 - }, - { - "epoch": 11.73, - "learning_rate": 1.6405734789359173e-05, - "loss": 2.7897, - "step": 815000 - }, - { - "epoch": 11.74, - "learning_rate": 1.6353911901089942e-05, - "loss": 2.7917, - "step": 815500 - }, - { - "epoch": 11.75, - "learning_rate": 1.6301985159337207e-05, - "loss": 2.7895, - "step": 816000 - }, - { - "epoch": 11.75, - "learning_rate": 1.6250058417584475e-05, - "loss": 2.7891, - "step": 816500 - }, - { - "epoch": 11.76, - "learning_rate": 1.6198131675831737e-05, - "loss": 2.7911, - "step": 817000 - }, - { - "epoch": 11.77, - "learning_rate": 1.614630878756251e-05, - "loss": 2.7892, - "step": 817500 - }, - { - "epoch": 11.77, - "learning_rate": 1.609438204580977e-05, - "loss": 2.7903, - "step": 818000 - }, - { - "epoch": 11.78, - "learning_rate": 1.6042559157540545e-05, - "loss": 2.7909, - "step": 818500 - }, - { - "epoch": 11.79, - "learning_rate": 1.5990632415787806e-05, - "loss": 2.7937, - "step": 819000 - }, - { - "epoch": 11.8, - "learning_rate": 1.5938705674035074e-05, - "loss": 2.7903, - "step": 819500 - }, - { - "epoch": 11.8, - "learning_rate": 1.5886778932282336e-05, - "loss": 2.7915, - "step": 820000 - }, - { - "epoch": 11.81, - "learning_rate": 1.58348521905296e-05, - "loss": 2.7842, - "step": 820500 - }, - { - "epoch": 11.82, - "learning_rate": 1.5782925448776866e-05, - "loss": 2.7848, - "step": 821000 - }, - { - "epoch": 11.82, - "learning_rate": 1.573099870702413e-05, - "loss": 2.7886, - "step": 821500 - }, - { - "epoch": 11.83, - "learning_rate": 1.5679071965271395e-05, - "loss": 2.7883, - "step": 822000 - }, - { - "epoch": 11.84, - "learning_rate": 1.562714522351866e-05, - "loss": 2.787, - "step": 822500 - }, - { - "epoch": 11.85, - "learning_rate": 1.5575218481765925e-05, - "loss": 2.7858, - "step": 823000 - }, - { - "epoch": 11.85, - "learning_rate": 1.552329174001319e-05, - "loss": 2.7867, - "step": 823500 - }, - { - "epoch": 11.86, - "learning_rate": 1.5471468851743963e-05, - "loss": 2.7886, - "step": 824000 - }, - { - "epoch": 11.87, - "learning_rate": 1.5419542109991225e-05, - "loss": 2.7878, - "step": 824500 - }, - { - "epoch": 11.88, - "learning_rate": 1.536761536823849e-05, - "loss": 2.7858, - "step": 825000 - }, - { - "epoch": 11.88, - "learning_rate": 1.5315688626485755e-05, - "loss": 2.7834, - "step": 825500 - }, - { - "epoch": 11.89, - "learning_rate": 1.5263865738216528e-05, - "loss": 2.7869, - "step": 826000 - }, - { - "epoch": 11.9, - "learning_rate": 1.5211938996463789e-05, - "loss": 2.7848, - "step": 826500 - }, - { - "epoch": 11.9, - "learning_rate": 1.5160012254711054e-05, - "loss": 2.7913, - "step": 827000 - }, - { - "epoch": 11.91, - "learning_rate": 1.510808551295832e-05, - "loss": 2.7855, - "step": 827500 - }, - { - "epoch": 11.92, - "learning_rate": 1.5056158771205584e-05, - "loss": 2.7869, - "step": 828000 - }, - { - "epoch": 11.93, - "learning_rate": 1.5004335882936355e-05, - "loss": 2.7922, - "step": 828500 - }, - { - "epoch": 11.93, - "learning_rate": 1.4952409141183618e-05, - "loss": 2.7892, - "step": 829000 - }, - { - "epoch": 11.94, - "learning_rate": 1.4900482399430885e-05, - "loss": 2.7889, - "step": 829500 - }, - { - "epoch": 11.95, - "learning_rate": 1.4848555657678148e-05, - "loss": 2.7905, - "step": 830000 - }, - { - "epoch": 11.95, - "learning_rate": 1.479673276940892e-05, - "loss": 2.786, - "step": 830500 - }, - { - "epoch": 11.96, - "learning_rate": 1.4744806027656183e-05, - "loss": 2.7905, - "step": 831000 - }, - { - "epoch": 11.97, - "learning_rate": 1.469287928590345e-05, - "loss": 2.7838, - "step": 831500 - }, - { - "epoch": 11.98, - "learning_rate": 1.4640952544150712e-05, - "loss": 2.786, - "step": 832000 - }, - { - "epoch": 11.98, - "learning_rate": 1.4589129655881484e-05, - "loss": 2.7855, - "step": 832500 - }, - { - "epoch": 11.99, - "learning_rate": 1.4537202914128747e-05, - "loss": 2.7836, - "step": 833000 - }, - { - "epoch": 12.0, - "learning_rate": 1.4485276172376014e-05, - "loss": 2.7894, - "step": 833500 - }, - { - "epoch": 12.0, - "eval_accuracy": 0.5055095615843279, - "eval_loss": 2.6172478199005127, - "eval_runtime": 556.0647, - "eval_samples_per_second": 969.198, - "eval_steps_per_second": 40.384, - "step": 833676 - }, - { - "epoch": 12.0, - "learning_rate": 1.4433349430623277e-05, - "loss": 2.7869, - "step": 834000 - }, - { - "epoch": 12.01, - "learning_rate": 1.4381422688870542e-05, - "loss": 2.784, - "step": 834500 - }, - { - "epoch": 12.02, - "learning_rate": 1.4329495947117808e-05, - "loss": 2.7852, - "step": 835000 - }, - { - "epoch": 12.03, - "learning_rate": 1.4277569205365072e-05, - "loss": 2.7812, - "step": 835500 - }, - { - "epoch": 12.03, - "learning_rate": 1.4225746317095843e-05, - "loss": 2.7795, - "step": 836000 - }, - { - "epoch": 12.04, - "learning_rate": 1.4173819575343106e-05, - "loss": 2.7886, - "step": 836500 - }, - { - "epoch": 12.05, - "learning_rate": 1.4121892833590373e-05, - "loss": 2.7786, - "step": 837000 - }, - { - "epoch": 12.06, - "learning_rate": 1.4069966091837636e-05, - "loss": 2.7819, - "step": 837500 - }, - { - "epoch": 12.06, - "learning_rate": 1.4018039350084902e-05, - "loss": 2.7792, - "step": 838000 - }, - { - "epoch": 12.07, - "learning_rate": 1.396621646181567e-05, - "loss": 2.7834, - "step": 838500 - }, - { - "epoch": 12.08, - "learning_rate": 1.3914289720062937e-05, - "loss": 2.7768, - "step": 839000 - }, - { - "epoch": 12.08, - "learning_rate": 1.38623629783102e-05, - "loss": 2.7793, - "step": 839500 - }, - { - "epoch": 12.09, - "learning_rate": 1.3810436236557463e-05, - "loss": 2.7784, - "step": 840000 - }, - { - "epoch": 12.1, - "learning_rate": 1.3758613348288235e-05, - "loss": 2.7776, - "step": 840500 - }, - { - "epoch": 12.11, - "learning_rate": 1.3706686606535501e-05, - "loss": 2.7822, - "step": 841000 - }, - { - "epoch": 12.11, - "learning_rate": 1.3654759864782765e-05, - "loss": 2.7877, - "step": 841500 - }, - { - "epoch": 12.12, - "learning_rate": 1.360283312303003e-05, - "loss": 2.7802, - "step": 842000 - }, - { - "epoch": 12.13, - "learning_rate": 1.3551010234760799e-05, - "loss": 2.7854, - "step": 842500 - }, - { - "epoch": 12.13, - "learning_rate": 1.3499083493008066e-05, - "loss": 2.7821, - "step": 843000 - }, - { - "epoch": 12.14, - "learning_rate": 1.344715675125533e-05, - "loss": 2.7788, - "step": 843500 - }, - { - "epoch": 12.15, - "learning_rate": 1.3395230009502594e-05, - "loss": 2.7802, - "step": 844000 - }, - { - "epoch": 12.16, - "learning_rate": 1.334330326774986e-05, - "loss": 2.7772, - "step": 844500 - }, - { - "epoch": 12.16, - "learning_rate": 1.3291376525997124e-05, - "loss": 2.7777, - "step": 845000 - }, - { - "epoch": 12.17, - "learning_rate": 1.323944978424439e-05, - "loss": 2.7751, - "step": 845500 - }, - { - "epoch": 12.18, - "learning_rate": 1.3187523042491653e-05, - "loss": 2.7846, - "step": 846000 - }, - { - "epoch": 12.18, - "learning_rate": 1.3135700154222425e-05, - "loss": 2.7809, - "step": 846500 - }, - { - "epoch": 12.19, - "learning_rate": 1.3083773412469688e-05, - "loss": 2.776, - "step": 847000 - }, - { - "epoch": 12.2, - "learning_rate": 1.3031846670716955e-05, - "loss": 2.7767, - "step": 847500 - }, - { - "epoch": 12.21, - "learning_rate": 1.2980023782447722e-05, - "loss": 2.7834, - "step": 848000 - }, - { - "epoch": 12.21, - "learning_rate": 1.2928097040694989e-05, - "loss": 2.7801, - "step": 848500 - }, - { - "epoch": 12.22, - "learning_rate": 1.2876170298942252e-05, - "loss": 2.7782, - "step": 849000 - }, - { - "epoch": 12.23, - "learning_rate": 1.2824243557189517e-05, - "loss": 2.7877, - "step": 849500 - }, - { - "epoch": 12.23, - "learning_rate": 1.2772420668920287e-05, - "loss": 2.783, - "step": 850000 - }, - { - "epoch": 12.24, - "learning_rate": 1.2720493927167553e-05, - "loss": 2.7825, - "step": 850500 - }, - { - "epoch": 12.25, - "learning_rate": 1.2668567185414818e-05, - "loss": 2.78, - "step": 851000 - }, - { - "epoch": 12.26, - "learning_rate": 1.2616640443662082e-05, - "loss": 2.7763, - "step": 851500 - }, - { - "epoch": 12.26, - "learning_rate": 1.2564713701909348e-05, - "loss": 2.7805, - "step": 852000 - }, - { - "epoch": 12.27, - "learning_rate": 1.2512890813640118e-05, - "loss": 2.7834, - "step": 852500 - }, - { - "epoch": 12.28, - "learning_rate": 1.2460964071887383e-05, - "loss": 2.7768, - "step": 853000 - }, - { - "epoch": 12.29, - "learning_rate": 1.2409037330134648e-05, - "loss": 2.7782, - "step": 853500 - }, - { - "epoch": 12.29, - "learning_rate": 1.235711058838191e-05, - "loss": 2.7797, - "step": 854000 - }, - { - "epoch": 12.3, - "learning_rate": 1.2305183846629176e-05, - "loss": 2.7775, - "step": 854500 - }, - { - "epoch": 12.31, - "learning_rate": 1.225325710487644e-05, - "loss": 2.7767, - "step": 855000 - }, - { - "epoch": 12.31, - "learning_rate": 1.2201330363123705e-05, - "loss": 2.781, - "step": 855500 - }, - { - "epoch": 12.32, - "learning_rate": 1.214940362137097e-05, - "loss": 2.7836, - "step": 856000 - }, - { - "epoch": 12.33, - "learning_rate": 1.209758073310174e-05, - "loss": 2.7769, - "step": 856500 - }, - { - "epoch": 12.34, - "learning_rate": 1.2045757844832511e-05, - "loss": 2.7768, - "step": 857000 - }, - { - "epoch": 12.34, - "learning_rate": 1.1993831103079775e-05, - "loss": 2.7803, - "step": 857500 - }, - { - "epoch": 12.35, - "learning_rate": 1.194190436132704e-05, - "loss": 2.7746, - "step": 858000 - }, - { - "epoch": 12.36, - "learning_rate": 1.1889977619574304e-05, - "loss": 2.7826, - "step": 858500 - }, - { - "epoch": 12.36, - "learning_rate": 1.1838050877821571e-05, - "loss": 2.7776, - "step": 859000 - }, - { - "epoch": 12.37, - "learning_rate": 1.178622798955234e-05, - "loss": 2.7823, - "step": 859500 - }, - { - "epoch": 12.38, - "learning_rate": 1.1734301247799605e-05, - "loss": 2.7855, - "step": 860000 - }, - { - "epoch": 12.39, - "learning_rate": 1.168237450604687e-05, - "loss": 2.7795, - "step": 860500 - }, - { - "epoch": 12.39, - "learning_rate": 1.1630447764294135e-05, - "loss": 2.7811, - "step": 861000 - }, - { - "epoch": 12.4, - "learning_rate": 1.15785210225414e-05, - "loss": 2.7801, - "step": 861500 - }, - { - "epoch": 12.41, - "learning_rate": 1.1526594280788663e-05, - "loss": 2.773, - "step": 862000 - }, - { - "epoch": 12.41, - "learning_rate": 1.1474771392519435e-05, - "loss": 2.7787, - "step": 862500 - }, - { - "epoch": 12.42, - "learning_rate": 1.14228446507667e-05, - "loss": 2.7825, - "step": 863000 - }, - { - "epoch": 12.43, - "learning_rate": 1.1370917909013963e-05, - "loss": 2.783, - "step": 863500 - }, - { - "epoch": 12.44, - "learning_rate": 1.1318991167261228e-05, - "loss": 2.7753, - "step": 864000 - }, - { - "epoch": 12.44, - "learning_rate": 1.1267064425508493e-05, - "loss": 2.7734, - "step": 864500 - }, - { - "epoch": 12.45, - "learning_rate": 1.1215137683755758e-05, - "loss": 2.7773, - "step": 865000 - }, - { - "epoch": 12.46, - "learning_rate": 1.1163210942003022e-05, - "loss": 2.7752, - "step": 865500 - }, - { - "epoch": 12.47, - "learning_rate": 1.1111284200250287e-05, - "loss": 2.7795, - "step": 866000 - }, - { - "epoch": 12.47, - "learning_rate": 1.1059461311981057e-05, - "loss": 2.7801, - "step": 866500 - }, - { - "epoch": 12.48, - "learning_rate": 1.1007534570228324e-05, - "loss": 2.7781, - "step": 867000 - }, - { - "epoch": 12.49, - "learning_rate": 1.0955711681959093e-05, - "loss": 2.7805, - "step": 867500 - }, - { - "epoch": 12.49, - "learning_rate": 1.0903784940206358e-05, - "loss": 2.7758, - "step": 868000 - }, - { - "epoch": 12.5, - "learning_rate": 1.0851858198453623e-05, - "loss": 2.7806, - "step": 868500 - }, - { - "epoch": 12.51, - "learning_rate": 1.0799931456700888e-05, - "loss": 2.7763, - "step": 869000 - }, - { - "epoch": 12.52, - "learning_rate": 1.0748108568431658e-05, - "loss": 2.7779, - "step": 869500 - }, - { - "epoch": 12.52, - "learning_rate": 1.0696181826678922e-05, - "loss": 2.7746, - "step": 870000 - }, - { - "epoch": 12.53, - "learning_rate": 1.0644255084926187e-05, - "loss": 2.7768, - "step": 870500 - }, - { - "epoch": 12.54, - "learning_rate": 1.059232834317345e-05, - "loss": 2.7761, - "step": 871000 - }, - { - "epoch": 12.54, - "learning_rate": 1.0540401601420715e-05, - "loss": 2.7745, - "step": 871500 - }, - { - "epoch": 12.55, - "learning_rate": 1.048847485966798e-05, - "loss": 2.7766, - "step": 872000 - }, - { - "epoch": 12.56, - "learning_rate": 1.0436548117915245e-05, - "loss": 2.7734, - "step": 872500 - }, - { - "epoch": 12.57, - "learning_rate": 1.038462137616251e-05, - "loss": 2.7794, - "step": 873000 - }, - { - "epoch": 12.57, - "learning_rate": 1.033279848789328e-05, - "loss": 2.7796, - "step": 873500 - }, - { - "epoch": 12.58, - "learning_rate": 1.0280871746140545e-05, - "loss": 2.7727, - "step": 874000 - }, - { - "epoch": 12.59, - "learning_rate": 1.0229048857871314e-05, - "loss": 2.7794, - "step": 874500 - }, - { - "epoch": 12.59, - "learning_rate": 1.0177122116118581e-05, - "loss": 2.7735, - "step": 875000 - }, - { - "epoch": 12.6, - "learning_rate": 1.0125195374365846e-05, - "loss": 2.7776, - "step": 875500 - }, - { - "epoch": 12.61, - "learning_rate": 1.007326863261311e-05, - "loss": 2.774, - "step": 876000 - }, - { - "epoch": 12.62, - "learning_rate": 1.002144574434388e-05, - "loss": 2.7807, - "step": 876500 - }, - { - "epoch": 12.62, - "learning_rate": 9.969519002591145e-06, - "loss": 2.7736, - "step": 877000 - }, - { - "epoch": 12.63, - "learning_rate": 9.91759226083841e-06, - "loss": 2.7706, - "step": 877500 - }, - { - "epoch": 12.64, - "learning_rate": 9.865665519085675e-06, - "loss": 2.774, - "step": 878000 - }, - { - "epoch": 12.65, - "learning_rate": 9.81373877733294e-06, - "loss": 2.7725, - "step": 878500 - }, - { - "epoch": 12.65, - "learning_rate": 9.761812035580203e-06, - "loss": 2.7807, - "step": 879000 - }, - { - "epoch": 12.66, - "learning_rate": 9.709885293827468e-06, - "loss": 2.7738, - "step": 879500 - }, - { - "epoch": 12.67, - "learning_rate": 9.65806240555824e-06, - "loss": 2.7749, - "step": 880000 - }, - { - "epoch": 12.67, - "learning_rate": 9.606135663805503e-06, - "loss": 2.7736, - "step": 880500 - }, - { - "epoch": 12.68, - "learning_rate": 9.554208922052768e-06, - "loss": 2.7764, - "step": 881000 - }, - { - "epoch": 12.69, - "learning_rate": 9.502282180300032e-06, - "loss": 2.7756, - "step": 881500 - }, - { - "epoch": 12.7, - "learning_rate": 9.450459292030802e-06, - "loss": 2.7762, - "step": 882000 - }, - { - "epoch": 12.7, - "learning_rate": 9.398532550278067e-06, - "loss": 2.7755, - "step": 882500 - }, - { - "epoch": 12.71, - "learning_rate": 9.346605808525334e-06, - "loss": 2.7792, - "step": 883000 - }, - { - "epoch": 12.72, - "learning_rate": 9.294679066772598e-06, - "loss": 2.7758, - "step": 883500 - }, - { - "epoch": 12.72, - "learning_rate": 9.242752325019863e-06, - "loss": 2.7782, - "step": 884000 - }, - { - "epoch": 12.73, - "learning_rate": 9.190929436750633e-06, - "loss": 2.7763, - "step": 884500 - }, - { - "epoch": 12.74, - "learning_rate": 9.139002694997898e-06, - "loss": 2.7738, - "step": 885000 - }, - { - "epoch": 12.75, - "learning_rate": 9.087075953245163e-06, - "loss": 2.7739, - "step": 885500 - }, - { - "epoch": 12.75, - "learning_rate": 9.035149211492428e-06, - "loss": 2.7787, - "step": 886000 - }, - { - "epoch": 12.76, - "learning_rate": 8.983222469739691e-06, - "loss": 2.7699, - "step": 886500 - }, - { - "epoch": 12.77, - "learning_rate": 8.931399581470462e-06, - "loss": 2.7746, - "step": 887000 - }, - { - "epoch": 12.77, - "learning_rate": 8.879472839717727e-06, - "loss": 2.774, - "step": 887500 - }, - { - "epoch": 12.78, - "learning_rate": 8.827546097964992e-06, - "loss": 2.7694, - "step": 888000 - }, - { - "epoch": 12.79, - "learning_rate": 8.775619356212255e-06, - "loss": 2.7754, - "step": 888500 - }, - { - "epoch": 12.8, - "learning_rate": 8.72369261445952e-06, - "loss": 2.7787, - "step": 889000 - }, - { - "epoch": 12.8, - "learning_rate": 8.671765872706785e-06, - "loss": 2.7732, - "step": 889500 - }, - { - "epoch": 12.81, - "learning_rate": 8.61983913095405e-06, - "loss": 2.7777, - "step": 890000 - }, - { - "epoch": 12.82, - "learning_rate": 8.56801624268482e-06, - "loss": 2.7704, - "step": 890500 - }, - { - "epoch": 12.83, - "learning_rate": 8.516089500932086e-06, - "loss": 2.7788, - "step": 891000 - }, - { - "epoch": 12.83, - "learning_rate": 8.464162759179351e-06, - "loss": 2.7706, - "step": 891500 - }, - { - "epoch": 12.84, - "learning_rate": 8.412236017426616e-06, - "loss": 2.7709, - "step": 892000 - }, - { - "epoch": 12.85, - "learning_rate": 8.360413129157386e-06, - "loss": 2.7779, - "step": 892500 - }, - { - "epoch": 12.85, - "learning_rate": 8.30848638740465e-06, - "loss": 2.774, - "step": 893000 - }, - { - "epoch": 12.86, - "learning_rate": 8.256559645651915e-06, - "loss": 2.7752, - "step": 893500 - }, - { - "epoch": 12.87, - "learning_rate": 8.20463290389918e-06, - "loss": 2.7729, - "step": 894000 - }, - { - "epoch": 12.88, - "learning_rate": 8.152706162146444e-06, - "loss": 2.7768, - "step": 894500 - }, - { - "epoch": 12.88, - "learning_rate": 8.100779420393708e-06, - "loss": 2.7796, - "step": 895000 - }, - { - "epoch": 12.89, - "learning_rate": 8.048852678640973e-06, - "loss": 2.773, - "step": 895500 - }, - { - "epoch": 12.9, - "learning_rate": 7.997029790371743e-06, - "loss": 2.7731, - "step": 896000 - }, - { - "epoch": 12.9, - "learning_rate": 7.945103048619008e-06, - "loss": 2.7717, - "step": 896500 - }, - { - "epoch": 12.91, - "learning_rate": 7.893176306866273e-06, - "loss": 2.774, - "step": 897000 - }, - { - "epoch": 12.92, - "learning_rate": 7.841249565113538e-06, - "loss": 2.7721, - "step": 897500 - }, - { - "epoch": 12.93, - "learning_rate": 7.789322823360804e-06, - "loss": 2.7745, - "step": 898000 - }, - { - "epoch": 12.93, - "learning_rate": 7.737396081608067e-06, - "loss": 2.7747, - "step": 898500 - }, - { - "epoch": 12.94, - "learning_rate": 7.685469339855332e-06, - "loss": 2.772, - "step": 899000 - }, - { - "epoch": 12.95, - "learning_rate": 7.633542598102597e-06, - "loss": 2.772, - "step": 899500 - }, - { - "epoch": 12.95, - "learning_rate": 7.581719709833368e-06, - "loss": 2.7748, - "step": 900000 - }, - { - "epoch": 12.96, - "learning_rate": 7.529896821564137e-06, - "loss": 2.7667, - "step": 900500 - }, - { - "epoch": 12.97, - "learning_rate": 7.477970079811403e-06, - "loss": 2.7705, - "step": 901000 - }, - { - "epoch": 12.98, - "learning_rate": 7.426043338058668e-06, - "loss": 2.7674, - "step": 901500 - }, - { - "epoch": 12.98, - "learning_rate": 7.374116596305931e-06, - "loss": 2.7729, - "step": 902000 - }, - { - "epoch": 12.99, - "learning_rate": 7.322189854553196e-06, - "loss": 2.7731, - "step": 902500 - }, - { - "epoch": 13.0, - "learning_rate": 7.2703669662839675e-06, - "loss": 2.7728, - "step": 903000 - }, - { - "epoch": 13.0, - "eval_accuracy": 0.5083214034814523, - "eval_loss": 2.5994162559509277, - "eval_runtime": 554.8265, - "eval_samples_per_second": 971.361, - "eval_steps_per_second": 40.474, - "step": 903149 - }, - { - "epoch": 13.01, - "learning_rate": 7.218440224531231e-06, - "loss": 2.7671, - "step": 903500 - }, - { - "epoch": 13.01, - "learning_rate": 7.1665134827784964e-06, - "loss": 2.7743, - "step": 904000 - }, - { - "epoch": 13.02, - "learning_rate": 7.114586741025761e-06, - "loss": 2.7692, - "step": 904500 - }, - { - "epoch": 13.03, - "learning_rate": 7.062659999273026e-06, - "loss": 2.7681, - "step": 905000 - }, - { - "epoch": 13.03, - "learning_rate": 7.010733257520291e-06, - "loss": 2.7708, - "step": 905500 - }, - { - "epoch": 13.04, - "learning_rate": 6.958910369251061e-06, - "loss": 2.7629, - "step": 906000 - }, - { - "epoch": 13.05, - "learning_rate": 6.906983627498326e-06, - "loss": 2.766, - "step": 906500 - }, - { - "epoch": 13.06, - "learning_rate": 6.855056885745591e-06, - "loss": 2.7656, - "step": 907000 - }, - { - "epoch": 13.06, - "learning_rate": 6.8031301439928555e-06, - "loss": 2.7697, - "step": 907500 - }, - { - "epoch": 13.07, - "learning_rate": 6.7512034022401195e-06, - "loss": 2.7668, - "step": 908000 - }, - { - "epoch": 13.08, - "learning_rate": 6.6992766604873845e-06, - "loss": 2.769, - "step": 908500 - }, - { - "epoch": 13.08, - "learning_rate": 6.647453772218156e-06, - "loss": 2.7645, - "step": 909000 - }, - { - "epoch": 13.09, - "learning_rate": 6.595527030465419e-06, - "loss": 2.7676, - "step": 909500 - }, - { - "epoch": 13.1, - "learning_rate": 6.543600288712684e-06, - "loss": 2.7681, - "step": 910000 - }, - { - "epoch": 13.11, - "learning_rate": 6.491673546959949e-06, - "loss": 2.7687, - "step": 910500 - }, - { - "epoch": 13.11, - "learning_rate": 6.43985065869072e-06, - "loss": 2.7705, - "step": 911000 - }, - { - "epoch": 13.12, - "learning_rate": 6.387923916937983e-06, - "loss": 2.7645, - "step": 911500 - }, - { - "epoch": 13.13, - "learning_rate": 6.335997175185249e-06, - "loss": 2.7686, - "step": 912000 - }, - { - "epoch": 13.13, - "learning_rate": 6.284070433432514e-06, - "loss": 2.7711, - "step": 912500 - }, - { - "epoch": 13.14, - "learning_rate": 6.232143691679779e-06, - "loss": 2.7682, - "step": 913000 - }, - { - "epoch": 13.15, - "learning_rate": 6.1803208034105485e-06, - "loss": 2.7709, - "step": 913500 - }, - { - "epoch": 13.16, - "learning_rate": 6.128394061657813e-06, - "loss": 2.7721, - "step": 914000 - }, - { - "epoch": 13.16, - "learning_rate": 6.076467319905078e-06, - "loss": 2.7701, - "step": 914500 - }, - { - "epoch": 13.17, - "learning_rate": 6.024540578152342e-06, - "loss": 2.766, - "step": 915000 - }, - { - "epoch": 13.18, - "learning_rate": 5.972717689883113e-06, - "loss": 2.7641, - "step": 915500 - }, - { - "epoch": 13.18, - "learning_rate": 5.920790948130378e-06, - "loss": 2.7665, - "step": 916000 - }, - { - "epoch": 13.19, - "learning_rate": 5.868864206377643e-06, - "loss": 2.768, - "step": 916500 - }, - { - "epoch": 13.2, - "learning_rate": 5.8169374646249076e-06, - "loss": 2.7703, - "step": 917000 - }, - { - "epoch": 13.21, - "learning_rate": 5.7650107228721725e-06, - "loss": 2.7685, - "step": 917500 - }, - { - "epoch": 13.21, - "learning_rate": 5.7130839811194365e-06, - "loss": 2.7681, - "step": 918000 - }, - { - "epoch": 13.22, - "learning_rate": 5.661261092850207e-06, - "loss": 2.7664, - "step": 918500 - }, - { - "epoch": 13.23, - "learning_rate": 5.609334351097472e-06, - "loss": 2.7648, - "step": 919000 - }, - { - "epoch": 13.24, - "learning_rate": 5.557511462828242e-06, - "loss": 2.7704, - "step": 919500 - }, - { - "epoch": 13.24, - "learning_rate": 5.5055847210755064e-06, - "loss": 2.7646, - "step": 920000 - }, - { - "epoch": 13.25, - "learning_rate": 5.453657979322771e-06, - "loss": 2.7614, - "step": 920500 - }, - { - "epoch": 13.26, - "learning_rate": 5.401731237570037e-06, - "loss": 2.7662, - "step": 921000 - }, - { - "epoch": 13.26, - "learning_rate": 5.349804495817301e-06, - "loss": 2.7648, - "step": 921500 - }, - { - "epoch": 13.27, - "learning_rate": 5.297877754064566e-06, - "loss": 2.7652, - "step": 922000 - }, - { - "epoch": 13.28, - "learning_rate": 5.245951012311831e-06, - "loss": 2.7639, - "step": 922500 - }, - { - "epoch": 13.29, - "learning_rate": 5.194024270559095e-06, - "loss": 2.7601, - "step": 923000 - }, - { - "epoch": 13.29, - "learning_rate": 5.142097528806361e-06, - "loss": 2.7665, - "step": 923500 - }, - { - "epoch": 13.3, - "learning_rate": 5.090170787053625e-06, - "loss": 2.7682, - "step": 924000 - }, - { - "epoch": 13.31, - "learning_rate": 5.038347898784395e-06, - "loss": 2.7652, - "step": 924500 - }, - { - "epoch": 13.31, - "learning_rate": 4.98642115703166e-06, - "loss": 2.7607, - "step": 925000 - }, - { - "epoch": 13.32, - "learning_rate": 4.934494415278925e-06, - "loss": 2.7694, - "step": 925500 - }, - { - "epoch": 13.33, - "learning_rate": 4.882567673526189e-06, - "loss": 2.7618, - "step": 926000 - }, - { - "epoch": 13.34, - "learning_rate": 4.830640931773454e-06, - "loss": 2.7679, - "step": 926500 - }, - { - "epoch": 13.34, - "learning_rate": 4.778714190020719e-06, - "loss": 2.7704, - "step": 927000 - }, - { - "epoch": 13.35, - "learning_rate": 4.7268913017514894e-06, - "loss": 2.7645, - "step": 927500 - }, - { - "epoch": 13.36, - "learning_rate": 4.674964559998754e-06, - "loss": 2.7672, - "step": 928000 - }, - { - "epoch": 13.36, - "learning_rate": 4.623037818246019e-06, - "loss": 2.7705, - "step": 928500 - }, - { - "epoch": 13.37, - "learning_rate": 4.571111076493283e-06, - "loss": 2.758, - "step": 929000 - }, - { - "epoch": 13.38, - "learning_rate": 4.519184334740548e-06, - "loss": 2.7695, - "step": 929500 - }, - { - "epoch": 13.39, - "learning_rate": 4.467361446471319e-06, - "loss": 2.768, - "step": 930000 - }, - { - "epoch": 13.39, - "learning_rate": 4.415434704718583e-06, - "loss": 2.7683, - "step": 930500 - }, - { - "epoch": 13.4, - "learning_rate": 4.363507962965848e-06, - "loss": 2.7695, - "step": 931000 - }, - { - "epoch": 13.41, - "learning_rate": 4.311581221213113e-06, - "loss": 2.7767, - "step": 931500 - }, - { - "epoch": 13.42, - "learning_rate": 4.2596544794603774e-06, - "loss": 2.7615, - "step": 932000 - }, - { - "epoch": 13.42, - "learning_rate": 4.207727737707642e-06, - "loss": 2.761, - "step": 932500 - }, - { - "epoch": 13.43, - "learning_rate": 4.155800995954906e-06, - "loss": 2.767, - "step": 933000 - }, - { - "epoch": 13.44, - "learning_rate": 4.103978107685677e-06, - "loss": 2.7671, - "step": 933500 - }, - { - "epoch": 13.44, - "learning_rate": 4.052051365932942e-06, - "loss": 2.7686, - "step": 934000 - }, - { - "epoch": 13.45, - "learning_rate": 4.000124624180207e-06, - "loss": 2.7656, - "step": 934500 - }, - { - "epoch": 13.46, - "learning_rate": 3.948197882427472e-06, - "loss": 2.7653, - "step": 935000 - }, - { - "epoch": 13.47, - "learning_rate": 3.8962711406747365e-06, - "loss": 2.7629, - "step": 935500 - }, - { - "epoch": 13.47, - "learning_rate": 3.844448252405507e-06, - "loss": 2.7645, - "step": 936000 - }, - { - "epoch": 13.48, - "learning_rate": 3.792521510652771e-06, - "loss": 2.7672, - "step": 936500 - }, - { - "epoch": 13.49, - "learning_rate": 3.740594768900036e-06, - "loss": 2.7698, - "step": 937000 - }, - { - "epoch": 13.49, - "learning_rate": 3.6886680271473013e-06, - "loss": 2.7685, - "step": 937500 - }, - { - "epoch": 13.5, - "learning_rate": 3.6367412853945653e-06, - "loss": 2.7602, - "step": 938000 - }, - { - "epoch": 13.51, - "learning_rate": 3.5849183971253358e-06, - "loss": 2.7635, - "step": 938500 - }, - { - "epoch": 13.52, - "learning_rate": 3.5329916553726007e-06, - "loss": 2.7602, - "step": 939000 - }, - { - "epoch": 13.52, - "learning_rate": 3.481064913619865e-06, - "loss": 2.7568, - "step": 939500 - }, - { - "epoch": 13.53, - "learning_rate": 3.42913817186713e-06, - "loss": 2.7642, - "step": 940000 - }, - { - "epoch": 13.54, - "learning_rate": 3.377211430114395e-06, - "loss": 2.7657, - "step": 940500 - }, - { - "epoch": 13.54, - "learning_rate": 3.3252846883616595e-06, - "loss": 2.7644, - "step": 941000 - }, - { - "epoch": 13.55, - "learning_rate": 3.2734618000924295e-06, - "loss": 2.765, - "step": 941500 - }, - { - "epoch": 13.56, - "learning_rate": 3.221535058339695e-06, - "loss": 2.7656, - "step": 942000 - }, - { - "epoch": 13.57, - "learning_rate": 3.169608316586959e-06, - "loss": 2.7593, - "step": 942500 - }, - { - "epoch": 13.57, - "learning_rate": 3.1176815748342242e-06, - "loss": 2.7662, - "step": 943000 - }, - { - "epoch": 13.58, - "learning_rate": 3.0657548330814887e-06, - "loss": 2.7595, - "step": 943500 - }, - { - "epoch": 13.59, - "learning_rate": 3.013931944812259e-06, - "loss": 2.7605, - "step": 944000 - }, - { - "epoch": 13.6, - "learning_rate": 2.9620052030595237e-06, - "loss": 2.7638, - "step": 944500 - }, - { - "epoch": 13.6, - "learning_rate": 2.9100784613067886e-06, - "loss": 2.7634, - "step": 945000 - }, - { - "epoch": 13.61, - "learning_rate": 2.8581517195540535e-06, - "loss": 2.7629, - "step": 945500 - }, - { - "epoch": 13.62, - "learning_rate": 2.806224977801318e-06, - "loss": 2.7662, - "step": 946000 - }, - { - "epoch": 13.62, - "learning_rate": 2.754298236048583e-06, - "loss": 2.7636, - "step": 946500 - }, - { - "epoch": 13.63, - "learning_rate": 2.7023714942958473e-06, - "loss": 2.7625, - "step": 947000 - }, - { - "epoch": 13.64, - "learning_rate": 2.650548606026618e-06, - "loss": 2.7606, - "step": 947500 - }, - { - "epoch": 13.65, - "learning_rate": 2.5986218642738823e-06, - "loss": 2.7685, - "step": 948000 - }, - { - "epoch": 13.65, - "learning_rate": 2.546695122521147e-06, - "loss": 2.7667, - "step": 948500 - }, - { - "epoch": 13.66, - "learning_rate": 2.494768380768412e-06, - "loss": 2.7613, - "step": 949000 - }, - { - "epoch": 13.67, - "learning_rate": 2.4428416390156766e-06, - "loss": 2.7675, - "step": 949500 - }, - { - "epoch": 13.67, - "learning_rate": 2.391018750746447e-06, - "loss": 2.766, - "step": 950000 - }, - { - "epoch": 13.68, - "learning_rate": 2.3390920089937115e-06, - "loss": 2.7623, - "step": 950500 - }, - { - "epoch": 13.69, - "learning_rate": 2.287165267240977e-06, - "loss": 2.7623, - "step": 951000 - }, - { - "epoch": 13.7, - "learning_rate": 2.2352385254882413e-06, - "loss": 2.7631, - "step": 951500 - }, - { - "epoch": 13.7, - "learning_rate": 2.1833117837355062e-06, - "loss": 2.7559, - "step": 952000 - }, - { - "epoch": 13.71, - "learning_rate": 2.1313850419827707e-06, - "loss": 2.7593, - "step": 952500 - }, - { - "epoch": 13.72, - "learning_rate": 2.0794583002300356e-06, - "loss": 2.7603, - "step": 953000 - }, - { - "epoch": 13.72, - "learning_rate": 2.0276354119608057e-06, - "loss": 2.7611, - "step": 953500 - }, - { - "epoch": 13.73, - "learning_rate": 1.9757086702080706e-06, - "loss": 2.7659, - "step": 954000 - }, - { - "epoch": 13.74, - "learning_rate": 1.9237819284553355e-06, - "loss": 2.7671, - "step": 954500 - }, - { - "epoch": 13.75, - "learning_rate": 1.8718551867026e-06, - "loss": 2.7695, - "step": 955000 - }, - { - "epoch": 13.75, - "learning_rate": 1.8199284449498646e-06, - "loss": 2.7619, - "step": 955500 - }, - { - "epoch": 13.76, - "learning_rate": 1.7681055566806351e-06, - "loss": 2.765, - "step": 956000 - }, - { - "epoch": 13.77, - "learning_rate": 1.7161788149278996e-06, - "loss": 2.7589, - "step": 956500 - }, - { - "epoch": 13.78, - "learning_rate": 1.6642520731751647e-06, - "loss": 2.7621, - "step": 957000 - }, - { - "epoch": 13.78, - "learning_rate": 1.6123253314224294e-06, - "loss": 2.7618, - "step": 957500 - }, - { - "epoch": 13.79, - "learning_rate": 1.560398589669694e-06, - "loss": 2.7611, - "step": 958000 - }, - { - "epoch": 13.8, - "learning_rate": 1.5084718479169588e-06, - "loss": 2.7601, - "step": 958500 - }, - { - "epoch": 13.8, - "learning_rate": 1.456648959647729e-06, - "loss": 2.7613, - "step": 959000 - }, - { - "epoch": 13.81, - "learning_rate": 1.404722217894994e-06, - "loss": 2.76, - "step": 959500 - }, - { - "epoch": 13.82, - "learning_rate": 1.3527954761422584e-06, - "loss": 2.7567, - "step": 960000 - }, - { - "epoch": 13.83, - "learning_rate": 1.3008687343895233e-06, - "loss": 2.7568, - "step": 960500 - }, - { - "epoch": 13.83, - "learning_rate": 1.248941992636788e-06, - "loss": 2.7613, - "step": 961000 - }, - { - "epoch": 13.84, - "learning_rate": 1.197015250884053e-06, - "loss": 2.7679, - "step": 961500 - }, - { - "epoch": 13.85, - "learning_rate": 1.1450885091313176e-06, - "loss": 2.7643, - "step": 962000 - }, - { - "epoch": 13.85, - "learning_rate": 1.0931617673785823e-06, - "loss": 2.7615, - "step": 962500 - }, - { - "epoch": 13.86, - "learning_rate": 1.0413388791093526e-06, - "loss": 2.7609, - "step": 963000 - }, - { - "epoch": 13.87, - "learning_rate": 9.894121373566175e-07, - "loss": 2.7618, - "step": 963500 - }, - { - "epoch": 13.88, - "learning_rate": 9.375892490873875e-07, - "loss": 2.7576, - "step": 964000 - }, - { - "epoch": 13.88, - "learning_rate": 8.856625073346523e-07, - "loss": 2.7619, - "step": 964500 - }, - { - "epoch": 13.89, - "learning_rate": 8.33735765581917e-07, - "loss": 2.7564, - "step": 965000 - }, - { - "epoch": 13.9, - "learning_rate": 7.818090238291818e-07, - "loss": 2.7631, - "step": 965500 - }, - { - "epoch": 13.9, - "learning_rate": 7.298822820764466e-07, - "loss": 2.7709, - "step": 966000 - }, - { - "epoch": 13.91, - "learning_rate": 6.779555403237114e-07, - "loss": 2.7624, - "step": 966500 - }, - { - "epoch": 13.92, - "learning_rate": 6.261326520544816e-07, - "loss": 2.7607, - "step": 967000 - }, - { - "epoch": 13.93, - "learning_rate": 5.742059103017464e-07, - "loss": 2.7559, - "step": 967500 - }, - { - "epoch": 13.93, - "learning_rate": 5.222791685490112e-07, - "loss": 2.7601, - "step": 968000 - }, - { - "epoch": 13.94, - "learning_rate": 4.7035242679627586e-07, - "loss": 2.7625, - "step": 968500 - }, - { - "epoch": 13.95, - "learning_rate": 4.184256850435406e-07, - "loss": 2.7585, - "step": 969000 - }, - { - "epoch": 13.96, - "learning_rate": 3.666027967743108e-07, - "loss": 2.7573, - "step": 969500 - }, - { - "epoch": 13.96, - "learning_rate": 3.1467605502157556e-07, - "loss": 2.7586, - "step": 970000 - }, - { - "epoch": 13.97, - "learning_rate": 2.627493132688403e-07, - "loss": 2.7611, - "step": 970500 - }, - { - "epoch": 13.98, - "learning_rate": 2.1082257151610508e-07, - "loss": 2.7612, - "step": 971000 - }, - { - "epoch": 13.98, - "learning_rate": 1.5899968324687532e-07, - "loss": 2.7539, - "step": 971500 - }, - { - "epoch": 13.99, - "learning_rate": 1.0707294149414008e-07, - "loss": 2.7624, - "step": 972000 - }, - { - "epoch": 14.0, - "learning_rate": 5.5146199741404825e-08, - "loss": 2.761, - "step": 972500 - }, - { - "epoch": 14.0, - "eval_accuracy": 0.509524975254837, - "eval_loss": 2.5919222831726074, - "eval_runtime": 555.1881, - "eval_samples_per_second": 970.729, - "eval_steps_per_second": 40.448, - "step": 972622 - } - ], - "max_steps": 972622, - "num_train_epochs": 14, - "total_flos": 5.9815806927215e+18, - "trial_name": null, - "trial_params": null -}