diff --git "a/perceived/model/trainer_state.json" "b/perceived/model/trainer_state.json" deleted file mode 100644--- "a/perceived/model/trainer_state.json" +++ /dev/null @@ -1,54900 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 50.0, - "global_step": 4544750, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 5.5008526321579835e-11, - "loss": 4.768, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.1001705264315967e-10, - "loss": 4.784, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.6502557896473952e-10, - "loss": 4.757, - "step": 1500 - }, - { - "epoch": 0.02, - "learning_rate": 2.2003410528631934e-10, - "loss": 4.7771, - "step": 2000 - }, - { - "epoch": 0.03, - "learning_rate": 2.750426316078992e-10, - "loss": 4.7514, - "step": 2500 - }, - { - "epoch": 0.03, - "learning_rate": 3.3005115792947905e-10, - "loss": 4.7511, - "step": 3000 - }, - { - "epoch": 0.04, - "learning_rate": 3.850596842510589e-10, - "loss": 4.7648, - "step": 3500 - }, - { - "epoch": 0.04, - "learning_rate": 4.400682105726387e-10, - "loss": 4.7847, - "step": 4000 - }, - { - "epoch": 0.05, - "learning_rate": 4.950767368942185e-10, - "loss": 4.7525, - "step": 4500 - }, - { - "epoch": 0.06, - "learning_rate": 5.500852632157984e-10, - "loss": 4.7737, - "step": 5000 - }, - { - "epoch": 0.06, - "learning_rate": 6.050937895373783e-10, - "loss": 4.764, - "step": 5500 - }, - { - "epoch": 0.07, - "learning_rate": 6.601023158589581e-10, - "loss": 4.7703, - "step": 6000 - }, - { - "epoch": 0.07, - "learning_rate": 7.151108421805379e-10, - "loss": 4.7538, - "step": 6500 - }, - { - "epoch": 0.08, - "learning_rate": 7.701193685021178e-10, - "loss": 4.7734, - "step": 7000 - }, - { - "epoch": 0.08, - "learning_rate": 8.251278948236977e-10, - "loss": 4.759, - "step": 7500 - }, - { - "epoch": 0.09, - "learning_rate": 8.801364211452774e-10, - "loss": 4.7597, - "step": 8000 - }, - { - "epoch": 0.09, - "learning_rate": 9.351449474668573e-10, - "loss": 4.7476, - "step": 8500 - }, - { - "epoch": 0.1, - "learning_rate": 9.90153473788437e-10, - "loss": 4.7311, - "step": 9000 - }, - { - "epoch": 0.1, - "learning_rate": 1.045162000110017e-09, - "loss": 4.7374, - "step": 9500 - }, - { - "epoch": 0.11, - "learning_rate": 1.1001705264315968e-09, - "loss": 4.7695, - "step": 10000 - }, - { - "epoch": 0.12, - "learning_rate": 1.1551790527531766e-09, - "loss": 4.7508, - "step": 10500 - }, - { - "epoch": 0.12, - "learning_rate": 1.2101875790747566e-09, - "loss": 4.7638, - "step": 11000 - }, - { - "epoch": 0.13, - "learning_rate": 1.2651961053963364e-09, - "loss": 4.7644, - "step": 11500 - }, - { - "epoch": 0.13, - "learning_rate": 1.3202046317179162e-09, - "loss": 4.7728, - "step": 12000 - }, - { - "epoch": 0.14, - "learning_rate": 1.3752131580394962e-09, - "loss": 4.7544, - "step": 12500 - }, - { - "epoch": 0.14, - "learning_rate": 1.4302216843610758e-09, - "loss": 4.7541, - "step": 13000 - }, - { - "epoch": 0.15, - "learning_rate": 1.4852302106826558e-09, - "loss": 4.7371, - "step": 13500 - }, - { - "epoch": 0.15, - "learning_rate": 1.5402387370042356e-09, - "loss": 4.7518, - "step": 14000 - }, - { - "epoch": 0.16, - "learning_rate": 1.5952472633258156e-09, - "loss": 4.7438, - "step": 14500 - }, - { - "epoch": 0.17, - "learning_rate": 1.6502557896473954e-09, - "loss": 4.7609, - "step": 15000 - }, - { - "epoch": 0.17, - "learning_rate": 1.705264315968975e-09, - "loss": 4.7393, - "step": 15500 - }, - { - "epoch": 0.18, - "learning_rate": 1.7602728422905547e-09, - "loss": 4.7297, - "step": 16000 - }, - { - "epoch": 0.18, - "learning_rate": 1.8152813686121347e-09, - "loss": 4.7173, - "step": 16500 - }, - { - "epoch": 0.19, - "learning_rate": 1.8702898949337145e-09, - "loss": 4.7246, - "step": 17000 - }, - { - "epoch": 0.19, - "learning_rate": 1.9252984212552945e-09, - "loss": 4.762, - "step": 17500 - }, - { - "epoch": 0.2, - "learning_rate": 1.980306947576874e-09, - "loss": 4.7414, - "step": 18000 - }, - { - "epoch": 0.2, - "learning_rate": 2.035315473898454e-09, - "loss": 4.7654, - "step": 18500 - }, - { - "epoch": 0.21, - "learning_rate": 2.090324000220034e-09, - "loss": 4.7208, - "step": 19000 - }, - { - "epoch": 0.21, - "learning_rate": 2.1453325265416137e-09, - "loss": 4.715, - "step": 19500 - }, - { - "epoch": 0.22, - "learning_rate": 2.2003410528631937e-09, - "loss": 4.7226, - "step": 20000 - }, - { - "epoch": 0.23, - "learning_rate": 2.2553495791847737e-09, - "loss": 4.7242, - "step": 20500 - }, - { - "epoch": 0.23, - "learning_rate": 2.3103581055063532e-09, - "loss": 4.7273, - "step": 21000 - }, - { - "epoch": 0.24, - "learning_rate": 2.3653666318279332e-09, - "loss": 4.7234, - "step": 21500 - }, - { - "epoch": 0.24, - "learning_rate": 2.4203751581495132e-09, - "loss": 4.7021, - "step": 22000 - }, - { - "epoch": 0.25, - "learning_rate": 2.475383684471093e-09, - "loss": 4.6943, - "step": 22500 - }, - { - "epoch": 0.25, - "learning_rate": 2.530392210792673e-09, - "loss": 4.6991, - "step": 23000 - }, - { - "epoch": 0.26, - "learning_rate": 2.585400737114253e-09, - "loss": 4.7006, - "step": 23500 - }, - { - "epoch": 0.26, - "learning_rate": 2.6404092634358324e-09, - "loss": 4.7126, - "step": 24000 - }, - { - "epoch": 0.27, - "learning_rate": 2.6954177897574124e-09, - "loss": 4.7212, - "step": 24500 - }, - { - "epoch": 0.28, - "learning_rate": 2.7504263160789924e-09, - "loss": 4.6977, - "step": 25000 - }, - { - "epoch": 0.28, - "learning_rate": 2.805434842400572e-09, - "loss": 4.7103, - "step": 25500 - }, - { - "epoch": 0.29, - "learning_rate": 2.8604433687221516e-09, - "loss": 4.7067, - "step": 26000 - }, - { - "epoch": 0.29, - "learning_rate": 2.9154518950437316e-09, - "loss": 4.6784, - "step": 26500 - }, - { - "epoch": 0.3, - "learning_rate": 2.9704604213653115e-09, - "loss": 4.6981, - "step": 27000 - }, - { - "epoch": 0.3, - "learning_rate": 3.025468947686891e-09, - "loss": 4.6792, - "step": 27500 - }, - { - "epoch": 0.31, - "learning_rate": 3.080477474008471e-09, - "loss": 4.6849, - "step": 28000 - }, - { - "epoch": 0.31, - "learning_rate": 3.135486000330051e-09, - "loss": 4.6782, - "step": 28500 - }, - { - "epoch": 0.32, - "learning_rate": 3.190494526651631e-09, - "loss": 4.6557, - "step": 29000 - }, - { - "epoch": 0.32, - "learning_rate": 3.2455030529732107e-09, - "loss": 4.648, - "step": 29500 - }, - { - "epoch": 0.33, - "learning_rate": 3.3005115792947907e-09, - "loss": 4.6865, - "step": 30000 - }, - { - "epoch": 0.34, - "learning_rate": 3.3555201056163703e-09, - "loss": 4.6514, - "step": 30500 - }, - { - "epoch": 0.34, - "learning_rate": 3.41052863193795e-09, - "loss": 4.6514, - "step": 31000 - }, - { - "epoch": 0.35, - "learning_rate": 3.4655371582595303e-09, - "loss": 4.6764, - "step": 31500 - }, - { - "epoch": 0.35, - "learning_rate": 3.5205456845811094e-09, - "loss": 4.6575, - "step": 32000 - }, - { - "epoch": 0.36, - "learning_rate": 3.57555421090269e-09, - "loss": 4.6421, - "step": 32500 - }, - { - "epoch": 0.36, - "learning_rate": 3.6305627372242694e-09, - "loss": 4.6165, - "step": 33000 - }, - { - "epoch": 0.37, - "learning_rate": 3.6855712635458494e-09, - "loss": 4.6427, - "step": 33500 - }, - { - "epoch": 0.37, - "learning_rate": 3.740579789867429e-09, - "loss": 4.6247, - "step": 34000 - }, - { - "epoch": 0.38, - "learning_rate": 3.795588316189009e-09, - "loss": 4.623, - "step": 34500 - }, - { - "epoch": 0.39, - "learning_rate": 3.850596842510589e-09, - "loss": 4.6339, - "step": 35000 - }, - { - "epoch": 0.39, - "learning_rate": 3.905605368832169e-09, - "loss": 4.6452, - "step": 35500 - }, - { - "epoch": 0.4, - "learning_rate": 3.960613895153748e-09, - "loss": 4.6301, - "step": 36000 - }, - { - "epoch": 0.4, - "learning_rate": 4.015622421475329e-09, - "loss": 4.6345, - "step": 36500 - }, - { - "epoch": 0.41, - "learning_rate": 4.070630947796908e-09, - "loss": 4.6117, - "step": 37000 - }, - { - "epoch": 0.41, - "learning_rate": 4.125639474118488e-09, - "loss": 4.6143, - "step": 37500 - }, - { - "epoch": 0.42, - "learning_rate": 4.180648000440068e-09, - "loss": 4.5898, - "step": 38000 - }, - { - "epoch": 0.42, - "learning_rate": 4.235656526761648e-09, - "loss": 4.6068, - "step": 38500 - }, - { - "epoch": 0.43, - "learning_rate": 4.290665053083227e-09, - "loss": 4.6021, - "step": 39000 - }, - { - "epoch": 0.43, - "learning_rate": 4.345673579404808e-09, - "loss": 4.6156, - "step": 39500 - }, - { - "epoch": 0.44, - "learning_rate": 4.400682105726387e-09, - "loss": 4.5951, - "step": 40000 - }, - { - "epoch": 0.45, - "learning_rate": 4.455690632047967e-09, - "loss": 4.5732, - "step": 40500 - }, - { - "epoch": 0.45, - "learning_rate": 4.510699158369547e-09, - "loss": 4.571, - "step": 41000 - }, - { - "epoch": 0.46, - "learning_rate": 4.5657076846911265e-09, - "loss": 4.5747, - "step": 41500 - }, - { - "epoch": 0.46, - "learning_rate": 4.6207162110127065e-09, - "loss": 4.5716, - "step": 42000 - }, - { - "epoch": 0.47, - "learning_rate": 4.6757247373342865e-09, - "loss": 4.5713, - "step": 42500 - }, - { - "epoch": 0.47, - "learning_rate": 4.7307332636558665e-09, - "loss": 4.5905, - "step": 43000 - }, - { - "epoch": 0.48, - "learning_rate": 4.785741789977446e-09, - "loss": 4.5881, - "step": 43500 - }, - { - "epoch": 0.48, - "learning_rate": 4.8407503162990265e-09, - "loss": 4.5747, - "step": 44000 - }, - { - "epoch": 0.49, - "learning_rate": 4.895758842620606e-09, - "loss": 4.5705, - "step": 44500 - }, - { - "epoch": 0.5, - "learning_rate": 4.950767368942186e-09, - "loss": 4.5505, - "step": 45000 - }, - { - "epoch": 0.5, - "learning_rate": 5.005775895263766e-09, - "loss": 4.5615, - "step": 45500 - }, - { - "epoch": 0.51, - "learning_rate": 5.060784421585346e-09, - "loss": 4.5524, - "step": 46000 - }, - { - "epoch": 0.51, - "learning_rate": 5.115792947906925e-09, - "loss": 4.5373, - "step": 46500 - }, - { - "epoch": 0.52, - "learning_rate": 5.170801474228506e-09, - "loss": 4.5184, - "step": 47000 - }, - { - "epoch": 0.52, - "learning_rate": 5.225810000550085e-09, - "loss": 4.533, - "step": 47500 - }, - { - "epoch": 0.53, - "learning_rate": 5.280818526871665e-09, - "loss": 4.5517, - "step": 48000 - }, - { - "epoch": 0.53, - "learning_rate": 5.335827053193245e-09, - "loss": 4.5389, - "step": 48500 - }, - { - "epoch": 0.54, - "learning_rate": 5.390835579514825e-09, - "loss": 4.5272, - "step": 49000 - }, - { - "epoch": 0.54, - "learning_rate": 5.445844105836404e-09, - "loss": 4.553, - "step": 49500 - }, - { - "epoch": 0.55, - "learning_rate": 5.500852632157985e-09, - "loss": 4.5444, - "step": 50000 - }, - { - "epoch": 0.56, - "learning_rate": 5.555861158479564e-09, - "loss": 4.541, - "step": 50500 - }, - { - "epoch": 0.56, - "learning_rate": 5.610869684801144e-09, - "loss": 4.5163, - "step": 51000 - }, - { - "epoch": 0.57, - "learning_rate": 5.665878211122724e-09, - "loss": 4.5052, - "step": 51500 - }, - { - "epoch": 0.57, - "learning_rate": 5.720886737444303e-09, - "loss": 4.5229, - "step": 52000 - }, - { - "epoch": 0.58, - "learning_rate": 5.775895263765883e-09, - "loss": 4.5193, - "step": 52500 - }, - { - "epoch": 0.58, - "learning_rate": 5.830903790087463e-09, - "loss": 4.5054, - "step": 53000 - }, - { - "epoch": 0.59, - "learning_rate": 5.885912316409043e-09, - "loss": 4.5003, - "step": 53500 - }, - { - "epoch": 0.59, - "learning_rate": 5.940920842730623e-09, - "loss": 4.5198, - "step": 54000 - }, - { - "epoch": 0.6, - "learning_rate": 5.995929369052203e-09, - "loss": 4.5211, - "step": 54500 - }, - { - "epoch": 0.61, - "learning_rate": 6.050937895373782e-09, - "loss": 4.5238, - "step": 55000 - }, - { - "epoch": 0.61, - "learning_rate": 6.105946421695362e-09, - "loss": 4.509, - "step": 55500 - }, - { - "epoch": 0.62, - "learning_rate": 6.160954948016942e-09, - "loss": 4.4888, - "step": 56000 - }, - { - "epoch": 0.62, - "learning_rate": 6.215963474338522e-09, - "loss": 4.4952, - "step": 56500 - }, - { - "epoch": 0.63, - "learning_rate": 6.270972000660102e-09, - "loss": 4.5136, - "step": 57000 - }, - { - "epoch": 0.63, - "learning_rate": 6.325980526981682e-09, - "loss": 4.5058, - "step": 57500 - }, - { - "epoch": 0.64, - "learning_rate": 6.380989053303262e-09, - "loss": 4.4879, - "step": 58000 - }, - { - "epoch": 0.64, - "learning_rate": 6.4359975796248414e-09, - "loss": 4.4921, - "step": 58500 - }, - { - "epoch": 0.65, - "learning_rate": 6.491006105946421e-09, - "loss": 4.4858, - "step": 59000 - }, - { - "epoch": 0.65, - "learning_rate": 6.546014632268001e-09, - "loss": 4.4903, - "step": 59500 - }, - { - "epoch": 0.66, - "learning_rate": 6.601023158589581e-09, - "loss": 4.4859, - "step": 60000 - }, - { - "epoch": 0.67, - "learning_rate": 6.6560316849111606e-09, - "loss": 4.4696, - "step": 60500 - }, - { - "epoch": 0.67, - "learning_rate": 6.7110402112327406e-09, - "loss": 4.4711, - "step": 61000 - }, - { - "epoch": 0.68, - "learning_rate": 6.766048737554321e-09, - "loss": 4.4755, - "step": 61500 - }, - { - "epoch": 0.68, - "learning_rate": 6.8210572638759e-09, - "loss": 4.4784, - "step": 62000 - }, - { - "epoch": 0.69, - "learning_rate": 6.87606579019748e-09, - "loss": 4.4525, - "step": 62500 - }, - { - "epoch": 0.69, - "learning_rate": 6.9310743165190606e-09, - "loss": 4.459, - "step": 63000 - }, - { - "epoch": 0.7, - "learning_rate": 6.9860828428406406e-09, - "loss": 4.4746, - "step": 63500 - }, - { - "epoch": 0.7, - "learning_rate": 7.041091369162219e-09, - "loss": 4.4519, - "step": 64000 - }, - { - "epoch": 0.71, - "learning_rate": 7.0960998954838e-09, - "loss": 4.4633, - "step": 64500 - }, - { - "epoch": 0.72, - "learning_rate": 7.15110842180538e-09, - "loss": 4.4463, - "step": 65000 - }, - { - "epoch": 0.72, - "learning_rate": 7.20611694812696e-09, - "loss": 4.4813, - "step": 65500 - }, - { - "epoch": 0.73, - "learning_rate": 7.261125474448539e-09, - "loss": 4.4452, - "step": 66000 - }, - { - "epoch": 0.73, - "learning_rate": 7.316134000770119e-09, - "loss": 4.4415, - "step": 66500 - }, - { - "epoch": 0.74, - "learning_rate": 7.371142527091699e-09, - "loss": 4.4505, - "step": 67000 - }, - { - "epoch": 0.74, - "learning_rate": 7.42615105341328e-09, - "loss": 4.4413, - "step": 67500 - }, - { - "epoch": 0.75, - "learning_rate": 7.481159579734858e-09, - "loss": 4.4267, - "step": 68000 - }, - { - "epoch": 0.75, - "learning_rate": 7.536168106056439e-09, - "loss": 4.4385, - "step": 68500 - }, - { - "epoch": 0.76, - "learning_rate": 7.591176632378018e-09, - "loss": 4.4343, - "step": 69000 - }, - { - "epoch": 0.76, - "learning_rate": 7.646185158699599e-09, - "loss": 4.419, - "step": 69500 - }, - { - "epoch": 0.77, - "learning_rate": 7.701193685021178e-09, - "loss": 4.4365, - "step": 70000 - }, - { - "epoch": 0.78, - "learning_rate": 7.756202211342757e-09, - "loss": 4.4294, - "step": 70500 - }, - { - "epoch": 0.78, - "learning_rate": 7.811210737664338e-09, - "loss": 4.4121, - "step": 71000 - }, - { - "epoch": 0.79, - "learning_rate": 7.866219263985917e-09, - "loss": 4.419, - "step": 71500 - }, - { - "epoch": 0.79, - "learning_rate": 7.921227790307496e-09, - "loss": 4.4186, - "step": 72000 - }, - { - "epoch": 0.8, - "learning_rate": 7.976236316629077e-09, - "loss": 4.4212, - "step": 72500 - }, - { - "epoch": 0.8, - "learning_rate": 8.031244842950658e-09, - "loss": 4.4224, - "step": 73000 - }, - { - "epoch": 0.81, - "learning_rate": 8.086253369272236e-09, - "loss": 4.4272, - "step": 73500 - }, - { - "epoch": 0.81, - "learning_rate": 8.141261895593816e-09, - "loss": 4.411, - "step": 74000 - }, - { - "epoch": 0.82, - "learning_rate": 8.196270421915397e-09, - "loss": 4.4217, - "step": 74500 - }, - { - "epoch": 0.83, - "learning_rate": 8.251278948236976e-09, - "loss": 4.3936, - "step": 75000 - }, - { - "epoch": 0.83, - "learning_rate": 8.306287474558556e-09, - "loss": 4.4151, - "step": 75500 - }, - { - "epoch": 0.84, - "learning_rate": 8.361296000880136e-09, - "loss": 4.394, - "step": 76000 - }, - { - "epoch": 0.84, - "learning_rate": 8.416304527201716e-09, - "loss": 4.4027, - "step": 76500 - }, - { - "epoch": 0.85, - "learning_rate": 8.471313053523296e-09, - "loss": 4.3853, - "step": 77000 - }, - { - "epoch": 0.85, - "learning_rate": 8.526321579844875e-09, - "loss": 4.3918, - "step": 77500 - }, - { - "epoch": 0.86, - "learning_rate": 8.581330106166455e-09, - "loss": 4.3874, - "step": 78000 - }, - { - "epoch": 0.86, - "learning_rate": 8.636338632488035e-09, - "loss": 4.3888, - "step": 78500 - }, - { - "epoch": 0.87, - "learning_rate": 8.691347158809616e-09, - "loss": 4.4081, - "step": 79000 - }, - { - "epoch": 0.87, - "learning_rate": 8.746355685131194e-09, - "loss": 4.3998, - "step": 79500 - }, - { - "epoch": 0.88, - "learning_rate": 8.801364211452775e-09, - "loss": 4.4128, - "step": 80000 - }, - { - "epoch": 0.89, - "learning_rate": 8.856372737774355e-09, - "loss": 4.4045, - "step": 80500 - }, - { - "epoch": 0.89, - "learning_rate": 8.911381264095935e-09, - "loss": 4.3757, - "step": 81000 - }, - { - "epoch": 0.9, - "learning_rate": 8.966389790417514e-09, - "loss": 4.3741, - "step": 81500 - }, - { - "epoch": 0.9, - "learning_rate": 9.021398316739095e-09, - "loss": 4.3908, - "step": 82000 - }, - { - "epoch": 0.91, - "learning_rate": 9.076406843060674e-09, - "loss": 4.3689, - "step": 82500 - }, - { - "epoch": 0.91, - "learning_rate": 9.131415369382253e-09, - "loss": 4.3711, - "step": 83000 - }, - { - "epoch": 0.92, - "learning_rate": 9.186423895703834e-09, - "loss": 4.3888, - "step": 83500 - }, - { - "epoch": 0.92, - "learning_rate": 9.241432422025413e-09, - "loss": 4.377, - "step": 84000 - }, - { - "epoch": 0.93, - "learning_rate": 9.296440948346994e-09, - "loss": 4.3702, - "step": 84500 - }, - { - "epoch": 0.94, - "learning_rate": 9.351449474668573e-09, - "loss": 4.3912, - "step": 85000 - }, - { - "epoch": 0.94, - "learning_rate": 9.406458000990152e-09, - "loss": 4.3761, - "step": 85500 - }, - { - "epoch": 0.95, - "learning_rate": 9.461466527311733e-09, - "loss": 4.3941, - "step": 86000 - }, - { - "epoch": 0.95, - "learning_rate": 9.516475053633314e-09, - "loss": 4.3642, - "step": 86500 - }, - { - "epoch": 0.96, - "learning_rate": 9.571483579954891e-09, - "loss": 4.354, - "step": 87000 - }, - { - "epoch": 0.96, - "learning_rate": 9.626492106276472e-09, - "loss": 4.3474, - "step": 87500 - }, - { - "epoch": 0.97, - "learning_rate": 9.681500632598053e-09, - "loss": 4.3485, - "step": 88000 - }, - { - "epoch": 0.97, - "learning_rate": 9.736509158919632e-09, - "loss": 4.3483, - "step": 88500 - }, - { - "epoch": 0.98, - "learning_rate": 9.791517685241211e-09, - "loss": 4.35, - "step": 89000 - }, - { - "epoch": 0.98, - "learning_rate": 9.846526211562792e-09, - "loss": 4.3379, - "step": 89500 - }, - { - "epoch": 0.99, - "learning_rate": 9.901534737884371e-09, - "loss": 4.3628, - "step": 90000 - }, - { - "epoch": 1.0, - "learning_rate": 9.956543264205952e-09, - "loss": 4.3589, - "step": 90500 - }, - { - "epoch": 1.0, - "eval_loss": 4.289782524108887, - "eval_runtime": 6.1366, - "eval_samples_per_second": 253.236, - "step": 90895 - }, - { - "epoch": 1.0, - "learning_rate": 1.0011551790527531e-08, - "loss": 4.362, - "step": 91000 - }, - { - "epoch": 1.01, - "learning_rate": 1.006656031684911e-08, - "loss": 4.3533, - "step": 91500 - }, - { - "epoch": 1.01, - "learning_rate": 1.0121568843170691e-08, - "loss": 4.3506, - "step": 92000 - }, - { - "epoch": 1.02, - "learning_rate": 1.017657736949227e-08, - "loss": 4.3542, - "step": 92500 - }, - { - "epoch": 1.02, - "learning_rate": 1.023158589581385e-08, - "loss": 4.3379, - "step": 93000 - }, - { - "epoch": 1.03, - "learning_rate": 1.028659442213543e-08, - "loss": 4.3561, - "step": 93500 - }, - { - "epoch": 1.03, - "learning_rate": 1.0341602948457011e-08, - "loss": 4.3405, - "step": 94000 - }, - { - "epoch": 1.04, - "learning_rate": 1.039661147477859e-08, - "loss": 4.3303, - "step": 94500 - }, - { - "epoch": 1.05, - "learning_rate": 1.045162000110017e-08, - "loss": 4.3082, - "step": 95000 - }, - { - "epoch": 1.05, - "learning_rate": 1.050662852742175e-08, - "loss": 4.343, - "step": 95500 - }, - { - "epoch": 1.06, - "learning_rate": 1.056163705374333e-08, - "loss": 4.3371, - "step": 96000 - }, - { - "epoch": 1.06, - "learning_rate": 1.0616645580064909e-08, - "loss": 4.3386, - "step": 96500 - }, - { - "epoch": 1.07, - "learning_rate": 1.067165410638649e-08, - "loss": 4.2928, - "step": 97000 - }, - { - "epoch": 1.07, - "learning_rate": 1.0726662632708069e-08, - "loss": 4.3474, - "step": 97500 - }, - { - "epoch": 1.08, - "learning_rate": 1.078167115902965e-08, - "loss": 4.3359, - "step": 98000 - }, - { - "epoch": 1.08, - "learning_rate": 1.0836679685351229e-08, - "loss": 4.3129, - "step": 98500 - }, - { - "epoch": 1.09, - "learning_rate": 1.0891688211672808e-08, - "loss": 4.3129, - "step": 99000 - }, - { - "epoch": 1.09, - "learning_rate": 1.0946696737994389e-08, - "loss": 4.3069, - "step": 99500 - }, - { - "epoch": 1.1, - "learning_rate": 1.100170526431597e-08, - "loss": 4.3064, - "step": 100000 - }, - { - "epoch": 1.11, - "learning_rate": 1.1056713790637549e-08, - "loss": 4.3297, - "step": 100500 - }, - { - "epoch": 1.11, - "learning_rate": 1.1111722316959128e-08, - "loss": 4.3102, - "step": 101000 - }, - { - "epoch": 1.12, - "learning_rate": 1.1166730843280709e-08, - "loss": 4.3026, - "step": 101500 - }, - { - "epoch": 1.12, - "learning_rate": 1.1221739369602288e-08, - "loss": 4.3202, - "step": 102000 - }, - { - "epoch": 1.13, - "learning_rate": 1.1276747895923867e-08, - "loss": 4.3356, - "step": 102500 - }, - { - "epoch": 1.13, - "learning_rate": 1.1331756422245448e-08, - "loss": 4.3202, - "step": 103000 - }, - { - "epoch": 1.14, - "learning_rate": 1.1386764948567027e-08, - "loss": 4.3178, - "step": 103500 - }, - { - "epoch": 1.14, - "learning_rate": 1.1441773474888606e-08, - "loss": 4.2979, - "step": 104000 - }, - { - "epoch": 1.15, - "learning_rate": 1.1496782001210187e-08, - "loss": 4.3109, - "step": 104500 - }, - { - "epoch": 1.16, - "learning_rate": 1.1551790527531766e-08, - "loss": 4.3, - "step": 105000 - }, - { - "epoch": 1.16, - "learning_rate": 1.1606799053853347e-08, - "loss": 4.3124, - "step": 105500 - }, - { - "epoch": 1.17, - "learning_rate": 1.1661807580174926e-08, - "loss": 4.2976, - "step": 106000 - }, - { - "epoch": 1.17, - "learning_rate": 1.1716816106496507e-08, - "loss": 4.2828, - "step": 106500 - }, - { - "epoch": 1.18, - "learning_rate": 1.1771824632818086e-08, - "loss": 4.3044, - "step": 107000 - }, - { - "epoch": 1.18, - "learning_rate": 1.1826833159139667e-08, - "loss": 4.2915, - "step": 107500 - }, - { - "epoch": 1.19, - "learning_rate": 1.1881841685461246e-08, - "loss": 4.3062, - "step": 108000 - }, - { - "epoch": 1.19, - "learning_rate": 1.1936850211782825e-08, - "loss": 4.2922, - "step": 108500 - }, - { - "epoch": 1.2, - "learning_rate": 1.1991858738104406e-08, - "loss": 4.2828, - "step": 109000 - }, - { - "epoch": 1.2, - "learning_rate": 1.2046867264425985e-08, - "loss": 4.2928, - "step": 109500 - }, - { - "epoch": 1.21, - "learning_rate": 1.2101875790747565e-08, - "loss": 4.2987, - "step": 110000 - }, - { - "epoch": 1.22, - "learning_rate": 1.2156884317069145e-08, - "loss": 4.2907, - "step": 110500 - }, - { - "epoch": 1.22, - "learning_rate": 1.2211892843390725e-08, - "loss": 4.2922, - "step": 111000 - }, - { - "epoch": 1.23, - "learning_rate": 1.2266901369712304e-08, - "loss": 4.2973, - "step": 111500 - }, - { - "epoch": 1.23, - "learning_rate": 1.2321909896033885e-08, - "loss": 4.2901, - "step": 112000 - }, - { - "epoch": 1.24, - "learning_rate": 1.2376918422355465e-08, - "loss": 4.283, - "step": 112500 - }, - { - "epoch": 1.24, - "learning_rate": 1.2431926948677045e-08, - "loss": 4.2771, - "step": 113000 - }, - { - "epoch": 1.25, - "learning_rate": 1.2486935474998624e-08, - "loss": 4.2953, - "step": 113500 - }, - { - "epoch": 1.25, - "learning_rate": 1.2541944001320205e-08, - "loss": 4.2705, - "step": 114000 - }, - { - "epoch": 1.26, - "learning_rate": 1.2596952527641782e-08, - "loss": 4.2946, - "step": 114500 - }, - { - "epoch": 1.27, - "learning_rate": 1.2651961053963364e-08, - "loss": 4.2666, - "step": 115000 - }, - { - "epoch": 1.27, - "learning_rate": 1.2706969580284944e-08, - "loss": 4.2594, - "step": 115500 - }, - { - "epoch": 1.28, - "learning_rate": 1.2761978106606524e-08, - "loss": 4.2728, - "step": 116000 - }, - { - "epoch": 1.28, - "learning_rate": 1.2816986632928104e-08, - "loss": 4.2912, - "step": 116500 - }, - { - "epoch": 1.29, - "learning_rate": 1.2871995159249683e-08, - "loss": 4.2638, - "step": 117000 - }, - { - "epoch": 1.29, - "learning_rate": 1.2927003685571264e-08, - "loss": 4.254, - "step": 117500 - }, - { - "epoch": 1.3, - "learning_rate": 1.2982012211892843e-08, - "loss": 4.2631, - "step": 118000 - }, - { - "epoch": 1.3, - "learning_rate": 1.3037020738214422e-08, - "loss": 4.2645, - "step": 118500 - }, - { - "epoch": 1.31, - "learning_rate": 1.3092029264536003e-08, - "loss": 4.2674, - "step": 119000 - }, - { - "epoch": 1.31, - "learning_rate": 1.3147037790857582e-08, - "loss": 4.2591, - "step": 119500 - }, - { - "epoch": 1.32, - "learning_rate": 1.3202046317179163e-08, - "loss": 4.2593, - "step": 120000 - }, - { - "epoch": 1.33, - "learning_rate": 1.3257054843500742e-08, - "loss": 4.2737, - "step": 120500 - }, - { - "epoch": 1.33, - "learning_rate": 1.3312063369822321e-08, - "loss": 4.2487, - "step": 121000 - }, - { - "epoch": 1.34, - "learning_rate": 1.3367071896143902e-08, - "loss": 4.2739, - "step": 121500 - }, - { - "epoch": 1.34, - "learning_rate": 1.3422080422465481e-08, - "loss": 4.2685, - "step": 122000 - }, - { - "epoch": 1.35, - "learning_rate": 1.347708894878706e-08, - "loss": 4.248, - "step": 122500 - }, - { - "epoch": 1.35, - "learning_rate": 1.3532097475108643e-08, - "loss": 4.2452, - "step": 123000 - }, - { - "epoch": 1.36, - "learning_rate": 1.358710600143022e-08, - "loss": 4.2696, - "step": 123500 - }, - { - "epoch": 1.36, - "learning_rate": 1.36421145277518e-08, - "loss": 4.2638, - "step": 124000 - }, - { - "epoch": 1.37, - "learning_rate": 1.3697123054073382e-08, - "loss": 4.2639, - "step": 124500 - }, - { - "epoch": 1.38, - "learning_rate": 1.375213158039496e-08, - "loss": 4.2381, - "step": 125000 - }, - { - "epoch": 1.38, - "learning_rate": 1.3807140106716542e-08, - "loss": 4.2466, - "step": 125500 - }, - { - "epoch": 1.39, - "learning_rate": 1.3862148633038121e-08, - "loss": 4.2518, - "step": 126000 - }, - { - "epoch": 1.39, - "learning_rate": 1.3917157159359699e-08, - "loss": 4.2385, - "step": 126500 - }, - { - "epoch": 1.4, - "learning_rate": 1.3972165685681281e-08, - "loss": 4.2427, - "step": 127000 - }, - { - "epoch": 1.4, - "learning_rate": 1.402717421200286e-08, - "loss": 4.2388, - "step": 127500 - }, - { - "epoch": 1.41, - "learning_rate": 1.4082182738324438e-08, - "loss": 4.2439, - "step": 128000 - }, - { - "epoch": 1.41, - "learning_rate": 1.413719126464602e-08, - "loss": 4.2515, - "step": 128500 - }, - { - "epoch": 1.42, - "learning_rate": 1.41921997909676e-08, - "loss": 4.2376, - "step": 129000 - }, - { - "epoch": 1.42, - "learning_rate": 1.424720831728918e-08, - "loss": 4.2544, - "step": 129500 - }, - { - "epoch": 1.43, - "learning_rate": 1.430221684361076e-08, - "loss": 4.2362, - "step": 130000 - }, - { - "epoch": 1.44, - "learning_rate": 1.4357225369932339e-08, - "loss": 4.2354, - "step": 130500 - }, - { - "epoch": 1.44, - "learning_rate": 1.441223389625392e-08, - "loss": 4.2289, - "step": 131000 - }, - { - "epoch": 1.45, - "learning_rate": 1.4467242422575499e-08, - "loss": 4.2352, - "step": 131500 - }, - { - "epoch": 1.45, - "learning_rate": 1.4522250948897078e-08, - "loss": 4.2446, - "step": 132000 - }, - { - "epoch": 1.46, - "learning_rate": 1.4577259475218659e-08, - "loss": 4.2215, - "step": 132500 - }, - { - "epoch": 1.46, - "learning_rate": 1.4632268001540238e-08, - "loss": 4.2311, - "step": 133000 - }, - { - "epoch": 1.47, - "learning_rate": 1.4687276527861817e-08, - "loss": 4.2194, - "step": 133500 - }, - { - "epoch": 1.47, - "learning_rate": 1.4742285054183398e-08, - "loss": 4.2424, - "step": 134000 - }, - { - "epoch": 1.48, - "learning_rate": 1.4797293580504977e-08, - "loss": 4.2409, - "step": 134500 - }, - { - "epoch": 1.49, - "learning_rate": 1.485230210682656e-08, - "loss": 4.2386, - "step": 135000 - }, - { - "epoch": 1.49, - "learning_rate": 1.4907310633148137e-08, - "loss": 4.231, - "step": 135500 - }, - { - "epoch": 1.5, - "learning_rate": 1.4962319159469716e-08, - "loss": 4.2323, - "step": 136000 - }, - { - "epoch": 1.5, - "learning_rate": 1.50173276857913e-08, - "loss": 4.2426, - "step": 136500 - }, - { - "epoch": 1.51, - "learning_rate": 1.5072336212112878e-08, - "loss": 4.2334, - "step": 137000 - }, - { - "epoch": 1.51, - "learning_rate": 1.5127344738434457e-08, - "loss": 4.2404, - "step": 137500 - }, - { - "epoch": 1.52, - "learning_rate": 1.5182353264756036e-08, - "loss": 4.2379, - "step": 138000 - }, - { - "epoch": 1.52, - "learning_rate": 1.5237361791077615e-08, - "loss": 4.2294, - "step": 138500 - }, - { - "epoch": 1.53, - "learning_rate": 1.5292370317399198e-08, - "loss": 4.2298, - "step": 139000 - }, - { - "epoch": 1.53, - "learning_rate": 1.5347378843720777e-08, - "loss": 4.2258, - "step": 139500 - }, - { - "epoch": 1.54, - "learning_rate": 1.5402387370042356e-08, - "loss": 4.2313, - "step": 140000 - }, - { - "epoch": 1.55, - "learning_rate": 1.5457395896363935e-08, - "loss": 4.2273, - "step": 140500 - }, - { - "epoch": 1.55, - "learning_rate": 1.5512404422685514e-08, - "loss": 4.2185, - "step": 141000 - }, - { - "epoch": 1.56, - "learning_rate": 1.5567412949007094e-08, - "loss": 4.2187, - "step": 141500 - }, - { - "epoch": 1.56, - "learning_rate": 1.5622421475328676e-08, - "loss": 4.2292, - "step": 142000 - }, - { - "epoch": 1.57, - "learning_rate": 1.5677430001650255e-08, - "loss": 4.2364, - "step": 142500 - }, - { - "epoch": 1.57, - "learning_rate": 1.5732438527971834e-08, - "loss": 4.1938, - "step": 143000 - }, - { - "epoch": 1.58, - "learning_rate": 1.5787447054293414e-08, - "loss": 4.2166, - "step": 143500 - }, - { - "epoch": 1.58, - "learning_rate": 1.5842455580614993e-08, - "loss": 4.2128, - "step": 144000 - }, - { - "epoch": 1.59, - "learning_rate": 1.5897464106936575e-08, - "loss": 4.2198, - "step": 144500 - }, - { - "epoch": 1.6, - "learning_rate": 1.5952472633258154e-08, - "loss": 4.2407, - "step": 145000 - }, - { - "epoch": 1.6, - "learning_rate": 1.6007481159579734e-08, - "loss": 4.2087, - "step": 145500 - }, - { - "epoch": 1.61, - "learning_rate": 1.6062489685901316e-08, - "loss": 4.2192, - "step": 146000 - }, - { - "epoch": 1.61, - "learning_rate": 1.6117498212222895e-08, - "loss": 4.2053, - "step": 146500 - }, - { - "epoch": 1.62, - "learning_rate": 1.617250673854447e-08, - "loss": 4.2251, - "step": 147000 - }, - { - "epoch": 1.62, - "learning_rate": 1.6227515264866054e-08, - "loss": 4.2181, - "step": 147500 - }, - { - "epoch": 1.63, - "learning_rate": 1.6282523791187633e-08, - "loss": 4.2102, - "step": 148000 - }, - { - "epoch": 1.63, - "learning_rate": 1.6337532317509215e-08, - "loss": 4.2054, - "step": 148500 - }, - { - "epoch": 1.64, - "learning_rate": 1.6392540843830794e-08, - "loss": 4.2147, - "step": 149000 - }, - { - "epoch": 1.64, - "learning_rate": 1.6447549370152374e-08, - "loss": 4.1979, - "step": 149500 - }, - { - "epoch": 1.65, - "learning_rate": 1.6502557896473953e-08, - "loss": 4.1898, - "step": 150000 - }, - { - "epoch": 1.66, - "learning_rate": 1.6557566422795532e-08, - "loss": 4.2091, - "step": 150500 - }, - { - "epoch": 1.66, - "learning_rate": 1.661257494911711e-08, - "loss": 4.193, - "step": 151000 - }, - { - "epoch": 1.67, - "learning_rate": 1.6667583475438694e-08, - "loss": 4.1914, - "step": 151500 - }, - { - "epoch": 1.67, - "learning_rate": 1.6722592001760273e-08, - "loss": 4.2179, - "step": 152000 - }, - { - "epoch": 1.68, - "learning_rate": 1.6777600528081852e-08, - "loss": 4.2042, - "step": 152500 - }, - { - "epoch": 1.68, - "learning_rate": 1.683260905440343e-08, - "loss": 4.2024, - "step": 153000 - }, - { - "epoch": 1.69, - "learning_rate": 1.688761758072501e-08, - "loss": 4.198, - "step": 153500 - }, - { - "epoch": 1.69, - "learning_rate": 1.6942626107046593e-08, - "loss": 4.1979, - "step": 154000 - }, - { - "epoch": 1.7, - "learning_rate": 1.6997634633368172e-08, - "loss": 4.1953, - "step": 154500 - }, - { - "epoch": 1.71, - "learning_rate": 1.705264315968975e-08, - "loss": 4.194, - "step": 155000 - }, - { - "epoch": 1.71, - "learning_rate": 1.710765168601133e-08, - "loss": 4.2025, - "step": 155500 - }, - { - "epoch": 1.72, - "learning_rate": 1.716266021233291e-08, - "loss": 4.2024, - "step": 156000 - }, - { - "epoch": 1.72, - "learning_rate": 1.721766873865449e-08, - "loss": 4.1942, - "step": 156500 - }, - { - "epoch": 1.73, - "learning_rate": 1.727267726497607e-08, - "loss": 4.1708, - "step": 157000 - }, - { - "epoch": 1.73, - "learning_rate": 1.732768579129765e-08, - "loss": 4.1987, - "step": 157500 - }, - { - "epoch": 1.74, - "learning_rate": 1.7382694317619233e-08, - "loss": 4.183, - "step": 158000 - }, - { - "epoch": 1.74, - "learning_rate": 1.7437702843940812e-08, - "loss": 4.1875, - "step": 158500 - }, - { - "epoch": 1.75, - "learning_rate": 1.7492711370262388e-08, - "loss": 4.1796, - "step": 159000 - }, - { - "epoch": 1.75, - "learning_rate": 1.754771989658397e-08, - "loss": 4.1838, - "step": 159500 - }, - { - "epoch": 1.76, - "learning_rate": 1.760272842290555e-08, - "loss": 4.1826, - "step": 160000 - }, - { - "epoch": 1.77, - "learning_rate": 1.765773694922713e-08, - "loss": 4.1893, - "step": 160500 - }, - { - "epoch": 1.77, - "learning_rate": 1.771274547554871e-08, - "loss": 4.1898, - "step": 161000 - }, - { - "epoch": 1.78, - "learning_rate": 1.776775400187029e-08, - "loss": 4.1741, - "step": 161500 - }, - { - "epoch": 1.78, - "learning_rate": 1.782276252819187e-08, - "loss": 4.2011, - "step": 162000 - }, - { - "epoch": 1.79, - "learning_rate": 1.787777105451345e-08, - "loss": 4.2012, - "step": 162500 - }, - { - "epoch": 1.79, - "learning_rate": 1.7932779580835028e-08, - "loss": 4.1947, - "step": 163000 - }, - { - "epoch": 1.8, - "learning_rate": 1.798778810715661e-08, - "loss": 4.1843, - "step": 163500 - }, - { - "epoch": 1.8, - "learning_rate": 1.804279663347819e-08, - "loss": 4.1737, - "step": 164000 - }, - { - "epoch": 1.81, - "learning_rate": 1.809780515979977e-08, - "loss": 4.1953, - "step": 164500 - }, - { - "epoch": 1.82, - "learning_rate": 1.8152813686121348e-08, - "loss": 4.1943, - "step": 165000 - }, - { - "epoch": 1.82, - "learning_rate": 1.8207822212442927e-08, - "loss": 4.1691, - "step": 165500 - }, - { - "epoch": 1.83, - "learning_rate": 1.8262830738764506e-08, - "loss": 4.153, - "step": 166000 - }, - { - "epoch": 1.83, - "learning_rate": 1.831783926508609e-08, - "loss": 4.1801, - "step": 166500 - }, - { - "epoch": 1.84, - "learning_rate": 1.8372847791407668e-08, - "loss": 4.1811, - "step": 167000 - }, - { - "epoch": 1.84, - "learning_rate": 1.8427856317729247e-08, - "loss": 4.1735, - "step": 167500 - }, - { - "epoch": 1.85, - "learning_rate": 1.8482864844050826e-08, - "loss": 4.1614, - "step": 168000 - }, - { - "epoch": 1.85, - "learning_rate": 1.8537873370372405e-08, - "loss": 4.1711, - "step": 168500 - }, - { - "epoch": 1.86, - "learning_rate": 1.8592881896693988e-08, - "loss": 4.1783, - "step": 169000 - }, - { - "epoch": 1.86, - "learning_rate": 1.8647890423015567e-08, - "loss": 4.17, - "step": 169500 - }, - { - "epoch": 1.87, - "learning_rate": 1.8702898949337146e-08, - "loss": 4.1725, - "step": 170000 - }, - { - "epoch": 1.88, - "learning_rate": 1.875790747565873e-08, - "loss": 4.1727, - "step": 170500 - }, - { - "epoch": 1.88, - "learning_rate": 1.8812916001980304e-08, - "loss": 4.17, - "step": 171000 - }, - { - "epoch": 1.89, - "learning_rate": 1.8867924528301887e-08, - "loss": 4.1768, - "step": 171500 - }, - { - "epoch": 1.89, - "learning_rate": 1.8922933054623466e-08, - "loss": 4.1607, - "step": 172000 - }, - { - "epoch": 1.9, - "learning_rate": 1.8977941580945045e-08, - "loss": 4.1826, - "step": 172500 - }, - { - "epoch": 1.9, - "learning_rate": 1.9032950107266628e-08, - "loss": 4.1884, - "step": 173000 - }, - { - "epoch": 1.91, - "learning_rate": 1.9087958633588207e-08, - "loss": 4.1573, - "step": 173500 - }, - { - "epoch": 1.91, - "learning_rate": 1.9142967159909783e-08, - "loss": 4.1502, - "step": 174000 - }, - { - "epoch": 1.92, - "learning_rate": 1.9197975686231365e-08, - "loss": 4.1692, - "step": 174500 - }, - { - "epoch": 1.93, - "learning_rate": 1.9252984212552944e-08, - "loss": 4.1763, - "step": 175000 - }, - { - "epoch": 1.93, - "learning_rate": 1.9307992738874523e-08, - "loss": 4.1595, - "step": 175500 - }, - { - "epoch": 1.94, - "learning_rate": 1.9363001265196106e-08, - "loss": 4.1727, - "step": 176000 - }, - { - "epoch": 1.94, - "learning_rate": 1.9418009791517685e-08, - "loss": 4.1723, - "step": 176500 - }, - { - "epoch": 1.95, - "learning_rate": 1.9473018317839264e-08, - "loss": 4.183, - "step": 177000 - }, - { - "epoch": 1.95, - "learning_rate": 1.9528026844160843e-08, - "loss": 4.1635, - "step": 177500 - }, - { - "epoch": 1.96, - "learning_rate": 1.9583035370482423e-08, - "loss": 4.1573, - "step": 178000 - }, - { - "epoch": 1.96, - "learning_rate": 1.9638043896804005e-08, - "loss": 4.1551, - "step": 178500 - }, - { - "epoch": 1.97, - "learning_rate": 1.9693052423125584e-08, - "loss": 4.1479, - "step": 179000 - }, - { - "epoch": 1.97, - "learning_rate": 1.9748060949447163e-08, - "loss": 4.1719, - "step": 179500 - }, - { - "epoch": 1.98, - "learning_rate": 1.9803069475768743e-08, - "loss": 4.1469, - "step": 180000 - }, - { - "epoch": 1.99, - "learning_rate": 1.9858078002090322e-08, - "loss": 4.1515, - "step": 180500 - }, - { - "epoch": 1.99, - "learning_rate": 1.9913086528411904e-08, - "loss": 4.1634, - "step": 181000 - }, - { - "epoch": 2.0, - "learning_rate": 1.9968095054733483e-08, - "loss": 4.1733, - "step": 181500 - }, - { - "epoch": 2.0, - "eval_loss": 4.123291492462158, - "eval_runtime": 6.1426, - "eval_samples_per_second": 252.988, - "step": 181790 - }, - { - "epoch": 2.0, - "learning_rate": 2.0023103581055063e-08, - "loss": 4.159, - "step": 182000 - }, - { - "epoch": 2.01, - "learning_rate": 2.0078112107376645e-08, - "loss": 4.1671, - "step": 182500 - }, - { - "epoch": 2.01, - "learning_rate": 2.013312063369822e-08, - "loss": 4.157, - "step": 183000 - }, - { - "epoch": 2.02, - "learning_rate": 2.01881291600198e-08, - "loss": 4.1638, - "step": 183500 - }, - { - "epoch": 2.02, - "learning_rate": 2.0243137686341383e-08, - "loss": 4.1781, - "step": 184000 - }, - { - "epoch": 2.03, - "learning_rate": 2.0298146212662962e-08, - "loss": 4.1482, - "step": 184500 - }, - { - "epoch": 2.04, - "learning_rate": 2.035315473898454e-08, - "loss": 4.1569, - "step": 185000 - }, - { - "epoch": 2.04, - "learning_rate": 2.0408163265306123e-08, - "loss": 4.1737, - "step": 185500 - }, - { - "epoch": 2.05, - "learning_rate": 2.04631717916277e-08, - "loss": 4.1626, - "step": 186000 - }, - { - "epoch": 2.05, - "learning_rate": 2.0518180317949282e-08, - "loss": 4.1507, - "step": 186500 - }, - { - "epoch": 2.06, - "learning_rate": 2.057318884427086e-08, - "loss": 4.1541, - "step": 187000 - }, - { - "epoch": 2.06, - "learning_rate": 2.062819737059244e-08, - "loss": 4.1576, - "step": 187500 - }, - { - "epoch": 2.07, - "learning_rate": 2.0683205896914023e-08, - "loss": 4.1452, - "step": 188000 - }, - { - "epoch": 2.07, - "learning_rate": 2.0738214423235602e-08, - "loss": 4.1473, - "step": 188500 - }, - { - "epoch": 2.08, - "learning_rate": 2.079322294955718e-08, - "loss": 4.1498, - "step": 189000 - }, - { - "epoch": 2.08, - "learning_rate": 2.084823147587876e-08, - "loss": 4.1441, - "step": 189500 - }, - { - "epoch": 2.09, - "learning_rate": 2.090324000220034e-08, - "loss": 4.1445, - "step": 190000 - }, - { - "epoch": 2.1, - "learning_rate": 2.0958248528521922e-08, - "loss": 4.1388, - "step": 190500 - }, - { - "epoch": 2.1, - "learning_rate": 2.10132570548435e-08, - "loss": 4.1458, - "step": 191000 - }, - { - "epoch": 2.11, - "learning_rate": 2.106826558116508e-08, - "loss": 4.1566, - "step": 191500 - }, - { - "epoch": 2.11, - "learning_rate": 2.112327410748666e-08, - "loss": 4.1697, - "step": 192000 - }, - { - "epoch": 2.12, - "learning_rate": 2.1178282633808238e-08, - "loss": 4.154, - "step": 192500 - }, - { - "epoch": 2.12, - "learning_rate": 2.1233291160129818e-08, - "loss": 4.1516, - "step": 193000 - }, - { - "epoch": 2.13, - "learning_rate": 2.12882996864514e-08, - "loss": 4.1357, - "step": 193500 - }, - { - "epoch": 2.13, - "learning_rate": 2.134330821277298e-08, - "loss": 4.1551, - "step": 194000 - }, - { - "epoch": 2.14, - "learning_rate": 2.1398316739094558e-08, - "loss": 4.1199, - "step": 194500 - }, - { - "epoch": 2.15, - "learning_rate": 2.1453325265416137e-08, - "loss": 4.1343, - "step": 195000 - }, - { - "epoch": 2.15, - "learning_rate": 2.1508333791737717e-08, - "loss": 4.1348, - "step": 195500 - }, - { - "epoch": 2.16, - "learning_rate": 2.15633423180593e-08, - "loss": 4.1537, - "step": 196000 - }, - { - "epoch": 2.16, - "learning_rate": 2.1618350844380878e-08, - "loss": 4.1408, - "step": 196500 - }, - { - "epoch": 2.17, - "learning_rate": 2.1673359370702457e-08, - "loss": 4.1233, - "step": 197000 - }, - { - "epoch": 2.17, - "learning_rate": 2.172836789702404e-08, - "loss": 4.1434, - "step": 197500 - }, - { - "epoch": 2.18, - "learning_rate": 2.1783376423345616e-08, - "loss": 4.1379, - "step": 198000 - }, - { - "epoch": 2.18, - "learning_rate": 2.1838384949667195e-08, - "loss": 4.1286, - "step": 198500 - }, - { - "epoch": 2.19, - "learning_rate": 2.1893393475988777e-08, - "loss": 4.1609, - "step": 199000 - }, - { - "epoch": 2.19, - "learning_rate": 2.1948402002310357e-08, - "loss": 4.1414, - "step": 199500 - }, - { - "epoch": 2.2, - "learning_rate": 2.200341052863194e-08, - "loss": 4.1278, - "step": 200000 - }, - { - "epoch": 2.21, - "learning_rate": 2.2058419054953518e-08, - "loss": 4.14, - "step": 200500 - }, - { - "epoch": 2.21, - "learning_rate": 2.2113427581275097e-08, - "loss": 4.1567, - "step": 201000 - }, - { - "epoch": 2.22, - "learning_rate": 2.2168436107596677e-08, - "loss": 4.132, - "step": 201500 - }, - { - "epoch": 2.22, - "learning_rate": 2.2223444633918256e-08, - "loss": 4.1439, - "step": 202000 - }, - { - "epoch": 2.23, - "learning_rate": 2.2278453160239835e-08, - "loss": 4.1418, - "step": 202500 - }, - { - "epoch": 2.23, - "learning_rate": 2.2333461686561417e-08, - "loss": 4.1574, - "step": 203000 - }, - { - "epoch": 2.24, - "learning_rate": 2.2388470212882997e-08, - "loss": 4.1295, - "step": 203500 - }, - { - "epoch": 2.24, - "learning_rate": 2.2443478739204576e-08, - "loss": 4.1422, - "step": 204000 - }, - { - "epoch": 2.25, - "learning_rate": 2.2498487265526155e-08, - "loss": 4.1128, - "step": 204500 - }, - { - "epoch": 2.26, - "learning_rate": 2.2553495791847734e-08, - "loss": 4.1337, - "step": 205000 - }, - { - "epoch": 2.26, - "learning_rate": 2.2608504318169317e-08, - "loss": 4.141, - "step": 205500 - }, - { - "epoch": 2.27, - "learning_rate": 2.2663512844490896e-08, - "loss": 4.1178, - "step": 206000 - }, - { - "epoch": 2.27, - "learning_rate": 2.2718521370812475e-08, - "loss": 4.1296, - "step": 206500 - }, - { - "epoch": 2.28, - "learning_rate": 2.2773529897134054e-08, - "loss": 4.1411, - "step": 207000 - }, - { - "epoch": 2.28, - "learning_rate": 2.2828538423455633e-08, - "loss": 4.1232, - "step": 207500 - }, - { - "epoch": 2.29, - "learning_rate": 2.2883546949777212e-08, - "loss": 4.1447, - "step": 208000 - }, - { - "epoch": 2.29, - "learning_rate": 2.2938555476098795e-08, - "loss": 4.152, - "step": 208500 - }, - { - "epoch": 2.3, - "learning_rate": 2.2993564002420374e-08, - "loss": 4.1064, - "step": 209000 - }, - { - "epoch": 2.3, - "learning_rate": 2.3048572528741957e-08, - "loss": 4.1464, - "step": 209500 - }, - { - "epoch": 2.31, - "learning_rate": 2.3103581055063532e-08, - "loss": 4.1286, - "step": 210000 - }, - { - "epoch": 2.32, - "learning_rate": 2.315858958138511e-08, - "loss": 4.1136, - "step": 210500 - }, - { - "epoch": 2.32, - "learning_rate": 2.3213598107706694e-08, - "loss": 4.1417, - "step": 211000 - }, - { - "epoch": 2.33, - "learning_rate": 2.3268606634028273e-08, - "loss": 4.1176, - "step": 211500 - }, - { - "epoch": 2.33, - "learning_rate": 2.3323615160349852e-08, - "loss": 4.1272, - "step": 212000 - }, - { - "epoch": 2.34, - "learning_rate": 2.3378623686671435e-08, - "loss": 4.1374, - "step": 212500 - }, - { - "epoch": 2.34, - "learning_rate": 2.3433632212993014e-08, - "loss": 4.1144, - "step": 213000 - }, - { - "epoch": 2.35, - "learning_rate": 2.348864073931459e-08, - "loss": 4.1247, - "step": 213500 - }, - { - "epoch": 2.35, - "learning_rate": 2.3543649265636172e-08, - "loss": 4.131, - "step": 214000 - }, - { - "epoch": 2.36, - "learning_rate": 2.359865779195775e-08, - "loss": 4.1254, - "step": 214500 - }, - { - "epoch": 2.37, - "learning_rate": 2.3653666318279334e-08, - "loss": 4.1133, - "step": 215000 - }, - { - "epoch": 2.37, - "learning_rate": 2.3708674844600913e-08, - "loss": 4.1169, - "step": 215500 - }, - { - "epoch": 2.38, - "learning_rate": 2.3763683370922492e-08, - "loss": 4.1224, - "step": 216000 - }, - { - "epoch": 2.38, - "learning_rate": 2.381869189724407e-08, - "loss": 4.116, - "step": 216500 - }, - { - "epoch": 2.39, - "learning_rate": 2.387370042356565e-08, - "loss": 4.1025, - "step": 217000 - }, - { - "epoch": 2.39, - "learning_rate": 2.392870894988723e-08, - "loss": 4.1161, - "step": 217500 - }, - { - "epoch": 2.4, - "learning_rate": 2.3983717476208812e-08, - "loss": 4.1103, - "step": 218000 - }, - { - "epoch": 2.4, - "learning_rate": 2.403872600253039e-08, - "loss": 4.1311, - "step": 218500 - }, - { - "epoch": 2.41, - "learning_rate": 2.409373452885197e-08, - "loss": 4.1039, - "step": 219000 - }, - { - "epoch": 2.41, - "learning_rate": 2.414874305517355e-08, - "loss": 4.1102, - "step": 219500 - }, - { - "epoch": 2.42, - "learning_rate": 2.420375158149513e-08, - "loss": 4.1115, - "step": 220000 - }, - { - "epoch": 2.43, - "learning_rate": 2.425876010781671e-08, - "loss": 4.1203, - "step": 220500 - }, - { - "epoch": 2.43, - "learning_rate": 2.431376863413829e-08, - "loss": 4.1062, - "step": 221000 - }, - { - "epoch": 2.44, - "learning_rate": 2.436877716045987e-08, - "loss": 4.1049, - "step": 221500 - }, - { - "epoch": 2.44, - "learning_rate": 2.442378568678145e-08, - "loss": 4.1047, - "step": 222000 - }, - { - "epoch": 2.45, - "learning_rate": 2.4478794213103028e-08, - "loss": 4.0975, - "step": 222500 - }, - { - "epoch": 2.45, - "learning_rate": 2.4533802739424607e-08, - "loss": 4.1031, - "step": 223000 - }, - { - "epoch": 2.46, - "learning_rate": 2.458881126574619e-08, - "loss": 4.1039, - "step": 223500 - }, - { - "epoch": 2.46, - "learning_rate": 2.464381979206777e-08, - "loss": 4.1193, - "step": 224000 - }, - { - "epoch": 2.47, - "learning_rate": 2.469882831838935e-08, - "loss": 4.1225, - "step": 224500 - }, - { - "epoch": 2.48, - "learning_rate": 2.475383684471093e-08, - "loss": 4.1232, - "step": 225000 - }, - { - "epoch": 2.48, - "learning_rate": 2.4808845371032507e-08, - "loss": 4.1206, - "step": 225500 - }, - { - "epoch": 2.49, - "learning_rate": 2.486385389735409e-08, - "loss": 4.12, - "step": 226000 - }, - { - "epoch": 2.49, - "learning_rate": 2.4918862423675668e-08, - "loss": 4.112, - "step": 226500 - }, - { - "epoch": 2.5, - "learning_rate": 2.4973870949997247e-08, - "loss": 4.1199, - "step": 227000 - }, - { - "epoch": 2.5, - "learning_rate": 2.5028879476318827e-08, - "loss": 4.1248, - "step": 227500 - }, - { - "epoch": 2.51, - "learning_rate": 2.508388800264041e-08, - "loss": 4.1182, - "step": 228000 - }, - { - "epoch": 2.51, - "learning_rate": 2.5138896528961988e-08, - "loss": 4.1075, - "step": 228500 - }, - { - "epoch": 2.52, - "learning_rate": 2.5193905055283564e-08, - "loss": 4.1157, - "step": 229000 - }, - { - "epoch": 2.52, - "learning_rate": 2.5248913581605147e-08, - "loss": 4.0953, - "step": 229500 - }, - { - "epoch": 2.53, - "learning_rate": 2.530392210792673e-08, - "loss": 4.089, - "step": 230000 - }, - { - "epoch": 2.54, - "learning_rate": 2.535893063424831e-08, - "loss": 4.0914, - "step": 230500 - }, - { - "epoch": 2.54, - "learning_rate": 2.5413939160569887e-08, - "loss": 4.1025, - "step": 231000 - }, - { - "epoch": 2.55, - "learning_rate": 2.5468947686891466e-08, - "loss": 4.1051, - "step": 231500 - }, - { - "epoch": 2.55, - "learning_rate": 2.552395621321305e-08, - "loss": 4.0837, - "step": 232000 - }, - { - "epoch": 2.56, - "learning_rate": 2.5578964739534625e-08, - "loss": 4.1203, - "step": 232500 - }, - { - "epoch": 2.56, - "learning_rate": 2.5633973265856207e-08, - "loss": 4.098, - "step": 233000 - }, - { - "epoch": 2.57, - "learning_rate": 2.568898179217779e-08, - "loss": 4.0965, - "step": 233500 - }, - { - "epoch": 2.57, - "learning_rate": 2.5743990318499366e-08, - "loss": 4.1057, - "step": 234000 - }, - { - "epoch": 2.58, - "learning_rate": 2.5798998844820945e-08, - "loss": 4.0846, - "step": 234500 - }, - { - "epoch": 2.59, - "learning_rate": 2.5854007371142527e-08, - "loss": 4.1125, - "step": 235000 - }, - { - "epoch": 2.59, - "learning_rate": 2.5909015897464103e-08, - "loss": 4.1225, - "step": 235500 - }, - { - "epoch": 2.6, - "learning_rate": 2.5964024423785686e-08, - "loss": 4.0813, - "step": 236000 - }, - { - "epoch": 2.6, - "learning_rate": 2.6019032950107268e-08, - "loss": 4.1011, - "step": 236500 - }, - { - "epoch": 2.61, - "learning_rate": 2.6074041476428844e-08, - "loss": 4.095, - "step": 237000 - }, - { - "epoch": 2.61, - "learning_rate": 2.6129050002750423e-08, - "loss": 4.0847, - "step": 237500 - }, - { - "epoch": 2.62, - "learning_rate": 2.6184058529072006e-08, - "loss": 4.0935, - "step": 238000 - }, - { - "epoch": 2.62, - "learning_rate": 2.623906705539358e-08, - "loss": 4.0988, - "step": 238500 - }, - { - "epoch": 2.63, - "learning_rate": 2.6294075581715164e-08, - "loss": 4.1106, - "step": 239000 - }, - { - "epoch": 2.63, - "learning_rate": 2.6349084108036746e-08, - "loss": 4.0884, - "step": 239500 - }, - { - "epoch": 2.64, - "learning_rate": 2.6404092634358326e-08, - "loss": 4.1179, - "step": 240000 - }, - { - "epoch": 2.65, - "learning_rate": 2.64591011606799e-08, - "loss": 4.0912, - "step": 240500 - }, - { - "epoch": 2.65, - "learning_rate": 2.6514109687001484e-08, - "loss": 4.0808, - "step": 241000 - }, - { - "epoch": 2.66, - "learning_rate": 2.6569118213323066e-08, - "loss": 4.1192, - "step": 241500 - }, - { - "epoch": 2.66, - "learning_rate": 2.6624126739644642e-08, - "loss": 4.0961, - "step": 242000 - }, - { - "epoch": 2.67, - "learning_rate": 2.6679135265966225e-08, - "loss": 4.0915, - "step": 242500 - }, - { - "epoch": 2.67, - "learning_rate": 2.6734143792287804e-08, - "loss": 4.0894, - "step": 243000 - }, - { - "epoch": 2.68, - "learning_rate": 2.6789152318609383e-08, - "loss": 4.0981, - "step": 243500 - }, - { - "epoch": 2.68, - "learning_rate": 2.6844160844930962e-08, - "loss": 4.0863, - "step": 244000 - }, - { - "epoch": 2.69, - "learning_rate": 2.6899169371252545e-08, - "loss": 4.1067, - "step": 244500 - }, - { - "epoch": 2.7, - "learning_rate": 2.695417789757412e-08, - "loss": 4.1008, - "step": 245000 - }, - { - "epoch": 2.7, - "learning_rate": 2.7009186423895703e-08, - "loss": 4.1062, - "step": 245500 - }, - { - "epoch": 2.71, - "learning_rate": 2.7064194950217286e-08, - "loss": 4.0995, - "step": 246000 - }, - { - "epoch": 2.71, - "learning_rate": 2.711920347653886e-08, - "loss": 4.0928, - "step": 246500 - }, - { - "epoch": 2.72, - "learning_rate": 2.717421200286044e-08, - "loss": 4.11, - "step": 247000 - }, - { - "epoch": 2.72, - "learning_rate": 2.7229220529182023e-08, - "loss": 4.094, - "step": 247500 - }, - { - "epoch": 2.73, - "learning_rate": 2.72842290555036e-08, - "loss": 4.0777, - "step": 248000 - }, - { - "epoch": 2.73, - "learning_rate": 2.733923758182518e-08, - "loss": 4.1137, - "step": 248500 - }, - { - "epoch": 2.74, - "learning_rate": 2.7394246108146764e-08, - "loss": 4.0861, - "step": 249000 - }, - { - "epoch": 2.74, - "learning_rate": 2.7449254634468343e-08, - "loss": 4.0958, - "step": 249500 - }, - { - "epoch": 2.75, - "learning_rate": 2.750426316078992e-08, - "loss": 4.0915, - "step": 250000 - }, - { - "epoch": 2.76, - "learning_rate": 2.75592716871115e-08, - "loss": 4.079, - "step": 250500 - }, - { - "epoch": 2.76, - "learning_rate": 2.7614280213433084e-08, - "loss": 4.0976, - "step": 251000 - }, - { - "epoch": 2.77, - "learning_rate": 2.766928873975466e-08, - "loss": 4.0696, - "step": 251500 - }, - { - "epoch": 2.77, - "learning_rate": 2.7724297266076242e-08, - "loss": 4.0911, - "step": 252000 - }, - { - "epoch": 2.78, - "learning_rate": 2.777930579239782e-08, - "loss": 4.0644, - "step": 252500 - }, - { - "epoch": 2.78, - "learning_rate": 2.7834314318719397e-08, - "loss": 4.0755, - "step": 253000 - }, - { - "epoch": 2.79, - "learning_rate": 2.788932284504098e-08, - "loss": 4.0652, - "step": 253500 - }, - { - "epoch": 2.79, - "learning_rate": 2.7944331371362562e-08, - "loss": 4.0744, - "step": 254000 - }, - { - "epoch": 2.8, - "learning_rate": 2.7999339897684138e-08, - "loss": 4.0751, - "step": 254500 - }, - { - "epoch": 2.81, - "learning_rate": 2.805434842400572e-08, - "loss": 4.0602, - "step": 255000 - }, - { - "epoch": 2.81, - "learning_rate": 2.81093569503273e-08, - "loss": 4.0901, - "step": 255500 - }, - { - "epoch": 2.82, - "learning_rate": 2.8164365476648876e-08, - "loss": 4.0726, - "step": 256000 - }, - { - "epoch": 2.82, - "learning_rate": 2.8219374002970458e-08, - "loss": 4.074, - "step": 256500 - }, - { - "epoch": 2.83, - "learning_rate": 2.827438252929204e-08, - "loss": 4.0823, - "step": 257000 - }, - { - "epoch": 2.83, - "learning_rate": 2.8329391055613616e-08, - "loss": 4.0885, - "step": 257500 - }, - { - "epoch": 2.84, - "learning_rate": 2.83843995819352e-08, - "loss": 4.0899, - "step": 258000 - }, - { - "epoch": 2.84, - "learning_rate": 2.8439408108256778e-08, - "loss": 4.0722, - "step": 258500 - }, - { - "epoch": 2.85, - "learning_rate": 2.849441663457836e-08, - "loss": 4.0792, - "step": 259000 - }, - { - "epoch": 2.85, - "learning_rate": 2.8549425160899936e-08, - "loss": 4.0765, - "step": 259500 - }, - { - "epoch": 2.86, - "learning_rate": 2.860443368722152e-08, - "loss": 4.0724, - "step": 260000 - }, - { - "epoch": 2.87, - "learning_rate": 2.86594422135431e-08, - "loss": 4.0767, - "step": 260500 - }, - { - "epoch": 2.87, - "learning_rate": 2.8714450739864677e-08, - "loss": 4.0731, - "step": 261000 - }, - { - "epoch": 2.88, - "learning_rate": 2.8769459266186256e-08, - "loss": 4.0742, - "step": 261500 - }, - { - "epoch": 2.88, - "learning_rate": 2.882446779250784e-08, - "loss": 4.0782, - "step": 262000 - }, - { - "epoch": 2.89, - "learning_rate": 2.8879476318829415e-08, - "loss": 4.092, - "step": 262500 - }, - { - "epoch": 2.89, - "learning_rate": 2.8934484845150997e-08, - "loss": 4.0789, - "step": 263000 - }, - { - "epoch": 2.9, - "learning_rate": 2.898949337147258e-08, - "loss": 4.0854, - "step": 263500 - }, - { - "epoch": 2.9, - "learning_rate": 2.9044501897794156e-08, - "loss": 4.0897, - "step": 264000 - }, - { - "epoch": 2.91, - "learning_rate": 2.9099510424115735e-08, - "loss": 4.0724, - "step": 264500 - }, - { - "epoch": 2.92, - "learning_rate": 2.9154518950437317e-08, - "loss": 4.0951, - "step": 265000 - }, - { - "epoch": 2.92, - "learning_rate": 2.9209527476758893e-08, - "loss": 4.087, - "step": 265500 - }, - { - "epoch": 2.93, - "learning_rate": 2.9264536003080476e-08, - "loss": 4.0687, - "step": 266000 - }, - { - "epoch": 2.93, - "learning_rate": 2.9319544529402058e-08, - "loss": 4.085, - "step": 266500 - }, - { - "epoch": 2.94, - "learning_rate": 2.9374553055723634e-08, - "loss": 4.0707, - "step": 267000 - }, - { - "epoch": 2.94, - "learning_rate": 2.9429561582045216e-08, - "loss": 4.0662, - "step": 267500 - }, - { - "epoch": 2.95, - "learning_rate": 2.9484570108366796e-08, - "loss": 4.0841, - "step": 268000 - }, - { - "epoch": 2.95, - "learning_rate": 2.9539578634688378e-08, - "loss": 4.0762, - "step": 268500 - }, - { - "epoch": 2.96, - "learning_rate": 2.9594587161009954e-08, - "loss": 4.0742, - "step": 269000 - }, - { - "epoch": 2.96, - "learning_rate": 2.9649595687331536e-08, - "loss": 4.0795, - "step": 269500 - }, - { - "epoch": 2.97, - "learning_rate": 2.970460421365312e-08, - "loss": 4.0605, - "step": 270000 - }, - { - "epoch": 2.98, - "learning_rate": 2.9759612739974695e-08, - "loss": 4.1081, - "step": 270500 - }, - { - "epoch": 2.98, - "learning_rate": 2.9814621266296274e-08, - "loss": 4.0718, - "step": 271000 - }, - { - "epoch": 2.99, - "learning_rate": 2.9869629792617856e-08, - "loss": 4.0627, - "step": 271500 - }, - { - "epoch": 2.99, - "learning_rate": 2.992463831893943e-08, - "loss": 4.092, - "step": 272000 - }, - { - "epoch": 3.0, - "learning_rate": 2.9979646845261015e-08, - "loss": 4.0619, - "step": 272500 - }, - { - "epoch": 3.0, - "eval_loss": 4.056740760803223, - "eval_runtime": 6.1366, - "eval_samples_per_second": 253.236, - "step": 272685 - }, - { - "epoch": 3.0, - "learning_rate": 3.00346553715826e-08, - "loss": 4.0703, - "step": 273000 - }, - { - "epoch": 3.01, - "learning_rate": 3.008966389790417e-08, - "loss": 4.0576, - "step": 273500 - }, - { - "epoch": 3.01, - "learning_rate": 3.0144672424225755e-08, - "loss": 4.0756, - "step": 274000 - }, - { - "epoch": 3.02, - "learning_rate": 3.019968095054734e-08, - "loss": 4.0657, - "step": 274500 - }, - { - "epoch": 3.03, - "learning_rate": 3.0254689476868914e-08, - "loss": 4.0519, - "step": 275000 - }, - { - "epoch": 3.03, - "learning_rate": 3.030969800319049e-08, - "loss": 4.0658, - "step": 275500 - }, - { - "epoch": 3.04, - "learning_rate": 3.036470652951207e-08, - "loss": 4.0678, - "step": 276000 - }, - { - "epoch": 3.04, - "learning_rate": 3.041971505583365e-08, - "loss": 4.0844, - "step": 276500 - }, - { - "epoch": 3.05, - "learning_rate": 3.047472358215523e-08, - "loss": 4.0661, - "step": 277000 - }, - { - "epoch": 3.05, - "learning_rate": 3.052973210847681e-08, - "loss": 4.0723, - "step": 277500 - }, - { - "epoch": 3.06, - "learning_rate": 3.0584740634798395e-08, - "loss": 4.0739, - "step": 278000 - }, - { - "epoch": 3.06, - "learning_rate": 3.063974916111997e-08, - "loss": 4.0801, - "step": 278500 - }, - { - "epoch": 3.07, - "learning_rate": 3.0694757687441554e-08, - "loss": 4.0804, - "step": 279000 - }, - { - "epoch": 3.07, - "learning_rate": 3.0749766213763136e-08, - "loss": 4.0707, - "step": 279500 - }, - { - "epoch": 3.08, - "learning_rate": 3.080477474008471e-08, - "loss": 4.0445, - "step": 280000 - }, - { - "epoch": 3.09, - "learning_rate": 3.0859783266406295e-08, - "loss": 4.046, - "step": 280500 - }, - { - "epoch": 3.09, - "learning_rate": 3.091479179272787e-08, - "loss": 4.0697, - "step": 281000 - }, - { - "epoch": 3.1, - "learning_rate": 3.0969800319049446e-08, - "loss": 4.0503, - "step": 281500 - }, - { - "epoch": 3.1, - "learning_rate": 3.102480884537103e-08, - "loss": 4.038, - "step": 282000 - }, - { - "epoch": 3.11, - "learning_rate": 3.107981737169261e-08, - "loss": 4.0421, - "step": 282500 - }, - { - "epoch": 3.11, - "learning_rate": 3.113482589801419e-08, - "loss": 4.0489, - "step": 283000 - }, - { - "epoch": 3.12, - "learning_rate": 3.118983442433577e-08, - "loss": 4.0613, - "step": 283500 - }, - { - "epoch": 3.12, - "learning_rate": 3.124484295065735e-08, - "loss": 4.0681, - "step": 284000 - }, - { - "epoch": 3.13, - "learning_rate": 3.129985147697893e-08, - "loss": 4.0751, - "step": 284500 - }, - { - "epoch": 3.14, - "learning_rate": 3.135486000330051e-08, - "loss": 4.0464, - "step": 285000 - }, - { - "epoch": 3.14, - "learning_rate": 3.140986852962209e-08, - "loss": 4.0619, - "step": 285500 - }, - { - "epoch": 3.15, - "learning_rate": 3.146487705594367e-08, - "loss": 4.065, - "step": 286000 - }, - { - "epoch": 3.15, - "learning_rate": 3.151988558226525e-08, - "loss": 4.0525, - "step": 286500 - }, - { - "epoch": 3.16, - "learning_rate": 3.157489410858683e-08, - "loss": 4.0533, - "step": 287000 - }, - { - "epoch": 3.16, - "learning_rate": 3.162990263490841e-08, - "loss": 4.0558, - "step": 287500 - }, - { - "epoch": 3.17, - "learning_rate": 3.1684911161229985e-08, - "loss": 4.0897, - "step": 288000 - }, - { - "epoch": 3.17, - "learning_rate": 3.173991968755157e-08, - "loss": 4.0699, - "step": 288500 - }, - { - "epoch": 3.18, - "learning_rate": 3.179492821387315e-08, - "loss": 4.0454, - "step": 289000 - }, - { - "epoch": 3.18, - "learning_rate": 3.1849936740194726e-08, - "loss": 4.073, - "step": 289500 - }, - { - "epoch": 3.19, - "learning_rate": 3.190494526651631e-08, - "loss": 4.0347, - "step": 290000 - }, - { - "epoch": 3.2, - "learning_rate": 3.195995379283789e-08, - "loss": 4.0387, - "step": 290500 - }, - { - "epoch": 3.2, - "learning_rate": 3.201496231915947e-08, - "loss": 4.0866, - "step": 291000 - }, - { - "epoch": 3.21, - "learning_rate": 3.206997084548105e-08, - "loss": 4.0629, - "step": 291500 - }, - { - "epoch": 3.21, - "learning_rate": 3.212497937180263e-08, - "loss": 4.0451, - "step": 292000 - }, - { - "epoch": 3.22, - "learning_rate": 3.217998789812421e-08, - "loss": 4.0698, - "step": 292500 - }, - { - "epoch": 3.22, - "learning_rate": 3.223499642444579e-08, - "loss": 4.06, - "step": 293000 - }, - { - "epoch": 3.23, - "learning_rate": 3.2290004950767366e-08, - "loss": 4.0609, - "step": 293500 - }, - { - "epoch": 3.23, - "learning_rate": 3.234501347708894e-08, - "loss": 4.06, - "step": 294000 - }, - { - "epoch": 3.24, - "learning_rate": 3.2400022003410525e-08, - "loss": 4.0346, - "step": 294500 - }, - { - "epoch": 3.25, - "learning_rate": 3.245503052973211e-08, - "loss": 4.0518, - "step": 295000 - }, - { - "epoch": 3.25, - "learning_rate": 3.251003905605368e-08, - "loss": 4.0434, - "step": 295500 - }, - { - "epoch": 3.26, - "learning_rate": 3.2565047582375265e-08, - "loss": 4.0486, - "step": 296000 - }, - { - "epoch": 3.26, - "learning_rate": 3.262005610869685e-08, - "loss": 4.0567, - "step": 296500 - }, - { - "epoch": 3.27, - "learning_rate": 3.267506463501843e-08, - "loss": 4.0553, - "step": 297000 - }, - { - "epoch": 3.27, - "learning_rate": 3.2730073161340006e-08, - "loss": 4.0523, - "step": 297500 - }, - { - "epoch": 3.28, - "learning_rate": 3.278508168766159e-08, - "loss": 4.041, - "step": 298000 - }, - { - "epoch": 3.28, - "learning_rate": 3.284009021398317e-08, - "loss": 4.0354, - "step": 298500 - }, - { - "epoch": 3.29, - "learning_rate": 3.289509874030475e-08, - "loss": 4.0617, - "step": 299000 - }, - { - "epoch": 3.3, - "learning_rate": 3.295010726662632e-08, - "loss": 4.0707, - "step": 299500 - }, - { - "epoch": 3.3, - "learning_rate": 3.3005115792947905e-08, - "loss": 4.0604, - "step": 300000 - }, - { - "epoch": 3.31, - "learning_rate": 3.306012431926948e-08, - "loss": 4.0456, - "step": 300500 - }, - { - "epoch": 3.31, - "learning_rate": 3.3115132845591064e-08, - "loss": 4.0447, - "step": 301000 - }, - { - "epoch": 3.32, - "learning_rate": 3.3170141371912646e-08, - "loss": 4.0557, - "step": 301500 - }, - { - "epoch": 3.32, - "learning_rate": 3.322514989823422e-08, - "loss": 4.0457, - "step": 302000 - }, - { - "epoch": 3.33, - "learning_rate": 3.3280158424555805e-08, - "loss": 4.0368, - "step": 302500 - }, - { - "epoch": 3.33, - "learning_rate": 3.333516695087739e-08, - "loss": 4.0553, - "step": 303000 - }, - { - "epoch": 3.34, - "learning_rate": 3.339017547719896e-08, - "loss": 4.0562, - "step": 303500 - }, - { - "epoch": 3.34, - "learning_rate": 3.3445184003520545e-08, - "loss": 4.0501, - "step": 304000 - }, - { - "epoch": 3.35, - "learning_rate": 3.350019252984213e-08, - "loss": 4.0603, - "step": 304500 - }, - { - "epoch": 3.36, - "learning_rate": 3.3555201056163704e-08, - "loss": 4.0607, - "step": 305000 - }, - { - "epoch": 3.36, - "learning_rate": 3.361020958248528e-08, - "loss": 4.0504, - "step": 305500 - }, - { - "epoch": 3.37, - "learning_rate": 3.366521810880686e-08, - "loss": 4.0367, - "step": 306000 - }, - { - "epoch": 3.37, - "learning_rate": 3.3720226635128444e-08, - "loss": 4.0498, - "step": 306500 - }, - { - "epoch": 3.38, - "learning_rate": 3.377523516145002e-08, - "loss": 4.0457, - "step": 307000 - }, - { - "epoch": 3.38, - "learning_rate": 3.38302436877716e-08, - "loss": 4.0459, - "step": 307500 - }, - { - "epoch": 3.39, - "learning_rate": 3.3885252214093185e-08, - "loss": 4.0495, - "step": 308000 - }, - { - "epoch": 3.39, - "learning_rate": 3.394026074041476e-08, - "loss": 4.0432, - "step": 308500 - }, - { - "epoch": 3.4, - "learning_rate": 3.3995269266736344e-08, - "loss": 4.0577, - "step": 309000 - }, - { - "epoch": 3.41, - "learning_rate": 3.4050277793057926e-08, - "loss": 4.0555, - "step": 309500 - }, - { - "epoch": 3.41, - "learning_rate": 3.41052863193795e-08, - "loss": 4.0387, - "step": 310000 - }, - { - "epoch": 3.42, - "learning_rate": 3.4160294845701084e-08, - "loss": 4.0537, - "step": 310500 - }, - { - "epoch": 3.42, - "learning_rate": 3.421530337202266e-08, - "loss": 4.0493, - "step": 311000 - }, - { - "epoch": 3.43, - "learning_rate": 3.427031189834424e-08, - "loss": 4.04, - "step": 311500 - }, - { - "epoch": 3.43, - "learning_rate": 3.432532042466582e-08, - "loss": 4.0484, - "step": 312000 - }, - { - "epoch": 3.44, - "learning_rate": 3.43803289509874e-08, - "loss": 4.0613, - "step": 312500 - }, - { - "epoch": 3.44, - "learning_rate": 3.443533747730898e-08, - "loss": 4.0247, - "step": 313000 - }, - { - "epoch": 3.45, - "learning_rate": 3.449034600363056e-08, - "loss": 4.017, - "step": 313500 - }, - { - "epoch": 3.45, - "learning_rate": 3.454535452995214e-08, - "loss": 4.044, - "step": 314000 - }, - { - "epoch": 3.46, - "learning_rate": 3.460036305627372e-08, - "loss": 4.0464, - "step": 314500 - }, - { - "epoch": 3.47, - "learning_rate": 3.46553715825953e-08, - "loss": 4.0469, - "step": 315000 - }, - { - "epoch": 3.47, - "learning_rate": 3.471038010891688e-08, - "loss": 4.0392, - "step": 315500 - }, - { - "epoch": 3.48, - "learning_rate": 3.4765388635238465e-08, - "loss": 4.0456, - "step": 316000 - }, - { - "epoch": 3.48, - "learning_rate": 3.482039716156004e-08, - "loss": 4.0338, - "step": 316500 - }, - { - "epoch": 3.49, - "learning_rate": 3.4875405687881624e-08, - "loss": 4.032, - "step": 317000 - }, - { - "epoch": 3.49, - "learning_rate": 3.49304142142032e-08, - "loss": 4.0359, - "step": 317500 - }, - { - "epoch": 3.5, - "learning_rate": 3.4985422740524775e-08, - "loss": 4.0514, - "step": 318000 - }, - { - "epoch": 3.5, - "learning_rate": 3.504043126684636e-08, - "loss": 4.0503, - "step": 318500 - }, - { - "epoch": 3.51, - "learning_rate": 3.509543979316794e-08, - "loss": 4.0402, - "step": 319000 - }, - { - "epoch": 3.52, - "learning_rate": 3.5150448319489516e-08, - "loss": 4.0275, - "step": 319500 - }, - { - "epoch": 3.52, - "learning_rate": 3.52054568458111e-08, - "loss": 4.06, - "step": 320000 - }, - { - "epoch": 3.53, - "learning_rate": 3.526046537213268e-08, - "loss": 4.0523, - "step": 320500 - }, - { - "epoch": 3.53, - "learning_rate": 3.531547389845426e-08, - "loss": 4.0402, - "step": 321000 - }, - { - "epoch": 3.54, - "learning_rate": 3.537048242477584e-08, - "loss": 4.0494, - "step": 321500 - }, - { - "epoch": 3.54, - "learning_rate": 3.542549095109742e-08, - "loss": 4.0467, - "step": 322000 - }, - { - "epoch": 3.55, - "learning_rate": 3.5480499477419e-08, - "loss": 4.0423, - "step": 322500 - }, - { - "epoch": 3.55, - "learning_rate": 3.553550800374058e-08, - "loss": 4.0594, - "step": 323000 - }, - { - "epoch": 3.56, - "learning_rate": 3.5590516530062156e-08, - "loss": 4.031, - "step": 323500 - }, - { - "epoch": 3.56, - "learning_rate": 3.564552505638374e-08, - "loss": 4.0452, - "step": 324000 - }, - { - "epoch": 3.57, - "learning_rate": 3.5700533582705314e-08, - "loss": 4.0303, - "step": 324500 - }, - { - "epoch": 3.58, - "learning_rate": 3.57555421090269e-08, - "loss": 4.0428, - "step": 325000 - }, - { - "epoch": 3.58, - "learning_rate": 3.581055063534848e-08, - "loss": 4.0235, - "step": 325500 - }, - { - "epoch": 3.59, - "learning_rate": 3.5865559161670055e-08, - "loss": 4.0525, - "step": 326000 - }, - { - "epoch": 3.59, - "learning_rate": 3.592056768799164e-08, - "loss": 4.0301, - "step": 326500 - }, - { - "epoch": 3.6, - "learning_rate": 3.597557621431322e-08, - "loss": 4.0356, - "step": 327000 - }, - { - "epoch": 3.6, - "learning_rate": 3.6030584740634796e-08, - "loss": 4.0469, - "step": 327500 - }, - { - "epoch": 3.61, - "learning_rate": 3.608559326695638e-08, - "loss": 4.0563, - "step": 328000 - }, - { - "epoch": 3.61, - "learning_rate": 3.614060179327796e-08, - "loss": 4.0312, - "step": 328500 - }, - { - "epoch": 3.62, - "learning_rate": 3.619561031959954e-08, - "loss": 4.0429, - "step": 329000 - }, - { - "epoch": 3.63, - "learning_rate": 3.625061884592111e-08, - "loss": 4.0158, - "step": 329500 - }, - { - "epoch": 3.63, - "learning_rate": 3.6305627372242695e-08, - "loss": 4.0303, - "step": 330000 - }, - { - "epoch": 3.64, - "learning_rate": 3.636063589856427e-08, - "loss": 4.026, - "step": 330500 - }, - { - "epoch": 3.64, - "learning_rate": 3.6415644424885854e-08, - "loss": 4.0521, - "step": 331000 - }, - { - "epoch": 3.65, - "learning_rate": 3.6470652951207436e-08, - "loss": 4.0328, - "step": 331500 - }, - { - "epoch": 3.65, - "learning_rate": 3.652566147752901e-08, - "loss": 4.0433, - "step": 332000 - }, - { - "epoch": 3.66, - "learning_rate": 3.6580670003850594e-08, - "loss": 4.0237, - "step": 332500 - }, - { - "epoch": 3.66, - "learning_rate": 3.663567853017218e-08, - "loss": 4.0241, - "step": 333000 - }, - { - "epoch": 3.67, - "learning_rate": 3.669068705649376e-08, - "loss": 4.0381, - "step": 333500 - }, - { - "epoch": 3.67, - "learning_rate": 3.6745695582815335e-08, - "loss": 4.0307, - "step": 334000 - }, - { - "epoch": 3.68, - "learning_rate": 3.680070410913692e-08, - "loss": 4.0286, - "step": 334500 - }, - { - "epoch": 3.69, - "learning_rate": 3.6855712635458494e-08, - "loss": 4.0291, - "step": 335000 - }, - { - "epoch": 3.69, - "learning_rate": 3.6910721161780076e-08, - "loss": 4.0137, - "step": 335500 - }, - { - "epoch": 3.7, - "learning_rate": 3.696572968810165e-08, - "loss": 4.0364, - "step": 336000 - }, - { - "epoch": 3.7, - "learning_rate": 3.7020738214423234e-08, - "loss": 4.0147, - "step": 336500 - }, - { - "epoch": 3.71, - "learning_rate": 3.707574674074481e-08, - "loss": 4.0135, - "step": 337000 - }, - { - "epoch": 3.71, - "learning_rate": 3.713075526706639e-08, - "loss": 4.0234, - "step": 337500 - }, - { - "epoch": 3.72, - "learning_rate": 3.7185763793387975e-08, - "loss": 4.0387, - "step": 338000 - }, - { - "epoch": 3.72, - "learning_rate": 3.724077231970955e-08, - "loss": 4.0183, - "step": 338500 - }, - { - "epoch": 3.73, - "learning_rate": 3.7295780846031134e-08, - "loss": 4.0375, - "step": 339000 - }, - { - "epoch": 3.74, - "learning_rate": 3.7350789372352716e-08, - "loss": 4.0222, - "step": 339500 - }, - { - "epoch": 3.74, - "learning_rate": 3.740579789867429e-08, - "loss": 4.0139, - "step": 340000 - }, - { - "epoch": 3.75, - "learning_rate": 3.7460806424995874e-08, - "loss": 4.0278, - "step": 340500 - }, - { - "epoch": 3.75, - "learning_rate": 3.751581495131746e-08, - "loss": 4.0418, - "step": 341000 - }, - { - "epoch": 3.76, - "learning_rate": 3.757082347763903e-08, - "loss": 4.016, - "step": 341500 - }, - { - "epoch": 3.76, - "learning_rate": 3.762583200396061e-08, - "loss": 4.0273, - "step": 342000 - }, - { - "epoch": 3.77, - "learning_rate": 3.768084053028219e-08, - "loss": 4.0246, - "step": 342500 - }, - { - "epoch": 3.77, - "learning_rate": 3.7735849056603774e-08, - "loss": 4.0259, - "step": 343000 - }, - { - "epoch": 3.78, - "learning_rate": 3.779085758292535e-08, - "loss": 4.0245, - "step": 343500 - }, - { - "epoch": 3.78, - "learning_rate": 3.784586610924693e-08, - "loss": 4.0109, - "step": 344000 - }, - { - "epoch": 3.79, - "learning_rate": 3.7900874635568514e-08, - "loss": 4.0318, - "step": 344500 - }, - { - "epoch": 3.8, - "learning_rate": 3.795588316189009e-08, - "loss": 4.0287, - "step": 345000 - }, - { - "epoch": 3.8, - "learning_rate": 3.801089168821167e-08, - "loss": 4.0193, - "step": 345500 - }, - { - "epoch": 3.81, - "learning_rate": 3.8065900214533255e-08, - "loss": 4.0221, - "step": 346000 - }, - { - "epoch": 3.81, - "learning_rate": 3.812090874085483e-08, - "loss": 4.0096, - "step": 346500 - }, - { - "epoch": 3.82, - "learning_rate": 3.8175917267176413e-08, - "loss": 4.0385, - "step": 347000 - }, - { - "epoch": 3.82, - "learning_rate": 3.823092579349799e-08, - "loss": 4.0195, - "step": 347500 - }, - { - "epoch": 3.83, - "learning_rate": 3.8285934319819565e-08, - "loss": 4.0207, - "step": 348000 - }, - { - "epoch": 3.83, - "learning_rate": 3.834094284614115e-08, - "loss": 4.0367, - "step": 348500 - }, - { - "epoch": 3.84, - "learning_rate": 3.839595137246273e-08, - "loss": 4.0268, - "step": 349000 - }, - { - "epoch": 3.85, - "learning_rate": 3.8450959898784306e-08, - "loss": 4.0501, - "step": 349500 - }, - { - "epoch": 3.85, - "learning_rate": 3.850596842510589e-08, - "loss": 4.0366, - "step": 350000 - }, - { - "epoch": 3.86, - "learning_rate": 3.856097695142747e-08, - "loss": 4.0161, - "step": 350500 - }, - { - "epoch": 3.86, - "learning_rate": 3.861598547774905e-08, - "loss": 4.0321, - "step": 351000 - }, - { - "epoch": 3.87, - "learning_rate": 3.867099400407063e-08, - "loss": 4.0502, - "step": 351500 - }, - { - "epoch": 3.87, - "learning_rate": 3.872600253039221e-08, - "loss": 4.0289, - "step": 352000 - }, - { - "epoch": 3.88, - "learning_rate": 3.8781011056713794e-08, - "loss": 4.0096, - "step": 352500 - }, - { - "epoch": 3.88, - "learning_rate": 3.883601958303537e-08, - "loss": 4.0201, - "step": 353000 - }, - { - "epoch": 3.89, - "learning_rate": 3.8891028109356946e-08, - "loss": 4.0336, - "step": 353500 - }, - { - "epoch": 3.89, - "learning_rate": 3.894603663567853e-08, - "loss": 4.0297, - "step": 354000 - }, - { - "epoch": 3.9, - "learning_rate": 3.9001045162000104e-08, - "loss": 4.0228, - "step": 354500 - }, - { - "epoch": 3.91, - "learning_rate": 3.905605368832169e-08, - "loss": 4.0394, - "step": 355000 - }, - { - "epoch": 3.91, - "learning_rate": 3.911106221464327e-08, - "loss": 4.0329, - "step": 355500 - }, - { - "epoch": 3.92, - "learning_rate": 3.9166070740964845e-08, - "loss": 4.0428, - "step": 356000 - }, - { - "epoch": 3.92, - "learning_rate": 3.922107926728643e-08, - "loss": 4.0207, - "step": 356500 - }, - { - "epoch": 3.93, - "learning_rate": 3.927608779360801e-08, - "loss": 4.0077, - "step": 357000 - }, - { - "epoch": 3.93, - "learning_rate": 3.9331096319929586e-08, - "loss": 4.0057, - "step": 357500 - }, - { - "epoch": 3.94, - "learning_rate": 3.938610484625117e-08, - "loss": 4.0271, - "step": 358000 - }, - { - "epoch": 3.94, - "learning_rate": 3.944111337257275e-08, - "loss": 4.0287, - "step": 358500 - }, - { - "epoch": 3.95, - "learning_rate": 3.949612189889433e-08, - "loss": 4.0055, - "step": 359000 - }, - { - "epoch": 3.96, - "learning_rate": 3.955113042521591e-08, - "loss": 4.008, - "step": 359500 - }, - { - "epoch": 3.96, - "learning_rate": 3.9606138951537485e-08, - "loss": 4.0103, - "step": 360000 - }, - { - "epoch": 3.97, - "learning_rate": 3.966114747785906e-08, - "loss": 4.031, - "step": 360500 - }, - { - "epoch": 3.97, - "learning_rate": 3.9716156004180643e-08, - "loss": 4.0047, - "step": 361000 - }, - { - "epoch": 3.98, - "learning_rate": 3.9771164530502226e-08, - "loss": 4.0167, - "step": 361500 - }, - { - "epoch": 3.98, - "learning_rate": 3.982617305682381e-08, - "loss": 4.0196, - "step": 362000 - }, - { - "epoch": 3.99, - "learning_rate": 3.9881181583145384e-08, - "loss": 4.0136, - "step": 362500 - }, - { - "epoch": 3.99, - "learning_rate": 3.993619010946697e-08, - "loss": 4.0117, - "step": 363000 - }, - { - "epoch": 4.0, - "learning_rate": 3.999119863578855e-08, - "loss": 4.0042, - "step": 363500 - }, - { - "epoch": 4.0, - "eval_loss": 4.0121588706970215, - "eval_runtime": 6.1397, - "eval_samples_per_second": 253.106, - "step": 363580 - }, - { - "epoch": 4.0, - "learning_rate": 4.0046207162110125e-08, - "loss": 4.0071, - "step": 364000 - }, - { - "epoch": 4.01, - "learning_rate": 4.010121568843171e-08, - "loss": 4.033, - "step": 364500 - }, - { - "epoch": 4.02, - "learning_rate": 4.015622421475329e-08, - "loss": 4.0074, - "step": 365000 - }, - { - "epoch": 4.02, - "learning_rate": 4.0211232741074866e-08, - "loss": 4.0232, - "step": 365500 - }, - { - "epoch": 4.03, - "learning_rate": 4.026624126739644e-08, - "loss": 4.0049, - "step": 366000 - }, - { - "epoch": 4.03, - "learning_rate": 4.0321249793718024e-08, - "loss": 4.0017, - "step": 366500 - }, - { - "epoch": 4.04, - "learning_rate": 4.03762583200396e-08, - "loss": 4.0143, - "step": 367000 - }, - { - "epoch": 4.04, - "learning_rate": 4.043126684636118e-08, - "loss": 4.0409, - "step": 367500 - }, - { - "epoch": 4.05, - "learning_rate": 4.0486275372682765e-08, - "loss": 4.0269, - "step": 368000 - }, - { - "epoch": 4.05, - "learning_rate": 4.054128389900434e-08, - "loss": 3.9749, - "step": 368500 - }, - { - "epoch": 4.06, - "learning_rate": 4.0596292425325923e-08, - "loss": 4.0078, - "step": 369000 - }, - { - "epoch": 4.07, - "learning_rate": 4.0651300951647506e-08, - "loss": 4.0135, - "step": 369500 - }, - { - "epoch": 4.07, - "learning_rate": 4.070630947796908e-08, - "loss": 4.0113, - "step": 370000 - }, - { - "epoch": 4.08, - "learning_rate": 4.0761318004290664e-08, - "loss": 4.0086, - "step": 370500 - }, - { - "epoch": 4.08, - "learning_rate": 4.081632653061225e-08, - "loss": 3.9849, - "step": 371000 - }, - { - "epoch": 4.09, - "learning_rate": 4.087133505693382e-08, - "loss": 4.0202, - "step": 371500 - }, - { - "epoch": 4.09, - "learning_rate": 4.09263435832554e-08, - "loss": 4.0197, - "step": 372000 - }, - { - "epoch": 4.1, - "learning_rate": 4.098135210957698e-08, - "loss": 4.0029, - "step": 372500 - }, - { - "epoch": 4.1, - "learning_rate": 4.1036360635898563e-08, - "loss": 3.9933, - "step": 373000 - }, - { - "epoch": 4.11, - "learning_rate": 4.109136916222014e-08, - "loss": 4.0088, - "step": 373500 - }, - { - "epoch": 4.11, - "learning_rate": 4.114637768854172e-08, - "loss": 4.0098, - "step": 374000 - }, - { - "epoch": 4.12, - "learning_rate": 4.1201386214863304e-08, - "loss": 4.0065, - "step": 374500 - }, - { - "epoch": 4.13, - "learning_rate": 4.125639474118488e-08, - "loss": 4.0075, - "step": 375000 - }, - { - "epoch": 4.13, - "learning_rate": 4.131140326750646e-08, - "loss": 4.0129, - "step": 375500 - }, - { - "epoch": 4.14, - "learning_rate": 4.1366411793828045e-08, - "loss": 4.0109, - "step": 376000 - }, - { - "epoch": 4.14, - "learning_rate": 4.142142032014962e-08, - "loss": 4.0215, - "step": 376500 - }, - { - "epoch": 4.15, - "learning_rate": 4.1476428846471203e-08, - "loss": 4.0045, - "step": 377000 - }, - { - "epoch": 4.15, - "learning_rate": 4.153143737279278e-08, - "loss": 4.0102, - "step": 377500 - }, - { - "epoch": 4.16, - "learning_rate": 4.158644589911436e-08, - "loss": 4.0199, - "step": 378000 - }, - { - "epoch": 4.16, - "learning_rate": 4.164145442543594e-08, - "loss": 3.9962, - "step": 378500 - }, - { - "epoch": 4.17, - "learning_rate": 4.169646295175752e-08, - "loss": 4.002, - "step": 379000 - }, - { - "epoch": 4.18, - "learning_rate": 4.1751471478079096e-08, - "loss": 3.9867, - "step": 379500 - }, - { - "epoch": 4.18, - "learning_rate": 4.180648000440068e-08, - "loss": 3.9963, - "step": 380000 - }, - { - "epoch": 4.19, - "learning_rate": 4.186148853072226e-08, - "loss": 4.0038, - "step": 380500 - }, - { - "epoch": 4.19, - "learning_rate": 4.1916497057043843e-08, - "loss": 3.9885, - "step": 381000 - }, - { - "epoch": 4.2, - "learning_rate": 4.197150558336542e-08, - "loss": 4.0027, - "step": 381500 - }, - { - "epoch": 4.2, - "learning_rate": 4.2026514109687e-08, - "loss": 4.0353, - "step": 382000 - }, - { - "epoch": 4.21, - "learning_rate": 4.2081522636008584e-08, - "loss": 4.0029, - "step": 382500 - }, - { - "epoch": 4.21, - "learning_rate": 4.213653116233016e-08, - "loss": 4.005, - "step": 383000 - }, - { - "epoch": 4.22, - "learning_rate": 4.219153968865174e-08, - "loss": 4.0055, - "step": 383500 - }, - { - "epoch": 4.22, - "learning_rate": 4.224654821497332e-08, - "loss": 3.9999, - "step": 384000 - }, - { - "epoch": 4.23, - "learning_rate": 4.2301556741294894e-08, - "loss": 3.9951, - "step": 384500 - }, - { - "epoch": 4.24, - "learning_rate": 4.2356565267616477e-08, - "loss": 4.0008, - "step": 385000 - }, - { - "epoch": 4.24, - "learning_rate": 4.241157379393806e-08, - "loss": 3.9898, - "step": 385500 - }, - { - "epoch": 4.25, - "learning_rate": 4.2466582320259635e-08, - "loss": 4.002, - "step": 386000 - }, - { - "epoch": 4.25, - "learning_rate": 4.252159084658122e-08, - "loss": 4.0249, - "step": 386500 - }, - { - "epoch": 4.26, - "learning_rate": 4.25765993729028e-08, - "loss": 4.0081, - "step": 387000 - }, - { - "epoch": 4.26, - "learning_rate": 4.2631607899224376e-08, - "loss": 4.0123, - "step": 387500 - }, - { - "epoch": 4.27, - "learning_rate": 4.268661642554596e-08, - "loss": 4.0046, - "step": 388000 - }, - { - "epoch": 4.27, - "learning_rate": 4.274162495186754e-08, - "loss": 4.0146, - "step": 388500 - }, - { - "epoch": 4.28, - "learning_rate": 4.2796633478189117e-08, - "loss": 4.0301, - "step": 389000 - }, - { - "epoch": 4.29, - "learning_rate": 4.28516420045107e-08, - "loss": 3.9999, - "step": 389500 - }, - { - "epoch": 4.29, - "learning_rate": 4.2906650530832275e-08, - "loss": 4.0287, - "step": 390000 - }, - { - "epoch": 4.3, - "learning_rate": 4.296165905715386e-08, - "loss": 3.9945, - "step": 390500 - }, - { - "epoch": 4.3, - "learning_rate": 4.3016667583475433e-08, - "loss": 3.9994, - "step": 391000 - }, - { - "epoch": 4.31, - "learning_rate": 4.3071676109797016e-08, - "loss": 4.0263, - "step": 391500 - }, - { - "epoch": 4.31, - "learning_rate": 4.31266846361186e-08, - "loss": 3.9797, - "step": 392000 - }, - { - "epoch": 4.32, - "learning_rate": 4.3181693162440174e-08, - "loss": 4.0001, - "step": 392500 - }, - { - "epoch": 4.32, - "learning_rate": 4.3236701688761757e-08, - "loss": 4.011, - "step": 393000 - }, - { - "epoch": 4.33, - "learning_rate": 4.329171021508334e-08, - "loss": 4.0018, - "step": 393500 - }, - { - "epoch": 4.33, - "learning_rate": 4.3346718741404915e-08, - "loss": 3.9987, - "step": 394000 - }, - { - "epoch": 4.34, - "learning_rate": 4.34017272677265e-08, - "loss": 3.989, - "step": 394500 - }, - { - "epoch": 4.35, - "learning_rate": 4.345673579404808e-08, - "loss": 4.0141, - "step": 395000 - }, - { - "epoch": 4.35, - "learning_rate": 4.3511744320369656e-08, - "loss": 3.9949, - "step": 395500 - }, - { - "epoch": 4.36, - "learning_rate": 4.356675284669123e-08, - "loss": 3.9979, - "step": 396000 - }, - { - "epoch": 4.36, - "learning_rate": 4.3621761373012814e-08, - "loss": 3.9793, - "step": 396500 - }, - { - "epoch": 4.37, - "learning_rate": 4.367676989933439e-08, - "loss": 4.0263, - "step": 397000 - }, - { - "epoch": 4.37, - "learning_rate": 4.373177842565597e-08, - "loss": 4.0045, - "step": 397500 - }, - { - "epoch": 4.38, - "learning_rate": 4.3786786951977555e-08, - "loss": 4.0005, - "step": 398000 - }, - { - "epoch": 4.38, - "learning_rate": 4.384179547829913e-08, - "loss": 4.0029, - "step": 398500 - }, - { - "epoch": 4.39, - "learning_rate": 4.389680400462071e-08, - "loss": 3.9951, - "step": 399000 - }, - { - "epoch": 4.4, - "learning_rate": 4.3951812530942296e-08, - "loss": 4.0049, - "step": 399500 - }, - { - "epoch": 4.4, - "learning_rate": 4.400682105726388e-08, - "loss": 3.9915, - "step": 400000 - }, - { - "epoch": 4.41, - "learning_rate": 4.4061829583585454e-08, - "loss": 4.0023, - "step": 400500 - }, - { - "epoch": 4.41, - "learning_rate": 4.4116838109907037e-08, - "loss": 4.003, - "step": 401000 - }, - { - "epoch": 4.42, - "learning_rate": 4.417184663622861e-08, - "loss": 3.9966, - "step": 401500 - }, - { - "epoch": 4.42, - "learning_rate": 4.4226855162550195e-08, - "loss": 4.0086, - "step": 402000 - }, - { - "epoch": 4.43, - "learning_rate": 4.428186368887177e-08, - "loss": 4.0102, - "step": 402500 - }, - { - "epoch": 4.43, - "learning_rate": 4.433687221519335e-08, - "loss": 4.0021, - "step": 403000 - }, - { - "epoch": 4.44, - "learning_rate": 4.439188074151493e-08, - "loss": 3.9932, - "step": 403500 - }, - { - "epoch": 4.44, - "learning_rate": 4.444688926783651e-08, - "loss": 3.9922, - "step": 404000 - }, - { - "epoch": 4.45, - "learning_rate": 4.4501897794158094e-08, - "loss": 3.9927, - "step": 404500 - }, - { - "epoch": 4.46, - "learning_rate": 4.455690632047967e-08, - "loss": 4.008, - "step": 405000 - }, - { - "epoch": 4.46, - "learning_rate": 4.461191484680125e-08, - "loss": 3.9971, - "step": 405500 - }, - { - "epoch": 4.47, - "learning_rate": 4.4666923373122835e-08, - "loss": 4.0077, - "step": 406000 - }, - { - "epoch": 4.47, - "learning_rate": 4.472193189944441e-08, - "loss": 3.9928, - "step": 406500 - }, - { - "epoch": 4.48, - "learning_rate": 4.477694042576599e-08, - "loss": 3.9987, - "step": 407000 - }, - { - "epoch": 4.48, - "learning_rate": 4.4831948952087576e-08, - "loss": 3.9989, - "step": 407500 - }, - { - "epoch": 4.49, - "learning_rate": 4.488695747840915e-08, - "loss": 4.0057, - "step": 408000 - }, - { - "epoch": 4.49, - "learning_rate": 4.494196600473073e-08, - "loss": 3.9884, - "step": 408500 - }, - { - "epoch": 4.5, - "learning_rate": 4.499697453105231e-08, - "loss": 4.0138, - "step": 409000 - }, - { - "epoch": 4.51, - "learning_rate": 4.505198305737389e-08, - "loss": 4.0081, - "step": 409500 - }, - { - "epoch": 4.51, - "learning_rate": 4.510699158369547e-08, - "loss": 3.9936, - "step": 410000 - }, - { - "epoch": 4.52, - "learning_rate": 4.516200011001705e-08, - "loss": 4.0194, - "step": 410500 - }, - { - "epoch": 4.52, - "learning_rate": 4.521700863633863e-08, - "loss": 3.9893, - "step": 411000 - }, - { - "epoch": 4.53, - "learning_rate": 4.527201716266021e-08, - "loss": 3.9932, - "step": 411500 - }, - { - "epoch": 4.53, - "learning_rate": 4.532702568898179e-08, - "loss": 3.9974, - "step": 412000 - }, - { - "epoch": 4.54, - "learning_rate": 4.5382034215303374e-08, - "loss": 3.9908, - "step": 412500 - }, - { - "epoch": 4.54, - "learning_rate": 4.543704274162495e-08, - "loss": 3.9843, - "step": 413000 - }, - { - "epoch": 4.55, - "learning_rate": 4.549205126794653e-08, - "loss": 4.0037, - "step": 413500 - }, - { - "epoch": 4.55, - "learning_rate": 4.554705979426811e-08, - "loss": 3.9912, - "step": 414000 - }, - { - "epoch": 4.56, - "learning_rate": 4.5602068320589684e-08, - "loss": 4.0016, - "step": 414500 - }, - { - "epoch": 4.57, - "learning_rate": 4.5657076846911267e-08, - "loss": 3.9823, - "step": 415000 - }, - { - "epoch": 4.57, - "learning_rate": 4.571208537323285e-08, - "loss": 3.9904, - "step": 415500 - }, - { - "epoch": 4.58, - "learning_rate": 4.5767093899554425e-08, - "loss": 3.9957, - "step": 416000 - }, - { - "epoch": 4.58, - "learning_rate": 4.582210242587601e-08, - "loss": 3.9918, - "step": 416500 - }, - { - "epoch": 4.59, - "learning_rate": 4.587711095219759e-08, - "loss": 3.981, - "step": 417000 - }, - { - "epoch": 4.59, - "learning_rate": 4.5932119478519166e-08, - "loss": 3.988, - "step": 417500 - }, - { - "epoch": 4.6, - "learning_rate": 4.598712800484075e-08, - "loss": 3.9751, - "step": 418000 - }, - { - "epoch": 4.6, - "learning_rate": 4.604213653116233e-08, - "loss": 3.9723, - "step": 418500 - }, - { - "epoch": 4.61, - "learning_rate": 4.609714505748391e-08, - "loss": 3.9709, - "step": 419000 - }, - { - "epoch": 4.62, - "learning_rate": 4.615215358380549e-08, - "loss": 3.9787, - "step": 419500 - }, - { - "epoch": 4.62, - "learning_rate": 4.6207162110127065e-08, - "loss": 3.988, - "step": 420000 - }, - { - "epoch": 4.63, - "learning_rate": 4.626217063644865e-08, - "loss": 3.9831, - "step": 420500 - }, - { - "epoch": 4.63, - "learning_rate": 4.631717916277022e-08, - "loss": 3.99, - "step": 421000 - }, - { - "epoch": 4.64, - "learning_rate": 4.6372187689091806e-08, - "loss": 3.9824, - "step": 421500 - }, - { - "epoch": 4.64, - "learning_rate": 4.642719621541339e-08, - "loss": 3.9747, - "step": 422000 - }, - { - "epoch": 4.65, - "learning_rate": 4.6482204741734964e-08, - "loss": 3.9933, - "step": 422500 - }, - { - "epoch": 4.65, - "learning_rate": 4.6537213268056547e-08, - "loss": 4.0043, - "step": 423000 - }, - { - "epoch": 4.66, - "learning_rate": 4.659222179437813e-08, - "loss": 3.9815, - "step": 423500 - }, - { - "epoch": 4.66, - "learning_rate": 4.6647230320699705e-08, - "loss": 3.9837, - "step": 424000 - }, - { - "epoch": 4.67, - "learning_rate": 4.670223884702129e-08, - "loss": 3.9899, - "step": 424500 - }, - { - "epoch": 4.68, - "learning_rate": 4.675724737334287e-08, - "loss": 4.0, - "step": 425000 - }, - { - "epoch": 4.68, - "learning_rate": 4.6812255899664446e-08, - "loss": 3.9825, - "step": 425500 - }, - { - "epoch": 4.69, - "learning_rate": 4.686726442598603e-08, - "loss": 3.9884, - "step": 426000 - }, - { - "epoch": 4.69, - "learning_rate": 4.6922272952307604e-08, - "loss": 3.9689, - "step": 426500 - }, - { - "epoch": 4.7, - "learning_rate": 4.697728147862918e-08, - "loss": 3.9748, - "step": 427000 - }, - { - "epoch": 4.7, - "learning_rate": 4.703229000495076e-08, - "loss": 3.9784, - "step": 427500 - }, - { - "epoch": 4.71, - "learning_rate": 4.7087298531272345e-08, - "loss": 3.9905, - "step": 428000 - }, - { - "epoch": 4.71, - "learning_rate": 4.714230705759393e-08, - "loss": 3.974, - "step": 428500 - }, - { - "epoch": 4.72, - "learning_rate": 4.71973155839155e-08, - "loss": 3.9935, - "step": 429000 - }, - { - "epoch": 4.73, - "learning_rate": 4.7252324110237086e-08, - "loss": 3.9933, - "step": 429500 - }, - { - "epoch": 4.73, - "learning_rate": 4.730733263655867e-08, - "loss": 3.973, - "step": 430000 - }, - { - "epoch": 4.74, - "learning_rate": 4.7362341162880244e-08, - "loss": 3.9661, - "step": 430500 - }, - { - "epoch": 4.74, - "learning_rate": 4.7417349689201826e-08, - "loss": 3.9842, - "step": 431000 - }, - { - "epoch": 4.75, - "learning_rate": 4.747235821552341e-08, - "loss": 3.9886, - "step": 431500 - }, - { - "epoch": 4.75, - "learning_rate": 4.7527366741844985e-08, - "loss": 3.9817, - "step": 432000 - }, - { - "epoch": 4.76, - "learning_rate": 4.758237526816656e-08, - "loss": 3.9832, - "step": 432500 - }, - { - "epoch": 4.76, - "learning_rate": 4.763738379448814e-08, - "loss": 3.9892, - "step": 433000 - }, - { - "epoch": 4.77, - "learning_rate": 4.769239232080972e-08, - "loss": 3.9949, - "step": 433500 - }, - { - "epoch": 4.77, - "learning_rate": 4.77474008471313e-08, - "loss": 3.9738, - "step": 434000 - }, - { - "epoch": 4.78, - "learning_rate": 4.7802409373452884e-08, - "loss": 3.9813, - "step": 434500 - }, - { - "epoch": 4.79, - "learning_rate": 4.785741789977446e-08, - "loss": 3.9875, - "step": 435000 - }, - { - "epoch": 4.79, - "learning_rate": 4.791242642609604e-08, - "loss": 4.0019, - "step": 435500 - }, - { - "epoch": 4.8, - "learning_rate": 4.7967434952417625e-08, - "loss": 3.9713, - "step": 436000 - }, - { - "epoch": 4.8, - "learning_rate": 4.80224434787392e-08, - "loss": 3.9735, - "step": 436500 - }, - { - "epoch": 4.81, - "learning_rate": 4.807745200506078e-08, - "loss": 3.995, - "step": 437000 - }, - { - "epoch": 4.81, - "learning_rate": 4.8132460531382366e-08, - "loss": 3.9831, - "step": 437500 - }, - { - "epoch": 4.82, - "learning_rate": 4.818746905770394e-08, - "loss": 3.9909, - "step": 438000 - }, - { - "epoch": 4.82, - "learning_rate": 4.824247758402552e-08, - "loss": 3.9871, - "step": 438500 - }, - { - "epoch": 4.83, - "learning_rate": 4.82974861103471e-08, - "loss": 3.9827, - "step": 439000 - }, - { - "epoch": 4.84, - "learning_rate": 4.835249463666868e-08, - "loss": 3.9868, - "step": 439500 - }, - { - "epoch": 4.84, - "learning_rate": 4.840750316299026e-08, - "loss": 3.9791, - "step": 440000 - }, - { - "epoch": 4.85, - "learning_rate": 4.846251168931184e-08, - "loss": 3.992, - "step": 440500 - }, - { - "epoch": 4.85, - "learning_rate": 4.851752021563342e-08, - "loss": 3.995, - "step": 441000 - }, - { - "epoch": 4.86, - "learning_rate": 4.8572528741955e-08, - "loss": 3.974, - "step": 441500 - }, - { - "epoch": 4.86, - "learning_rate": 4.862753726827658e-08, - "loss": 3.9685, - "step": 442000 - }, - { - "epoch": 4.87, - "learning_rate": 4.8682545794598164e-08, - "loss": 3.994, - "step": 442500 - }, - { - "epoch": 4.87, - "learning_rate": 4.873755432091974e-08, - "loss": 3.9945, - "step": 443000 - }, - { - "epoch": 4.88, - "learning_rate": 4.879256284724132e-08, - "loss": 3.9687, - "step": 443500 - }, - { - "epoch": 4.88, - "learning_rate": 4.88475713735629e-08, - "loss": 3.9684, - "step": 444000 - }, - { - "epoch": 4.89, - "learning_rate": 4.890257989988448e-08, - "loss": 3.9765, - "step": 444500 - }, - { - "epoch": 4.9, - "learning_rate": 4.8957588426206056e-08, - "loss": 3.971, - "step": 445000 - }, - { - "epoch": 4.9, - "learning_rate": 4.901259695252764e-08, - "loss": 3.9697, - "step": 445500 - }, - { - "epoch": 4.91, - "learning_rate": 4.9067605478849215e-08, - "loss": 3.9693, - "step": 446000 - }, - { - "epoch": 4.91, - "learning_rate": 4.91226140051708e-08, - "loss": 3.9733, - "step": 446500 - }, - { - "epoch": 4.92, - "learning_rate": 4.917762253149238e-08, - "loss": 3.9804, - "step": 447000 - }, - { - "epoch": 4.92, - "learning_rate": 4.923263105781396e-08, - "loss": 3.9777, - "step": 447500 - }, - { - "epoch": 4.93, - "learning_rate": 4.928763958413554e-08, - "loss": 3.979, - "step": 448000 - }, - { - "epoch": 4.93, - "learning_rate": 4.934264811045712e-08, - "loss": 3.9641, - "step": 448500 - }, - { - "epoch": 4.94, - "learning_rate": 4.93976566367787e-08, - "loss": 3.9728, - "step": 449000 - }, - { - "epoch": 4.95, - "learning_rate": 4.945266516310028e-08, - "loss": 3.9872, - "step": 449500 - }, - { - "epoch": 4.95, - "learning_rate": 4.950767368942186e-08, - "loss": 3.9514, - "step": 450000 - }, - { - "epoch": 4.96, - "learning_rate": 4.956268221574344e-08, - "loss": 3.974, - "step": 450500 - }, - { - "epoch": 4.96, - "learning_rate": 4.961769074206501e-08, - "loss": 3.9851, - "step": 451000 - }, - { - "epoch": 4.97, - "learning_rate": 4.9672699268386596e-08, - "loss": 3.974, - "step": 451500 - }, - { - "epoch": 4.97, - "learning_rate": 4.972770779470818e-08, - "loss": 3.9641, - "step": 452000 - }, - { - "epoch": 4.98, - "learning_rate": 4.9782716321029754e-08, - "loss": 3.9679, - "step": 452500 - }, - { - "epoch": 4.98, - "learning_rate": 4.9837724847351336e-08, - "loss": 3.9805, - "step": 453000 - }, - { - "epoch": 4.99, - "learning_rate": 4.989273337367292e-08, - "loss": 3.9635, - "step": 453500 - }, - { - "epoch": 4.99, - "learning_rate": 4.9947741899994495e-08, - "loss": 3.9655, - "step": 454000 - }, - { - "epoch": 5.0, - "eval_loss": 3.980429172515869, - "eval_runtime": 6.1307, - "eval_samples_per_second": 253.477, - "step": 454475 - }, - { - "epoch": 5.0, - "learning_rate": 5.000275042631608e-08, - "loss": 3.9819, - "step": 454500 - }, - { - "epoch": 5.01, - "learning_rate": 5.005775895263765e-08, - "loss": 3.9567, - "step": 455000 - }, - { - "epoch": 5.01, - "learning_rate": 5.011276747895924e-08, - "loss": 3.9931, - "step": 455500 - }, - { - "epoch": 5.02, - "learning_rate": 5.016777600528082e-08, - "loss": 3.9623, - "step": 456000 - }, - { - "epoch": 5.02, - "learning_rate": 5.0222784531602394e-08, - "loss": 3.9674, - "step": 456500 - }, - { - "epoch": 5.03, - "learning_rate": 5.0277793057923976e-08, - "loss": 3.955, - "step": 457000 - }, - { - "epoch": 5.03, - "learning_rate": 5.033280158424555e-08, - "loss": 3.9906, - "step": 457500 - }, - { - "epoch": 5.04, - "learning_rate": 5.038781011056713e-08, - "loss": 3.9723, - "step": 458000 - }, - { - "epoch": 5.04, - "learning_rate": 5.044281863688872e-08, - "loss": 3.9704, - "step": 458500 - }, - { - "epoch": 5.05, - "learning_rate": 5.049782716321029e-08, - "loss": 3.9716, - "step": 459000 - }, - { - "epoch": 5.06, - "learning_rate": 5.055283568953187e-08, - "loss": 3.9701, - "step": 459500 - }, - { - "epoch": 5.06, - "learning_rate": 5.060784421585346e-08, - "loss": 3.9642, - "step": 460000 - }, - { - "epoch": 5.07, - "learning_rate": 5.0662852742175034e-08, - "loss": 3.9735, - "step": 460500 - }, - { - "epoch": 5.07, - "learning_rate": 5.071786126849662e-08, - "loss": 3.9809, - "step": 461000 - }, - { - "epoch": 5.08, - "learning_rate": 5.07728697948182e-08, - "loss": 3.9641, - "step": 461500 - }, - { - "epoch": 5.08, - "learning_rate": 5.0827878321139775e-08, - "loss": 3.9653, - "step": 462000 - }, - { - "epoch": 5.09, - "learning_rate": 5.088288684746136e-08, - "loss": 3.9794, - "step": 462500 - }, - { - "epoch": 5.09, - "learning_rate": 5.093789537378293e-08, - "loss": 3.9835, - "step": 463000 - }, - { - "epoch": 5.1, - "learning_rate": 5.099290390010451e-08, - "loss": 3.988, - "step": 463500 - }, - { - "epoch": 5.1, - "learning_rate": 5.10479124264261e-08, - "loss": 3.964, - "step": 464000 - }, - { - "epoch": 5.11, - "learning_rate": 5.1102920952747674e-08, - "loss": 3.9694, - "step": 464500 - }, - { - "epoch": 5.12, - "learning_rate": 5.115792947906925e-08, - "loss": 3.9647, - "step": 465000 - }, - { - "epoch": 5.12, - "learning_rate": 5.121293800539084e-08, - "loss": 3.9671, - "step": 465500 - }, - { - "epoch": 5.13, - "learning_rate": 5.1267946531712415e-08, - "loss": 3.9661, - "step": 466000 - }, - { - "epoch": 5.13, - "learning_rate": 5.132295505803399e-08, - "loss": 3.9645, - "step": 466500 - }, - { - "epoch": 5.14, - "learning_rate": 5.137796358435558e-08, - "loss": 3.9872, - "step": 467000 - }, - { - "epoch": 5.14, - "learning_rate": 5.1432972110677155e-08, - "loss": 3.9663, - "step": 467500 - }, - { - "epoch": 5.15, - "learning_rate": 5.148798063699873e-08, - "loss": 3.9515, - "step": 468000 - }, - { - "epoch": 5.15, - "learning_rate": 5.1542989163320314e-08, - "loss": 3.9619, - "step": 468500 - }, - { - "epoch": 5.16, - "learning_rate": 5.159799768964189e-08, - "loss": 3.9639, - "step": 469000 - }, - { - "epoch": 5.17, - "learning_rate": 5.1653006215963466e-08, - "loss": 3.9766, - "step": 469500 - }, - { - "epoch": 5.17, - "learning_rate": 5.1708014742285055e-08, - "loss": 3.9871, - "step": 470000 - }, - { - "epoch": 5.18, - "learning_rate": 5.176302326860663e-08, - "loss": 3.9717, - "step": 470500 - }, - { - "epoch": 5.18, - "learning_rate": 5.1818031794928206e-08, - "loss": 3.9862, - "step": 471000 - }, - { - "epoch": 5.19, - "learning_rate": 5.1873040321249795e-08, - "loss": 3.9745, - "step": 471500 - }, - { - "epoch": 5.19, - "learning_rate": 5.192804884757137e-08, - "loss": 3.977, - "step": 472000 - }, - { - "epoch": 5.2, - "learning_rate": 5.198305737389295e-08, - "loss": 3.9838, - "step": 472500 - }, - { - "epoch": 5.2, - "learning_rate": 5.2038065900214536e-08, - "loss": 3.9731, - "step": 473000 - }, - { - "epoch": 5.21, - "learning_rate": 5.209307442653611e-08, - "loss": 3.9583, - "step": 473500 - }, - { - "epoch": 5.21, - "learning_rate": 5.214808295285769e-08, - "loss": 3.9543, - "step": 474000 - }, - { - "epoch": 5.22, - "learning_rate": 5.220309147917927e-08, - "loss": 3.974, - "step": 474500 - }, - { - "epoch": 5.23, - "learning_rate": 5.2258100005500846e-08, - "loss": 3.9668, - "step": 475000 - }, - { - "epoch": 5.23, - "learning_rate": 5.231310853182242e-08, - "loss": 3.9816, - "step": 475500 - }, - { - "epoch": 5.24, - "learning_rate": 5.236811705814401e-08, - "loss": 3.9574, - "step": 476000 - }, - { - "epoch": 5.24, - "learning_rate": 5.242312558446559e-08, - "loss": 3.9651, - "step": 476500 - }, - { - "epoch": 5.25, - "learning_rate": 5.247813411078716e-08, - "loss": 3.9695, - "step": 477000 - }, - { - "epoch": 5.25, - "learning_rate": 5.253314263710875e-08, - "loss": 3.9782, - "step": 477500 - }, - { - "epoch": 5.26, - "learning_rate": 5.258815116343033e-08, - "loss": 3.9788, - "step": 478000 - }, - { - "epoch": 5.26, - "learning_rate": 5.2643159689751904e-08, - "loss": 3.9607, - "step": 478500 - }, - { - "epoch": 5.27, - "learning_rate": 5.269816821607349e-08, - "loss": 3.9679, - "step": 479000 - }, - { - "epoch": 5.28, - "learning_rate": 5.275317674239507e-08, - "loss": 3.9604, - "step": 479500 - }, - { - "epoch": 5.28, - "learning_rate": 5.280818526871665e-08, - "loss": 3.9783, - "step": 480000 - }, - { - "epoch": 5.29, - "learning_rate": 5.286319379503823e-08, - "loss": 3.9557, - "step": 480500 - }, - { - "epoch": 5.29, - "learning_rate": 5.29182023213598e-08, - "loss": 3.981, - "step": 481000 - }, - { - "epoch": 5.3, - "learning_rate": 5.297321084768139e-08, - "loss": 3.9644, - "step": 481500 - }, - { - "epoch": 5.3, - "learning_rate": 5.302821937400297e-08, - "loss": 3.9616, - "step": 482000 - }, - { - "epoch": 5.31, - "learning_rate": 5.3083227900324544e-08, - "loss": 3.9705, - "step": 482500 - }, - { - "epoch": 5.31, - "learning_rate": 5.313823642664613e-08, - "loss": 3.9667, - "step": 483000 - }, - { - "epoch": 5.32, - "learning_rate": 5.319324495296771e-08, - "loss": 3.9628, - "step": 483500 - }, - { - "epoch": 5.32, - "learning_rate": 5.3248253479289285e-08, - "loss": 3.9661, - "step": 484000 - }, - { - "epoch": 5.33, - "learning_rate": 5.3303262005610874e-08, - "loss": 3.9593, - "step": 484500 - }, - { - "epoch": 5.34, - "learning_rate": 5.335827053193245e-08, - "loss": 3.9695, - "step": 485000 - }, - { - "epoch": 5.34, - "learning_rate": 5.3413279058254025e-08, - "loss": 3.9734, - "step": 485500 - }, - { - "epoch": 5.35, - "learning_rate": 5.346828758457561e-08, - "loss": 3.9713, - "step": 486000 - }, - { - "epoch": 5.35, - "learning_rate": 5.3523296110897184e-08, - "loss": 3.9626, - "step": 486500 - }, - { - "epoch": 5.36, - "learning_rate": 5.3578304637218766e-08, - "loss": 3.9468, - "step": 487000 - }, - { - "epoch": 5.36, - "learning_rate": 5.363331316354035e-08, - "loss": 3.9713, - "step": 487500 - }, - { - "epoch": 5.37, - "learning_rate": 5.3688321689861925e-08, - "loss": 3.9502, - "step": 488000 - }, - { - "epoch": 5.37, - "learning_rate": 5.37433302161835e-08, - "loss": 3.9582, - "step": 488500 - }, - { - "epoch": 5.38, - "learning_rate": 5.379833874250509e-08, - "loss": 3.9792, - "step": 489000 - }, - { - "epoch": 5.39, - "learning_rate": 5.3853347268826665e-08, - "loss": 3.9665, - "step": 489500 - }, - { - "epoch": 5.39, - "learning_rate": 5.390835579514824e-08, - "loss": 3.9455, - "step": 490000 - }, - { - "epoch": 5.4, - "learning_rate": 5.396336432146983e-08, - "loss": 3.9625, - "step": 490500 - }, - { - "epoch": 5.4, - "learning_rate": 5.4018372847791406e-08, - "loss": 3.967, - "step": 491000 - }, - { - "epoch": 5.41, - "learning_rate": 5.407338137411298e-08, - "loss": 3.9528, - "step": 491500 - }, - { - "epoch": 5.41, - "learning_rate": 5.412838990043457e-08, - "loss": 3.9586, - "step": 492000 - }, - { - "epoch": 5.42, - "learning_rate": 5.418339842675615e-08, - "loss": 3.9547, - "step": 492500 - }, - { - "epoch": 5.42, - "learning_rate": 5.423840695307772e-08, - "loss": 3.9765, - "step": 493000 - }, - { - "epoch": 5.43, - "learning_rate": 5.4293415479399305e-08, - "loss": 3.9561, - "step": 493500 - }, - { - "epoch": 5.43, - "learning_rate": 5.434842400572088e-08, - "loss": 3.9474, - "step": 494000 - }, - { - "epoch": 5.44, - "learning_rate": 5.440343253204246e-08, - "loss": 3.9687, - "step": 494500 - }, - { - "epoch": 5.45, - "learning_rate": 5.4458441058364046e-08, - "loss": 3.9501, - "step": 495000 - }, - { - "epoch": 5.45, - "learning_rate": 5.451344958468562e-08, - "loss": 3.9448, - "step": 495500 - }, - { - "epoch": 5.46, - "learning_rate": 5.45684581110072e-08, - "loss": 3.9588, - "step": 496000 - }, - { - "epoch": 5.46, - "learning_rate": 5.462346663732879e-08, - "loss": 3.9652, - "step": 496500 - }, - { - "epoch": 5.47, - "learning_rate": 5.467847516365036e-08, - "loss": 3.952, - "step": 497000 - }, - { - "epoch": 5.47, - "learning_rate": 5.473348368997194e-08, - "loss": 3.9503, - "step": 497500 - }, - { - "epoch": 5.48, - "learning_rate": 5.478849221629353e-08, - "loss": 3.9532, - "step": 498000 - }, - { - "epoch": 5.48, - "learning_rate": 5.4843500742615104e-08, - "loss": 3.9668, - "step": 498500 - }, - { - "epoch": 5.49, - "learning_rate": 5.4898509268936686e-08, - "loss": 3.9745, - "step": 499000 - }, - { - "epoch": 5.5, - "learning_rate": 5.495351779525826e-08, - "loss": 3.9625, - "step": 499500 - }, - { - "epoch": 5.5, - "learning_rate": 5.500852632157984e-08, - "loss": 3.9581, - "step": 500000 - }, - { - "epoch": 5.51, - "learning_rate": 5.506353484790143e-08, - "loss": 3.9555, - "step": 500500 - }, - { - "epoch": 5.51, - "learning_rate": 5.5118543374223e-08, - "loss": 3.9846, - "step": 501000 - }, - { - "epoch": 5.52, - "learning_rate": 5.517355190054458e-08, - "loss": 3.9805, - "step": 501500 - }, - { - "epoch": 5.52, - "learning_rate": 5.522856042686617e-08, - "loss": 3.9715, - "step": 502000 - }, - { - "epoch": 5.53, - "learning_rate": 5.5283568953187744e-08, - "loss": 3.963, - "step": 502500 - }, - { - "epoch": 5.53, - "learning_rate": 5.533857747950932e-08, - "loss": 3.9431, - "step": 503000 - }, - { - "epoch": 5.54, - "learning_rate": 5.539358600583091e-08, - "loss": 3.9637, - "step": 503500 - }, - { - "epoch": 5.54, - "learning_rate": 5.5448594532152484e-08, - "loss": 3.9447, - "step": 504000 - }, - { - "epoch": 5.55, - "learning_rate": 5.550360305847406e-08, - "loss": 3.9546, - "step": 504500 - }, - { - "epoch": 5.56, - "learning_rate": 5.555861158479564e-08, - "loss": 3.9714, - "step": 505000 - }, - { - "epoch": 5.56, - "learning_rate": 5.561362011111722e-08, - "loss": 3.9531, - "step": 505500 - }, - { - "epoch": 5.57, - "learning_rate": 5.5668628637438795e-08, - "loss": 3.9678, - "step": 506000 - }, - { - "epoch": 5.57, - "learning_rate": 5.5723637163760384e-08, - "loss": 3.9499, - "step": 506500 - }, - { - "epoch": 5.58, - "learning_rate": 5.577864569008196e-08, - "loss": 3.947, - "step": 507000 - }, - { - "epoch": 5.58, - "learning_rate": 5.5833654216403535e-08, - "loss": 3.961, - "step": 507500 - }, - { - "epoch": 5.59, - "learning_rate": 5.5888662742725124e-08, - "loss": 3.95, - "step": 508000 - }, - { - "epoch": 5.59, - "learning_rate": 5.59436712690467e-08, - "loss": 3.9695, - "step": 508500 - }, - { - "epoch": 5.6, - "learning_rate": 5.5998679795368276e-08, - "loss": 3.9634, - "step": 509000 - }, - { - "epoch": 5.61, - "learning_rate": 5.6053688321689865e-08, - "loss": 3.9519, - "step": 509500 - }, - { - "epoch": 5.61, - "learning_rate": 5.610869684801144e-08, - "loss": 3.9536, - "step": 510000 - }, - { - "epoch": 5.62, - "learning_rate": 5.616370537433302e-08, - "loss": 3.9448, - "step": 510500 - }, - { - "epoch": 5.62, - "learning_rate": 5.62187139006546e-08, - "loss": 3.9632, - "step": 511000 - }, - { - "epoch": 5.63, - "learning_rate": 5.6273722426976175e-08, - "loss": 3.9457, - "step": 511500 - }, - { - "epoch": 5.63, - "learning_rate": 5.632873095329775e-08, - "loss": 3.9622, - "step": 512000 - }, - { - "epoch": 5.64, - "learning_rate": 5.638373947961934e-08, - "loss": 3.9404, - "step": 512500 - }, - { - "epoch": 5.64, - "learning_rate": 5.6438748005940916e-08, - "loss": 3.9654, - "step": 513000 - }, - { - "epoch": 5.65, - "learning_rate": 5.649375653226249e-08, - "loss": 3.9565, - "step": 513500 - }, - { - "epoch": 5.65, - "learning_rate": 5.654876505858408e-08, - "loss": 3.9585, - "step": 514000 - }, - { - "epoch": 5.66, - "learning_rate": 5.660377358490566e-08, - "loss": 3.9617, - "step": 514500 - }, - { - "epoch": 5.67, - "learning_rate": 5.665878211122723e-08, - "loss": 3.9598, - "step": 515000 - }, - { - "epoch": 5.67, - "learning_rate": 5.671379063754882e-08, - "loss": 3.9674, - "step": 515500 - }, - { - "epoch": 5.68, - "learning_rate": 5.67687991638704e-08, - "loss": 3.9676, - "step": 516000 - }, - { - "epoch": 5.68, - "learning_rate": 5.682380769019198e-08, - "loss": 3.9224, - "step": 516500 - }, - { - "epoch": 5.69, - "learning_rate": 5.6878816216513556e-08, - "loss": 3.9673, - "step": 517000 - }, - { - "epoch": 5.69, - "learning_rate": 5.693382474283513e-08, - "loss": 3.9491, - "step": 517500 - }, - { - "epoch": 5.7, - "learning_rate": 5.698883326915672e-08, - "loss": 3.9543, - "step": 518000 - }, - { - "epoch": 5.7, - "learning_rate": 5.70438417954783e-08, - "loss": 3.9433, - "step": 518500 - }, - { - "epoch": 5.71, - "learning_rate": 5.709885032179987e-08, - "loss": 3.9444, - "step": 519000 - }, - { - "epoch": 5.72, - "learning_rate": 5.715385884812146e-08, - "loss": 3.9704, - "step": 519500 - }, - { - "epoch": 5.72, - "learning_rate": 5.720886737444304e-08, - "loss": 3.9654, - "step": 520000 - }, - { - "epoch": 5.73, - "learning_rate": 5.7263875900764614e-08, - "loss": 3.9717, - "step": 520500 - }, - { - "epoch": 5.73, - "learning_rate": 5.73188844270862e-08, - "loss": 3.9549, - "step": 521000 - }, - { - "epoch": 5.74, - "learning_rate": 5.737389295340778e-08, - "loss": 3.9556, - "step": 521500 - }, - { - "epoch": 5.74, - "learning_rate": 5.7428901479729354e-08, - "loss": 3.964, - "step": 522000 - }, - { - "epoch": 5.75, - "learning_rate": 5.748391000605094e-08, - "loss": 3.9617, - "step": 522500 - }, - { - "epoch": 5.75, - "learning_rate": 5.753891853237251e-08, - "loss": 3.9238, - "step": 523000 - }, - { - "epoch": 5.76, - "learning_rate": 5.759392705869409e-08, - "loss": 3.974, - "step": 523500 - }, - { - "epoch": 5.76, - "learning_rate": 5.764893558501568e-08, - "loss": 3.9519, - "step": 524000 - }, - { - "epoch": 5.77, - "learning_rate": 5.7703944111337254e-08, - "loss": 3.9472, - "step": 524500 - }, - { - "epoch": 5.78, - "learning_rate": 5.775895263765883e-08, - "loss": 3.9688, - "step": 525000 - }, - { - "epoch": 5.78, - "learning_rate": 5.781396116398042e-08, - "loss": 3.9611, - "step": 525500 - }, - { - "epoch": 5.79, - "learning_rate": 5.7868969690301994e-08, - "loss": 3.9406, - "step": 526000 - }, - { - "epoch": 5.79, - "learning_rate": 5.792397821662357e-08, - "loss": 3.9517, - "step": 526500 - }, - { - "epoch": 5.8, - "learning_rate": 5.797898674294516e-08, - "loss": 3.9668, - "step": 527000 - }, - { - "epoch": 5.8, - "learning_rate": 5.8033995269266735e-08, - "loss": 3.9495, - "step": 527500 - }, - { - "epoch": 5.81, - "learning_rate": 5.808900379558831e-08, - "loss": 3.94, - "step": 528000 - }, - { - "epoch": 5.81, - "learning_rate": 5.8144012321909894e-08, - "loss": 3.9477, - "step": 528500 - }, - { - "epoch": 5.82, - "learning_rate": 5.819902084823147e-08, - "loss": 3.9443, - "step": 529000 - }, - { - "epoch": 5.83, - "learning_rate": 5.825402937455305e-08, - "loss": 3.9334, - "step": 529500 - }, - { - "epoch": 5.83, - "learning_rate": 5.8309037900874634e-08, - "loss": 3.9379, - "step": 530000 - }, - { - "epoch": 5.84, - "learning_rate": 5.836404642719621e-08, - "loss": 3.9645, - "step": 530500 - }, - { - "epoch": 5.84, - "learning_rate": 5.8419054953517786e-08, - "loss": 3.9347, - "step": 531000 - }, - { - "epoch": 5.85, - "learning_rate": 5.8474063479839375e-08, - "loss": 3.9638, - "step": 531500 - }, - { - "epoch": 5.85, - "learning_rate": 5.852907200616095e-08, - "loss": 3.963, - "step": 532000 - }, - { - "epoch": 5.86, - "learning_rate": 5.858408053248253e-08, - "loss": 3.9338, - "step": 532500 - }, - { - "epoch": 5.86, - "learning_rate": 5.8639089058804116e-08, - "loss": 3.9373, - "step": 533000 - }, - { - "epoch": 5.87, - "learning_rate": 5.869409758512569e-08, - "loss": 3.9303, - "step": 533500 - }, - { - "epoch": 5.87, - "learning_rate": 5.874910611144727e-08, - "loss": 3.9594, - "step": 534000 - }, - { - "epoch": 5.88, - "learning_rate": 5.880411463776886e-08, - "loss": 3.9326, - "step": 534500 - }, - { - "epoch": 5.89, - "learning_rate": 5.885912316409043e-08, - "loss": 3.9415, - "step": 535000 - }, - { - "epoch": 5.89, - "learning_rate": 5.8914131690412015e-08, - "loss": 3.9484, - "step": 535500 - }, - { - "epoch": 5.9, - "learning_rate": 5.896914021673359e-08, - "loss": 3.9398, - "step": 536000 - }, - { - "epoch": 5.9, - "learning_rate": 5.902414874305517e-08, - "loss": 3.9405, - "step": 536500 - }, - { - "epoch": 5.91, - "learning_rate": 5.9079157269376756e-08, - "loss": 3.9471, - "step": 537000 - }, - { - "epoch": 5.91, - "learning_rate": 5.913416579569833e-08, - "loss": 3.953, - "step": 537500 - }, - { - "epoch": 5.92, - "learning_rate": 5.918917432201991e-08, - "loss": 3.9506, - "step": 538000 - }, - { - "epoch": 5.92, - "learning_rate": 5.92441828483415e-08, - "loss": 3.9436, - "step": 538500 - }, - { - "epoch": 5.93, - "learning_rate": 5.929919137466307e-08, - "loss": 3.9281, - "step": 539000 - }, - { - "epoch": 5.94, - "learning_rate": 5.935419990098465e-08, - "loss": 3.9576, - "step": 539500 - }, - { - "epoch": 5.94, - "learning_rate": 5.940920842730624e-08, - "loss": 3.9382, - "step": 540000 - }, - { - "epoch": 5.95, - "learning_rate": 5.9464216953627813e-08, - "loss": 3.9441, - "step": 540500 - }, - { - "epoch": 5.95, - "learning_rate": 5.951922547994939e-08, - "loss": 3.9373, - "step": 541000 - }, - { - "epoch": 5.96, - "learning_rate": 5.957423400627097e-08, - "loss": 3.9462, - "step": 541500 - }, - { - "epoch": 5.96, - "learning_rate": 5.962924253259255e-08, - "loss": 3.9592, - "step": 542000 - }, - { - "epoch": 5.97, - "learning_rate": 5.968425105891412e-08, - "loss": 3.941, - "step": 542500 - }, - { - "epoch": 5.97, - "learning_rate": 5.973925958523571e-08, - "loss": 3.9576, - "step": 543000 - }, - { - "epoch": 5.98, - "learning_rate": 5.979426811155729e-08, - "loss": 3.9445, - "step": 543500 - }, - { - "epoch": 5.98, - "learning_rate": 5.984927663787886e-08, - "loss": 3.9583, - "step": 544000 - }, - { - "epoch": 5.99, - "learning_rate": 5.990428516420045e-08, - "loss": 3.9579, - "step": 544500 - }, - { - "epoch": 6.0, - "learning_rate": 5.995929369052203e-08, - "loss": 3.9562, - "step": 545000 - }, - { - "epoch": 6.0, - "eval_loss": 3.9576685428619385, - "eval_runtime": 6.1306, - "eval_samples_per_second": 253.481, - "step": 545370 - }, - { - "epoch": 6.0, - "learning_rate": 6.00143022168436e-08, - "loss": 3.9328, - "step": 545500 - }, - { - "epoch": 6.01, - "learning_rate": 6.00693107431652e-08, - "loss": 3.9461, - "step": 546000 - }, - { - "epoch": 6.01, - "learning_rate": 6.012431926948677e-08, - "loss": 3.9389, - "step": 546500 - }, - { - "epoch": 6.02, - "learning_rate": 6.017932779580835e-08, - "loss": 3.9322, - "step": 547000 - }, - { - "epoch": 6.02, - "learning_rate": 6.023433632212994e-08, - "loss": 3.9461, - "step": 547500 - }, - { - "epoch": 6.03, - "learning_rate": 6.028934484845151e-08, - "loss": 3.9675, - "step": 548000 - }, - { - "epoch": 6.03, - "learning_rate": 6.034435337477309e-08, - "loss": 3.9524, - "step": 548500 - }, - { - "epoch": 6.04, - "learning_rate": 6.039936190109468e-08, - "loss": 3.9484, - "step": 549000 - }, - { - "epoch": 6.05, - "learning_rate": 6.045437042741625e-08, - "loss": 3.9521, - "step": 549500 - }, - { - "epoch": 6.05, - "learning_rate": 6.050937895373783e-08, - "loss": 3.9518, - "step": 550000 - }, - { - "epoch": 6.06, - "learning_rate": 6.05643874800594e-08, - "loss": 3.951, - "step": 550500 - }, - { - "epoch": 6.06, - "learning_rate": 6.061939600638098e-08, - "loss": 3.94, - "step": 551000 - }, - { - "epoch": 6.07, - "learning_rate": 6.067440453270256e-08, - "loss": 3.9571, - "step": 551500 - }, - { - "epoch": 6.07, - "learning_rate": 6.072941305902414e-08, - "loss": 3.9502, - "step": 552000 - }, - { - "epoch": 6.08, - "learning_rate": 6.078442158534572e-08, - "loss": 3.9648, - "step": 552500 - }, - { - "epoch": 6.08, - "learning_rate": 6.08394301116673e-08, - "loss": 3.9206, - "step": 553000 - }, - { - "epoch": 6.09, - "learning_rate": 6.089443863798889e-08, - "loss": 3.9481, - "step": 553500 - }, - { - "epoch": 6.09, - "learning_rate": 6.094944716431046e-08, - "loss": 3.9347, - "step": 554000 - }, - { - "epoch": 6.1, - "learning_rate": 6.100445569063205e-08, - "loss": 3.9768, - "step": 554500 - }, - { - "epoch": 6.11, - "learning_rate": 6.105946421695363e-08, - "loss": 3.9426, - "step": 555000 - }, - { - "epoch": 6.11, - "learning_rate": 6.11144727432752e-08, - "loss": 3.9284, - "step": 555500 - }, - { - "epoch": 6.12, - "learning_rate": 6.116948126959679e-08, - "loss": 3.9415, - "step": 556000 - }, - { - "epoch": 6.12, - "learning_rate": 6.122448979591837e-08, - "loss": 3.9329, - "step": 556500 - }, - { - "epoch": 6.13, - "learning_rate": 6.127949832223994e-08, - "loss": 3.9476, - "step": 557000 - }, - { - "epoch": 6.13, - "learning_rate": 6.133450684856153e-08, - "loss": 3.941, - "step": 557500 - }, - { - "epoch": 6.14, - "learning_rate": 6.138951537488311e-08, - "loss": 3.9352, - "step": 558000 - }, - { - "epoch": 6.14, - "learning_rate": 6.144452390120468e-08, - "loss": 3.9353, - "step": 558500 - }, - { - "epoch": 6.15, - "learning_rate": 6.149953242752627e-08, - "loss": 3.9361, - "step": 559000 - }, - { - "epoch": 6.16, - "learning_rate": 6.155454095384785e-08, - "loss": 3.9676, - "step": 559500 - }, - { - "epoch": 6.16, - "learning_rate": 6.160954948016942e-08, - "loss": 3.9426, - "step": 560000 - }, - { - "epoch": 6.17, - "learning_rate": 6.166455800649101e-08, - "loss": 3.9516, - "step": 560500 - }, - { - "epoch": 6.17, - "learning_rate": 6.171956653281259e-08, - "loss": 3.9637, - "step": 561000 - }, - { - "epoch": 6.18, - "learning_rate": 6.177457505913417e-08, - "loss": 3.9363, - "step": 561500 - }, - { - "epoch": 6.18, - "learning_rate": 6.182958358545574e-08, - "loss": 3.9608, - "step": 562000 - }, - { - "epoch": 6.19, - "learning_rate": 6.188459211177732e-08, - "loss": 3.9412, - "step": 562500 - }, - { - "epoch": 6.19, - "learning_rate": 6.193960063809889e-08, - "loss": 3.9411, - "step": 563000 - }, - { - "epoch": 6.2, - "learning_rate": 6.199460916442048e-08, - "loss": 3.9509, - "step": 563500 - }, - { - "epoch": 6.2, - "learning_rate": 6.204961769074206e-08, - "loss": 3.9322, - "step": 564000 - }, - { - "epoch": 6.21, - "learning_rate": 6.210462621706363e-08, - "loss": 3.9118, - "step": 564500 - }, - { - "epoch": 6.22, - "learning_rate": 6.215963474338522e-08, - "loss": 3.9509, - "step": 565000 - }, - { - "epoch": 6.22, - "learning_rate": 6.22146432697068e-08, - "loss": 3.9439, - "step": 565500 - }, - { - "epoch": 6.23, - "learning_rate": 6.226965179602837e-08, - "loss": 3.9287, - "step": 566000 - }, - { - "epoch": 6.23, - "learning_rate": 6.232466032234996e-08, - "loss": 3.9262, - "step": 566500 - }, - { - "epoch": 6.24, - "learning_rate": 6.237966884867154e-08, - "loss": 3.9332, - "step": 567000 - }, - { - "epoch": 6.24, - "learning_rate": 6.243467737499312e-08, - "loss": 3.9483, - "step": 567500 - }, - { - "epoch": 6.25, - "learning_rate": 6.24896859013147e-08, - "loss": 3.929, - "step": 568000 - }, - { - "epoch": 6.25, - "learning_rate": 6.254469442763628e-08, - "loss": 3.9239, - "step": 568500 - }, - { - "epoch": 6.26, - "learning_rate": 6.259970295395786e-08, - "loss": 3.9452, - "step": 569000 - }, - { - "epoch": 6.27, - "learning_rate": 6.265471148027944e-08, - "loss": 3.9175, - "step": 569500 - }, - { - "epoch": 6.27, - "learning_rate": 6.270972000660102e-08, - "loss": 3.958, - "step": 570000 - }, - { - "epoch": 6.28, - "learning_rate": 6.27647285329226e-08, - "loss": 3.9289, - "step": 570500 - }, - { - "epoch": 6.28, - "learning_rate": 6.281973705924419e-08, - "loss": 3.9381, - "step": 571000 - }, - { - "epoch": 6.29, - "learning_rate": 6.287474558556576e-08, - "loss": 3.9398, - "step": 571500 - }, - { - "epoch": 6.29, - "learning_rate": 6.292975411188734e-08, - "loss": 3.9404, - "step": 572000 - }, - { - "epoch": 6.3, - "learning_rate": 6.298476263820893e-08, - "loss": 3.9404, - "step": 572500 - }, - { - "epoch": 6.3, - "learning_rate": 6.30397711645305e-08, - "loss": 3.935, - "step": 573000 - }, - { - "epoch": 6.31, - "learning_rate": 6.309477969085208e-08, - "loss": 3.9377, - "step": 573500 - }, - { - "epoch": 6.31, - "learning_rate": 6.314978821717365e-08, - "loss": 3.9401, - "step": 574000 - }, - { - "epoch": 6.32, - "learning_rate": 6.320479674349524e-08, - "loss": 3.9466, - "step": 574500 - }, - { - "epoch": 6.33, - "learning_rate": 6.325980526981682e-08, - "loss": 3.9326, - "step": 575000 - }, - { - "epoch": 6.33, - "learning_rate": 6.33148137961384e-08, - "loss": 3.9202, - "step": 575500 - }, - { - "epoch": 6.34, - "learning_rate": 6.336982232245997e-08, - "loss": 3.9389, - "step": 576000 - }, - { - "epoch": 6.34, - "learning_rate": 6.342483084878156e-08, - "loss": 3.9515, - "step": 576500 - }, - { - "epoch": 6.35, - "learning_rate": 6.347983937510314e-08, - "loss": 3.9255, - "step": 577000 - }, - { - "epoch": 6.35, - "learning_rate": 6.353484790142471e-08, - "loss": 3.9249, - "step": 577500 - }, - { - "epoch": 6.36, - "learning_rate": 6.35898564277463e-08, - "loss": 3.9361, - "step": 578000 - }, - { - "epoch": 6.36, - "learning_rate": 6.364486495406788e-08, - "loss": 3.9522, - "step": 578500 - }, - { - "epoch": 6.37, - "learning_rate": 6.369987348038945e-08, - "loss": 3.9233, - "step": 579000 - }, - { - "epoch": 6.38, - "learning_rate": 6.375488200671104e-08, - "loss": 3.9269, - "step": 579500 - }, - { - "epoch": 6.38, - "learning_rate": 6.380989053303262e-08, - "loss": 3.935, - "step": 580000 - }, - { - "epoch": 6.39, - "learning_rate": 6.386489905935419e-08, - "loss": 3.9263, - "step": 580500 - }, - { - "epoch": 6.39, - "learning_rate": 6.391990758567578e-08, - "loss": 3.9252, - "step": 581000 - }, - { - "epoch": 6.4, - "learning_rate": 6.397491611199736e-08, - "loss": 3.931, - "step": 581500 - }, - { - "epoch": 6.4, - "learning_rate": 6.402992463831893e-08, - "loss": 3.9309, - "step": 582000 - }, - { - "epoch": 6.41, - "learning_rate": 6.408493316464052e-08, - "loss": 3.9348, - "step": 582500 - }, - { - "epoch": 6.41, - "learning_rate": 6.41399416909621e-08, - "loss": 3.9203, - "step": 583000 - }, - { - "epoch": 6.42, - "learning_rate": 6.419495021728367e-08, - "loss": 3.9343, - "step": 583500 - }, - { - "epoch": 6.42, - "learning_rate": 6.424995874360526e-08, - "loss": 3.9201, - "step": 584000 - }, - { - "epoch": 6.43, - "learning_rate": 6.430496726992684e-08, - "loss": 3.9472, - "step": 584500 - }, - { - "epoch": 6.44, - "learning_rate": 6.435997579624842e-08, - "loss": 3.9412, - "step": 585000 - }, - { - "epoch": 6.44, - "learning_rate": 6.441498432257e-08, - "loss": 3.9461, - "step": 585500 - }, - { - "epoch": 6.45, - "learning_rate": 6.446999284889158e-08, - "loss": 3.9392, - "step": 586000 - }, - { - "epoch": 6.45, - "learning_rate": 6.452500137521316e-08, - "loss": 3.9402, - "step": 586500 - }, - { - "epoch": 6.46, - "learning_rate": 6.458000990153473e-08, - "loss": 3.9349, - "step": 587000 - }, - { - "epoch": 6.46, - "learning_rate": 6.463501842785631e-08, - "loss": 3.9292, - "step": 587500 - }, - { - "epoch": 6.47, - "learning_rate": 6.469002695417788e-08, - "loss": 3.9409, - "step": 588000 - }, - { - "epoch": 6.47, - "learning_rate": 6.474503548049947e-08, - "loss": 3.9253, - "step": 588500 - }, - { - "epoch": 6.48, - "learning_rate": 6.480004400682105e-08, - "loss": 3.9516, - "step": 589000 - }, - { - "epoch": 6.49, - "learning_rate": 6.485505253314263e-08, - "loss": 3.9209, - "step": 589500 - }, - { - "epoch": 6.49, - "learning_rate": 6.491006105946421e-08, - "loss": 3.9083, - "step": 590000 - }, - { - "epoch": 6.5, - "learning_rate": 6.496506958578579e-08, - "loss": 3.9227, - "step": 590500 - }, - { - "epoch": 6.5, - "learning_rate": 6.502007811210737e-08, - "loss": 3.9363, - "step": 591000 - }, - { - "epoch": 6.51, - "learning_rate": 6.507508663842895e-08, - "loss": 3.9302, - "step": 591500 - }, - { - "epoch": 6.51, - "learning_rate": 6.513009516475053e-08, - "loss": 3.9293, - "step": 592000 - }, - { - "epoch": 6.52, - "learning_rate": 6.518510369107212e-08, - "loss": 3.9304, - "step": 592500 - }, - { - "epoch": 6.52, - "learning_rate": 6.52401122173937e-08, - "loss": 3.9351, - "step": 593000 - }, - { - "epoch": 6.53, - "learning_rate": 6.529512074371527e-08, - "loss": 3.927, - "step": 593500 - }, - { - "epoch": 6.54, - "learning_rate": 6.535012927003686e-08, - "loss": 3.9318, - "step": 594000 - }, - { - "epoch": 6.54, - "learning_rate": 6.540513779635844e-08, - "loss": 3.9341, - "step": 594500 - }, - { - "epoch": 6.55, - "learning_rate": 6.546014632268001e-08, - "loss": 3.9156, - "step": 595000 - }, - { - "epoch": 6.55, - "learning_rate": 6.55151548490016e-08, - "loss": 3.9396, - "step": 595500 - }, - { - "epoch": 6.56, - "learning_rate": 6.557016337532318e-08, - "loss": 3.9281, - "step": 596000 - }, - { - "epoch": 6.56, - "learning_rate": 6.562517190164475e-08, - "loss": 3.9291, - "step": 596500 - }, - { - "epoch": 6.57, - "learning_rate": 6.568018042796634e-08, - "loss": 3.9369, - "step": 597000 - }, - { - "epoch": 6.57, - "learning_rate": 6.573518895428792e-08, - "loss": 3.9168, - "step": 597500 - }, - { - "epoch": 6.58, - "learning_rate": 6.57901974806095e-08, - "loss": 3.9322, - "step": 598000 - }, - { - "epoch": 6.58, - "learning_rate": 6.584520600693107e-08, - "loss": 3.9493, - "step": 598500 - }, - { - "epoch": 6.59, - "learning_rate": 6.590021453325265e-08, - "loss": 3.9286, - "step": 599000 - }, - { - "epoch": 6.6, - "learning_rate": 6.595522305957422e-08, - "loss": 3.9198, - "step": 599500 - }, - { - "epoch": 6.6, - "learning_rate": 6.601023158589581e-08, - "loss": 3.9297, - "step": 600000 - }, - { - "epoch": 6.61, - "learning_rate": 6.606524011221739e-08, - "loss": 3.9379, - "step": 600500 - }, - { - "epoch": 6.61, - "learning_rate": 6.612024863853896e-08, - "loss": 3.9321, - "step": 601000 - }, - { - "epoch": 6.62, - "learning_rate": 6.617525716486055e-08, - "loss": 3.9202, - "step": 601500 - }, - { - "epoch": 6.62, - "learning_rate": 6.623026569118213e-08, - "loss": 3.9229, - "step": 602000 - }, - { - "epoch": 6.63, - "learning_rate": 6.62852742175037e-08, - "loss": 3.9386, - "step": 602500 - }, - { - "epoch": 6.63, - "learning_rate": 6.634028274382529e-08, - "loss": 3.9369, - "step": 603000 - }, - { - "epoch": 6.64, - "learning_rate": 6.639529127014687e-08, - "loss": 3.9228, - "step": 603500 - }, - { - "epoch": 6.65, - "learning_rate": 6.645029979646844e-08, - "loss": 3.9425, - "step": 604000 - }, - { - "epoch": 6.65, - "learning_rate": 6.650530832279003e-08, - "loss": 3.911, - "step": 604500 - }, - { - "epoch": 6.66, - "learning_rate": 6.656031684911161e-08, - "loss": 3.9405, - "step": 605000 - }, - { - "epoch": 6.66, - "learning_rate": 6.661532537543318e-08, - "loss": 3.9182, - "step": 605500 - }, - { - "epoch": 6.67, - "learning_rate": 6.667033390175477e-08, - "loss": 3.9491, - "step": 606000 - }, - { - "epoch": 6.67, - "learning_rate": 6.672534242807635e-08, - "loss": 3.9242, - "step": 606500 - }, - { - "epoch": 6.68, - "learning_rate": 6.678035095439793e-08, - "loss": 3.9226, - "step": 607000 - }, - { - "epoch": 6.68, - "learning_rate": 6.683535948071951e-08, - "loss": 3.9386, - "step": 607500 - }, - { - "epoch": 6.69, - "learning_rate": 6.689036800704109e-08, - "loss": 3.923, - "step": 608000 - }, - { - "epoch": 6.69, - "learning_rate": 6.694537653336267e-08, - "loss": 3.9361, - "step": 608500 - }, - { - "epoch": 6.7, - "learning_rate": 6.700038505968426e-08, - "loss": 3.9267, - "step": 609000 - }, - { - "epoch": 6.71, - "learning_rate": 6.705539358600583e-08, - "loss": 3.9354, - "step": 609500 - }, - { - "epoch": 6.71, - "learning_rate": 6.711040211232741e-08, - "loss": 3.9226, - "step": 610000 - }, - { - "epoch": 6.72, - "learning_rate": 6.716541063864898e-08, - "loss": 3.9149, - "step": 610500 - }, - { - "epoch": 6.72, - "learning_rate": 6.722041916497056e-08, - "loss": 3.93, - "step": 611000 - }, - { - "epoch": 6.73, - "learning_rate": 6.727542769129215e-08, - "loss": 3.9193, - "step": 611500 - }, - { - "epoch": 6.73, - "learning_rate": 6.733043621761372e-08, - "loss": 3.9372, - "step": 612000 - }, - { - "epoch": 6.74, - "learning_rate": 6.73854447439353e-08, - "loss": 3.9306, - "step": 612500 - }, - { - "epoch": 6.74, - "learning_rate": 6.744045327025689e-08, - "loss": 3.9304, - "step": 613000 - }, - { - "epoch": 6.75, - "learning_rate": 6.749546179657846e-08, - "loss": 3.9305, - "step": 613500 - }, - { - "epoch": 6.76, - "learning_rate": 6.755047032290004e-08, - "loss": 3.9368, - "step": 614000 - }, - { - "epoch": 6.76, - "learning_rate": 6.760547884922163e-08, - "loss": 3.9285, - "step": 614500 - }, - { - "epoch": 6.77, - "learning_rate": 6.76604873755432e-08, - "loss": 3.934, - "step": 615000 - }, - { - "epoch": 6.77, - "learning_rate": 6.771549590186478e-08, - "loss": 3.9292, - "step": 615500 - }, - { - "epoch": 6.78, - "learning_rate": 6.777050442818637e-08, - "loss": 3.9402, - "step": 616000 - }, - { - "epoch": 6.78, - "learning_rate": 6.782551295450795e-08, - "loss": 3.947, - "step": 616500 - }, - { - "epoch": 6.79, - "learning_rate": 6.788052148082952e-08, - "loss": 3.9345, - "step": 617000 - }, - { - "epoch": 6.79, - "learning_rate": 6.793553000715111e-08, - "loss": 3.9416, - "step": 617500 - }, - { - "epoch": 6.8, - "learning_rate": 6.799053853347269e-08, - "loss": 3.9245, - "step": 618000 - }, - { - "epoch": 6.8, - "learning_rate": 6.804554705979426e-08, - "loss": 3.919, - "step": 618500 - }, - { - "epoch": 6.81, - "learning_rate": 6.810055558611585e-08, - "loss": 3.904, - "step": 619000 - }, - { - "epoch": 6.82, - "learning_rate": 6.815556411243743e-08, - "loss": 3.9165, - "step": 619500 - }, - { - "epoch": 6.82, - "learning_rate": 6.8210572638759e-08, - "loss": 3.923, - "step": 620000 - }, - { - "epoch": 6.83, - "learning_rate": 6.826558116508059e-08, - "loss": 3.9316, - "step": 620500 - }, - { - "epoch": 6.83, - "learning_rate": 6.832058969140217e-08, - "loss": 3.9293, - "step": 621000 - }, - { - "epoch": 6.84, - "learning_rate": 6.837559821772374e-08, - "loss": 3.9369, - "step": 621500 - }, - { - "epoch": 6.84, - "learning_rate": 6.843060674404532e-08, - "loss": 3.9225, - "step": 622000 - }, - { - "epoch": 6.85, - "learning_rate": 6.848561527036691e-08, - "loss": 3.9419, - "step": 622500 - }, - { - "epoch": 6.85, - "learning_rate": 6.854062379668849e-08, - "loss": 3.9314, - "step": 623000 - }, - { - "epoch": 6.86, - "learning_rate": 6.859563232301006e-08, - "loss": 3.9441, - "step": 623500 - }, - { - "epoch": 6.87, - "learning_rate": 6.865064084933164e-08, - "loss": 3.9144, - "step": 624000 - }, - { - "epoch": 6.87, - "learning_rate": 6.870564937565321e-08, - "loss": 3.9274, - "step": 624500 - }, - { - "epoch": 6.88, - "learning_rate": 6.87606579019748e-08, - "loss": 3.9134, - "step": 625000 - }, - { - "epoch": 6.88, - "learning_rate": 6.881566642829638e-08, - "loss": 3.9179, - "step": 625500 - }, - { - "epoch": 6.89, - "learning_rate": 6.887067495461795e-08, - "loss": 3.9024, - "step": 626000 - }, - { - "epoch": 6.89, - "learning_rate": 6.892568348093954e-08, - "loss": 3.9225, - "step": 626500 - }, - { - "epoch": 6.9, - "learning_rate": 6.898069200726112e-08, - "loss": 3.9324, - "step": 627000 - }, - { - "epoch": 6.9, - "learning_rate": 6.90357005335827e-08, - "loss": 3.9262, - "step": 627500 - }, - { - "epoch": 6.91, - "learning_rate": 6.909070905990428e-08, - "loss": 3.9258, - "step": 628000 - }, - { - "epoch": 6.91, - "learning_rate": 6.914571758622586e-08, - "loss": 3.9372, - "step": 628500 - }, - { - "epoch": 6.92, - "learning_rate": 6.920072611254744e-08, - "loss": 3.9198, - "step": 629000 - }, - { - "epoch": 6.93, - "learning_rate": 6.925573463886902e-08, - "loss": 3.9342, - "step": 629500 - }, - { - "epoch": 6.93, - "learning_rate": 6.93107431651906e-08, - "loss": 3.9058, - "step": 630000 - }, - { - "epoch": 6.94, - "learning_rate": 6.936575169151219e-08, - "loss": 3.9207, - "step": 630500 - }, - { - "epoch": 6.94, - "learning_rate": 6.942076021783377e-08, - "loss": 3.9388, - "step": 631000 - }, - { - "epoch": 6.95, - "learning_rate": 6.947576874415534e-08, - "loss": 3.9307, - "step": 631500 - }, - { - "epoch": 6.95, - "learning_rate": 6.953077727047693e-08, - "loss": 3.9357, - "step": 632000 - }, - { - "epoch": 6.96, - "learning_rate": 6.95857857967985e-08, - "loss": 3.9173, - "step": 632500 - }, - { - "epoch": 6.96, - "learning_rate": 6.964079432312008e-08, - "loss": 3.9104, - "step": 633000 - }, - { - "epoch": 6.97, - "learning_rate": 6.969580284944167e-08, - "loss": 3.9464, - "step": 633500 - }, - { - "epoch": 6.98, - "learning_rate": 6.975081137576325e-08, - "loss": 3.9087, - "step": 634000 - }, - { - "epoch": 6.98, - "learning_rate": 6.980581990208482e-08, - "loss": 3.9162, - "step": 634500 - }, - { - "epoch": 6.99, - "learning_rate": 6.98608284284064e-08, - "loss": 3.9259, - "step": 635000 - }, - { - "epoch": 6.99, - "learning_rate": 6.991583695472797e-08, - "loss": 3.9292, - "step": 635500 - }, - { - "epoch": 7.0, - "learning_rate": 6.997084548104955e-08, - "loss": 3.9056, - "step": 636000 - }, - { - "epoch": 7.0, - "eval_loss": 3.940394878387451, - "eval_runtime": 6.1436, - "eval_samples_per_second": 252.945, - "step": 636265 - }, - { - "epoch": 7.0, - "learning_rate": 7.002585400737114e-08, - "loss": 3.9303, - "step": 636500 - }, - { - "epoch": 7.01, - "learning_rate": 7.008086253369272e-08, - "loss": 3.9269, - "step": 637000 - }, - { - "epoch": 7.01, - "learning_rate": 7.013587106001429e-08, - "loss": 3.9401, - "step": 637500 - }, - { - "epoch": 7.02, - "learning_rate": 7.019087958633588e-08, - "loss": 3.9193, - "step": 638000 - }, - { - "epoch": 7.02, - "learning_rate": 7.024588811265746e-08, - "loss": 3.9222, - "step": 638500 - }, - { - "epoch": 7.03, - "learning_rate": 7.030089663897903e-08, - "loss": 3.9185, - "step": 639000 - }, - { - "epoch": 7.04, - "learning_rate": 7.035590516530062e-08, - "loss": 3.9251, - "step": 639500 - }, - { - "epoch": 7.04, - "learning_rate": 7.04109136916222e-08, - "loss": 3.9128, - "step": 640000 - }, - { - "epoch": 7.05, - "learning_rate": 7.046592221794377e-08, - "loss": 3.9191, - "step": 640500 - }, - { - "epoch": 7.05, - "learning_rate": 7.052093074426536e-08, - "loss": 3.9081, - "step": 641000 - }, - { - "epoch": 7.06, - "learning_rate": 7.057593927058694e-08, - "loss": 3.9258, - "step": 641500 - }, - { - "epoch": 7.06, - "learning_rate": 7.063094779690851e-08, - "loss": 3.9314, - "step": 642000 - }, - { - "epoch": 7.07, - "learning_rate": 7.06859563232301e-08, - "loss": 3.9366, - "step": 642500 - }, - { - "epoch": 7.07, - "learning_rate": 7.074096484955168e-08, - "loss": 3.9039, - "step": 643000 - }, - { - "epoch": 7.08, - "learning_rate": 7.079597337587325e-08, - "loss": 3.918, - "step": 643500 - }, - { - "epoch": 7.09, - "learning_rate": 7.085098190219484e-08, - "loss": 3.9195, - "step": 644000 - }, - { - "epoch": 7.09, - "learning_rate": 7.090599042851642e-08, - "loss": 3.9363, - "step": 644500 - }, - { - "epoch": 7.1, - "learning_rate": 7.0960998954838e-08, - "loss": 3.9192, - "step": 645000 - }, - { - "epoch": 7.1, - "learning_rate": 7.101600748115958e-08, - "loss": 3.9334, - "step": 645500 - }, - { - "epoch": 7.11, - "learning_rate": 7.107101600748116e-08, - "loss": 3.9039, - "step": 646000 - }, - { - "epoch": 7.11, - "learning_rate": 7.112602453380274e-08, - "loss": 3.9188, - "step": 646500 - }, - { - "epoch": 7.12, - "learning_rate": 7.118103306012431e-08, - "loss": 3.9307, - "step": 647000 - }, - { - "epoch": 7.12, - "learning_rate": 7.123604158644589e-08, - "loss": 3.9368, - "step": 647500 - }, - { - "epoch": 7.13, - "learning_rate": 7.129105011276748e-08, - "loss": 3.9192, - "step": 648000 - }, - { - "epoch": 7.13, - "learning_rate": 7.134605863908905e-08, - "loss": 3.9148, - "step": 648500 - }, - { - "epoch": 7.14, - "learning_rate": 7.140106716541063e-08, - "loss": 3.9037, - "step": 649000 - }, - { - "epoch": 7.15, - "learning_rate": 7.145607569173222e-08, - "loss": 3.901, - "step": 649500 - }, - { - "epoch": 7.15, - "learning_rate": 7.15110842180538e-08, - "loss": 3.9074, - "step": 650000 - }, - { - "epoch": 7.16, - "learning_rate": 7.156609274437537e-08, - "loss": 3.9267, - "step": 650500 - }, - { - "epoch": 7.16, - "learning_rate": 7.162110127069696e-08, - "loss": 3.9173, - "step": 651000 - }, - { - "epoch": 7.17, - "learning_rate": 7.167610979701853e-08, - "loss": 3.9384, - "step": 651500 - }, - { - "epoch": 7.17, - "learning_rate": 7.173111832334011e-08, - "loss": 3.9389, - "step": 652000 - }, - { - "epoch": 7.18, - "learning_rate": 7.17861268496617e-08, - "loss": 3.9181, - "step": 652500 - }, - { - "epoch": 7.18, - "learning_rate": 7.184113537598328e-08, - "loss": 3.9165, - "step": 653000 - }, - { - "epoch": 7.19, - "learning_rate": 7.189614390230485e-08, - "loss": 3.8961, - "step": 653500 - }, - { - "epoch": 7.2, - "learning_rate": 7.195115242862644e-08, - "loss": 3.9135, - "step": 654000 - }, - { - "epoch": 7.2, - "learning_rate": 7.200616095494802e-08, - "loss": 3.9025, - "step": 654500 - }, - { - "epoch": 7.21, - "learning_rate": 7.206116948126959e-08, - "loss": 3.9301, - "step": 655000 - }, - { - "epoch": 7.21, - "learning_rate": 7.211617800759118e-08, - "loss": 3.8983, - "step": 655500 - }, - { - "epoch": 7.22, - "learning_rate": 7.217118653391276e-08, - "loss": 3.924, - "step": 656000 - }, - { - "epoch": 7.22, - "learning_rate": 7.222619506023433e-08, - "loss": 3.9168, - "step": 656500 - }, - { - "epoch": 7.23, - "learning_rate": 7.228120358655592e-08, - "loss": 3.9028, - "step": 657000 - }, - { - "epoch": 7.23, - "learning_rate": 7.23362121128775e-08, - "loss": 3.9169, - "step": 657500 - }, - { - "epoch": 7.24, - "learning_rate": 7.239122063919907e-08, - "loss": 3.906, - "step": 658000 - }, - { - "epoch": 7.24, - "learning_rate": 7.244622916552065e-08, - "loss": 3.9178, - "step": 658500 - }, - { - "epoch": 7.25, - "learning_rate": 7.250123769184223e-08, - "loss": 3.923, - "step": 659000 - }, - { - "epoch": 7.26, - "learning_rate": 7.25562462181638e-08, - "loss": 3.9157, - "step": 659500 - }, - { - "epoch": 7.26, - "learning_rate": 7.261125474448539e-08, - "loss": 3.9085, - "step": 660000 - }, - { - "epoch": 7.27, - "learning_rate": 7.266626327080697e-08, - "loss": 3.9407, - "step": 660500 - }, - { - "epoch": 7.27, - "learning_rate": 7.272127179712854e-08, - "loss": 3.9164, - "step": 661000 - }, - { - "epoch": 7.28, - "learning_rate": 7.277628032345013e-08, - "loss": 3.9124, - "step": 661500 - }, - { - "epoch": 7.28, - "learning_rate": 7.283128884977171e-08, - "loss": 3.9136, - "step": 662000 - }, - { - "epoch": 7.29, - "learning_rate": 7.288629737609328e-08, - "loss": 3.9222, - "step": 662500 - }, - { - "epoch": 7.29, - "learning_rate": 7.294130590241487e-08, - "loss": 3.9177, - "step": 663000 - }, - { - "epoch": 7.3, - "learning_rate": 7.299631442873645e-08, - "loss": 3.9184, - "step": 663500 - }, - { - "epoch": 7.31, - "learning_rate": 7.305132295505802e-08, - "loss": 3.9249, - "step": 664000 - }, - { - "epoch": 7.31, - "learning_rate": 7.310633148137961e-08, - "loss": 3.9227, - "step": 664500 - }, - { - "epoch": 7.32, - "learning_rate": 7.316134000770119e-08, - "loss": 3.9117, - "step": 665000 - }, - { - "epoch": 7.32, - "learning_rate": 7.321634853402276e-08, - "loss": 3.9063, - "step": 665500 - }, - { - "epoch": 7.33, - "learning_rate": 7.327135706034435e-08, - "loss": 3.9121, - "step": 666000 - }, - { - "epoch": 7.33, - "learning_rate": 7.332636558666593e-08, - "loss": 3.9133, - "step": 666500 - }, - { - "epoch": 7.34, - "learning_rate": 7.338137411298752e-08, - "loss": 3.9194, - "step": 667000 - }, - { - "epoch": 7.34, - "learning_rate": 7.34363826393091e-08, - "loss": 3.9119, - "step": 667500 - }, - { - "epoch": 7.35, - "learning_rate": 7.349139116563067e-08, - "loss": 3.9068, - "step": 668000 - }, - { - "epoch": 7.35, - "learning_rate": 7.354639969195226e-08, - "loss": 3.8997, - "step": 668500 - }, - { - "epoch": 7.36, - "learning_rate": 7.360140821827384e-08, - "loss": 3.9227, - "step": 669000 - }, - { - "epoch": 7.37, - "learning_rate": 7.365641674459541e-08, - "loss": 3.9088, - "step": 669500 - }, - { - "epoch": 7.37, - "learning_rate": 7.371142527091699e-08, - "loss": 3.8975, - "step": 670000 - }, - { - "epoch": 7.38, - "learning_rate": 7.376643379723858e-08, - "loss": 3.9216, - "step": 670500 - }, - { - "epoch": 7.38, - "learning_rate": 7.382144232356015e-08, - "loss": 3.9021, - "step": 671000 - }, - { - "epoch": 7.39, - "learning_rate": 7.387645084988173e-08, - "loss": 3.9231, - "step": 671500 - }, - { - "epoch": 7.39, - "learning_rate": 7.39314593762033e-08, - "loss": 3.9226, - "step": 672000 - }, - { - "epoch": 7.4, - "learning_rate": 7.398646790252488e-08, - "loss": 3.9387, - "step": 672500 - }, - { - "epoch": 7.4, - "learning_rate": 7.404147642884647e-08, - "loss": 3.9327, - "step": 673000 - }, - { - "epoch": 7.41, - "learning_rate": 7.409648495516804e-08, - "loss": 3.9036, - "step": 673500 - }, - { - "epoch": 7.42, - "learning_rate": 7.415149348148962e-08, - "loss": 3.9243, - "step": 674000 - }, - { - "epoch": 7.42, - "learning_rate": 7.420650200781121e-08, - "loss": 3.9086, - "step": 674500 - }, - { - "epoch": 7.43, - "learning_rate": 7.426151053413279e-08, - "loss": 3.9141, - "step": 675000 - }, - { - "epoch": 7.43, - "learning_rate": 7.431651906045436e-08, - "loss": 3.9254, - "step": 675500 - }, - { - "epoch": 7.44, - "learning_rate": 7.437152758677595e-08, - "loss": 3.906, - "step": 676000 - }, - { - "epoch": 7.44, - "learning_rate": 7.442653611309753e-08, - "loss": 3.9208, - "step": 676500 - }, - { - "epoch": 7.45, - "learning_rate": 7.44815446394191e-08, - "loss": 3.9241, - "step": 677000 - }, - { - "epoch": 7.45, - "learning_rate": 7.453655316574069e-08, - "loss": 3.9148, - "step": 677500 - }, - { - "epoch": 7.46, - "learning_rate": 7.459156169206227e-08, - "loss": 3.9087, - "step": 678000 - }, - { - "epoch": 7.46, - "learning_rate": 7.464657021838384e-08, - "loss": 3.9096, - "step": 678500 - }, - { - "epoch": 7.47, - "learning_rate": 7.470157874470543e-08, - "loss": 3.9391, - "step": 679000 - }, - { - "epoch": 7.48, - "learning_rate": 7.475658727102701e-08, - "loss": 3.9227, - "step": 679500 - }, - { - "epoch": 7.48, - "learning_rate": 7.481159579734858e-08, - "loss": 3.9108, - "step": 680000 - }, - { - "epoch": 7.49, - "learning_rate": 7.486660432367017e-08, - "loss": 3.9147, - "step": 680500 - }, - { - "epoch": 7.49, - "learning_rate": 7.492161284999175e-08, - "loss": 3.9291, - "step": 681000 - }, - { - "epoch": 7.5, - "learning_rate": 7.497662137631332e-08, - "loss": 3.9131, - "step": 681500 - }, - { - "epoch": 7.5, - "learning_rate": 7.503162990263491e-08, - "loss": 3.9299, - "step": 682000 - }, - { - "epoch": 7.51, - "learning_rate": 7.508663842895649e-08, - "loss": 3.9156, - "step": 682500 - }, - { - "epoch": 7.51, - "learning_rate": 7.514164695527807e-08, - "loss": 3.9146, - "step": 683000 - }, - { - "epoch": 7.52, - "learning_rate": 7.519665548159964e-08, - "loss": 3.9249, - "step": 683500 - }, - { - "epoch": 7.53, - "learning_rate": 7.525166400792122e-08, - "loss": 3.9217, - "step": 684000 - }, - { - "epoch": 7.53, - "learning_rate": 7.530667253424279e-08, - "loss": 3.8964, - "step": 684500 - }, - { - "epoch": 7.54, - "learning_rate": 7.536168106056438e-08, - "loss": 3.9092, - "step": 685000 - }, - { - "epoch": 7.54, - "learning_rate": 7.541668958688596e-08, - "loss": 3.8986, - "step": 685500 - }, - { - "epoch": 7.55, - "learning_rate": 7.547169811320755e-08, - "loss": 3.9074, - "step": 686000 - }, - { - "epoch": 7.55, - "learning_rate": 7.552670663952912e-08, - "loss": 3.9234, - "step": 686500 - }, - { - "epoch": 7.56, - "learning_rate": 7.55817151658507e-08, - "loss": 3.8932, - "step": 687000 - }, - { - "epoch": 7.56, - "learning_rate": 7.563672369217229e-08, - "loss": 3.9228, - "step": 687500 - }, - { - "epoch": 7.57, - "learning_rate": 7.569173221849386e-08, - "loss": 3.9148, - "step": 688000 - }, - { - "epoch": 7.57, - "learning_rate": 7.574674074481544e-08, - "loss": 3.9152, - "step": 688500 - }, - { - "epoch": 7.58, - "learning_rate": 7.580174927113703e-08, - "loss": 3.8918, - "step": 689000 - }, - { - "epoch": 7.59, - "learning_rate": 7.58567577974586e-08, - "loss": 3.921, - "step": 689500 - }, - { - "epoch": 7.59, - "learning_rate": 7.591176632378018e-08, - "loss": 3.8818, - "step": 690000 - }, - { - "epoch": 7.6, - "learning_rate": 7.596677485010177e-08, - "loss": 3.93, - "step": 690500 - }, - { - "epoch": 7.6, - "learning_rate": 7.602178337642335e-08, - "loss": 3.9163, - "step": 691000 - }, - { - "epoch": 7.61, - "learning_rate": 7.607679190274492e-08, - "loss": 3.8964, - "step": 691500 - }, - { - "epoch": 7.61, - "learning_rate": 7.613180042906651e-08, - "loss": 3.9159, - "step": 692000 - }, - { - "epoch": 7.62, - "learning_rate": 7.618680895538809e-08, - "loss": 3.9261, - "step": 692500 - }, - { - "epoch": 7.62, - "learning_rate": 7.624181748170966e-08, - "loss": 3.9159, - "step": 693000 - }, - { - "epoch": 7.63, - "learning_rate": 7.629682600803125e-08, - "loss": 3.9089, - "step": 693500 - }, - { - "epoch": 7.64, - "learning_rate": 7.635183453435283e-08, - "loss": 3.8955, - "step": 694000 - }, - { - "epoch": 7.64, - "learning_rate": 7.64068430606744e-08, - "loss": 3.9165, - "step": 694500 - }, - { - "epoch": 7.65, - "learning_rate": 7.646185158699598e-08, - "loss": 3.9015, - "step": 695000 - }, - { - "epoch": 7.65, - "learning_rate": 7.651686011331755e-08, - "loss": 3.9221, - "step": 695500 - }, - { - "epoch": 7.66, - "learning_rate": 7.657186863963913e-08, - "loss": 3.9051, - "step": 696000 - }, - { - "epoch": 7.66, - "learning_rate": 7.662687716596072e-08, - "loss": 3.8975, - "step": 696500 - }, - { - "epoch": 7.67, - "learning_rate": 7.66818856922823e-08, - "loss": 3.897, - "step": 697000 - }, - { - "epoch": 7.67, - "learning_rate": 7.673689421860387e-08, - "loss": 3.8943, - "step": 697500 - }, - { - "epoch": 7.68, - "learning_rate": 7.679190274492546e-08, - "loss": 3.9132, - "step": 698000 - }, - { - "epoch": 7.68, - "learning_rate": 7.684691127124704e-08, - "loss": 3.9125, - "step": 698500 - }, - { - "epoch": 7.69, - "learning_rate": 7.690191979756861e-08, - "loss": 3.8882, - "step": 699000 - }, - { - "epoch": 7.7, - "learning_rate": 7.69569283238902e-08, - "loss": 3.9115, - "step": 699500 - }, - { - "epoch": 7.7, - "learning_rate": 7.701193685021178e-08, - "loss": 3.9181, - "step": 700000 - }, - { - "epoch": 7.71, - "learning_rate": 7.706694537653335e-08, - "loss": 3.9306, - "step": 700500 - }, - { - "epoch": 7.71, - "learning_rate": 7.712195390285494e-08, - "loss": 3.8962, - "step": 701000 - }, - { - "epoch": 7.72, - "learning_rate": 7.717696242917652e-08, - "loss": 3.9223, - "step": 701500 - }, - { - "epoch": 7.72, - "learning_rate": 7.72319709554981e-08, - "loss": 3.8991, - "step": 702000 - }, - { - "epoch": 7.73, - "learning_rate": 7.728697948181968e-08, - "loss": 3.9178, - "step": 702500 - }, - { - "epoch": 7.73, - "learning_rate": 7.734198800814126e-08, - "loss": 3.9014, - "step": 703000 - }, - { - "epoch": 7.74, - "learning_rate": 7.739699653446283e-08, - "loss": 3.9259, - "step": 703500 - }, - { - "epoch": 7.75, - "learning_rate": 7.745200506078442e-08, - "loss": 3.9172, - "step": 704000 - }, - { - "epoch": 7.75, - "learning_rate": 7.7507013587106e-08, - "loss": 3.916, - "step": 704500 - }, - { - "epoch": 7.76, - "learning_rate": 7.756202211342759e-08, - "loss": 3.8928, - "step": 705000 - }, - { - "epoch": 7.76, - "learning_rate": 7.761703063974916e-08, - "loss": 3.8943, - "step": 705500 - }, - { - "epoch": 7.77, - "learning_rate": 7.767203916607074e-08, - "loss": 3.9075, - "step": 706000 - }, - { - "epoch": 7.77, - "learning_rate": 7.772704769239232e-08, - "loss": 3.9107, - "step": 706500 - }, - { - "epoch": 7.78, - "learning_rate": 7.778205621871389e-08, - "loss": 3.9118, - "step": 707000 - }, - { - "epoch": 7.78, - "learning_rate": 7.783706474503548e-08, - "loss": 3.9033, - "step": 707500 - }, - { - "epoch": 7.79, - "learning_rate": 7.789207327135706e-08, - "loss": 3.9055, - "step": 708000 - }, - { - "epoch": 7.79, - "learning_rate": 7.794708179767863e-08, - "loss": 3.9084, - "step": 708500 - }, - { - "epoch": 7.8, - "learning_rate": 7.800209032400021e-08, - "loss": 3.9016, - "step": 709000 - }, - { - "epoch": 7.81, - "learning_rate": 7.80570988503218e-08, - "loss": 3.9138, - "step": 709500 - }, - { - "epoch": 7.81, - "learning_rate": 7.811210737664337e-08, - "loss": 3.9096, - "step": 710000 - }, - { - "epoch": 7.82, - "learning_rate": 7.816711590296495e-08, - "loss": 3.8901, - "step": 710500 - }, - { - "epoch": 7.82, - "learning_rate": 7.822212442928654e-08, - "loss": 3.906, - "step": 711000 - }, - { - "epoch": 7.83, - "learning_rate": 7.827713295560811e-08, - "loss": 3.8939, - "step": 711500 - }, - { - "epoch": 7.83, - "learning_rate": 7.833214148192969e-08, - "loss": 3.9009, - "step": 712000 - }, - { - "epoch": 7.84, - "learning_rate": 7.838715000825128e-08, - "loss": 3.9171, - "step": 712500 - }, - { - "epoch": 7.84, - "learning_rate": 7.844215853457286e-08, - "loss": 3.9023, - "step": 713000 - }, - { - "epoch": 7.85, - "learning_rate": 7.849716706089443e-08, - "loss": 3.9002, - "step": 713500 - }, - { - "epoch": 7.86, - "learning_rate": 7.855217558721602e-08, - "loss": 3.9156, - "step": 714000 - }, - { - "epoch": 7.86, - "learning_rate": 7.86071841135376e-08, - "loss": 3.9115, - "step": 714500 - }, - { - "epoch": 7.87, - "learning_rate": 7.866219263985917e-08, - "loss": 3.9076, - "step": 715000 - }, - { - "epoch": 7.87, - "learning_rate": 7.871720116618076e-08, - "loss": 3.8795, - "step": 715500 - }, - { - "epoch": 7.88, - "learning_rate": 7.877220969250234e-08, - "loss": 3.9118, - "step": 716000 - }, - { - "epoch": 7.88, - "learning_rate": 7.882721821882391e-08, - "loss": 3.8955, - "step": 716500 - }, - { - "epoch": 7.89, - "learning_rate": 7.88822267451455e-08, - "loss": 3.9018, - "step": 717000 - }, - { - "epoch": 7.89, - "learning_rate": 7.893723527146708e-08, - "loss": 3.9139, - "step": 717500 - }, - { - "epoch": 7.9, - "learning_rate": 7.899224379778865e-08, - "loss": 3.9105, - "step": 718000 - }, - { - "epoch": 7.9, - "learning_rate": 7.904725232411024e-08, - "loss": 3.9065, - "step": 718500 - }, - { - "epoch": 7.91, - "learning_rate": 7.910226085043182e-08, - "loss": 3.9276, - "step": 719000 - }, - { - "epoch": 7.92, - "learning_rate": 7.91572693767534e-08, - "loss": 3.9266, - "step": 719500 - }, - { - "epoch": 7.92, - "learning_rate": 7.921227790307497e-08, - "loss": 3.9034, - "step": 720000 - }, - { - "epoch": 7.93, - "learning_rate": 7.926728642939655e-08, - "loss": 3.9024, - "step": 720500 - }, - { - "epoch": 7.93, - "learning_rate": 7.932229495571812e-08, - "loss": 3.9129, - "step": 721000 - }, - { - "epoch": 7.94, - "learning_rate": 7.937730348203971e-08, - "loss": 3.8932, - "step": 721500 - }, - { - "epoch": 7.94, - "learning_rate": 7.943231200836129e-08, - "loss": 3.8973, - "step": 722000 - }, - { - "epoch": 7.95, - "learning_rate": 7.948732053468286e-08, - "loss": 3.8973, - "step": 722500 - }, - { - "epoch": 7.95, - "learning_rate": 7.954232906100445e-08, - "loss": 3.9007, - "step": 723000 - }, - { - "epoch": 7.96, - "learning_rate": 7.959733758732603e-08, - "loss": 3.8944, - "step": 723500 - }, - { - "epoch": 7.97, - "learning_rate": 7.965234611364762e-08, - "loss": 3.9182, - "step": 724000 - }, - { - "epoch": 7.97, - "learning_rate": 7.970735463996919e-08, - "loss": 3.8951, - "step": 724500 - }, - { - "epoch": 7.98, - "learning_rate": 7.976236316629077e-08, - "loss": 3.9124, - "step": 725000 - }, - { - "epoch": 7.98, - "learning_rate": 7.981737169261236e-08, - "loss": 3.9019, - "step": 725500 - }, - { - "epoch": 7.99, - "learning_rate": 7.987238021893393e-08, - "loss": 3.8971, - "step": 726000 - }, - { - "epoch": 7.99, - "learning_rate": 7.992738874525551e-08, - "loss": 3.9142, - "step": 726500 - }, - { - "epoch": 8.0, - "learning_rate": 7.99823972715771e-08, - "loss": 3.8871, - "step": 727000 - }, - { - "epoch": 8.0, - "eval_loss": 3.926957845687866, - "eval_runtime": 6.1331, - "eval_samples_per_second": 253.379, - "step": 727160 - }, - { - "epoch": 8.0, - "learning_rate": 8.003740579789867e-08, - "loss": 3.8918, - "step": 727500 - }, - { - "epoch": 8.01, - "learning_rate": 8.009241432422025e-08, - "loss": 3.9025, - "step": 728000 - }, - { - "epoch": 8.01, - "learning_rate": 8.014742285054184e-08, - "loss": 3.9083, - "step": 728500 - }, - { - "epoch": 8.02, - "learning_rate": 8.020243137686342e-08, - "loss": 3.9043, - "step": 729000 - }, - { - "epoch": 8.03, - "learning_rate": 8.025743990318499e-08, - "loss": 3.8933, - "step": 729500 - }, - { - "epoch": 8.03, - "learning_rate": 8.031244842950658e-08, - "loss": 3.9053, - "step": 730000 - }, - { - "epoch": 8.04, - "learning_rate": 8.036745695582816e-08, - "loss": 3.9155, - "step": 730500 - }, - { - "epoch": 8.04, - "learning_rate": 8.042246548214973e-08, - "loss": 3.9173, - "step": 731000 - }, - { - "epoch": 8.05, - "learning_rate": 8.047747400847131e-08, - "loss": 3.9124, - "step": 731500 - }, - { - "epoch": 8.05, - "learning_rate": 8.053248253479288e-08, - "loss": 3.913, - "step": 732000 - }, - { - "epoch": 8.06, - "learning_rate": 8.058749106111446e-08, - "loss": 3.9008, - "step": 732500 - }, - { - "epoch": 8.06, - "learning_rate": 8.064249958743605e-08, - "loss": 3.901, - "step": 733000 - }, - { - "epoch": 8.07, - "learning_rate": 8.069750811375762e-08, - "loss": 3.88, - "step": 733500 - }, - { - "epoch": 8.08, - "learning_rate": 8.07525166400792e-08, - "loss": 3.9202, - "step": 734000 - }, - { - "epoch": 8.08, - "learning_rate": 8.080752516640079e-08, - "loss": 3.9036, - "step": 734500 - }, - { - "epoch": 8.09, - "learning_rate": 8.086253369272237e-08, - "loss": 3.8783, - "step": 735000 - }, - { - "epoch": 8.09, - "learning_rate": 8.091754221904394e-08, - "loss": 3.9001, - "step": 735500 - }, - { - "epoch": 8.1, - "learning_rate": 8.097255074536553e-08, - "loss": 3.8894, - "step": 736000 - }, - { - "epoch": 8.1, - "learning_rate": 8.10275592716871e-08, - "loss": 3.8991, - "step": 736500 - }, - { - "epoch": 8.11, - "learning_rate": 8.108256779800868e-08, - "loss": 3.894, - "step": 737000 - }, - { - "epoch": 8.11, - "learning_rate": 8.113757632433027e-08, - "loss": 3.8955, - "step": 737500 - }, - { - "epoch": 8.12, - "learning_rate": 8.119258485065185e-08, - "loss": 3.9309, - "step": 738000 - }, - { - "epoch": 8.12, - "learning_rate": 8.124759337697342e-08, - "loss": 3.8995, - "step": 738500 - }, - { - "epoch": 8.13, - "learning_rate": 8.130260190329501e-08, - "loss": 3.9121, - "step": 739000 - }, - { - "epoch": 8.14, - "learning_rate": 8.135761042961659e-08, - "loss": 3.8867, - "step": 739500 - }, - { - "epoch": 8.14, - "learning_rate": 8.141261895593816e-08, - "loss": 3.9147, - "step": 740000 - }, - { - "epoch": 8.15, - "learning_rate": 8.146762748225975e-08, - "loss": 3.8934, - "step": 740500 - }, - { - "epoch": 8.15, - "learning_rate": 8.152263600858133e-08, - "loss": 3.9009, - "step": 741000 - }, - { - "epoch": 8.16, - "learning_rate": 8.15776445349029e-08, - "loss": 3.8814, - "step": 741500 - }, - { - "epoch": 8.16, - "learning_rate": 8.16326530612245e-08, - "loss": 3.8974, - "step": 742000 - }, - { - "epoch": 8.17, - "learning_rate": 8.168766158754607e-08, - "loss": 3.8935, - "step": 742500 - }, - { - "epoch": 8.17, - "learning_rate": 8.174267011386765e-08, - "loss": 3.8936, - "step": 743000 - }, - { - "epoch": 8.18, - "learning_rate": 8.179767864018922e-08, - "loss": 3.9022, - "step": 743500 - }, - { - "epoch": 8.19, - "learning_rate": 8.18526871665108e-08, - "loss": 3.8802, - "step": 744000 - }, - { - "epoch": 8.19, - "learning_rate": 8.190769569283239e-08, - "loss": 3.9066, - "step": 744500 - }, - { - "epoch": 8.2, - "learning_rate": 8.196270421915396e-08, - "loss": 3.886, - "step": 745000 - }, - { - "epoch": 8.2, - "learning_rate": 8.201771274547554e-08, - "loss": 3.8857, - "step": 745500 - }, - { - "epoch": 8.21, - "learning_rate": 8.207272127179713e-08, - "loss": 3.9002, - "step": 746000 - }, - { - "epoch": 8.21, - "learning_rate": 8.21277297981187e-08, - "loss": 3.8968, - "step": 746500 - }, - { - "epoch": 8.22, - "learning_rate": 8.218273832444028e-08, - "loss": 3.8912, - "step": 747000 - }, - { - "epoch": 8.22, - "learning_rate": 8.223774685076187e-08, - "loss": 3.9007, - "step": 747500 - }, - { - "epoch": 8.23, - "learning_rate": 8.229275537708344e-08, - "loss": 3.9005, - "step": 748000 - }, - { - "epoch": 8.23, - "learning_rate": 8.234776390340502e-08, - "loss": 3.8994, - "step": 748500 - }, - { - "epoch": 8.24, - "learning_rate": 8.240277242972661e-08, - "loss": 3.8879, - "step": 749000 - }, - { - "epoch": 8.25, - "learning_rate": 8.245778095604818e-08, - "loss": 3.8951, - "step": 749500 - }, - { - "epoch": 8.25, - "learning_rate": 8.251278948236976e-08, - "loss": 3.9098, - "step": 750000 - }, - { - "epoch": 8.26, - "learning_rate": 8.256779800869135e-08, - "loss": 3.8986, - "step": 750500 - }, - { - "epoch": 8.26, - "learning_rate": 8.262280653501293e-08, - "loss": 3.8956, - "step": 751000 - }, - { - "epoch": 8.27, - "learning_rate": 8.26778150613345e-08, - "loss": 3.8887, - "step": 751500 - }, - { - "epoch": 8.27, - "learning_rate": 8.273282358765609e-08, - "loss": 3.9047, - "step": 752000 - }, - { - "epoch": 8.28, - "learning_rate": 8.278783211397767e-08, - "loss": 3.8919, - "step": 752500 - }, - { - "epoch": 8.28, - "learning_rate": 8.284284064029924e-08, - "loss": 3.9062, - "step": 753000 - }, - { - "epoch": 8.29, - "learning_rate": 8.289784916662083e-08, - "loss": 3.8922, - "step": 753500 - }, - { - "epoch": 8.3, - "learning_rate": 8.295285769294241e-08, - "loss": 3.8911, - "step": 754000 - }, - { - "epoch": 8.3, - "learning_rate": 8.300786621926398e-08, - "loss": 3.8721, - "step": 754500 - }, - { - "epoch": 8.31, - "learning_rate": 8.306287474558556e-08, - "loss": 3.9009, - "step": 755000 - }, - { - "epoch": 8.31, - "learning_rate": 8.311788327190715e-08, - "loss": 3.8953, - "step": 755500 - }, - { - "epoch": 8.32, - "learning_rate": 8.317289179822872e-08, - "loss": 3.9044, - "step": 756000 - }, - { - "epoch": 8.32, - "learning_rate": 8.32279003245503e-08, - "loss": 3.9114, - "step": 756500 - }, - { - "epoch": 8.33, - "learning_rate": 8.328290885087188e-08, - "loss": 3.8909, - "step": 757000 - }, - { - "epoch": 8.33, - "learning_rate": 8.333791737719345e-08, - "loss": 3.9084, - "step": 757500 - }, - { - "epoch": 8.34, - "learning_rate": 8.339292590351504e-08, - "loss": 3.9077, - "step": 758000 - }, - { - "epoch": 8.34, - "learning_rate": 8.344793442983662e-08, - "loss": 3.8981, - "step": 758500 - }, - { - "epoch": 8.35, - "learning_rate": 8.350294295615819e-08, - "loss": 3.9014, - "step": 759000 - }, - { - "epoch": 8.36, - "learning_rate": 8.355795148247978e-08, - "loss": 3.882, - "step": 759500 - }, - { - "epoch": 8.36, - "learning_rate": 8.361296000880136e-08, - "loss": 3.8898, - "step": 760000 - }, - { - "epoch": 8.37, - "learning_rate": 8.366796853512293e-08, - "loss": 3.9083, - "step": 760500 - }, - { - "epoch": 8.37, - "learning_rate": 8.372297706144452e-08, - "loss": 3.8963, - "step": 761000 - }, - { - "epoch": 8.38, - "learning_rate": 8.37779855877661e-08, - "loss": 3.9029, - "step": 761500 - }, - { - "epoch": 8.38, - "learning_rate": 8.383299411408769e-08, - "loss": 3.9072, - "step": 762000 - }, - { - "epoch": 8.39, - "learning_rate": 8.388800264040926e-08, - "loss": 3.8948, - "step": 762500 - }, - { - "epoch": 8.39, - "learning_rate": 8.394301116673084e-08, - "loss": 3.8935, - "step": 763000 - }, - { - "epoch": 8.4, - "learning_rate": 8.399801969305243e-08, - "loss": 3.891, - "step": 763500 - }, - { - "epoch": 8.41, - "learning_rate": 8.4053028219374e-08, - "loss": 3.8859, - "step": 764000 - }, - { - "epoch": 8.41, - "learning_rate": 8.410803674569558e-08, - "loss": 3.8979, - "step": 764500 - }, - { - "epoch": 8.42, - "learning_rate": 8.416304527201717e-08, - "loss": 3.9019, - "step": 765000 - }, - { - "epoch": 8.42, - "learning_rate": 8.421805379833874e-08, - "loss": 3.9051, - "step": 765500 - }, - { - "epoch": 8.43, - "learning_rate": 8.427306232466032e-08, - "loss": 3.8916, - "step": 766000 - }, - { - "epoch": 8.43, - "learning_rate": 8.432807085098191e-08, - "loss": 3.9007, - "step": 766500 - }, - { - "epoch": 8.44, - "learning_rate": 8.438307937730348e-08, - "loss": 3.8985, - "step": 767000 - }, - { - "epoch": 8.44, - "learning_rate": 8.443808790362506e-08, - "loss": 3.8977, - "step": 767500 - }, - { - "epoch": 8.45, - "learning_rate": 8.449309642994664e-08, - "loss": 3.8982, - "step": 768000 - }, - { - "epoch": 8.45, - "learning_rate": 8.454810495626821e-08, - "loss": 3.9051, - "step": 768500 - }, - { - "epoch": 8.46, - "learning_rate": 8.460311348258979e-08, - "loss": 3.8998, - "step": 769000 - }, - { - "epoch": 8.47, - "learning_rate": 8.465812200891138e-08, - "loss": 3.8904, - "step": 769500 - }, - { - "epoch": 8.47, - "learning_rate": 8.471313053523295e-08, - "loss": 3.8885, - "step": 770000 - }, - { - "epoch": 8.48, - "learning_rate": 8.476813906155453e-08, - "loss": 3.8949, - "step": 770500 - }, - { - "epoch": 8.48, - "learning_rate": 8.482314758787612e-08, - "loss": 3.8934, - "step": 771000 - }, - { - "epoch": 8.49, - "learning_rate": 8.48781561141977e-08, - "loss": 3.896, - "step": 771500 - }, - { - "epoch": 8.49, - "learning_rate": 8.493316464051927e-08, - "loss": 3.8946, - "step": 772000 - }, - { - "epoch": 8.5, - "learning_rate": 8.498817316684086e-08, - "loss": 3.8903, - "step": 772500 - }, - { - "epoch": 8.5, - "learning_rate": 8.504318169316243e-08, - "loss": 3.9084, - "step": 773000 - }, - { - "epoch": 8.51, - "learning_rate": 8.509819021948401e-08, - "loss": 3.8922, - "step": 773500 - }, - { - "epoch": 8.52, - "learning_rate": 8.51531987458056e-08, - "loss": 3.9014, - "step": 774000 - }, - { - "epoch": 8.52, - "learning_rate": 8.520820727212718e-08, - "loss": 3.8909, - "step": 774500 - }, - { - "epoch": 8.53, - "learning_rate": 8.526321579844875e-08, - "loss": 3.893, - "step": 775000 - }, - { - "epoch": 8.53, - "learning_rate": 8.531822432477034e-08, - "loss": 3.8987, - "step": 775500 - }, - { - "epoch": 8.54, - "learning_rate": 8.537323285109192e-08, - "loss": 3.8829, - "step": 776000 - }, - { - "epoch": 8.54, - "learning_rate": 8.542824137741349e-08, - "loss": 3.8944, - "step": 776500 - }, - { - "epoch": 8.55, - "learning_rate": 8.548324990373508e-08, - "loss": 3.8891, - "step": 777000 - }, - { - "epoch": 8.55, - "learning_rate": 8.553825843005666e-08, - "loss": 3.8932, - "step": 777500 - }, - { - "epoch": 8.56, - "learning_rate": 8.559326695637823e-08, - "loss": 3.9029, - "step": 778000 - }, - { - "epoch": 8.56, - "learning_rate": 8.564827548269982e-08, - "loss": 3.8749, - "step": 778500 - }, - { - "epoch": 8.57, - "learning_rate": 8.57032840090214e-08, - "loss": 3.9014, - "step": 779000 - }, - { - "epoch": 8.58, - "learning_rate": 8.575829253534297e-08, - "loss": 3.8893, - "step": 779500 - }, - { - "epoch": 8.58, - "learning_rate": 8.581330106166455e-08, - "loss": 3.8894, - "step": 780000 - }, - { - "epoch": 8.59, - "learning_rate": 8.586830958798613e-08, - "loss": 3.9082, - "step": 780500 - }, - { - "epoch": 8.59, - "learning_rate": 8.592331811430771e-08, - "loss": 3.8854, - "step": 781000 - }, - { - "epoch": 8.6, - "learning_rate": 8.597832664062929e-08, - "loss": 3.8751, - "step": 781500 - }, - { - "epoch": 8.6, - "learning_rate": 8.603333516695087e-08, - "loss": 3.8986, - "step": 782000 - }, - { - "epoch": 8.61, - "learning_rate": 8.608834369327246e-08, - "loss": 3.889, - "step": 782500 - }, - { - "epoch": 8.61, - "learning_rate": 8.614335221959403e-08, - "loss": 3.9028, - "step": 783000 - }, - { - "epoch": 8.62, - "learning_rate": 8.619836074591561e-08, - "loss": 3.9117, - "step": 783500 - }, - { - "epoch": 8.63, - "learning_rate": 8.62533692722372e-08, - "loss": 3.8982, - "step": 784000 - }, - { - "epoch": 8.63, - "learning_rate": 8.630837779855877e-08, - "loss": 3.879, - "step": 784500 - }, - { - "epoch": 8.64, - "learning_rate": 8.636338632488035e-08, - "loss": 3.8861, - "step": 785000 - }, - { - "epoch": 8.64, - "learning_rate": 8.641839485120194e-08, - "loss": 3.8881, - "step": 785500 - }, - { - "epoch": 8.65, - "learning_rate": 8.647340337752351e-08, - "loss": 3.8935, - "step": 786000 - }, - { - "epoch": 8.65, - "learning_rate": 8.652841190384509e-08, - "loss": 3.8953, - "step": 786500 - }, - { - "epoch": 8.66, - "learning_rate": 8.658342043016668e-08, - "loss": 3.8813, - "step": 787000 - }, - { - "epoch": 8.66, - "learning_rate": 8.663842895648825e-08, - "loss": 3.9062, - "step": 787500 - }, - { - "epoch": 8.67, - "learning_rate": 8.669343748280983e-08, - "loss": 3.8921, - "step": 788000 - }, - { - "epoch": 8.67, - "learning_rate": 8.674844600913142e-08, - "loss": 3.8953, - "step": 788500 - }, - { - "epoch": 8.68, - "learning_rate": 8.6803454535453e-08, - "loss": 3.8898, - "step": 789000 - }, - { - "epoch": 8.69, - "learning_rate": 8.685846306177457e-08, - "loss": 3.8872, - "step": 789500 - }, - { - "epoch": 8.69, - "learning_rate": 8.691347158809616e-08, - "loss": 3.8914, - "step": 790000 - }, - { - "epoch": 8.7, - "learning_rate": 8.696848011441774e-08, - "loss": 3.8833, - "step": 790500 - }, - { - "epoch": 8.7, - "learning_rate": 8.702348864073931e-08, - "loss": 3.8932, - "step": 791000 - }, - { - "epoch": 8.71, - "learning_rate": 8.707849716706089e-08, - "loss": 3.8925, - "step": 791500 - }, - { - "epoch": 8.71, - "learning_rate": 8.713350569338246e-08, - "loss": 3.8878, - "step": 792000 - }, - { - "epoch": 8.72, - "learning_rate": 8.718851421970404e-08, - "loss": 3.9026, - "step": 792500 - }, - { - "epoch": 8.72, - "learning_rate": 8.724352274602563e-08, - "loss": 3.91, - "step": 793000 - }, - { - "epoch": 8.73, - "learning_rate": 8.72985312723472e-08, - "loss": 3.8778, - "step": 793500 - }, - { - "epoch": 8.74, - "learning_rate": 8.735353979866878e-08, - "loss": 3.8947, - "step": 794000 - }, - { - "epoch": 8.74, - "learning_rate": 8.740854832499037e-08, - "loss": 3.8945, - "step": 794500 - }, - { - "epoch": 8.75, - "learning_rate": 8.746355685131194e-08, - "loss": 3.8887, - "step": 795000 - }, - { - "epoch": 8.75, - "learning_rate": 8.751856537763352e-08, - "loss": 3.8829, - "step": 795500 - }, - { - "epoch": 8.76, - "learning_rate": 8.757357390395511e-08, - "loss": 3.8842, - "step": 796000 - }, - { - "epoch": 8.76, - "learning_rate": 8.762858243027669e-08, - "loss": 3.8862, - "step": 796500 - }, - { - "epoch": 8.77, - "learning_rate": 8.768359095659826e-08, - "loss": 3.8884, - "step": 797000 - }, - { - "epoch": 8.77, - "learning_rate": 8.773859948291985e-08, - "loss": 3.8705, - "step": 797500 - }, - { - "epoch": 8.78, - "learning_rate": 8.779360800924143e-08, - "loss": 3.8802, - "step": 798000 - }, - { - "epoch": 8.78, - "learning_rate": 8.784861653556302e-08, - "loss": 3.898, - "step": 798500 - }, - { - "epoch": 8.79, - "learning_rate": 8.790362506188459e-08, - "loss": 3.9, - "step": 799000 - }, - { - "epoch": 8.8, - "learning_rate": 8.795863358820617e-08, - "loss": 3.8913, - "step": 799500 - }, - { - "epoch": 8.8, - "learning_rate": 8.801364211452776e-08, - "loss": 3.9104, - "step": 800000 - }, - { - "epoch": 8.81, - "learning_rate": 8.806865064084933e-08, - "loss": 3.8795, - "step": 800500 - }, - { - "epoch": 8.81, - "learning_rate": 8.812365916717091e-08, - "loss": 3.8898, - "step": 801000 - }, - { - "epoch": 8.82, - "learning_rate": 8.81786676934925e-08, - "loss": 3.9109, - "step": 801500 - }, - { - "epoch": 8.82, - "learning_rate": 8.823367621981407e-08, - "loss": 3.8936, - "step": 802000 - }, - { - "epoch": 8.83, - "learning_rate": 8.828868474613565e-08, - "loss": 3.8818, - "step": 802500 - }, - { - "epoch": 8.83, - "learning_rate": 8.834369327245722e-08, - "loss": 3.8757, - "step": 803000 - }, - { - "epoch": 8.84, - "learning_rate": 8.839870179877881e-08, - "loss": 3.8839, - "step": 803500 - }, - { - "epoch": 8.85, - "learning_rate": 8.845371032510039e-08, - "loss": 3.8843, - "step": 804000 - }, - { - "epoch": 8.85, - "learning_rate": 8.850871885142197e-08, - "loss": 3.8792, - "step": 804500 - }, - { - "epoch": 8.86, - "learning_rate": 8.856372737774354e-08, - "loss": 3.8905, - "step": 805000 - }, - { - "epoch": 8.86, - "learning_rate": 8.861873590406512e-08, - "loss": 3.8775, - "step": 805500 - }, - { - "epoch": 8.87, - "learning_rate": 8.86737444303867e-08, - "loss": 3.879, - "step": 806000 - }, - { - "epoch": 8.87, - "learning_rate": 8.872875295670828e-08, - "loss": 3.8906, - "step": 806500 - }, - { - "epoch": 8.88, - "learning_rate": 8.878376148302986e-08, - "loss": 3.9063, - "step": 807000 - }, - { - "epoch": 8.88, - "learning_rate": 8.883877000935145e-08, - "loss": 3.8853, - "step": 807500 - }, - { - "epoch": 8.89, - "learning_rate": 8.889377853567302e-08, - "loss": 3.8906, - "step": 808000 - }, - { - "epoch": 8.89, - "learning_rate": 8.89487870619946e-08, - "loss": 3.8863, - "step": 808500 - }, - { - "epoch": 8.9, - "learning_rate": 8.900379558831619e-08, - "loss": 3.889, - "step": 809000 - }, - { - "epoch": 8.91, - "learning_rate": 8.905880411463776e-08, - "loss": 3.8813, - "step": 809500 - }, - { - "epoch": 8.91, - "learning_rate": 8.911381264095934e-08, - "loss": 3.8928, - "step": 810000 - }, - { - "epoch": 8.92, - "learning_rate": 8.916882116728093e-08, - "loss": 3.9089, - "step": 810500 - }, - { - "epoch": 8.92, - "learning_rate": 8.92238296936025e-08, - "loss": 3.8768, - "step": 811000 - }, - { - "epoch": 8.93, - "learning_rate": 8.927883821992408e-08, - "loss": 3.9027, - "step": 811500 - }, - { - "epoch": 8.93, - "learning_rate": 8.933384674624567e-08, - "loss": 3.8835, - "step": 812000 - }, - { - "epoch": 8.94, - "learning_rate": 8.938885527256725e-08, - "loss": 3.8869, - "step": 812500 - }, - { - "epoch": 8.94, - "learning_rate": 8.944386379888882e-08, - "loss": 3.8905, - "step": 813000 - }, - { - "epoch": 8.95, - "learning_rate": 8.949887232521041e-08, - "loss": 3.8784, - "step": 813500 - }, - { - "epoch": 8.96, - "learning_rate": 8.955388085153199e-08, - "loss": 3.8828, - "step": 814000 - }, - { - "epoch": 8.96, - "learning_rate": 8.960888937785356e-08, - "loss": 3.8996, - "step": 814500 - }, - { - "epoch": 8.97, - "learning_rate": 8.966389790417515e-08, - "loss": 3.9052, - "step": 815000 - }, - { - "epoch": 8.97, - "learning_rate": 8.971890643049673e-08, - "loss": 3.8879, - "step": 815500 - }, - { - "epoch": 8.98, - "learning_rate": 8.97739149568183e-08, - "loss": 3.8851, - "step": 816000 - }, - { - "epoch": 8.98, - "learning_rate": 8.982892348313988e-08, - "loss": 3.9055, - "step": 816500 - }, - { - "epoch": 8.99, - "learning_rate": 8.988393200946145e-08, - "loss": 3.8858, - "step": 817000 - }, - { - "epoch": 8.99, - "learning_rate": 8.993894053578304e-08, - "loss": 3.881, - "step": 817500 - }, - { - "epoch": 9.0, - "learning_rate": 8.999394906210462e-08, - "loss": 3.9195, - "step": 818000 - }, - { - "epoch": 9.0, - "eval_loss": 3.91526460647583, - "eval_runtime": 6.1324, - "eval_samples_per_second": 253.406, - "step": 818055 - }, - { - "epoch": 9.0, - "learning_rate": 9.00489575884262e-08, - "loss": 3.8931, - "step": 818500 - }, - { - "epoch": 9.01, - "learning_rate": 9.010396611474778e-08, - "loss": 3.8859, - "step": 819000 - }, - { - "epoch": 9.02, - "learning_rate": 9.015897464106936e-08, - "loss": 3.8897, - "step": 819500 - }, - { - "epoch": 9.02, - "learning_rate": 9.021398316739094e-08, - "loss": 3.8872, - "step": 820000 - }, - { - "epoch": 9.03, - "learning_rate": 9.026899169371253e-08, - "loss": 3.8912, - "step": 820500 - }, - { - "epoch": 9.03, - "learning_rate": 9.03240002200341e-08, - "loss": 3.8934, - "step": 821000 - }, - { - "epoch": 9.04, - "learning_rate": 9.037900874635568e-08, - "loss": 3.8843, - "step": 821500 - }, - { - "epoch": 9.04, - "learning_rate": 9.043401727267727e-08, - "loss": 3.8948, - "step": 822000 - }, - { - "epoch": 9.05, - "learning_rate": 9.048902579899884e-08, - "loss": 3.866, - "step": 822500 - }, - { - "epoch": 9.05, - "learning_rate": 9.054403432532042e-08, - "loss": 3.8823, - "step": 823000 - }, - { - "epoch": 9.06, - "learning_rate": 9.059904285164201e-08, - "loss": 3.8994, - "step": 823500 - }, - { - "epoch": 9.07, - "learning_rate": 9.065405137796358e-08, - "loss": 3.881, - "step": 824000 - }, - { - "epoch": 9.07, - "learning_rate": 9.070905990428516e-08, - "loss": 3.8865, - "step": 824500 - }, - { - "epoch": 9.08, - "learning_rate": 9.076406843060675e-08, - "loss": 3.8763, - "step": 825000 - }, - { - "epoch": 9.08, - "learning_rate": 9.081907695692832e-08, - "loss": 3.9057, - "step": 825500 - }, - { - "epoch": 9.09, - "learning_rate": 9.08740854832499e-08, - "loss": 3.8752, - "step": 826000 - }, - { - "epoch": 9.09, - "learning_rate": 9.092909400957149e-08, - "loss": 3.8823, - "step": 826500 - }, - { - "epoch": 9.1, - "learning_rate": 9.098410253589306e-08, - "loss": 3.8974, - "step": 827000 - }, - { - "epoch": 9.1, - "learning_rate": 9.103911106221464e-08, - "loss": 3.8731, - "step": 827500 - }, - { - "epoch": 9.11, - "learning_rate": 9.109411958853622e-08, - "loss": 3.887, - "step": 828000 - }, - { - "epoch": 9.11, - "learning_rate": 9.114912811485779e-08, - "loss": 3.8689, - "step": 828500 - }, - { - "epoch": 9.12, - "learning_rate": 9.120413664117937e-08, - "loss": 3.9069, - "step": 829000 - }, - { - "epoch": 9.13, - "learning_rate": 9.125914516750096e-08, - "loss": 3.8876, - "step": 829500 - }, - { - "epoch": 9.13, - "learning_rate": 9.131415369382253e-08, - "loss": 3.8935, - "step": 830000 - }, - { - "epoch": 9.14, - "learning_rate": 9.136916222014411e-08, - "loss": 3.8853, - "step": 830500 - }, - { - "epoch": 9.14, - "learning_rate": 9.14241707464657e-08, - "loss": 3.8828, - "step": 831000 - }, - { - "epoch": 9.15, - "learning_rate": 9.147917927278727e-08, - "loss": 3.8912, - "step": 831500 - }, - { - "epoch": 9.15, - "learning_rate": 9.153418779910885e-08, - "loss": 3.8987, - "step": 832000 - }, - { - "epoch": 9.16, - "learning_rate": 9.158919632543044e-08, - "loss": 3.8755, - "step": 832500 - }, - { - "epoch": 9.16, - "learning_rate": 9.164420485175201e-08, - "loss": 3.8698, - "step": 833000 - }, - { - "epoch": 9.17, - "learning_rate": 9.169921337807359e-08, - "loss": 3.8809, - "step": 833500 - }, - { - "epoch": 9.18, - "learning_rate": 9.175422190439518e-08, - "loss": 3.8716, - "step": 834000 - }, - { - "epoch": 9.18, - "learning_rate": 9.180923043071676e-08, - "loss": 3.8921, - "step": 834500 - }, - { - "epoch": 9.19, - "learning_rate": 9.186423895703833e-08, - "loss": 3.8844, - "step": 835000 - }, - { - "epoch": 9.19, - "learning_rate": 9.191924748335992e-08, - "loss": 3.879, - "step": 835500 - }, - { - "epoch": 9.2, - "learning_rate": 9.19742560096815e-08, - "loss": 3.8695, - "step": 836000 - }, - { - "epoch": 9.2, - "learning_rate": 9.202926453600309e-08, - "loss": 3.8923, - "step": 836500 - }, - { - "epoch": 9.21, - "learning_rate": 9.208427306232466e-08, - "loss": 3.8497, - "step": 837000 - }, - { - "epoch": 9.21, - "learning_rate": 9.213928158864624e-08, - "loss": 3.8885, - "step": 837500 - }, - { - "epoch": 9.22, - "learning_rate": 9.219429011496783e-08, - "loss": 3.8694, - "step": 838000 - }, - { - "epoch": 9.22, - "learning_rate": 9.22492986412894e-08, - "loss": 3.8745, - "step": 838500 - }, - { - "epoch": 9.23, - "learning_rate": 9.230430716761098e-08, - "loss": 3.8806, - "step": 839000 - }, - { - "epoch": 9.24, - "learning_rate": 9.235931569393255e-08, - "loss": 3.8787, - "step": 839500 - }, - { - "epoch": 9.24, - "learning_rate": 9.241432422025413e-08, - "loss": 3.8871, - "step": 840000 - }, - { - "epoch": 9.25, - "learning_rate": 9.246933274657572e-08, - "loss": 3.8722, - "step": 840500 - }, - { - "epoch": 9.25, - "learning_rate": 9.25243412728973e-08, - "loss": 3.8858, - "step": 841000 - }, - { - "epoch": 9.26, - "learning_rate": 9.257934979921887e-08, - "loss": 3.8961, - "step": 841500 - }, - { - "epoch": 9.26, - "learning_rate": 9.263435832554045e-08, - "loss": 3.8958, - "step": 842000 - }, - { - "epoch": 9.27, - "learning_rate": 9.268936685186204e-08, - "loss": 3.8843, - "step": 842500 - }, - { - "epoch": 9.27, - "learning_rate": 9.274437537818361e-08, - "loss": 3.872, - "step": 843000 - }, - { - "epoch": 9.28, - "learning_rate": 9.279938390450519e-08, - "loss": 3.8803, - "step": 843500 - }, - { - "epoch": 9.29, - "learning_rate": 9.285439243082678e-08, - "loss": 3.8821, - "step": 844000 - }, - { - "epoch": 9.29, - "learning_rate": 9.290940095714835e-08, - "loss": 3.8959, - "step": 844500 - }, - { - "epoch": 9.3, - "learning_rate": 9.296440948346993e-08, - "loss": 3.8836, - "step": 845000 - }, - { - "epoch": 9.3, - "learning_rate": 9.301941800979152e-08, - "loss": 3.8783, - "step": 845500 - }, - { - "epoch": 9.31, - "learning_rate": 9.307442653611309e-08, - "loss": 3.8917, - "step": 846000 - }, - { - "epoch": 9.31, - "learning_rate": 9.312943506243467e-08, - "loss": 3.9004, - "step": 846500 - }, - { - "epoch": 9.32, - "learning_rate": 9.318444358875626e-08, - "loss": 3.8687, - "step": 847000 - }, - { - "epoch": 9.32, - "learning_rate": 9.323945211507783e-08, - "loss": 3.8708, - "step": 847500 - }, - { - "epoch": 9.33, - "learning_rate": 9.329446064139941e-08, - "loss": 3.8681, - "step": 848000 - }, - { - "epoch": 9.33, - "learning_rate": 9.3349469167721e-08, - "loss": 3.8812, - "step": 848500 - }, - { - "epoch": 9.34, - "learning_rate": 9.340447769404257e-08, - "loss": 3.889, - "step": 849000 - }, - { - "epoch": 9.35, - "learning_rate": 9.345948622036415e-08, - "loss": 3.905, - "step": 849500 - }, - { - "epoch": 9.35, - "learning_rate": 9.351449474668574e-08, - "loss": 3.8923, - "step": 850000 - }, - { - "epoch": 9.36, - "learning_rate": 9.356950327300732e-08, - "loss": 3.8838, - "step": 850500 - }, - { - "epoch": 9.36, - "learning_rate": 9.362451179932889e-08, - "loss": 3.8681, - "step": 851000 - }, - { - "epoch": 9.37, - "learning_rate": 9.367952032565048e-08, - "loss": 3.8939, - "step": 851500 - }, - { - "epoch": 9.37, - "learning_rate": 9.373452885197206e-08, - "loss": 3.8911, - "step": 852000 - }, - { - "epoch": 9.38, - "learning_rate": 9.378953737829363e-08, - "loss": 3.8759, - "step": 852500 - }, - { - "epoch": 9.38, - "learning_rate": 9.384454590461521e-08, - "loss": 3.8749, - "step": 853000 - }, - { - "epoch": 9.39, - "learning_rate": 9.389955443093678e-08, - "loss": 3.8796, - "step": 853500 - }, - { - "epoch": 9.4, - "learning_rate": 9.395456295725836e-08, - "loss": 3.8843, - "step": 854000 - }, - { - "epoch": 9.4, - "learning_rate": 9.400957148357995e-08, - "loss": 3.8791, - "step": 854500 - }, - { - "epoch": 9.41, - "learning_rate": 9.406458000990152e-08, - "loss": 3.8743, - "step": 855000 - }, - { - "epoch": 9.41, - "learning_rate": 9.411958853622311e-08, - "loss": 3.8948, - "step": 855500 - }, - { - "epoch": 9.42, - "learning_rate": 9.417459706254469e-08, - "loss": 3.8576, - "step": 856000 - }, - { - "epoch": 9.42, - "learning_rate": 9.422960558886627e-08, - "loss": 3.8904, - "step": 856500 - }, - { - "epoch": 9.43, - "learning_rate": 9.428461411518785e-08, - "loss": 3.8703, - "step": 857000 - }, - { - "epoch": 9.43, - "learning_rate": 9.433962264150943e-08, - "loss": 3.8838, - "step": 857500 - }, - { - "epoch": 9.44, - "learning_rate": 9.4394631167831e-08, - "loss": 3.8972, - "step": 858000 - }, - { - "epoch": 9.44, - "learning_rate": 9.44496396941526e-08, - "loss": 3.876, - "step": 858500 - }, - { - "epoch": 9.45, - "learning_rate": 9.450464822047417e-08, - "loss": 3.8738, - "step": 859000 - }, - { - "epoch": 9.46, - "learning_rate": 9.455965674679575e-08, - "loss": 3.8905, - "step": 859500 - }, - { - "epoch": 9.46, - "learning_rate": 9.461466527311734e-08, - "loss": 3.8696, - "step": 860000 - }, - { - "epoch": 9.47, - "learning_rate": 9.466967379943891e-08, - "loss": 3.8876, - "step": 860500 - }, - { - "epoch": 9.47, - "learning_rate": 9.472468232576049e-08, - "loss": 3.8806, - "step": 861000 - }, - { - "epoch": 9.48, - "learning_rate": 9.477969085208208e-08, - "loss": 3.8752, - "step": 861500 - }, - { - "epoch": 9.48, - "learning_rate": 9.483469937840365e-08, - "loss": 3.8944, - "step": 862000 - }, - { - "epoch": 9.49, - "learning_rate": 9.488970790472523e-08, - "loss": 3.8768, - "step": 862500 - }, - { - "epoch": 9.49, - "learning_rate": 9.494471643104682e-08, - "loss": 3.8779, - "step": 863000 - }, - { - "epoch": 9.5, - "learning_rate": 9.49997249573684e-08, - "loss": 3.8678, - "step": 863500 - }, - { - "epoch": 9.51, - "learning_rate": 9.505473348368997e-08, - "loss": 3.8701, - "step": 864000 - }, - { - "epoch": 9.51, - "learning_rate": 9.510974201001155e-08, - "loss": 3.8868, - "step": 864500 - }, - { - "epoch": 9.52, - "learning_rate": 9.516475053633312e-08, - "loss": 3.877, - "step": 865000 - }, - { - "epoch": 9.52, - "learning_rate": 9.52197590626547e-08, - "loss": 3.863, - "step": 865500 - }, - { - "epoch": 9.53, - "learning_rate": 9.527476758897629e-08, - "loss": 3.881, - "step": 866000 - }, - { - "epoch": 9.53, - "learning_rate": 9.532977611529786e-08, - "loss": 3.8917, - "step": 866500 - }, - { - "epoch": 9.54, - "learning_rate": 9.538478464161944e-08, - "loss": 3.8803, - "step": 867000 - }, - { - "epoch": 9.54, - "learning_rate": 9.543979316794103e-08, - "loss": 3.8846, - "step": 867500 - }, - { - "epoch": 9.55, - "learning_rate": 9.54948016942626e-08, - "loss": 3.8789, - "step": 868000 - }, - { - "epoch": 9.55, - "learning_rate": 9.554981022058418e-08, - "loss": 3.8757, - "step": 868500 - }, - { - "epoch": 9.56, - "learning_rate": 9.560481874690577e-08, - "loss": 3.8924, - "step": 869000 - }, - { - "epoch": 9.57, - "learning_rate": 9.565982727322734e-08, - "loss": 3.8773, - "step": 869500 - }, - { - "epoch": 9.57, - "learning_rate": 9.571483579954892e-08, - "loss": 3.8751, - "step": 870000 - }, - { - "epoch": 9.58, - "learning_rate": 9.576984432587051e-08, - "loss": 3.8848, - "step": 870500 - }, - { - "epoch": 9.58, - "learning_rate": 9.582485285219208e-08, - "loss": 3.863, - "step": 871000 - }, - { - "epoch": 9.59, - "learning_rate": 9.587986137851366e-08, - "loss": 3.8641, - "step": 871500 - }, - { - "epoch": 9.59, - "learning_rate": 9.593486990483525e-08, - "loss": 3.8632, - "step": 872000 - }, - { - "epoch": 9.6, - "learning_rate": 9.598987843115683e-08, - "loss": 3.8858, - "step": 872500 - }, - { - "epoch": 9.6, - "learning_rate": 9.60448869574784e-08, - "loss": 3.8765, - "step": 873000 - }, - { - "epoch": 9.61, - "learning_rate": 9.609989548379999e-08, - "loss": 3.8702, - "step": 873500 - }, - { - "epoch": 9.62, - "learning_rate": 9.615490401012157e-08, - "loss": 3.8629, - "step": 874000 - }, - { - "epoch": 9.62, - "learning_rate": 9.620991253644316e-08, - "loss": 3.8748, - "step": 874500 - }, - { - "epoch": 9.63, - "learning_rate": 9.626492106276473e-08, - "loss": 3.8688, - "step": 875000 - }, - { - "epoch": 9.63, - "learning_rate": 9.631992958908631e-08, - "loss": 3.8858, - "step": 875500 - }, - { - "epoch": 9.64, - "learning_rate": 9.637493811540788e-08, - "loss": 3.8649, - "step": 876000 - }, - { - "epoch": 9.64, - "learning_rate": 9.642994664172946e-08, - "loss": 3.893, - "step": 876500 - }, - { - "epoch": 9.65, - "learning_rate": 9.648495516805103e-08, - "loss": 3.8773, - "step": 877000 - }, - { - "epoch": 9.65, - "learning_rate": 9.653996369437262e-08, - "loss": 3.8813, - "step": 877500 - }, - { - "epoch": 9.66, - "learning_rate": 9.65949722206942e-08, - "loss": 3.8754, - "step": 878000 - }, - { - "epoch": 9.66, - "learning_rate": 9.664998074701578e-08, - "loss": 3.8753, - "step": 878500 - }, - { - "epoch": 9.67, - "learning_rate": 9.670498927333736e-08, - "loss": 3.8904, - "step": 879000 - }, - { - "epoch": 9.68, - "learning_rate": 9.675999779965894e-08, - "loss": 3.9013, - "step": 879500 - }, - { - "epoch": 9.68, - "learning_rate": 9.681500632598052e-08, - "loss": 3.8645, - "step": 880000 - }, - { - "epoch": 9.69, - "learning_rate": 9.68700148523021e-08, - "loss": 3.875, - "step": 880500 - }, - { - "epoch": 9.69, - "learning_rate": 9.692502337862368e-08, - "loss": 3.8759, - "step": 881000 - }, - { - "epoch": 9.7, - "learning_rate": 9.698003190494526e-08, - "loss": 3.8723, - "step": 881500 - }, - { - "epoch": 9.7, - "learning_rate": 9.703504043126685e-08, - "loss": 3.8717, - "step": 882000 - }, - { - "epoch": 9.71, - "learning_rate": 9.709004895758842e-08, - "loss": 3.8703, - "step": 882500 - }, - { - "epoch": 9.71, - "learning_rate": 9.714505748391e-08, - "loss": 3.8701, - "step": 883000 - }, - { - "epoch": 9.72, - "learning_rate": 9.720006601023159e-08, - "loss": 3.8526, - "step": 883500 - }, - { - "epoch": 9.73, - "learning_rate": 9.725507453655316e-08, - "loss": 3.8551, - "step": 884000 - }, - { - "epoch": 9.73, - "learning_rate": 9.731008306287474e-08, - "loss": 3.8786, - "step": 884500 - }, - { - "epoch": 9.74, - "learning_rate": 9.736509158919633e-08, - "loss": 3.8725, - "step": 885000 - }, - { - "epoch": 9.74, - "learning_rate": 9.74201001155179e-08, - "loss": 3.8839, - "step": 885500 - }, - { - "epoch": 9.75, - "learning_rate": 9.747510864183948e-08, - "loss": 3.8654, - "step": 886000 - }, - { - "epoch": 9.75, - "learning_rate": 9.753011716816107e-08, - "loss": 3.8946, - "step": 886500 - }, - { - "epoch": 9.76, - "learning_rate": 9.758512569448264e-08, - "loss": 3.8675, - "step": 887000 - }, - { - "epoch": 9.76, - "learning_rate": 9.764013422080422e-08, - "loss": 3.8737, - "step": 887500 - }, - { - "epoch": 9.77, - "learning_rate": 9.76951427471258e-08, - "loss": 3.876, - "step": 888000 - }, - { - "epoch": 9.78, - "learning_rate": 9.775015127344739e-08, - "loss": 3.8848, - "step": 888500 - }, - { - "epoch": 9.78, - "learning_rate": 9.780515979976896e-08, - "loss": 3.8654, - "step": 889000 - }, - { - "epoch": 9.79, - "learning_rate": 9.786016832609054e-08, - "loss": 3.8669, - "step": 889500 - }, - { - "epoch": 9.79, - "learning_rate": 9.791517685241211e-08, - "loss": 3.8551, - "step": 890000 - }, - { - "epoch": 9.8, - "learning_rate": 9.797018537873369e-08, - "loss": 3.8773, - "step": 890500 - }, - { - "epoch": 9.8, - "learning_rate": 9.802519390505528e-08, - "loss": 3.8834, - "step": 891000 - }, - { - "epoch": 9.81, - "learning_rate": 9.808020243137685e-08, - "loss": 3.8759, - "step": 891500 - }, - { - "epoch": 9.81, - "learning_rate": 9.813521095769843e-08, - "loss": 3.864, - "step": 892000 - }, - { - "epoch": 9.82, - "learning_rate": 9.819021948402002e-08, - "loss": 3.8698, - "step": 892500 - }, - { - "epoch": 9.82, - "learning_rate": 9.82452280103416e-08, - "loss": 3.8783, - "step": 893000 - }, - { - "epoch": 9.83, - "learning_rate": 9.830023653666318e-08, - "loss": 3.8792, - "step": 893500 - }, - { - "epoch": 9.84, - "learning_rate": 9.835524506298476e-08, - "loss": 3.8725, - "step": 894000 - }, - { - "epoch": 9.84, - "learning_rate": 9.841025358930634e-08, - "loss": 3.8855, - "step": 894500 - }, - { - "epoch": 9.85, - "learning_rate": 9.846526211562792e-08, - "loss": 3.8717, - "step": 895000 - }, - { - "epoch": 9.85, - "learning_rate": 9.85202706419495e-08, - "loss": 3.8812, - "step": 895500 - }, - { - "epoch": 9.86, - "learning_rate": 9.857527916827108e-08, - "loss": 3.8659, - "step": 896000 - }, - { - "epoch": 9.86, - "learning_rate": 9.863028769459267e-08, - "loss": 3.8765, - "step": 896500 - }, - { - "epoch": 9.87, - "learning_rate": 9.868529622091424e-08, - "loss": 3.8749, - "step": 897000 - }, - { - "epoch": 9.87, - "learning_rate": 9.874030474723582e-08, - "loss": 3.8877, - "step": 897500 - }, - { - "epoch": 9.88, - "learning_rate": 9.87953132735574e-08, - "loss": 3.8622, - "step": 898000 - }, - { - "epoch": 9.89, - "learning_rate": 9.885032179987898e-08, - "loss": 3.8751, - "step": 898500 - }, - { - "epoch": 9.89, - "learning_rate": 9.890533032620056e-08, - "loss": 3.8655, - "step": 899000 - }, - { - "epoch": 9.9, - "learning_rate": 9.896033885252215e-08, - "loss": 3.895, - "step": 899500 - }, - { - "epoch": 9.9, - "learning_rate": 9.901534737884372e-08, - "loss": 3.888, - "step": 900000 - }, - { - "epoch": 9.91, - "learning_rate": 9.90703559051653e-08, - "loss": 3.8801, - "step": 900500 - }, - { - "epoch": 9.91, - "learning_rate": 9.912536443148687e-08, - "loss": 3.8707, - "step": 901000 - }, - { - "epoch": 9.92, - "learning_rate": 9.918037295780845e-08, - "loss": 3.89, - "step": 901500 - }, - { - "epoch": 9.92, - "learning_rate": 9.923538148413003e-08, - "loss": 3.8869, - "step": 902000 - }, - { - "epoch": 9.93, - "learning_rate": 9.929039001045162e-08, - "loss": 3.8724, - "step": 902500 - }, - { - "epoch": 9.93, - "learning_rate": 9.934539853677319e-08, - "loss": 3.8889, - "step": 903000 - }, - { - "epoch": 9.94, - "learning_rate": 9.940040706309477e-08, - "loss": 3.8517, - "step": 903500 - }, - { - "epoch": 9.95, - "learning_rate": 9.945541558941636e-08, - "loss": 3.8743, - "step": 904000 - }, - { - "epoch": 9.95, - "learning_rate": 9.951042411573793e-08, - "loss": 3.8948, - "step": 904500 - }, - { - "epoch": 9.96, - "learning_rate": 9.956543264205951e-08, - "loss": 3.8811, - "step": 905000 - }, - { - "epoch": 9.96, - "learning_rate": 9.96204411683811e-08, - "loss": 3.8664, - "step": 905500 - }, - { - "epoch": 9.97, - "learning_rate": 9.967544969470267e-08, - "loss": 3.8681, - "step": 906000 - }, - { - "epoch": 9.97, - "learning_rate": 9.973045822102425e-08, - "loss": 3.8666, - "step": 906500 - }, - { - "epoch": 9.98, - "learning_rate": 9.978546674734584e-08, - "loss": 3.8888, - "step": 907000 - }, - { - "epoch": 9.98, - "learning_rate": 9.984047527366741e-08, - "loss": 3.8618, - "step": 907500 - }, - { - "epoch": 9.99, - "learning_rate": 9.989548379998899e-08, - "loss": 3.8665, - "step": 908000 - }, - { - "epoch": 10.0, - "learning_rate": 9.995049232631058e-08, - "loss": 3.8727, - "step": 908500 - }, - { - "epoch": 10.0, - "eval_loss": 3.9042184352874756, - "eval_runtime": 6.1344, - "eval_samples_per_second": 253.326, - "step": 908950 - }, - { - "epoch": 10.0, - "learning_rate": 9.999862478684196e-08, - "loss": 3.8701, - "step": 909000 - }, - { - "epoch": 10.01, - "learning_rate": 9.998487265526157e-08, - "loss": 3.8771, - "step": 909500 - }, - { - "epoch": 10.01, - "learning_rate": 9.997112052368116e-08, - "loss": 3.8777, - "step": 910000 - }, - { - "epoch": 10.02, - "learning_rate": 9.995736839210077e-08, - "loss": 3.8868, - "step": 910500 - }, - { - "epoch": 10.02, - "learning_rate": 9.994361626052038e-08, - "loss": 3.8467, - "step": 911000 - }, - { - "epoch": 10.03, - "learning_rate": 9.992986412893998e-08, - "loss": 3.8789, - "step": 911500 - }, - { - "epoch": 10.03, - "learning_rate": 9.991611199735959e-08, - "loss": 3.8684, - "step": 912000 - }, - { - "epoch": 10.04, - "learning_rate": 9.99023598657792e-08, - "loss": 3.8753, - "step": 912500 - }, - { - "epoch": 10.04, - "learning_rate": 9.98886077341988e-08, - "loss": 3.8548, - "step": 913000 - }, - { - "epoch": 10.05, - "learning_rate": 9.98748556026184e-08, - "loss": 3.8719, - "step": 913500 - }, - { - "epoch": 10.06, - "learning_rate": 9.986110347103801e-08, - "loss": 3.8869, - "step": 914000 - }, - { - "epoch": 10.06, - "learning_rate": 9.984735133945761e-08, - "loss": 3.8617, - "step": 914500 - }, - { - "epoch": 10.07, - "learning_rate": 9.983359920787722e-08, - "loss": 3.878, - "step": 915000 - }, - { - "epoch": 10.07, - "learning_rate": 9.981984707629683e-08, - "loss": 3.864, - "step": 915500 - }, - { - "epoch": 10.08, - "learning_rate": 9.980609494471642e-08, - "loss": 3.8714, - "step": 916000 - }, - { - "epoch": 10.08, - "learning_rate": 9.979234281313603e-08, - "loss": 3.8534, - "step": 916500 - }, - { - "epoch": 10.09, - "learning_rate": 9.977859068155564e-08, - "loss": 3.8678, - "step": 917000 - }, - { - "epoch": 10.09, - "learning_rate": 9.976483854997524e-08, - "loss": 3.8713, - "step": 917500 - }, - { - "epoch": 10.1, - "learning_rate": 9.975108641839485e-08, - "loss": 3.8683, - "step": 918000 - }, - { - "epoch": 10.11, - "learning_rate": 9.973733428681446e-08, - "loss": 3.8884, - "step": 918500 - }, - { - "epoch": 10.11, - "learning_rate": 9.972358215523405e-08, - "loss": 3.8626, - "step": 919000 - }, - { - "epoch": 10.12, - "learning_rate": 9.970983002365366e-08, - "loss": 3.873, - "step": 919500 - }, - { - "epoch": 10.12, - "learning_rate": 9.969607789207327e-08, - "loss": 3.8694, - "step": 920000 - }, - { - "epoch": 10.13, - "learning_rate": 9.968232576049287e-08, - "loss": 3.8691, - "step": 920500 - }, - { - "epoch": 10.13, - "learning_rate": 9.966857362891248e-08, - "loss": 3.8766, - "step": 921000 - }, - { - "epoch": 10.14, - "learning_rate": 9.965482149733209e-08, - "loss": 3.8944, - "step": 921500 - }, - { - "epoch": 10.14, - "learning_rate": 9.964106936575168e-08, - "loss": 3.8497, - "step": 922000 - }, - { - "epoch": 10.15, - "learning_rate": 9.962731723417129e-08, - "loss": 3.8611, - "step": 922500 - }, - { - "epoch": 10.15, - "learning_rate": 9.96135651025909e-08, - "loss": 3.8727, - "step": 923000 - }, - { - "epoch": 10.16, - "learning_rate": 9.95998129710105e-08, - "loss": 3.8585, - "step": 923500 - }, - { - "epoch": 10.17, - "learning_rate": 9.958606083943011e-08, - "loss": 3.8616, - "step": 924000 - }, - { - "epoch": 10.17, - "learning_rate": 9.957230870784972e-08, - "loss": 3.8469, - "step": 924500 - }, - { - "epoch": 10.18, - "learning_rate": 9.955855657626931e-08, - "loss": 3.8673, - "step": 925000 - }, - { - "epoch": 10.18, - "learning_rate": 9.954480444468892e-08, - "loss": 3.8815, - "step": 925500 - }, - { - "epoch": 10.19, - "learning_rate": 9.953105231310853e-08, - "loss": 3.8865, - "step": 926000 - }, - { - "epoch": 10.19, - "learning_rate": 9.951730018152813e-08, - "loss": 3.8616, - "step": 926500 - }, - { - "epoch": 10.2, - "learning_rate": 9.950354804994774e-08, - "loss": 3.8531, - "step": 927000 - }, - { - "epoch": 10.2, - "learning_rate": 9.948979591836735e-08, - "loss": 3.8767, - "step": 927500 - }, - { - "epoch": 10.21, - "learning_rate": 9.947604378678694e-08, - "loss": 3.878, - "step": 928000 - }, - { - "epoch": 10.22, - "learning_rate": 9.946229165520655e-08, - "loss": 3.8771, - "step": 928500 - }, - { - "epoch": 10.22, - "learning_rate": 9.944853952362616e-08, - "loss": 3.8742, - "step": 929000 - }, - { - "epoch": 10.23, - "learning_rate": 9.943478739204576e-08, - "loss": 3.8609, - "step": 929500 - }, - { - "epoch": 10.23, - "learning_rate": 9.942103526046537e-08, - "loss": 3.867, - "step": 930000 - }, - { - "epoch": 10.24, - "learning_rate": 9.940728312888498e-08, - "loss": 3.8851, - "step": 930500 - }, - { - "epoch": 10.24, - "learning_rate": 9.939353099730457e-08, - "loss": 3.8482, - "step": 931000 - }, - { - "epoch": 10.25, - "learning_rate": 9.937977886572418e-08, - "loss": 3.8677, - "step": 931500 - }, - { - "epoch": 10.25, - "learning_rate": 9.936602673414379e-08, - "loss": 3.8713, - "step": 932000 - }, - { - "epoch": 10.26, - "learning_rate": 9.935227460256339e-08, - "loss": 3.8697, - "step": 932500 - }, - { - "epoch": 10.26, - "learning_rate": 9.9338522470983e-08, - "loss": 3.8768, - "step": 933000 - }, - { - "epoch": 10.27, - "learning_rate": 9.93247703394026e-08, - "loss": 3.8682, - "step": 933500 - }, - { - "epoch": 10.28, - "learning_rate": 9.93110182078222e-08, - "loss": 3.8627, - "step": 934000 - }, - { - "epoch": 10.28, - "learning_rate": 9.929726607624181e-08, - "loss": 3.8777, - "step": 934500 - }, - { - "epoch": 10.29, - "learning_rate": 9.928351394466142e-08, - "loss": 3.8933, - "step": 935000 - }, - { - "epoch": 10.29, - "learning_rate": 9.926976181308102e-08, - "loss": 3.8538, - "step": 935500 - }, - { - "epoch": 10.3, - "learning_rate": 9.925600968150063e-08, - "loss": 3.8656, - "step": 936000 - }, - { - "epoch": 10.3, - "learning_rate": 9.924225754992023e-08, - "loss": 3.8555, - "step": 936500 - }, - { - "epoch": 10.31, - "learning_rate": 9.922850541833983e-08, - "loss": 3.8903, - "step": 937000 - }, - { - "epoch": 10.31, - "learning_rate": 9.921475328675944e-08, - "loss": 3.8552, - "step": 937500 - }, - { - "epoch": 10.32, - "learning_rate": 9.920100115517905e-08, - "loss": 3.8667, - "step": 938000 - }, - { - "epoch": 10.33, - "learning_rate": 9.918724902359865e-08, - "loss": 3.8532, - "step": 938500 - }, - { - "epoch": 10.33, - "learning_rate": 9.917349689201825e-08, - "loss": 3.842, - "step": 939000 - }, - { - "epoch": 10.34, - "learning_rate": 9.915974476043786e-08, - "loss": 3.8711, - "step": 939500 - }, - { - "epoch": 10.34, - "learning_rate": 9.914599262885746e-08, - "loss": 3.8498, - "step": 940000 - }, - { - "epoch": 10.35, - "learning_rate": 9.913224049727707e-08, - "loss": 3.8502, - "step": 940500 - }, - { - "epoch": 10.35, - "learning_rate": 9.911848836569668e-08, - "loss": 3.8692, - "step": 941000 - }, - { - "epoch": 10.36, - "learning_rate": 9.910473623411628e-08, - "loss": 3.8643, - "step": 941500 - }, - { - "epoch": 10.36, - "learning_rate": 9.909098410253588e-08, - "loss": 3.8646, - "step": 942000 - }, - { - "epoch": 10.37, - "learning_rate": 9.90772319709555e-08, - "loss": 3.8596, - "step": 942500 - }, - { - "epoch": 10.37, - "learning_rate": 9.90634798393751e-08, - "loss": 3.8669, - "step": 943000 - }, - { - "epoch": 10.38, - "learning_rate": 9.90497277077947e-08, - "loss": 3.8637, - "step": 943500 - }, - { - "epoch": 10.39, - "learning_rate": 9.903597557621431e-08, - "loss": 3.8543, - "step": 944000 - }, - { - "epoch": 10.39, - "learning_rate": 9.902222344463392e-08, - "loss": 3.8847, - "step": 944500 - }, - { - "epoch": 10.4, - "learning_rate": 9.900847131305353e-08, - "loss": 3.8667, - "step": 945000 - }, - { - "epoch": 10.4, - "learning_rate": 9.899471918147312e-08, - "loss": 3.8794, - "step": 945500 - }, - { - "epoch": 10.41, - "learning_rate": 9.898096704989273e-08, - "loss": 3.8772, - "step": 946000 - }, - { - "epoch": 10.41, - "learning_rate": 9.896721491831234e-08, - "loss": 3.8748, - "step": 946500 - }, - { - "epoch": 10.42, - "learning_rate": 9.895346278673194e-08, - "loss": 3.8735, - "step": 947000 - }, - { - "epoch": 10.42, - "learning_rate": 9.893971065515155e-08, - "loss": 3.8614, - "step": 947500 - }, - { - "epoch": 10.43, - "learning_rate": 9.892595852357116e-08, - "loss": 3.8754, - "step": 948000 - }, - { - "epoch": 10.44, - "learning_rate": 9.891220639199075e-08, - "loss": 3.8776, - "step": 948500 - }, - { - "epoch": 10.44, - "learning_rate": 9.889845426041036e-08, - "loss": 3.8592, - "step": 949000 - }, - { - "epoch": 10.45, - "learning_rate": 9.888470212882997e-08, - "loss": 3.8724, - "step": 949500 - }, - { - "epoch": 10.45, - "learning_rate": 9.887094999724957e-08, - "loss": 3.874, - "step": 950000 - }, - { - "epoch": 10.46, - "learning_rate": 9.885719786566918e-08, - "loss": 3.8758, - "step": 950500 - }, - { - "epoch": 10.46, - "learning_rate": 9.884344573408879e-08, - "loss": 3.8518, - "step": 951000 - }, - { - "epoch": 10.47, - "learning_rate": 9.882969360250838e-08, - "loss": 3.8717, - "step": 951500 - }, - { - "epoch": 10.47, - "learning_rate": 9.881594147092799e-08, - "loss": 3.8688, - "step": 952000 - }, - { - "epoch": 10.48, - "learning_rate": 9.88021893393476e-08, - "loss": 3.8604, - "step": 952500 - }, - { - "epoch": 10.48, - "learning_rate": 9.87884372077672e-08, - "loss": 3.86, - "step": 953000 - }, - { - "epoch": 10.49, - "learning_rate": 9.877468507618681e-08, - "loss": 3.8734, - "step": 953500 - }, - { - "epoch": 10.5, - "learning_rate": 9.876093294460642e-08, - "loss": 3.8658, - "step": 954000 - }, - { - "epoch": 10.5, - "learning_rate": 9.874718081302601e-08, - "loss": 3.8501, - "step": 954500 - }, - { - "epoch": 10.51, - "learning_rate": 9.873342868144562e-08, - "loss": 3.8703, - "step": 955000 - }, - { - "epoch": 10.51, - "learning_rate": 9.871967654986523e-08, - "loss": 3.8658, - "step": 955500 - }, - { - "epoch": 10.52, - "learning_rate": 9.870592441828483e-08, - "loss": 3.8704, - "step": 956000 - }, - { - "epoch": 10.52, - "learning_rate": 9.869217228670444e-08, - "loss": 3.883, - "step": 956500 - }, - { - "epoch": 10.53, - "learning_rate": 9.867842015512405e-08, - "loss": 3.8654, - "step": 957000 - }, - { - "epoch": 10.53, - "learning_rate": 9.866466802354364e-08, - "loss": 3.8446, - "step": 957500 - }, - { - "epoch": 10.54, - "learning_rate": 9.865091589196325e-08, - "loss": 3.8637, - "step": 958000 - }, - { - "epoch": 10.55, - "learning_rate": 9.863716376038286e-08, - "loss": 3.8563, - "step": 958500 - }, - { - "epoch": 10.55, - "learning_rate": 9.862341162880246e-08, - "loss": 3.8579, - "step": 959000 - }, - { - "epoch": 10.56, - "learning_rate": 9.860965949722207e-08, - "loss": 3.8813, - "step": 959500 - }, - { - "epoch": 10.56, - "learning_rate": 9.859590736564168e-08, - "loss": 3.8509, - "step": 960000 - }, - { - "epoch": 10.57, - "learning_rate": 9.858215523406127e-08, - "loss": 3.8784, - "step": 960500 - }, - { - "epoch": 10.57, - "learning_rate": 9.856840310248088e-08, - "loss": 3.8797, - "step": 961000 - }, - { - "epoch": 10.58, - "learning_rate": 9.855465097090049e-08, - "loss": 3.8665, - "step": 961500 - }, - { - "epoch": 10.58, - "learning_rate": 9.854089883932009e-08, - "loss": 3.8666, - "step": 962000 - }, - { - "epoch": 10.59, - "learning_rate": 9.85271467077397e-08, - "loss": 3.8468, - "step": 962500 - }, - { - "epoch": 10.59, - "learning_rate": 9.85133945761593e-08, - "loss": 3.8637, - "step": 963000 - }, - { - "epoch": 10.6, - "learning_rate": 9.84996424445789e-08, - "loss": 3.8757, - "step": 963500 - }, - { - "epoch": 10.61, - "learning_rate": 9.848589031299851e-08, - "loss": 3.864, - "step": 964000 - }, - { - "epoch": 10.61, - "learning_rate": 9.847213818141812e-08, - "loss": 3.8569, - "step": 964500 - }, - { - "epoch": 10.62, - "learning_rate": 9.845838604983772e-08, - "loss": 3.8758, - "step": 965000 - }, - { - "epoch": 10.62, - "learning_rate": 9.844463391825733e-08, - "loss": 3.8931, - "step": 965500 - }, - { - "epoch": 10.63, - "learning_rate": 9.843088178667693e-08, - "loss": 3.8637, - "step": 966000 - }, - { - "epoch": 10.63, - "learning_rate": 9.841712965509653e-08, - "loss": 3.8612, - "step": 966500 - }, - { - "epoch": 10.64, - "learning_rate": 9.840337752351614e-08, - "loss": 3.8609, - "step": 967000 - }, - { - "epoch": 10.64, - "learning_rate": 9.838962539193575e-08, - "loss": 3.8681, - "step": 967500 - }, - { - "epoch": 10.65, - "learning_rate": 9.837587326035535e-08, - "loss": 3.8522, - "step": 968000 - }, - { - "epoch": 10.66, - "learning_rate": 9.836212112877495e-08, - "loss": 3.8582, - "step": 968500 - }, - { - "epoch": 10.66, - "learning_rate": 9.834836899719456e-08, - "loss": 3.8594, - "step": 969000 - }, - { - "epoch": 10.67, - "learning_rate": 9.833461686561416e-08, - "loss": 3.8481, - "step": 969500 - }, - { - "epoch": 10.67, - "learning_rate": 9.832086473403377e-08, - "loss": 3.8386, - "step": 970000 - }, - { - "epoch": 10.68, - "learning_rate": 9.830711260245338e-08, - "loss": 3.8644, - "step": 970500 - }, - { - "epoch": 10.68, - "learning_rate": 9.829336047087298e-08, - "loss": 3.8727, - "step": 971000 - }, - { - "epoch": 10.69, - "learning_rate": 9.827960833929258e-08, - "loss": 3.8721, - "step": 971500 - }, - { - "epoch": 10.69, - "learning_rate": 9.82658562077122e-08, - "loss": 3.8689, - "step": 972000 - }, - { - "epoch": 10.7, - "learning_rate": 9.825210407613179e-08, - "loss": 3.8601, - "step": 972500 - }, - { - "epoch": 10.7, - "learning_rate": 9.82383519445514e-08, - "loss": 3.8681, - "step": 973000 - }, - { - "epoch": 10.71, - "learning_rate": 9.822459981297101e-08, - "loss": 3.8565, - "step": 973500 - }, - { - "epoch": 10.72, - "learning_rate": 9.82108476813906e-08, - "loss": 3.8703, - "step": 974000 - }, - { - "epoch": 10.72, - "learning_rate": 9.819709554981021e-08, - "loss": 3.8704, - "step": 974500 - }, - { - "epoch": 10.73, - "learning_rate": 9.818334341822982e-08, - "loss": 3.8545, - "step": 975000 - }, - { - "epoch": 10.73, - "learning_rate": 9.816959128664942e-08, - "loss": 3.8456, - "step": 975500 - }, - { - "epoch": 10.74, - "learning_rate": 9.815583915506903e-08, - "loss": 3.8623, - "step": 976000 - }, - { - "epoch": 10.74, - "learning_rate": 9.814208702348864e-08, - "loss": 3.863, - "step": 976500 - }, - { - "epoch": 10.75, - "learning_rate": 9.812833489190823e-08, - "loss": 3.8543, - "step": 977000 - }, - { - "epoch": 10.75, - "learning_rate": 9.811458276032784e-08, - "loss": 3.8538, - "step": 977500 - }, - { - "epoch": 10.76, - "learning_rate": 9.810083062874745e-08, - "loss": 3.856, - "step": 978000 - }, - { - "epoch": 10.77, - "learning_rate": 9.808707849716705e-08, - "loss": 3.867, - "step": 978500 - }, - { - "epoch": 10.77, - "learning_rate": 9.807332636558666e-08, - "loss": 3.8787, - "step": 979000 - }, - { - "epoch": 10.78, - "learning_rate": 9.805957423400627e-08, - "loss": 3.8575, - "step": 979500 - }, - { - "epoch": 10.78, - "learning_rate": 9.804582210242586e-08, - "loss": 3.8658, - "step": 980000 - }, - { - "epoch": 10.79, - "learning_rate": 9.803206997084547e-08, - "loss": 3.8507, - "step": 980500 - }, - { - "epoch": 10.79, - "learning_rate": 9.801831783926508e-08, - "loss": 3.8549, - "step": 981000 - }, - { - "epoch": 10.8, - "learning_rate": 9.800456570768468e-08, - "loss": 3.88, - "step": 981500 - }, - { - "epoch": 10.8, - "learning_rate": 9.799081357610429e-08, - "loss": 3.8592, - "step": 982000 - }, - { - "epoch": 10.81, - "learning_rate": 9.79770614445239e-08, - "loss": 3.8429, - "step": 982500 - }, - { - "epoch": 10.81, - "learning_rate": 9.79633093129435e-08, - "loss": 3.8448, - "step": 983000 - }, - { - "epoch": 10.82, - "learning_rate": 9.79495571813631e-08, - "loss": 3.8565, - "step": 983500 - }, - { - "epoch": 10.83, - "learning_rate": 9.793580504978271e-08, - "loss": 3.8517, - "step": 984000 - }, - { - "epoch": 10.83, - "learning_rate": 9.792205291820231e-08, - "loss": 3.8523, - "step": 984500 - }, - { - "epoch": 10.84, - "learning_rate": 9.790830078662192e-08, - "loss": 3.8534, - "step": 985000 - }, - { - "epoch": 10.84, - "learning_rate": 9.789454865504153e-08, - "loss": 3.8734, - "step": 985500 - }, - { - "epoch": 10.85, - "learning_rate": 9.788079652346112e-08, - "loss": 3.8567, - "step": 986000 - }, - { - "epoch": 10.85, - "learning_rate": 9.786704439188073e-08, - "loss": 3.8718, - "step": 986500 - }, - { - "epoch": 10.86, - "learning_rate": 9.785329226030034e-08, - "loss": 3.8782, - "step": 987000 - }, - { - "epoch": 10.86, - "learning_rate": 9.783954012871994e-08, - "loss": 3.8746, - "step": 987500 - }, - { - "epoch": 10.87, - "learning_rate": 9.782578799713955e-08, - "loss": 3.8683, - "step": 988000 - }, - { - "epoch": 10.88, - "learning_rate": 9.781203586555916e-08, - "loss": 3.8782, - "step": 988500 - }, - { - "epoch": 10.88, - "learning_rate": 9.779828373397877e-08, - "loss": 3.8757, - "step": 989000 - }, - { - "epoch": 10.89, - "learning_rate": 9.778453160239836e-08, - "loss": 3.8643, - "step": 989500 - }, - { - "epoch": 10.89, - "learning_rate": 9.777077947081797e-08, - "loss": 3.8603, - "step": 990000 - }, - { - "epoch": 10.9, - "learning_rate": 9.775702733923758e-08, - "loss": 3.8593, - "step": 990500 - }, - { - "epoch": 10.9, - "learning_rate": 9.774327520765718e-08, - "loss": 3.8638, - "step": 991000 - }, - { - "epoch": 10.91, - "learning_rate": 9.772952307607679e-08, - "loss": 3.8731, - "step": 991500 - }, - { - "epoch": 10.91, - "learning_rate": 9.77157709444964e-08, - "loss": 3.8735, - "step": 992000 - }, - { - "epoch": 10.92, - "learning_rate": 9.7702018812916e-08, - "loss": 3.859, - "step": 992500 - }, - { - "epoch": 10.92, - "learning_rate": 9.76882666813356e-08, - "loss": 3.8499, - "step": 993000 - }, - { - "epoch": 10.93, - "learning_rate": 9.767451454975521e-08, - "loss": 3.858, - "step": 993500 - }, - { - "epoch": 10.94, - "learning_rate": 9.766076241817482e-08, - "loss": 3.8575, - "step": 994000 - }, - { - "epoch": 10.94, - "learning_rate": 9.764701028659443e-08, - "loss": 3.8623, - "step": 994500 - }, - { - "epoch": 10.95, - "learning_rate": 9.763325815501402e-08, - "loss": 3.8699, - "step": 995000 - }, - { - "epoch": 10.95, - "learning_rate": 9.761950602343363e-08, - "loss": 3.8741, - "step": 995500 - }, - { - "epoch": 10.96, - "learning_rate": 9.760575389185324e-08, - "loss": 3.8574, - "step": 996000 - }, - { - "epoch": 10.96, - "learning_rate": 9.759200176027284e-08, - "loss": 3.8764, - "step": 996500 - }, - { - "epoch": 10.97, - "learning_rate": 9.757824962869245e-08, - "loss": 3.8547, - "step": 997000 - }, - { - "epoch": 10.97, - "learning_rate": 9.756449749711206e-08, - "loss": 3.8585, - "step": 997500 - }, - { - "epoch": 10.98, - "learning_rate": 9.755074536553165e-08, - "loss": 3.8546, - "step": 998000 - }, - { - "epoch": 10.99, - "learning_rate": 9.753699323395126e-08, - "loss": 3.8531, - "step": 998500 - }, - { - "epoch": 10.99, - "learning_rate": 9.752324110237087e-08, - "loss": 3.8498, - "step": 999000 - }, - { - "epoch": 11.0, - "learning_rate": 9.750948897079047e-08, - "loss": 3.8766, - "step": 999500 - }, - { - "epoch": 11.0, - "eval_loss": 3.895866870880127, - "eval_runtime": 6.1351, - "eval_samples_per_second": 253.299, - "step": 999845 - }, - { - "epoch": 11.0, - "learning_rate": 9.749573683921008e-08, - "loss": 3.8554, - "step": 1000000 - }, - { - "epoch": 11.01, - "learning_rate": 9.748198470762969e-08, - "loss": 3.8874, - "step": 1000500 - }, - { - "epoch": 11.01, - "learning_rate": 9.746823257604928e-08, - "loss": 3.8435, - "step": 1001000 - }, - { - "epoch": 11.02, - "learning_rate": 9.74544804444689e-08, - "loss": 3.8729, - "step": 1001500 - }, - { - "epoch": 11.02, - "learning_rate": 9.74407283128885e-08, - "loss": 3.8792, - "step": 1002000 - }, - { - "epoch": 11.03, - "learning_rate": 9.74269761813081e-08, - "loss": 3.8472, - "step": 1002500 - }, - { - "epoch": 11.03, - "learning_rate": 9.741322404972771e-08, - "loss": 3.8571, - "step": 1003000 - }, - { - "epoch": 11.04, - "learning_rate": 9.739947191814732e-08, - "loss": 3.8463, - "step": 1003500 - }, - { - "epoch": 11.05, - "learning_rate": 9.738571978656691e-08, - "loss": 3.8666, - "step": 1004000 - }, - { - "epoch": 11.05, - "learning_rate": 9.737196765498652e-08, - "loss": 3.8495, - "step": 1004500 - }, - { - "epoch": 11.06, - "learning_rate": 9.735821552340613e-08, - "loss": 3.869, - "step": 1005000 - }, - { - "epoch": 11.06, - "learning_rate": 9.734446339182573e-08, - "loss": 3.8619, - "step": 1005500 - }, - { - "epoch": 11.07, - "learning_rate": 9.733071126024534e-08, - "loss": 3.8636, - "step": 1006000 - }, - { - "epoch": 11.07, - "learning_rate": 9.731695912866495e-08, - "loss": 3.8448, - "step": 1006500 - }, - { - "epoch": 11.08, - "learning_rate": 9.730320699708454e-08, - "loss": 3.8611, - "step": 1007000 - }, - { - "epoch": 11.08, - "learning_rate": 9.728945486550415e-08, - "loss": 3.8451, - "step": 1007500 - }, - { - "epoch": 11.09, - "learning_rate": 9.727570273392376e-08, - "loss": 3.875, - "step": 1008000 - }, - { - "epoch": 11.1, - "learning_rate": 9.726195060234336e-08, - "loss": 3.8701, - "step": 1008500 - }, - { - "epoch": 11.1, - "learning_rate": 9.724819847076297e-08, - "loss": 3.8766, - "step": 1009000 - }, - { - "epoch": 11.11, - "learning_rate": 9.723444633918258e-08, - "loss": 3.8508, - "step": 1009500 - }, - { - "epoch": 11.11, - "learning_rate": 9.722069420760217e-08, - "loss": 3.8487, - "step": 1010000 - }, - { - "epoch": 11.12, - "learning_rate": 9.720694207602178e-08, - "loss": 3.8628, - "step": 1010500 - }, - { - "epoch": 11.12, - "learning_rate": 9.719318994444139e-08, - "loss": 3.8508, - "step": 1011000 - }, - { - "epoch": 11.13, - "learning_rate": 9.717943781286099e-08, - "loss": 3.8699, - "step": 1011500 - }, - { - "epoch": 11.13, - "learning_rate": 9.71656856812806e-08, - "loss": 3.8638, - "step": 1012000 - }, - { - "epoch": 11.14, - "learning_rate": 9.71519335497002e-08, - "loss": 3.8429, - "step": 1012500 - }, - { - "epoch": 11.14, - "learning_rate": 9.71381814181198e-08, - "loss": 3.8388, - "step": 1013000 - }, - { - "epoch": 11.15, - "learning_rate": 9.712442928653941e-08, - "loss": 3.8491, - "step": 1013500 - }, - { - "epoch": 11.16, - "learning_rate": 9.711067715495902e-08, - "loss": 3.8575, - "step": 1014000 - }, - { - "epoch": 11.16, - "learning_rate": 9.709692502337862e-08, - "loss": 3.8515, - "step": 1014500 - }, - { - "epoch": 11.17, - "learning_rate": 9.708317289179823e-08, - "loss": 3.8726, - "step": 1015000 - }, - { - "epoch": 11.17, - "learning_rate": 9.706942076021782e-08, - "loss": 3.8666, - "step": 1015500 - }, - { - "epoch": 11.18, - "learning_rate": 9.705566862863743e-08, - "loss": 3.8772, - "step": 1016000 - }, - { - "epoch": 11.18, - "learning_rate": 9.704191649705704e-08, - "loss": 3.8504, - "step": 1016500 - }, - { - "epoch": 11.19, - "learning_rate": 9.702816436547664e-08, - "loss": 3.8383, - "step": 1017000 - }, - { - "epoch": 11.19, - "learning_rate": 9.701441223389625e-08, - "loss": 3.8617, - "step": 1017500 - }, - { - "epoch": 11.2, - "learning_rate": 9.700066010231586e-08, - "loss": 3.842, - "step": 1018000 - }, - { - "epoch": 11.21, - "learning_rate": 9.698690797073545e-08, - "loss": 3.8596, - "step": 1018500 - }, - { - "epoch": 11.21, - "learning_rate": 9.697315583915506e-08, - "loss": 3.8662, - "step": 1019000 - }, - { - "epoch": 11.22, - "learning_rate": 9.695940370757467e-08, - "loss": 3.8617, - "step": 1019500 - }, - { - "epoch": 11.22, - "learning_rate": 9.694565157599427e-08, - "loss": 3.8479, - "step": 1020000 - }, - { - "epoch": 11.23, - "learning_rate": 9.693189944441388e-08, - "loss": 3.8726, - "step": 1020500 - }, - { - "epoch": 11.23, - "learning_rate": 9.691814731283349e-08, - "loss": 3.8639, - "step": 1021000 - }, - { - "epoch": 11.24, - "learning_rate": 9.690439518125308e-08, - "loss": 3.8646, - "step": 1021500 - }, - { - "epoch": 11.24, - "learning_rate": 9.689064304967269e-08, - "loss": 3.8641, - "step": 1022000 - }, - { - "epoch": 11.25, - "learning_rate": 9.68768909180923e-08, - "loss": 3.8669, - "step": 1022500 - }, - { - "epoch": 11.25, - "learning_rate": 9.68631387865119e-08, - "loss": 3.8502, - "step": 1023000 - }, - { - "epoch": 11.26, - "learning_rate": 9.68493866549315e-08, - "loss": 3.8813, - "step": 1023500 - }, - { - "epoch": 11.27, - "learning_rate": 9.683563452335112e-08, - "loss": 3.8592, - "step": 1024000 - }, - { - "epoch": 11.27, - "learning_rate": 9.682188239177071e-08, - "loss": 3.8553, - "step": 1024500 - }, - { - "epoch": 11.28, - "learning_rate": 9.680813026019032e-08, - "loss": 3.8477, - "step": 1025000 - }, - { - "epoch": 11.28, - "learning_rate": 9.679437812860993e-08, - "loss": 3.8418, - "step": 1025500 - }, - { - "epoch": 11.29, - "learning_rate": 9.678062599702953e-08, - "loss": 3.8389, - "step": 1026000 - }, - { - "epoch": 11.29, - "learning_rate": 9.676687386544914e-08, - "loss": 3.8577, - "step": 1026500 - }, - { - "epoch": 11.3, - "learning_rate": 9.675312173386875e-08, - "loss": 3.8561, - "step": 1027000 - }, - { - "epoch": 11.3, - "learning_rate": 9.673936960228834e-08, - "loss": 3.8564, - "step": 1027500 - }, - { - "epoch": 11.31, - "learning_rate": 9.672561747070795e-08, - "loss": 3.8499, - "step": 1028000 - }, - { - "epoch": 11.32, - "learning_rate": 9.671186533912756e-08, - "loss": 3.8395, - "step": 1028500 - }, - { - "epoch": 11.32, - "learning_rate": 9.669811320754716e-08, - "loss": 3.8522, - "step": 1029000 - }, - { - "epoch": 11.33, - "learning_rate": 9.668436107596677e-08, - "loss": 3.8551, - "step": 1029500 - }, - { - "epoch": 11.33, - "learning_rate": 9.667060894438637e-08, - "loss": 3.8493, - "step": 1030000 - }, - { - "epoch": 11.34, - "learning_rate": 9.665685681280597e-08, - "loss": 3.8701, - "step": 1030500 - }, - { - "epoch": 11.34, - "learning_rate": 9.664310468122558e-08, - "loss": 3.8614, - "step": 1031000 - }, - { - "epoch": 11.35, - "learning_rate": 9.662935254964519e-08, - "loss": 3.8588, - "step": 1031500 - }, - { - "epoch": 11.35, - "learning_rate": 9.661560041806479e-08, - "loss": 3.8544, - "step": 1032000 - }, - { - "epoch": 11.36, - "learning_rate": 9.66018482864844e-08, - "loss": 3.8473, - "step": 1032500 - }, - { - "epoch": 11.36, - "learning_rate": 9.6588096154904e-08, - "loss": 3.8432, - "step": 1033000 - }, - { - "epoch": 11.37, - "learning_rate": 9.65743440233236e-08, - "loss": 3.8371, - "step": 1033500 - }, - { - "epoch": 11.38, - "learning_rate": 9.656059189174321e-08, - "loss": 3.8503, - "step": 1034000 - }, - { - "epoch": 11.38, - "learning_rate": 9.654683976016282e-08, - "loss": 3.8621, - "step": 1034500 - }, - { - "epoch": 11.39, - "learning_rate": 9.653308762858242e-08, - "loss": 3.8454, - "step": 1035000 - }, - { - "epoch": 11.39, - "learning_rate": 9.651933549700202e-08, - "loss": 3.8663, - "step": 1035500 - }, - { - "epoch": 11.4, - "learning_rate": 9.650558336542163e-08, - "loss": 3.8392, - "step": 1036000 - }, - { - "epoch": 11.4, - "learning_rate": 9.649183123384124e-08, - "loss": 3.8508, - "step": 1036500 - }, - { - "epoch": 11.41, - "learning_rate": 9.647807910226084e-08, - "loss": 3.849, - "step": 1037000 - }, - { - "epoch": 11.41, - "learning_rate": 9.646432697068045e-08, - "loss": 3.8781, - "step": 1037500 - }, - { - "epoch": 11.42, - "learning_rate": 9.645057483910006e-08, - "loss": 3.8599, - "step": 1038000 - }, - { - "epoch": 11.43, - "learning_rate": 9.643682270751965e-08, - "loss": 3.8613, - "step": 1038500 - }, - { - "epoch": 11.43, - "learning_rate": 9.642307057593926e-08, - "loss": 3.853, - "step": 1039000 - }, - { - "epoch": 11.44, - "learning_rate": 9.640931844435887e-08, - "loss": 3.8524, - "step": 1039500 - }, - { - "epoch": 11.44, - "learning_rate": 9.639556631277848e-08, - "loss": 3.8483, - "step": 1040000 - }, - { - "epoch": 11.45, - "learning_rate": 9.638181418119808e-08, - "loss": 3.8465, - "step": 1040500 - }, - { - "epoch": 11.45, - "learning_rate": 9.636806204961769e-08, - "loss": 3.8485, - "step": 1041000 - }, - { - "epoch": 11.46, - "learning_rate": 9.63543099180373e-08, - "loss": 3.8652, - "step": 1041500 - }, - { - "epoch": 11.46, - "learning_rate": 9.63405577864569e-08, - "loss": 3.8547, - "step": 1042000 - }, - { - "epoch": 11.47, - "learning_rate": 9.63268056548765e-08, - "loss": 3.8508, - "step": 1042500 - }, - { - "epoch": 11.47, - "learning_rate": 9.631305352329611e-08, - "loss": 3.8684, - "step": 1043000 - }, - { - "epoch": 11.48, - "learning_rate": 9.629930139171572e-08, - "loss": 3.8516, - "step": 1043500 - }, - { - "epoch": 11.49, - "learning_rate": 9.628554926013532e-08, - "loss": 3.8452, - "step": 1044000 - }, - { - "epoch": 11.49, - "learning_rate": 9.627179712855493e-08, - "loss": 3.8465, - "step": 1044500 - }, - { - "epoch": 11.5, - "learning_rate": 9.625804499697454e-08, - "loss": 3.8386, - "step": 1045000 - }, - { - "epoch": 11.5, - "learning_rate": 9.624429286539413e-08, - "loss": 3.8555, - "step": 1045500 - }, - { - "epoch": 11.51, - "learning_rate": 9.623054073381374e-08, - "loss": 3.8634, - "step": 1046000 - }, - { - "epoch": 11.51, - "learning_rate": 9.621678860223335e-08, - "loss": 3.8556, - "step": 1046500 - }, - { - "epoch": 11.52, - "learning_rate": 9.620303647065295e-08, - "loss": 3.8601, - "step": 1047000 - }, - { - "epoch": 11.52, - "learning_rate": 9.618928433907256e-08, - "loss": 3.8625, - "step": 1047500 - }, - { - "epoch": 11.53, - "learning_rate": 9.617553220749217e-08, - "loss": 3.8585, - "step": 1048000 - }, - { - "epoch": 11.54, - "learning_rate": 9.616178007591176e-08, - "loss": 3.8576, - "step": 1048500 - }, - { - "epoch": 11.54, - "learning_rate": 9.614802794433137e-08, - "loss": 3.853, - "step": 1049000 - }, - { - "epoch": 11.55, - "learning_rate": 9.613427581275098e-08, - "loss": 3.8545, - "step": 1049500 - }, - { - "epoch": 11.55, - "learning_rate": 9.612052368117058e-08, - "loss": 3.85, - "step": 1050000 - }, - { - "epoch": 11.56, - "learning_rate": 9.610677154959019e-08, - "loss": 3.8578, - "step": 1050500 - }, - { - "epoch": 11.56, - "learning_rate": 9.60930194180098e-08, - "loss": 3.8581, - "step": 1051000 - }, - { - "epoch": 11.57, - "learning_rate": 9.607926728642939e-08, - "loss": 3.8568, - "step": 1051500 - }, - { - "epoch": 11.57, - "learning_rate": 9.6065515154849e-08, - "loss": 3.8609, - "step": 1052000 - }, - { - "epoch": 11.58, - "learning_rate": 9.605176302326861e-08, - "loss": 3.8379, - "step": 1052500 - }, - { - "epoch": 11.58, - "learning_rate": 9.60380108916882e-08, - "loss": 3.8529, - "step": 1053000 - }, - { - "epoch": 11.59, - "learning_rate": 9.602425876010782e-08, - "loss": 3.8429, - "step": 1053500 - }, - { - "epoch": 11.6, - "learning_rate": 9.601050662852742e-08, - "loss": 3.8343, - "step": 1054000 - }, - { - "epoch": 11.6, - "learning_rate": 9.599675449694702e-08, - "loss": 3.8305, - "step": 1054500 - }, - { - "epoch": 11.61, - "learning_rate": 9.598300236536663e-08, - "loss": 3.8729, - "step": 1055000 - }, - { - "epoch": 11.61, - "learning_rate": 9.596925023378624e-08, - "loss": 3.8549, - "step": 1055500 - }, - { - "epoch": 11.62, - "learning_rate": 9.595549810220584e-08, - "loss": 3.8515, - "step": 1056000 - }, - { - "epoch": 11.62, - "learning_rate": 9.594174597062544e-08, - "loss": 3.8538, - "step": 1056500 - }, - { - "epoch": 11.63, - "learning_rate": 9.592799383904505e-08, - "loss": 3.8531, - "step": 1057000 - }, - { - "epoch": 11.63, - "learning_rate": 9.591424170746465e-08, - "loss": 3.8471, - "step": 1057500 - }, - { - "epoch": 11.64, - "learning_rate": 9.590048957588426e-08, - "loss": 3.8409, - "step": 1058000 - }, - { - "epoch": 11.65, - "learning_rate": 9.588673744430387e-08, - "loss": 3.8684, - "step": 1058500 - }, - { - "epoch": 11.65, - "learning_rate": 9.587298531272347e-08, - "loss": 3.8542, - "step": 1059000 - }, - { - "epoch": 11.66, - "learning_rate": 9.585923318114307e-08, - "loss": 3.8459, - "step": 1059500 - }, - { - "epoch": 11.66, - "learning_rate": 9.584548104956268e-08, - "loss": 3.8347, - "step": 1060000 - }, - { - "epoch": 11.67, - "learning_rate": 9.583172891798228e-08, - "loss": 3.8486, - "step": 1060500 - }, - { - "epoch": 11.67, - "learning_rate": 9.581797678640189e-08, - "loss": 3.8488, - "step": 1061000 - }, - { - "epoch": 11.68, - "learning_rate": 9.58042246548215e-08, - "loss": 3.8714, - "step": 1061500 - }, - { - "epoch": 11.68, - "learning_rate": 9.57904725232411e-08, - "loss": 3.8863, - "step": 1062000 - }, - { - "epoch": 11.69, - "learning_rate": 9.57767203916607e-08, - "loss": 3.8484, - "step": 1062500 - }, - { - "epoch": 11.69, - "learning_rate": 9.576296826008031e-08, - "loss": 3.853, - "step": 1063000 - }, - { - "epoch": 11.7, - "learning_rate": 9.574921612849991e-08, - "loss": 3.8334, - "step": 1063500 - }, - { - "epoch": 11.71, - "learning_rate": 9.573546399691952e-08, - "loss": 3.8506, - "step": 1064000 - }, - { - "epoch": 11.71, - "learning_rate": 9.572171186533913e-08, - "loss": 3.8327, - "step": 1064500 - }, - { - "epoch": 11.72, - "learning_rate": 9.570795973375872e-08, - "loss": 3.8461, - "step": 1065000 - }, - { - "epoch": 11.72, - "learning_rate": 9.569420760217833e-08, - "loss": 3.8453, - "step": 1065500 - }, - { - "epoch": 11.73, - "learning_rate": 9.568045547059794e-08, - "loss": 3.8445, - "step": 1066000 - }, - { - "epoch": 11.73, - "learning_rate": 9.566670333901754e-08, - "loss": 3.8568, - "step": 1066500 - }, - { - "epoch": 11.74, - "learning_rate": 9.565295120743715e-08, - "loss": 3.8575, - "step": 1067000 - }, - { - "epoch": 11.74, - "learning_rate": 9.563919907585676e-08, - "loss": 3.8499, - "step": 1067500 - }, - { - "epoch": 11.75, - "learning_rate": 9.562544694427635e-08, - "loss": 3.8452, - "step": 1068000 - }, - { - "epoch": 11.76, - "learning_rate": 9.561169481269596e-08, - "loss": 3.8769, - "step": 1068500 - }, - { - "epoch": 11.76, - "learning_rate": 9.559794268111557e-08, - "loss": 3.8759, - "step": 1069000 - }, - { - "epoch": 11.77, - "learning_rate": 9.558419054953517e-08, - "loss": 3.8518, - "step": 1069500 - }, - { - "epoch": 11.77, - "learning_rate": 9.557043841795478e-08, - "loss": 3.8602, - "step": 1070000 - }, - { - "epoch": 11.78, - "learning_rate": 9.555668628637439e-08, - "loss": 3.8502, - "step": 1070500 - }, - { - "epoch": 11.78, - "learning_rate": 9.554293415479398e-08, - "loss": 3.851, - "step": 1071000 - }, - { - "epoch": 11.79, - "learning_rate": 9.552918202321359e-08, - "loss": 3.8361, - "step": 1071500 - }, - { - "epoch": 11.79, - "learning_rate": 9.55154298916332e-08, - "loss": 3.854, - "step": 1072000 - }, - { - "epoch": 11.8, - "learning_rate": 9.55016777600528e-08, - "loss": 3.8515, - "step": 1072500 - }, - { - "epoch": 11.8, - "learning_rate": 9.548792562847241e-08, - "loss": 3.8648, - "step": 1073000 - }, - { - "epoch": 11.81, - "learning_rate": 9.547417349689202e-08, - "loss": 3.842, - "step": 1073500 - }, - { - "epoch": 11.82, - "learning_rate": 9.546042136531161e-08, - "loss": 3.861, - "step": 1074000 - }, - { - "epoch": 11.82, - "learning_rate": 9.544666923373122e-08, - "loss": 3.845, - "step": 1074500 - }, - { - "epoch": 11.83, - "learning_rate": 9.543291710215083e-08, - "loss": 3.8699, - "step": 1075000 - }, - { - "epoch": 11.83, - "learning_rate": 9.541916497057043e-08, - "loss": 3.8418, - "step": 1075500 - }, - { - "epoch": 11.84, - "learning_rate": 9.540541283899004e-08, - "loss": 3.852, - "step": 1076000 - }, - { - "epoch": 11.84, - "learning_rate": 9.539166070740965e-08, - "loss": 3.851, - "step": 1076500 - }, - { - "epoch": 11.85, - "learning_rate": 9.537790857582924e-08, - "loss": 3.8313, - "step": 1077000 - }, - { - "epoch": 11.85, - "learning_rate": 9.536415644424885e-08, - "loss": 3.8516, - "step": 1077500 - }, - { - "epoch": 11.86, - "learning_rate": 9.535040431266846e-08, - "loss": 3.8357, - "step": 1078000 - }, - { - "epoch": 11.87, - "learning_rate": 9.533665218108806e-08, - "loss": 3.8461, - "step": 1078500 - }, - { - "epoch": 11.87, - "learning_rate": 9.532290004950767e-08, - "loss": 3.8608, - "step": 1079000 - }, - { - "epoch": 11.88, - "learning_rate": 9.530914791792728e-08, - "loss": 3.8533, - "step": 1079500 - }, - { - "epoch": 11.88, - "learning_rate": 9.529539578634687e-08, - "loss": 3.8361, - "step": 1080000 - }, - { - "epoch": 11.89, - "learning_rate": 9.528164365476648e-08, - "loss": 3.8455, - "step": 1080500 - }, - { - "epoch": 11.89, - "learning_rate": 9.526789152318609e-08, - "loss": 3.8398, - "step": 1081000 - }, - { - "epoch": 11.9, - "learning_rate": 9.525413939160569e-08, - "loss": 3.8553, - "step": 1081500 - }, - { - "epoch": 11.9, - "learning_rate": 9.52403872600253e-08, - "loss": 3.8275, - "step": 1082000 - }, - { - "epoch": 11.91, - "learning_rate": 9.52266351284449e-08, - "loss": 3.8512, - "step": 1082500 - }, - { - "epoch": 11.91, - "learning_rate": 9.52128829968645e-08, - "loss": 3.8606, - "step": 1083000 - }, - { - "epoch": 11.92, - "learning_rate": 9.519913086528411e-08, - "loss": 3.858, - "step": 1083500 - }, - { - "epoch": 11.93, - "learning_rate": 9.518537873370372e-08, - "loss": 3.8465, - "step": 1084000 - }, - { - "epoch": 11.93, - "learning_rate": 9.517162660212332e-08, - "loss": 3.8482, - "step": 1084500 - }, - { - "epoch": 11.94, - "learning_rate": 9.515787447054293e-08, - "loss": 3.8532, - "step": 1085000 - }, - { - "epoch": 11.94, - "learning_rate": 9.514412233896254e-08, - "loss": 3.843, - "step": 1085500 - }, - { - "epoch": 11.95, - "learning_rate": 9.513037020738213e-08, - "loss": 3.85, - "step": 1086000 - }, - { - "epoch": 11.95, - "learning_rate": 9.511661807580174e-08, - "loss": 3.8532, - "step": 1086500 - }, - { - "epoch": 11.96, - "learning_rate": 9.510286594422135e-08, - "loss": 3.8317, - "step": 1087000 - }, - { - "epoch": 11.96, - "learning_rate": 9.508911381264096e-08, - "loss": 3.8499, - "step": 1087500 - }, - { - "epoch": 11.97, - "learning_rate": 9.507536168106056e-08, - "loss": 3.8736, - "step": 1088000 - }, - { - "epoch": 11.98, - "learning_rate": 9.506160954948016e-08, - "loss": 3.8361, - "step": 1088500 - }, - { - "epoch": 11.98, - "learning_rate": 9.504785741789977e-08, - "loss": 3.851, - "step": 1089000 - }, - { - "epoch": 11.99, - "learning_rate": 9.503410528631938e-08, - "loss": 3.8527, - "step": 1089500 - }, - { - "epoch": 11.99, - "learning_rate": 9.502035315473898e-08, - "loss": 3.8495, - "step": 1090000 - }, - { - "epoch": 12.0, - "learning_rate": 9.500660102315859e-08, - "loss": 3.8611, - "step": 1090500 - }, - { - "epoch": 12.0, - "eval_loss": 3.8887219429016113, - "eval_runtime": 6.1376, - "eval_samples_per_second": 253.195, - "step": 1090740 - }, - { - "epoch": 12.0, - "learning_rate": 9.49928488915782e-08, - "loss": 3.8234, - "step": 1091000 - }, - { - "epoch": 12.01, - "learning_rate": 9.49790967599978e-08, - "loss": 3.8437, - "step": 1091500 - }, - { - "epoch": 12.01, - "learning_rate": 9.49653446284174e-08, - "loss": 3.8494, - "step": 1092000 - }, - { - "epoch": 12.02, - "learning_rate": 9.495159249683701e-08, - "loss": 3.8533, - "step": 1092500 - }, - { - "epoch": 12.02, - "learning_rate": 9.493784036525661e-08, - "loss": 3.8646, - "step": 1093000 - }, - { - "epoch": 12.03, - "learning_rate": 9.492408823367622e-08, - "loss": 3.8291, - "step": 1093500 - }, - { - "epoch": 12.04, - "learning_rate": 9.491033610209583e-08, - "loss": 3.8515, - "step": 1094000 - }, - { - "epoch": 12.04, - "learning_rate": 9.489658397051542e-08, - "loss": 3.85, - "step": 1094500 - }, - { - "epoch": 12.05, - "learning_rate": 9.488283183893503e-08, - "loss": 3.8505, - "step": 1095000 - }, - { - "epoch": 12.05, - "learning_rate": 9.486907970735464e-08, - "loss": 3.8356, - "step": 1095500 - }, - { - "epoch": 12.06, - "learning_rate": 9.485532757577424e-08, - "loss": 3.8407, - "step": 1096000 - }, - { - "epoch": 12.06, - "learning_rate": 9.484157544419385e-08, - "loss": 3.8344, - "step": 1096500 - }, - { - "epoch": 12.07, - "learning_rate": 9.482782331261346e-08, - "loss": 3.8445, - "step": 1097000 - }, - { - "epoch": 12.07, - "learning_rate": 9.481407118103305e-08, - "loss": 3.8602, - "step": 1097500 - }, - { - "epoch": 12.08, - "learning_rate": 9.480031904945266e-08, - "loss": 3.8586, - "step": 1098000 - }, - { - "epoch": 12.09, - "learning_rate": 9.478656691787227e-08, - "loss": 3.8459, - "step": 1098500 - }, - { - "epoch": 12.09, - "learning_rate": 9.477281478629187e-08, - "loss": 3.8466, - "step": 1099000 - }, - { - "epoch": 12.1, - "learning_rate": 9.475906265471148e-08, - "loss": 3.8512, - "step": 1099500 - }, - { - "epoch": 12.1, - "learning_rate": 9.474531052313109e-08, - "loss": 3.8545, - "step": 1100000 - }, - { - "epoch": 12.11, - "learning_rate": 9.473155839155068e-08, - "loss": 3.8633, - "step": 1100500 - }, - { - "epoch": 12.11, - "learning_rate": 9.471780625997029e-08, - "loss": 3.8301, - "step": 1101000 - }, - { - "epoch": 12.12, - "learning_rate": 9.47040541283899e-08, - "loss": 3.8708, - "step": 1101500 - }, - { - "epoch": 12.12, - "learning_rate": 9.46903019968095e-08, - "loss": 3.8366, - "step": 1102000 - }, - { - "epoch": 12.13, - "learning_rate": 9.467654986522911e-08, - "loss": 3.8454, - "step": 1102500 - }, - { - "epoch": 12.13, - "learning_rate": 9.466279773364872e-08, - "loss": 3.8364, - "step": 1103000 - }, - { - "epoch": 12.14, - "learning_rate": 9.464904560206831e-08, - "loss": 3.8277, - "step": 1103500 - }, - { - "epoch": 12.15, - "learning_rate": 9.463529347048792e-08, - "loss": 3.8566, - "step": 1104000 - }, - { - "epoch": 12.15, - "learning_rate": 9.462154133890753e-08, - "loss": 3.8562, - "step": 1104500 - }, - { - "epoch": 12.16, - "learning_rate": 9.460778920732713e-08, - "loss": 3.8211, - "step": 1105000 - }, - { - "epoch": 12.16, - "learning_rate": 9.459403707574674e-08, - "loss": 3.8784, - "step": 1105500 - }, - { - "epoch": 12.17, - "learning_rate": 9.458028494416635e-08, - "loss": 3.8409, - "step": 1106000 - }, - { - "epoch": 12.17, - "learning_rate": 9.456653281258594e-08, - "loss": 3.8558, - "step": 1106500 - }, - { - "epoch": 12.18, - "learning_rate": 9.455278068100555e-08, - "loss": 3.8583, - "step": 1107000 - }, - { - "epoch": 12.18, - "learning_rate": 9.453902854942516e-08, - "loss": 3.8558, - "step": 1107500 - }, - { - "epoch": 12.19, - "learning_rate": 9.452527641784476e-08, - "loss": 3.8456, - "step": 1108000 - }, - { - "epoch": 12.2, - "learning_rate": 9.451152428626437e-08, - "loss": 3.8566, - "step": 1108500 - }, - { - "epoch": 12.2, - "learning_rate": 9.449777215468398e-08, - "loss": 3.8432, - "step": 1109000 - }, - { - "epoch": 12.21, - "learning_rate": 9.448402002310357e-08, - "loss": 3.8462, - "step": 1109500 - }, - { - "epoch": 12.21, - "learning_rate": 9.447026789152318e-08, - "loss": 3.8483, - "step": 1110000 - }, - { - "epoch": 12.22, - "learning_rate": 9.445651575994279e-08, - "loss": 3.8542, - "step": 1110500 - }, - { - "epoch": 12.22, - "learning_rate": 9.444276362836239e-08, - "loss": 3.8275, - "step": 1111000 - }, - { - "epoch": 12.23, - "learning_rate": 9.4429011496782e-08, - "loss": 3.8589, - "step": 1111500 - }, - { - "epoch": 12.23, - "learning_rate": 9.44152593652016e-08, - "loss": 3.8548, - "step": 1112000 - }, - { - "epoch": 12.24, - "learning_rate": 9.44015072336212e-08, - "loss": 3.8475, - "step": 1112500 - }, - { - "epoch": 12.24, - "learning_rate": 9.438775510204081e-08, - "loss": 3.8447, - "step": 1113000 - }, - { - "epoch": 12.25, - "learning_rate": 9.437400297046042e-08, - "loss": 3.8368, - "step": 1113500 - }, - { - "epoch": 12.26, - "learning_rate": 9.436025083888002e-08, - "loss": 3.8414, - "step": 1114000 - }, - { - "epoch": 12.26, - "learning_rate": 9.434649870729963e-08, - "loss": 3.8453, - "step": 1114500 - }, - { - "epoch": 12.27, - "learning_rate": 9.433274657571924e-08, - "loss": 3.8514, - "step": 1115000 - }, - { - "epoch": 12.27, - "learning_rate": 9.431899444413883e-08, - "loss": 3.8609, - "step": 1115500 - }, - { - "epoch": 12.28, - "learning_rate": 9.430524231255844e-08, - "loss": 3.8691, - "step": 1116000 - }, - { - "epoch": 12.28, - "learning_rate": 9.429149018097805e-08, - "loss": 3.8496, - "step": 1116500 - }, - { - "epoch": 12.29, - "learning_rate": 9.427773804939765e-08, - "loss": 3.8407, - "step": 1117000 - }, - { - "epoch": 12.29, - "learning_rate": 9.426398591781726e-08, - "loss": 3.854, - "step": 1117500 - }, - { - "epoch": 12.3, - "learning_rate": 9.425023378623686e-08, - "loss": 3.8544, - "step": 1118000 - }, - { - "epoch": 12.31, - "learning_rate": 9.423648165465646e-08, - "loss": 3.8364, - "step": 1118500 - }, - { - "epoch": 12.31, - "learning_rate": 9.422272952307607e-08, - "loss": 3.8483, - "step": 1119000 - }, - { - "epoch": 12.32, - "learning_rate": 9.420897739149568e-08, - "loss": 3.8348, - "step": 1119500 - }, - { - "epoch": 12.32, - "learning_rate": 9.419522525991528e-08, - "loss": 3.8491, - "step": 1120000 - }, - { - "epoch": 12.33, - "learning_rate": 9.418147312833488e-08, - "loss": 3.8641, - "step": 1120500 - }, - { - "epoch": 12.33, - "learning_rate": 9.41677209967545e-08, - "loss": 3.8354, - "step": 1121000 - }, - { - "epoch": 12.34, - "learning_rate": 9.415396886517409e-08, - "loss": 3.8441, - "step": 1121500 - }, - { - "epoch": 12.34, - "learning_rate": 9.41402167335937e-08, - "loss": 3.8482, - "step": 1122000 - }, - { - "epoch": 12.35, - "learning_rate": 9.412646460201331e-08, - "loss": 3.8572, - "step": 1122500 - }, - { - "epoch": 12.35, - "learning_rate": 9.41127124704329e-08, - "loss": 3.8433, - "step": 1123000 - }, - { - "epoch": 12.36, - "learning_rate": 9.409896033885251e-08, - "loss": 3.8443, - "step": 1123500 - }, - { - "epoch": 12.37, - "learning_rate": 9.408520820727212e-08, - "loss": 3.8391, - "step": 1124000 - }, - { - "epoch": 12.37, - "learning_rate": 9.407145607569172e-08, - "loss": 3.8617, - "step": 1124500 - }, - { - "epoch": 12.38, - "learning_rate": 9.405770394411133e-08, - "loss": 3.8388, - "step": 1125000 - }, - { - "epoch": 12.38, - "learning_rate": 9.404395181253094e-08, - "loss": 3.8581, - "step": 1125500 - }, - { - "epoch": 12.39, - "learning_rate": 9.403019968095053e-08, - "loss": 3.8504, - "step": 1126000 - }, - { - "epoch": 12.39, - "learning_rate": 9.401644754937014e-08, - "loss": 3.8322, - "step": 1126500 - }, - { - "epoch": 12.4, - "learning_rate": 9.400269541778975e-08, - "loss": 3.8383, - "step": 1127000 - }, - { - "epoch": 12.4, - "learning_rate": 9.398894328620935e-08, - "loss": 3.8522, - "step": 1127500 - }, - { - "epoch": 12.41, - "learning_rate": 9.397519115462896e-08, - "loss": 3.8329, - "step": 1128000 - }, - { - "epoch": 12.42, - "learning_rate": 9.396143902304857e-08, - "loss": 3.8356, - "step": 1128500 - }, - { - "epoch": 12.42, - "learning_rate": 9.394768689146816e-08, - "loss": 3.8424, - "step": 1129000 - }, - { - "epoch": 12.43, - "learning_rate": 9.393393475988777e-08, - "loss": 3.8158, - "step": 1129500 - }, - { - "epoch": 12.43, - "learning_rate": 9.392018262830738e-08, - "loss": 3.8494, - "step": 1130000 - }, - { - "epoch": 12.44, - "learning_rate": 9.390643049672698e-08, - "loss": 3.8515, - "step": 1130500 - }, - { - "epoch": 12.44, - "learning_rate": 9.389267836514659e-08, - "loss": 3.8338, - "step": 1131000 - }, - { - "epoch": 12.45, - "learning_rate": 9.38789262335662e-08, - "loss": 3.8524, - "step": 1131500 - }, - { - "epoch": 12.45, - "learning_rate": 9.38651741019858e-08, - "loss": 3.8483, - "step": 1132000 - }, - { - "epoch": 12.46, - "learning_rate": 9.38514219704054e-08, - "loss": 3.8332, - "step": 1132500 - }, - { - "epoch": 12.46, - "learning_rate": 9.383766983882501e-08, - "loss": 3.8514, - "step": 1133000 - }, - { - "epoch": 12.47, - "learning_rate": 9.382391770724461e-08, - "loss": 3.8435, - "step": 1133500 - }, - { - "epoch": 12.48, - "learning_rate": 9.381016557566422e-08, - "loss": 3.8634, - "step": 1134000 - }, - { - "epoch": 12.48, - "learning_rate": 9.379641344408383e-08, - "loss": 3.8614, - "step": 1134500 - }, - { - "epoch": 12.49, - "learning_rate": 9.378266131250344e-08, - "loss": 3.8595, - "step": 1135000 - }, - { - "epoch": 12.49, - "learning_rate": 9.376890918092303e-08, - "loss": 3.8436, - "step": 1135500 - }, - { - "epoch": 12.5, - "learning_rate": 9.375515704934264e-08, - "loss": 3.8546, - "step": 1136000 - }, - { - "epoch": 12.5, - "learning_rate": 9.374140491776225e-08, - "loss": 3.8584, - "step": 1136500 - }, - { - "epoch": 12.51, - "learning_rate": 9.372765278618186e-08, - "loss": 3.847, - "step": 1137000 - }, - { - "epoch": 12.51, - "learning_rate": 9.371390065460146e-08, - "loss": 3.8429, - "step": 1137500 - }, - { - "epoch": 12.52, - "learning_rate": 9.370014852302107e-08, - "loss": 3.8421, - "step": 1138000 - }, - { - "epoch": 12.53, - "learning_rate": 9.368639639144068e-08, - "loss": 3.8301, - "step": 1138500 - }, - { - "epoch": 12.53, - "learning_rate": 9.367264425986027e-08, - "loss": 3.8522, - "step": 1139000 - }, - { - "epoch": 12.54, - "learning_rate": 9.365889212827988e-08, - "loss": 3.8434, - "step": 1139500 - }, - { - "epoch": 12.54, - "learning_rate": 9.364513999669949e-08, - "loss": 3.8342, - "step": 1140000 - }, - { - "epoch": 12.55, - "learning_rate": 9.36313878651191e-08, - "loss": 3.8511, - "step": 1140500 - }, - { - "epoch": 12.55, - "learning_rate": 9.36176357335387e-08, - "loss": 3.8365, - "step": 1141000 - }, - { - "epoch": 12.56, - "learning_rate": 9.36038836019583e-08, - "loss": 3.8405, - "step": 1141500 - }, - { - "epoch": 12.56, - "learning_rate": 9.359013147037791e-08, - "loss": 3.8439, - "step": 1142000 - }, - { - "epoch": 12.57, - "learning_rate": 9.357637933879751e-08, - "loss": 3.856, - "step": 1142500 - }, - { - "epoch": 12.57, - "learning_rate": 9.356262720721712e-08, - "loss": 3.8466, - "step": 1143000 - }, - { - "epoch": 12.58, - "learning_rate": 9.354887507563673e-08, - "loss": 3.8499, - "step": 1143500 - }, - { - "epoch": 12.59, - "learning_rate": 9.353512294405633e-08, - "loss": 3.8465, - "step": 1144000 - }, - { - "epoch": 12.59, - "learning_rate": 9.352137081247593e-08, - "loss": 3.8395, - "step": 1144500 - }, - { - "epoch": 12.6, - "learning_rate": 9.350761868089554e-08, - "loss": 3.8371, - "step": 1145000 - }, - { - "epoch": 12.6, - "learning_rate": 9.349386654931514e-08, - "loss": 3.8399, - "step": 1145500 - }, - { - "epoch": 12.61, - "learning_rate": 9.348011441773475e-08, - "loss": 3.8291, - "step": 1146000 - }, - { - "epoch": 12.61, - "learning_rate": 9.346636228615436e-08, - "loss": 3.8304, - "step": 1146500 - }, - { - "epoch": 12.62, - "learning_rate": 9.345261015457396e-08, - "loss": 3.8305, - "step": 1147000 - }, - { - "epoch": 12.62, - "learning_rate": 9.343885802299356e-08, - "loss": 3.8472, - "step": 1147500 - }, - { - "epoch": 12.63, - "learning_rate": 9.342510589141317e-08, - "loss": 3.8415, - "step": 1148000 - }, - { - "epoch": 12.64, - "learning_rate": 9.341135375983277e-08, - "loss": 3.8467, - "step": 1148500 - }, - { - "epoch": 12.64, - "learning_rate": 9.339760162825238e-08, - "loss": 3.8542, - "step": 1149000 - }, - { - "epoch": 12.65, - "learning_rate": 9.338384949667199e-08, - "loss": 3.8424, - "step": 1149500 - }, - { - "epoch": 12.65, - "learning_rate": 9.337009736509158e-08, - "loss": 3.8254, - "step": 1150000 - }, - { - "epoch": 12.66, - "learning_rate": 9.33563452335112e-08, - "loss": 3.8499, - "step": 1150500 - }, - { - "epoch": 12.66, - "learning_rate": 9.33425931019308e-08, - "loss": 3.8562, - "step": 1151000 - }, - { - "epoch": 12.67, - "learning_rate": 9.33288409703504e-08, - "loss": 3.8467, - "step": 1151500 - }, - { - "epoch": 12.67, - "learning_rate": 9.331508883877001e-08, - "loss": 3.8452, - "step": 1152000 - }, - { - "epoch": 12.68, - "learning_rate": 9.330133670718962e-08, - "loss": 3.8336, - "step": 1152500 - }, - { - "epoch": 12.68, - "learning_rate": 9.328758457560921e-08, - "loss": 3.823, - "step": 1153000 - }, - { - "epoch": 12.69, - "learning_rate": 9.327383244402882e-08, - "loss": 3.8553, - "step": 1153500 - }, - { - "epoch": 12.7, - "learning_rate": 9.326008031244843e-08, - "loss": 3.8567, - "step": 1154000 - }, - { - "epoch": 12.7, - "learning_rate": 9.324632818086803e-08, - "loss": 3.8446, - "step": 1154500 - }, - { - "epoch": 12.71, - "learning_rate": 9.323257604928764e-08, - "loss": 3.8422, - "step": 1155000 - }, - { - "epoch": 12.71, - "learning_rate": 9.321882391770725e-08, - "loss": 3.8255, - "step": 1155500 - }, - { - "epoch": 12.72, - "learning_rate": 9.320507178612684e-08, - "loss": 3.8435, - "step": 1156000 - }, - { - "epoch": 12.72, - "learning_rate": 9.319131965454645e-08, - "loss": 3.849, - "step": 1156500 - }, - { - "epoch": 12.73, - "learning_rate": 9.317756752296606e-08, - "loss": 3.8247, - "step": 1157000 - }, - { - "epoch": 12.73, - "learning_rate": 9.316381539138566e-08, - "loss": 3.8498, - "step": 1157500 - }, - { - "epoch": 12.74, - "learning_rate": 9.315006325980527e-08, - "loss": 3.8387, - "step": 1158000 - }, - { - "epoch": 12.75, - "learning_rate": 9.313631112822488e-08, - "loss": 3.851, - "step": 1158500 - }, - { - "epoch": 12.75, - "learning_rate": 9.312255899664447e-08, - "loss": 3.8589, - "step": 1159000 - }, - { - "epoch": 12.76, - "learning_rate": 9.310880686506408e-08, - "loss": 3.8516, - "step": 1159500 - }, - { - "epoch": 12.76, - "learning_rate": 9.309505473348369e-08, - "loss": 3.8563, - "step": 1160000 - }, - { - "epoch": 12.77, - "learning_rate": 9.308130260190329e-08, - "loss": 3.8329, - "step": 1160500 - }, - { - "epoch": 12.77, - "learning_rate": 9.30675504703229e-08, - "loss": 3.8258, - "step": 1161000 - }, - { - "epoch": 12.78, - "learning_rate": 9.305379833874251e-08, - "loss": 3.8369, - "step": 1161500 - }, - { - "epoch": 12.78, - "learning_rate": 9.30400462071621e-08, - "loss": 3.8504, - "step": 1162000 - }, - { - "epoch": 12.79, - "learning_rate": 9.302629407558171e-08, - "loss": 3.853, - "step": 1162500 - }, - { - "epoch": 12.79, - "learning_rate": 9.301254194400132e-08, - "loss": 3.8362, - "step": 1163000 - }, - { - "epoch": 12.8, - "learning_rate": 9.299878981242092e-08, - "loss": 3.8469, - "step": 1163500 - }, - { - "epoch": 12.81, - "learning_rate": 9.298503768084053e-08, - "loss": 3.8514, - "step": 1164000 - }, - { - "epoch": 12.81, - "learning_rate": 9.297128554926012e-08, - "loss": 3.8272, - "step": 1164500 - }, - { - "epoch": 12.82, - "learning_rate": 9.295753341767973e-08, - "loss": 3.8303, - "step": 1165000 - }, - { - "epoch": 12.82, - "learning_rate": 9.294378128609934e-08, - "loss": 3.8392, - "step": 1165500 - }, - { - "epoch": 12.83, - "learning_rate": 9.293002915451894e-08, - "loss": 3.8377, - "step": 1166000 - }, - { - "epoch": 12.83, - "learning_rate": 9.291627702293855e-08, - "loss": 3.8435, - "step": 1166500 - }, - { - "epoch": 12.84, - "learning_rate": 9.290252489135816e-08, - "loss": 3.8245, - "step": 1167000 - }, - { - "epoch": 12.84, - "learning_rate": 9.288877275977775e-08, - "loss": 3.8507, - "step": 1167500 - }, - { - "epoch": 12.85, - "learning_rate": 9.287502062819736e-08, - "loss": 3.8472, - "step": 1168000 - }, - { - "epoch": 12.86, - "learning_rate": 9.286126849661697e-08, - "loss": 3.8193, - "step": 1168500 - }, - { - "epoch": 12.86, - "learning_rate": 9.284751636503657e-08, - "loss": 3.837, - "step": 1169000 - }, - { - "epoch": 12.87, - "learning_rate": 9.283376423345618e-08, - "loss": 3.8436, - "step": 1169500 - }, - { - "epoch": 12.87, - "learning_rate": 9.282001210187579e-08, - "loss": 3.839, - "step": 1170000 - }, - { - "epoch": 12.88, - "learning_rate": 9.280625997029538e-08, - "loss": 3.8458, - "step": 1170500 - }, - { - "epoch": 12.88, - "learning_rate": 9.279250783871499e-08, - "loss": 3.8394, - "step": 1171000 - }, - { - "epoch": 12.89, - "learning_rate": 9.27787557071346e-08, - "loss": 3.853, - "step": 1171500 - }, - { - "epoch": 12.89, - "learning_rate": 9.27650035755542e-08, - "loss": 3.8484, - "step": 1172000 - }, - { - "epoch": 12.9, - "learning_rate": 9.275125144397381e-08, - "loss": 3.8281, - "step": 1172500 - }, - { - "epoch": 12.91, - "learning_rate": 9.273749931239342e-08, - "loss": 3.8462, - "step": 1173000 - }, - { - "epoch": 12.91, - "learning_rate": 9.272374718081301e-08, - "loss": 3.8299, - "step": 1173500 - }, - { - "epoch": 12.92, - "learning_rate": 9.270999504923262e-08, - "loss": 3.833, - "step": 1174000 - }, - { - "epoch": 12.92, - "learning_rate": 9.269624291765223e-08, - "loss": 3.8459, - "step": 1174500 - }, - { - "epoch": 12.93, - "learning_rate": 9.268249078607183e-08, - "loss": 3.8403, - "step": 1175000 - }, - { - "epoch": 12.93, - "learning_rate": 9.266873865449144e-08, - "loss": 3.8356, - "step": 1175500 - }, - { - "epoch": 12.94, - "learning_rate": 9.265498652291105e-08, - "loss": 3.8462, - "step": 1176000 - }, - { - "epoch": 12.94, - "learning_rate": 9.264123439133064e-08, - "loss": 3.8358, - "step": 1176500 - }, - { - "epoch": 12.95, - "learning_rate": 9.262748225975025e-08, - "loss": 3.8372, - "step": 1177000 - }, - { - "epoch": 12.95, - "learning_rate": 9.261373012816986e-08, - "loss": 3.8385, - "step": 1177500 - }, - { - "epoch": 12.96, - "learning_rate": 9.259997799658946e-08, - "loss": 3.8371, - "step": 1178000 - }, - { - "epoch": 12.97, - "learning_rate": 9.258622586500907e-08, - "loss": 3.8283, - "step": 1178500 - }, - { - "epoch": 12.97, - "learning_rate": 9.257247373342868e-08, - "loss": 3.8499, - "step": 1179000 - }, - { - "epoch": 12.98, - "learning_rate": 9.255872160184827e-08, - "loss": 3.8439, - "step": 1179500 - }, - { - "epoch": 12.98, - "learning_rate": 9.254496947026788e-08, - "loss": 3.8382, - "step": 1180000 - }, - { - "epoch": 12.99, - "learning_rate": 9.253121733868749e-08, - "loss": 3.8405, - "step": 1180500 - }, - { - "epoch": 12.99, - "learning_rate": 9.251746520710709e-08, - "loss": 3.8276, - "step": 1181000 - }, - { - "epoch": 13.0, - "learning_rate": 9.25037130755267e-08, - "loss": 3.8529, - "step": 1181500 - }, - { - "epoch": 13.0, - "eval_loss": 3.883164405822754, - "eval_runtime": 6.142, - "eval_samples_per_second": 253.014, - "step": 1181635 - }, - { - "epoch": 13.0, - "learning_rate": 9.24899609439463e-08, - "loss": 3.8435, - "step": 1182000 - }, - { - "epoch": 13.01, - "learning_rate": 9.247620881236591e-08, - "loss": 3.826, - "step": 1182500 - }, - { - "epoch": 13.02, - "learning_rate": 9.246245668078551e-08, - "loss": 3.8357, - "step": 1183000 - }, - { - "epoch": 13.02, - "learning_rate": 9.244870454920512e-08, - "loss": 3.8327, - "step": 1183500 - }, - { - "epoch": 13.03, - "learning_rate": 9.243495241762473e-08, - "loss": 3.8556, - "step": 1184000 - }, - { - "epoch": 13.03, - "learning_rate": 9.242120028604434e-08, - "loss": 3.8313, - "step": 1184500 - }, - { - "epoch": 13.04, - "learning_rate": 9.240744815446393e-08, - "loss": 3.858, - "step": 1185000 - }, - { - "epoch": 13.04, - "learning_rate": 9.239369602288354e-08, - "loss": 3.854, - "step": 1185500 - }, - { - "epoch": 13.05, - "learning_rate": 9.237994389130315e-08, - "loss": 3.846, - "step": 1186000 - }, - { - "epoch": 13.05, - "learning_rate": 9.236619175972275e-08, - "loss": 3.8472, - "step": 1186500 - }, - { - "epoch": 13.06, - "learning_rate": 9.235243962814236e-08, - "loss": 3.8436, - "step": 1187000 - }, - { - "epoch": 13.06, - "learning_rate": 9.233868749656197e-08, - "loss": 3.8398, - "step": 1187500 - }, - { - "epoch": 13.07, - "learning_rate": 9.232493536498158e-08, - "loss": 3.8173, - "step": 1188000 - }, - { - "epoch": 13.08, - "learning_rate": 9.231118323340117e-08, - "loss": 3.8494, - "step": 1188500 - }, - { - "epoch": 13.08, - "learning_rate": 9.229743110182078e-08, - "loss": 3.836, - "step": 1189000 - }, - { - "epoch": 13.09, - "learning_rate": 9.228367897024039e-08, - "loss": 3.8333, - "step": 1189500 - }, - { - "epoch": 13.09, - "learning_rate": 9.226992683865999e-08, - "loss": 3.842, - "step": 1190000 - }, - { - "epoch": 13.1, - "learning_rate": 9.22561747070796e-08, - "loss": 3.8228, - "step": 1190500 - }, - { - "epoch": 13.1, - "learning_rate": 9.224242257549921e-08, - "loss": 3.8329, - "step": 1191000 - }, - { - "epoch": 13.11, - "learning_rate": 9.22286704439188e-08, - "loss": 3.8283, - "step": 1191500 - }, - { - "epoch": 13.11, - "learning_rate": 9.221491831233841e-08, - "loss": 3.8551, - "step": 1192000 - }, - { - "epoch": 13.12, - "learning_rate": 9.220116618075802e-08, - "loss": 3.8317, - "step": 1192500 - }, - { - "epoch": 13.13, - "learning_rate": 9.218741404917762e-08, - "loss": 3.8255, - "step": 1193000 - }, - { - "epoch": 13.13, - "learning_rate": 9.217366191759723e-08, - "loss": 3.8387, - "step": 1193500 - }, - { - "epoch": 13.14, - "learning_rate": 9.215990978601684e-08, - "loss": 3.8346, - "step": 1194000 - }, - { - "epoch": 13.14, - "learning_rate": 9.214615765443643e-08, - "loss": 3.8476, - "step": 1194500 - }, - { - "epoch": 13.15, - "learning_rate": 9.213240552285604e-08, - "loss": 3.8553, - "step": 1195000 - }, - { - "epoch": 13.15, - "learning_rate": 9.211865339127565e-08, - "loss": 3.8554, - "step": 1195500 - }, - { - "epoch": 13.16, - "learning_rate": 9.210490125969525e-08, - "loss": 3.8496, - "step": 1196000 - }, - { - "epoch": 13.16, - "learning_rate": 9.209114912811486e-08, - "loss": 3.859, - "step": 1196500 - }, - { - "epoch": 13.17, - "learning_rate": 9.207739699653447e-08, - "loss": 3.8418, - "step": 1197000 - }, - { - "epoch": 13.17, - "learning_rate": 9.206364486495406e-08, - "loss": 3.824, - "step": 1197500 - }, - { - "epoch": 13.18, - "learning_rate": 9.204989273337367e-08, - "loss": 3.8371, - "step": 1198000 - }, - { - "epoch": 13.19, - "learning_rate": 9.203614060179328e-08, - "loss": 3.8478, - "step": 1198500 - }, - { - "epoch": 13.19, - "learning_rate": 9.202238847021288e-08, - "loss": 3.8318, - "step": 1199000 - }, - { - "epoch": 13.2, - "learning_rate": 9.200863633863249e-08, - "loss": 3.8384, - "step": 1199500 - }, - { - "epoch": 13.2, - "learning_rate": 9.19948842070521e-08, - "loss": 3.8368, - "step": 1200000 - }, - { - "epoch": 13.21, - "learning_rate": 9.198113207547169e-08, - "loss": 3.8501, - "step": 1200500 - }, - { - "epoch": 13.21, - "learning_rate": 9.19673799438913e-08, - "loss": 3.8391, - "step": 1201000 - }, - { - "epoch": 13.22, - "learning_rate": 9.195362781231091e-08, - "loss": 3.8356, - "step": 1201500 - }, - { - "epoch": 13.22, - "learning_rate": 9.19398756807305e-08, - "loss": 3.8369, - "step": 1202000 - }, - { - "epoch": 13.23, - "learning_rate": 9.192612354915012e-08, - "loss": 3.826, - "step": 1202500 - }, - { - "epoch": 13.24, - "learning_rate": 9.191237141756973e-08, - "loss": 3.8188, - "step": 1203000 - }, - { - "epoch": 13.24, - "learning_rate": 9.189861928598932e-08, - "loss": 3.8421, - "step": 1203500 - }, - { - "epoch": 13.25, - "learning_rate": 9.188486715440893e-08, - "loss": 3.8414, - "step": 1204000 - }, - { - "epoch": 13.25, - "learning_rate": 9.187111502282854e-08, - "loss": 3.8285, - "step": 1204500 - }, - { - "epoch": 13.26, - "learning_rate": 9.185736289124814e-08, - "loss": 3.8447, - "step": 1205000 - }, - { - "epoch": 13.26, - "learning_rate": 9.184361075966775e-08, - "loss": 3.83, - "step": 1205500 - }, - { - "epoch": 13.27, - "learning_rate": 9.182985862808735e-08, - "loss": 3.8213, - "step": 1206000 - }, - { - "epoch": 13.27, - "learning_rate": 9.181610649650695e-08, - "loss": 3.8473, - "step": 1206500 - }, - { - "epoch": 13.28, - "learning_rate": 9.180235436492656e-08, - "loss": 3.8394, - "step": 1207000 - }, - { - "epoch": 13.28, - "learning_rate": 9.178860223334617e-08, - "loss": 3.8413, - "step": 1207500 - }, - { - "epoch": 13.29, - "learning_rate": 9.177485010176577e-08, - "loss": 3.8429, - "step": 1208000 - }, - { - "epoch": 13.3, - "learning_rate": 9.176109797018538e-08, - "loss": 3.8222, - "step": 1208500 - }, - { - "epoch": 13.3, - "learning_rate": 9.174734583860498e-08, - "loss": 3.833, - "step": 1209000 - }, - { - "epoch": 13.31, - "learning_rate": 9.173359370702458e-08, - "loss": 3.8409, - "step": 1209500 - }, - { - "epoch": 13.31, - "learning_rate": 9.171984157544419e-08, - "loss": 3.8438, - "step": 1210000 - }, - { - "epoch": 13.32, - "learning_rate": 9.17060894438638e-08, - "loss": 3.8277, - "step": 1210500 - }, - { - "epoch": 13.32, - "learning_rate": 9.16923373122834e-08, - "loss": 3.8349, - "step": 1211000 - }, - { - "epoch": 13.33, - "learning_rate": 9.1678585180703e-08, - "loss": 3.8444, - "step": 1211500 - }, - { - "epoch": 13.33, - "learning_rate": 9.166483304912261e-08, - "loss": 3.8565, - "step": 1212000 - }, - { - "epoch": 13.34, - "learning_rate": 9.165108091754221e-08, - "loss": 3.8534, - "step": 1212500 - }, - { - "epoch": 13.35, - "learning_rate": 9.163732878596182e-08, - "loss": 3.8493, - "step": 1213000 - }, - { - "epoch": 13.35, - "learning_rate": 9.162357665438143e-08, - "loss": 3.8298, - "step": 1213500 - }, - { - "epoch": 13.36, - "learning_rate": 9.160982452280102e-08, - "loss": 3.8399, - "step": 1214000 - }, - { - "epoch": 13.36, - "learning_rate": 9.159607239122063e-08, - "loss": 3.8411, - "step": 1214500 - }, - { - "epoch": 13.37, - "learning_rate": 9.158232025964024e-08, - "loss": 3.8151, - "step": 1215000 - }, - { - "epoch": 13.37, - "learning_rate": 9.156856812805984e-08, - "loss": 3.8279, - "step": 1215500 - }, - { - "epoch": 13.38, - "learning_rate": 9.155481599647945e-08, - "loss": 3.8292, - "step": 1216000 - }, - { - "epoch": 13.38, - "learning_rate": 9.154106386489906e-08, - "loss": 3.8525, - "step": 1216500 - }, - { - "epoch": 13.39, - "learning_rate": 9.152731173331865e-08, - "loss": 3.8336, - "step": 1217000 - }, - { - "epoch": 13.39, - "learning_rate": 9.151355960173826e-08, - "loss": 3.8598, - "step": 1217500 - }, - { - "epoch": 13.4, - "learning_rate": 9.149980747015787e-08, - "loss": 3.8331, - "step": 1218000 - }, - { - "epoch": 13.41, - "learning_rate": 9.148605533857747e-08, - "loss": 3.8237, - "step": 1218500 - }, - { - "epoch": 13.41, - "learning_rate": 9.147230320699708e-08, - "loss": 3.8359, - "step": 1219000 - }, - { - "epoch": 13.42, - "learning_rate": 9.145855107541669e-08, - "loss": 3.8295, - "step": 1219500 - }, - { - "epoch": 13.42, - "learning_rate": 9.144479894383628e-08, - "loss": 3.8374, - "step": 1220000 - }, - { - "epoch": 13.43, - "learning_rate": 9.14310468122559e-08, - "loss": 3.8434, - "step": 1220500 - }, - { - "epoch": 13.43, - "learning_rate": 9.14172946806755e-08, - "loss": 3.8352, - "step": 1221000 - }, - { - "epoch": 13.44, - "learning_rate": 9.14035425490951e-08, - "loss": 3.8392, - "step": 1221500 - }, - { - "epoch": 13.44, - "learning_rate": 9.138979041751471e-08, - "loss": 3.842, - "step": 1222000 - }, - { - "epoch": 13.45, - "learning_rate": 9.137603828593432e-08, - "loss": 3.8447, - "step": 1222500 - }, - { - "epoch": 13.46, - "learning_rate": 9.136228615435391e-08, - "loss": 3.8526, - "step": 1223000 - }, - { - "epoch": 13.46, - "learning_rate": 9.134853402277352e-08, - "loss": 3.8412, - "step": 1223500 - }, - { - "epoch": 13.47, - "learning_rate": 9.133478189119313e-08, - "loss": 3.836, - "step": 1224000 - }, - { - "epoch": 13.47, - "learning_rate": 9.132102975961273e-08, - "loss": 3.8414, - "step": 1224500 - }, - { - "epoch": 13.48, - "learning_rate": 9.130727762803234e-08, - "loss": 3.8434, - "step": 1225000 - }, - { - "epoch": 13.48, - "learning_rate": 9.129352549645195e-08, - "loss": 3.8256, - "step": 1225500 - }, - { - "epoch": 13.49, - "learning_rate": 9.127977336487154e-08, - "loss": 3.8252, - "step": 1226000 - }, - { - "epoch": 13.49, - "learning_rate": 9.126602123329115e-08, - "loss": 3.8345, - "step": 1226500 - }, - { - "epoch": 13.5, - "learning_rate": 9.125226910171076e-08, - "loss": 3.8522, - "step": 1227000 - }, - { - "epoch": 13.5, - "learning_rate": 9.123851697013036e-08, - "loss": 3.8464, - "step": 1227500 - }, - { - "epoch": 13.51, - "learning_rate": 9.122476483854997e-08, - "loss": 3.8329, - "step": 1228000 - }, - { - "epoch": 13.52, - "learning_rate": 9.121101270696958e-08, - "loss": 3.8437, - "step": 1228500 - }, - { - "epoch": 13.52, - "learning_rate": 9.119726057538917e-08, - "loss": 3.8389, - "step": 1229000 - }, - { - "epoch": 13.53, - "learning_rate": 9.118350844380878e-08, - "loss": 3.8353, - "step": 1229500 - }, - { - "epoch": 13.53, - "learning_rate": 9.116975631222839e-08, - "loss": 3.8294, - "step": 1230000 - }, - { - "epoch": 13.54, - "learning_rate": 9.115600418064799e-08, - "loss": 3.8433, - "step": 1230500 - }, - { - "epoch": 13.54, - "learning_rate": 9.11422520490676e-08, - "loss": 3.8331, - "step": 1231000 - }, - { - "epoch": 13.55, - "learning_rate": 9.11284999174872e-08, - "loss": 3.8299, - "step": 1231500 - }, - { - "epoch": 13.55, - "learning_rate": 9.111474778590682e-08, - "loss": 3.8405, - "step": 1232000 - }, - { - "epoch": 13.56, - "learning_rate": 9.110099565432641e-08, - "loss": 3.8342, - "step": 1232500 - }, - { - "epoch": 13.57, - "learning_rate": 9.108724352274602e-08, - "loss": 3.8416, - "step": 1233000 - }, - { - "epoch": 13.57, - "learning_rate": 9.107349139116563e-08, - "loss": 3.837, - "step": 1233500 - }, - { - "epoch": 13.58, - "learning_rate": 9.105973925958524e-08, - "loss": 3.8327, - "step": 1234000 - }, - { - "epoch": 13.58, - "learning_rate": 9.104598712800484e-08, - "loss": 3.8413, - "step": 1234500 - }, - { - "epoch": 13.59, - "learning_rate": 9.103223499642445e-08, - "loss": 3.8333, - "step": 1235000 - }, - { - "epoch": 13.59, - "learning_rate": 9.101848286484405e-08, - "loss": 3.8149, - "step": 1235500 - }, - { - "epoch": 13.6, - "learning_rate": 9.100473073326365e-08, - "loss": 3.8377, - "step": 1236000 - }, - { - "epoch": 13.6, - "learning_rate": 9.099097860168326e-08, - "loss": 3.8467, - "step": 1236500 - }, - { - "epoch": 13.61, - "learning_rate": 9.097722647010287e-08, - "loss": 3.8293, - "step": 1237000 - }, - { - "epoch": 13.61, - "learning_rate": 9.096347433852248e-08, - "loss": 3.837, - "step": 1237500 - }, - { - "epoch": 13.62, - "learning_rate": 9.094972220694207e-08, - "loss": 3.8359, - "step": 1238000 - }, - { - "epoch": 13.63, - "learning_rate": 9.093597007536168e-08, - "loss": 3.824, - "step": 1238500 - }, - { - "epoch": 13.63, - "learning_rate": 9.09222179437813e-08, - "loss": 3.8252, - "step": 1239000 - }, - { - "epoch": 13.64, - "learning_rate": 9.090846581220089e-08, - "loss": 3.8431, - "step": 1239500 - }, - { - "epoch": 13.64, - "learning_rate": 9.08947136806205e-08, - "loss": 3.855, - "step": 1240000 - }, - { - "epoch": 13.65, - "learning_rate": 9.08809615490401e-08, - "loss": 3.8327, - "step": 1240500 - }, - { - "epoch": 13.65, - "learning_rate": 9.08672094174597e-08, - "loss": 3.8392, - "step": 1241000 - }, - { - "epoch": 13.66, - "learning_rate": 9.085345728587931e-08, - "loss": 3.8306, - "step": 1241500 - }, - { - "epoch": 13.66, - "learning_rate": 9.083970515429891e-08, - "loss": 3.8445, - "step": 1242000 - }, - { - "epoch": 13.67, - "learning_rate": 9.082595302271852e-08, - "loss": 3.8407, - "step": 1242500 - }, - { - "epoch": 13.68, - "learning_rate": 9.081220089113813e-08, - "loss": 3.8331, - "step": 1243000 - }, - { - "epoch": 13.68, - "learning_rate": 9.079844875955772e-08, - "loss": 3.8528, - "step": 1243500 - }, - { - "epoch": 13.69, - "learning_rate": 9.078469662797733e-08, - "loss": 3.8492, - "step": 1244000 - }, - { - "epoch": 13.69, - "learning_rate": 9.077094449639694e-08, - "loss": 3.835, - "step": 1244500 - }, - { - "epoch": 13.7, - "learning_rate": 9.075719236481654e-08, - "loss": 3.8104, - "step": 1245000 - }, - { - "epoch": 13.7, - "learning_rate": 9.074344023323615e-08, - "loss": 3.8294, - "step": 1245500 - }, - { - "epoch": 13.71, - "learning_rate": 9.072968810165576e-08, - "loss": 3.8243, - "step": 1246000 - }, - { - "epoch": 13.71, - "learning_rate": 9.071593597007535e-08, - "loss": 3.822, - "step": 1246500 - }, - { - "epoch": 13.72, - "learning_rate": 9.070218383849496e-08, - "loss": 3.8266, - "step": 1247000 - }, - { - "epoch": 13.72, - "learning_rate": 9.068843170691457e-08, - "loss": 3.8195, - "step": 1247500 - }, - { - "epoch": 13.73, - "learning_rate": 9.067467957533417e-08, - "loss": 3.8157, - "step": 1248000 - }, - { - "epoch": 13.74, - "learning_rate": 9.066092744375378e-08, - "loss": 3.8359, - "step": 1248500 - }, - { - "epoch": 13.74, - "learning_rate": 9.064717531217339e-08, - "loss": 3.8356, - "step": 1249000 - }, - { - "epoch": 13.75, - "learning_rate": 9.063342318059298e-08, - "loss": 3.8221, - "step": 1249500 - }, - { - "epoch": 13.75, - "learning_rate": 9.061967104901259e-08, - "loss": 3.8219, - "step": 1250000 - }, - { - "epoch": 13.76, - "learning_rate": 9.06059189174322e-08, - "loss": 3.8214, - "step": 1250500 - }, - { - "epoch": 13.76, - "learning_rate": 9.05921667858518e-08, - "loss": 3.8301, - "step": 1251000 - }, - { - "epoch": 13.77, - "learning_rate": 9.057841465427141e-08, - "loss": 3.8324, - "step": 1251500 - }, - { - "epoch": 13.77, - "learning_rate": 9.056466252269102e-08, - "loss": 3.8312, - "step": 1252000 - }, - { - "epoch": 13.78, - "learning_rate": 9.055091039111061e-08, - "loss": 3.8204, - "step": 1252500 - }, - { - "epoch": 13.79, - "learning_rate": 9.053715825953022e-08, - "loss": 3.8401, - "step": 1253000 - }, - { - "epoch": 13.79, - "learning_rate": 9.052340612794983e-08, - "loss": 3.8298, - "step": 1253500 - }, - { - "epoch": 13.8, - "learning_rate": 9.050965399636943e-08, - "loss": 3.839, - "step": 1254000 - }, - { - "epoch": 13.8, - "learning_rate": 9.049590186478904e-08, - "loss": 3.85, - "step": 1254500 - }, - { - "epoch": 13.81, - "learning_rate": 9.048214973320865e-08, - "loss": 3.8128, - "step": 1255000 - }, - { - "epoch": 13.81, - "learning_rate": 9.046839760162824e-08, - "loss": 3.8177, - "step": 1255500 - }, - { - "epoch": 13.82, - "learning_rate": 9.045464547004785e-08, - "loss": 3.8591, - "step": 1256000 - }, - { - "epoch": 13.82, - "learning_rate": 9.044089333846746e-08, - "loss": 3.8283, - "step": 1256500 - }, - { - "epoch": 13.83, - "learning_rate": 9.042714120688706e-08, - "loss": 3.8344, - "step": 1257000 - }, - { - "epoch": 13.83, - "learning_rate": 9.041338907530667e-08, - "loss": 3.8203, - "step": 1257500 - }, - { - "epoch": 13.84, - "learning_rate": 9.039963694372628e-08, - "loss": 3.816, - "step": 1258000 - }, - { - "epoch": 13.85, - "learning_rate": 9.038588481214587e-08, - "loss": 3.8433, - "step": 1258500 - }, - { - "epoch": 13.85, - "learning_rate": 9.037213268056548e-08, - "loss": 3.8174, - "step": 1259000 - }, - { - "epoch": 13.86, - "learning_rate": 9.035838054898509e-08, - "loss": 3.8359, - "step": 1259500 - }, - { - "epoch": 13.86, - "learning_rate": 9.034462841740469e-08, - "loss": 3.8325, - "step": 1260000 - }, - { - "epoch": 13.87, - "learning_rate": 9.03308762858243e-08, - "loss": 3.8263, - "step": 1260500 - }, - { - "epoch": 13.87, - "learning_rate": 9.03171241542439e-08, - "loss": 3.8161, - "step": 1261000 - }, - { - "epoch": 13.88, - "learning_rate": 9.03033720226635e-08, - "loss": 3.851, - "step": 1261500 - }, - { - "epoch": 13.88, - "learning_rate": 9.028961989108311e-08, - "loss": 3.8266, - "step": 1262000 - }, - { - "epoch": 13.89, - "learning_rate": 9.027586775950272e-08, - "loss": 3.8362, - "step": 1262500 - }, - { - "epoch": 13.9, - "learning_rate": 9.026211562792232e-08, - "loss": 3.814, - "step": 1263000 - }, - { - "epoch": 13.9, - "learning_rate": 9.024836349634193e-08, - "loss": 3.8394, - "step": 1263500 - }, - { - "epoch": 13.91, - "learning_rate": 9.023461136476154e-08, - "loss": 3.8369, - "step": 1264000 - }, - { - "epoch": 13.91, - "learning_rate": 9.022085923318113e-08, - "loss": 3.8324, - "step": 1264500 - }, - { - "epoch": 13.92, - "learning_rate": 9.020710710160074e-08, - "loss": 3.847, - "step": 1265000 - }, - { - "epoch": 13.92, - "learning_rate": 9.019335497002035e-08, - "loss": 3.859, - "step": 1265500 - }, - { - "epoch": 13.93, - "learning_rate": 9.017960283843995e-08, - "loss": 3.8282, - "step": 1266000 - }, - { - "epoch": 13.93, - "learning_rate": 9.016585070685956e-08, - "loss": 3.8372, - "step": 1266500 - }, - { - "epoch": 13.94, - "learning_rate": 9.015209857527917e-08, - "loss": 3.8276, - "step": 1267000 - }, - { - "epoch": 13.94, - "learning_rate": 9.013834644369876e-08, - "loss": 3.8427, - "step": 1267500 - }, - { - "epoch": 13.95, - "learning_rate": 9.012459431211837e-08, - "loss": 3.8229, - "step": 1268000 - }, - { - "epoch": 13.96, - "learning_rate": 9.011084218053798e-08, - "loss": 3.8208, - "step": 1268500 - }, - { - "epoch": 13.96, - "learning_rate": 9.009709004895758e-08, - "loss": 3.8364, - "step": 1269000 - }, - { - "epoch": 13.97, - "learning_rate": 9.008333791737719e-08, - "loss": 3.8278, - "step": 1269500 - }, - { - "epoch": 13.97, - "learning_rate": 9.00695857857968e-08, - "loss": 3.8244, - "step": 1270000 - }, - { - "epoch": 13.98, - "learning_rate": 9.005583365421639e-08, - "loss": 3.849, - "step": 1270500 - }, - { - "epoch": 13.98, - "learning_rate": 9.0042081522636e-08, - "loss": 3.8212, - "step": 1271000 - }, - { - "epoch": 13.99, - "learning_rate": 9.002832939105561e-08, - "loss": 3.8329, - "step": 1271500 - }, - { - "epoch": 13.99, - "learning_rate": 9.00145772594752e-08, - "loss": 3.8335, - "step": 1272000 - }, - { - "epoch": 14.0, - "learning_rate": 9.000082512789482e-08, - "loss": 3.8233, - "step": 1272500 - }, - { - "epoch": 14.0, - "eval_loss": 3.8761990070343018, - "eval_runtime": 6.1312, - "eval_samples_per_second": 253.458, - "step": 1272530 - }, - { - "epoch": 14.01, - "learning_rate": 8.998707299631442e-08, - "loss": 3.8429, - "step": 1273000 - }, - { - "epoch": 14.01, - "learning_rate": 8.997332086473402e-08, - "loss": 3.8518, - "step": 1273500 - }, - { - "epoch": 14.02, - "learning_rate": 8.995956873315363e-08, - "loss": 3.8246, - "step": 1274000 - }, - { - "epoch": 14.02, - "learning_rate": 8.994581660157324e-08, - "loss": 3.8219, - "step": 1274500 - }, - { - "epoch": 14.03, - "learning_rate": 8.993206446999284e-08, - "loss": 3.8167, - "step": 1275000 - }, - { - "epoch": 14.03, - "learning_rate": 8.991831233841244e-08, - "loss": 3.8455, - "step": 1275500 - }, - { - "epoch": 14.04, - "learning_rate": 8.990456020683205e-08, - "loss": 3.814, - "step": 1276000 - }, - { - "epoch": 14.04, - "learning_rate": 8.989080807525165e-08, - "loss": 3.8528, - "step": 1276500 - }, - { - "epoch": 14.05, - "learning_rate": 8.987705594367126e-08, - "loss": 3.8351, - "step": 1277000 - }, - { - "epoch": 14.05, - "learning_rate": 8.986330381209087e-08, - "loss": 3.8345, - "step": 1277500 - }, - { - "epoch": 14.06, - "learning_rate": 8.984955168051046e-08, - "loss": 3.8136, - "step": 1278000 - }, - { - "epoch": 14.07, - "learning_rate": 8.983579954893007e-08, - "loss": 3.8299, - "step": 1278500 - }, - { - "epoch": 14.07, - "learning_rate": 8.982204741734968e-08, - "loss": 3.8173, - "step": 1279000 - }, - { - "epoch": 14.08, - "learning_rate": 8.980829528576929e-08, - "loss": 3.8066, - "step": 1279500 - }, - { - "epoch": 14.08, - "learning_rate": 8.979454315418889e-08, - "loss": 3.8486, - "step": 1280000 - }, - { - "epoch": 14.09, - "learning_rate": 8.97807910226085e-08, - "loss": 3.8196, - "step": 1280500 - }, - { - "epoch": 14.09, - "learning_rate": 8.976703889102811e-08, - "loss": 3.8276, - "step": 1281000 - }, - { - "epoch": 14.1, - "learning_rate": 8.975328675944772e-08, - "loss": 3.8144, - "step": 1281500 - }, - { - "epoch": 14.1, - "learning_rate": 8.973953462786731e-08, - "loss": 3.8227, - "step": 1282000 - }, - { - "epoch": 14.11, - "learning_rate": 8.972578249628692e-08, - "loss": 3.8422, - "step": 1282500 - }, - { - "epoch": 14.12, - "learning_rate": 8.971203036470653e-08, - "loss": 3.8273, - "step": 1283000 - }, - { - "epoch": 14.12, - "learning_rate": 8.969827823312613e-08, - "loss": 3.8154, - "step": 1283500 - }, - { - "epoch": 14.13, - "learning_rate": 8.968452610154574e-08, - "loss": 3.8165, - "step": 1284000 - }, - { - "epoch": 14.13, - "learning_rate": 8.967077396996535e-08, - "loss": 3.8358, - "step": 1284500 - }, - { - "epoch": 14.14, - "learning_rate": 8.965702183838496e-08, - "loss": 3.8182, - "step": 1285000 - }, - { - "epoch": 14.14, - "learning_rate": 8.964326970680455e-08, - "loss": 3.8434, - "step": 1285500 - }, - { - "epoch": 14.15, - "learning_rate": 8.962951757522416e-08, - "loss": 3.8237, - "step": 1286000 - }, - { - "epoch": 14.15, - "learning_rate": 8.961576544364377e-08, - "loss": 3.8382, - "step": 1286500 - }, - { - "epoch": 14.16, - "learning_rate": 8.960201331206337e-08, - "loss": 3.8302, - "step": 1287000 - }, - { - "epoch": 14.16, - "learning_rate": 8.958826118048298e-08, - "loss": 3.8336, - "step": 1287500 - }, - { - "epoch": 14.17, - "learning_rate": 8.957450904890259e-08, - "loss": 3.8055, - "step": 1288000 - }, - { - "epoch": 14.18, - "learning_rate": 8.956075691732218e-08, - "loss": 3.8228, - "step": 1288500 - }, - { - "epoch": 14.18, - "learning_rate": 8.954700478574179e-08, - "loss": 3.8555, - "step": 1289000 - }, - { - "epoch": 14.19, - "learning_rate": 8.95332526541614e-08, - "loss": 3.8289, - "step": 1289500 - }, - { - "epoch": 14.19, - "learning_rate": 8.9519500522581e-08, - "loss": 3.8338, - "step": 1290000 - }, - { - "epoch": 14.2, - "learning_rate": 8.95057483910006e-08, - "loss": 3.8181, - "step": 1290500 - }, - { - "epoch": 14.2, - "learning_rate": 8.949199625942022e-08, - "loss": 3.8304, - "step": 1291000 - }, - { - "epoch": 14.21, - "learning_rate": 8.947824412783981e-08, - "loss": 3.8418, - "step": 1291500 - }, - { - "epoch": 14.21, - "learning_rate": 8.946449199625942e-08, - "loss": 3.8576, - "step": 1292000 - }, - { - "epoch": 14.22, - "learning_rate": 8.945073986467903e-08, - "loss": 3.8289, - "step": 1292500 - }, - { - "epoch": 14.23, - "learning_rate": 8.943698773309863e-08, - "loss": 3.8199, - "step": 1293000 - }, - { - "epoch": 14.23, - "learning_rate": 8.942323560151824e-08, - "loss": 3.8392, - "step": 1293500 - }, - { - "epoch": 14.24, - "learning_rate": 8.940948346993784e-08, - "loss": 3.8288, - "step": 1294000 - }, - { - "epoch": 14.24, - "learning_rate": 8.939573133835744e-08, - "loss": 3.8401, - "step": 1294500 - }, - { - "epoch": 14.25, - "learning_rate": 8.938197920677705e-08, - "loss": 3.8449, - "step": 1295000 - }, - { - "epoch": 14.25, - "learning_rate": 8.936822707519666e-08, - "loss": 3.8269, - "step": 1295500 - }, - { - "epoch": 14.26, - "learning_rate": 8.935447494361626e-08, - "loss": 3.8395, - "step": 1296000 - }, - { - "epoch": 14.26, - "learning_rate": 8.934072281203587e-08, - "loss": 3.8267, - "step": 1296500 - }, - { - "epoch": 14.27, - "learning_rate": 8.932697068045547e-08, - "loss": 3.839, - "step": 1297000 - }, - { - "epoch": 14.27, - "learning_rate": 8.931321854887507e-08, - "loss": 3.8286, - "step": 1297500 - }, - { - "epoch": 14.28, - "learning_rate": 8.929946641729468e-08, - "loss": 3.8479, - "step": 1298000 - }, - { - "epoch": 14.29, - "learning_rate": 8.928571428571429e-08, - "loss": 3.8156, - "step": 1298500 - }, - { - "epoch": 14.29, - "learning_rate": 8.927196215413389e-08, - "loss": 3.8269, - "step": 1299000 - }, - { - "epoch": 14.3, - "learning_rate": 8.92582100225535e-08, - "loss": 3.8423, - "step": 1299500 - }, - { - "epoch": 14.3, - "learning_rate": 8.92444578909731e-08, - "loss": 3.8312, - "step": 1300000 - }, - { - "epoch": 14.31, - "learning_rate": 8.92307057593927e-08, - "loss": 3.8257, - "step": 1300500 - }, - { - "epoch": 14.31, - "learning_rate": 8.921695362781231e-08, - "loss": 3.8231, - "step": 1301000 - }, - { - "epoch": 14.32, - "learning_rate": 8.920320149623192e-08, - "loss": 3.8352, - "step": 1301500 - }, - { - "epoch": 14.32, - "learning_rate": 8.918944936465151e-08, - "loss": 3.8058, - "step": 1302000 - }, - { - "epoch": 14.33, - "learning_rate": 8.917569723307112e-08, - "loss": 3.8538, - "step": 1302500 - }, - { - "epoch": 14.34, - "learning_rate": 8.916194510149073e-08, - "loss": 3.819, - "step": 1303000 - }, - { - "epoch": 14.34, - "learning_rate": 8.914819296991033e-08, - "loss": 3.8431, - "step": 1303500 - }, - { - "epoch": 14.35, - "learning_rate": 8.913444083832994e-08, - "loss": 3.8362, - "step": 1304000 - }, - { - "epoch": 14.35, - "learning_rate": 8.912068870674955e-08, - "loss": 3.8366, - "step": 1304500 - }, - { - "epoch": 14.36, - "learning_rate": 8.910693657516914e-08, - "loss": 3.8475, - "step": 1305000 - }, - { - "epoch": 14.36, - "learning_rate": 8.909318444358875e-08, - "loss": 3.8437, - "step": 1305500 - }, - { - "epoch": 14.37, - "learning_rate": 8.907943231200836e-08, - "loss": 3.827, - "step": 1306000 - }, - { - "epoch": 14.37, - "learning_rate": 8.906568018042796e-08, - "loss": 3.8453, - "step": 1306500 - }, - { - "epoch": 14.38, - "learning_rate": 8.905192804884757e-08, - "loss": 3.8273, - "step": 1307000 - }, - { - "epoch": 14.38, - "learning_rate": 8.903817591726718e-08, - "loss": 3.8175, - "step": 1307500 - }, - { - "epoch": 14.39, - "learning_rate": 8.902442378568677e-08, - "loss": 3.8297, - "step": 1308000 - }, - { - "epoch": 14.4, - "learning_rate": 8.901067165410638e-08, - "loss": 3.823, - "step": 1308500 - }, - { - "epoch": 14.4, - "learning_rate": 8.899691952252599e-08, - "loss": 3.83, - "step": 1309000 - }, - { - "epoch": 14.41, - "learning_rate": 8.898316739094559e-08, - "loss": 3.8234, - "step": 1309500 - }, - { - "epoch": 14.41, - "learning_rate": 8.89694152593652e-08, - "loss": 3.8344, - "step": 1310000 - }, - { - "epoch": 14.42, - "learning_rate": 8.895566312778481e-08, - "loss": 3.8282, - "step": 1310500 - }, - { - "epoch": 14.42, - "learning_rate": 8.89419109962044e-08, - "loss": 3.8331, - "step": 1311000 - }, - { - "epoch": 14.43, - "learning_rate": 8.892815886462401e-08, - "loss": 3.8289, - "step": 1311500 - }, - { - "epoch": 14.43, - "learning_rate": 8.891440673304362e-08, - "loss": 3.82, - "step": 1312000 - }, - { - "epoch": 14.44, - "learning_rate": 8.890065460146322e-08, - "loss": 3.8294, - "step": 1312500 - }, - { - "epoch": 14.45, - "learning_rate": 8.888690246988283e-08, - "loss": 3.8244, - "step": 1313000 - }, - { - "epoch": 14.45, - "learning_rate": 8.887315033830244e-08, - "loss": 3.8232, - "step": 1313500 - }, - { - "epoch": 14.46, - "learning_rate": 8.885939820672203e-08, - "loss": 3.8166, - "step": 1314000 - }, - { - "epoch": 14.46, - "learning_rate": 8.884564607514164e-08, - "loss": 3.8492, - "step": 1314500 - }, - { - "epoch": 14.47, - "learning_rate": 8.883189394356124e-08, - "loss": 3.8119, - "step": 1315000 - }, - { - "epoch": 14.47, - "learning_rate": 8.881814181198085e-08, - "loss": 3.8253, - "step": 1315500 - }, - { - "epoch": 14.48, - "learning_rate": 8.880438968040046e-08, - "loss": 3.8214, - "step": 1316000 - }, - { - "epoch": 14.48, - "learning_rate": 8.879063754882005e-08, - "loss": 3.8221, - "step": 1316500 - }, - { - "epoch": 14.49, - "learning_rate": 8.877688541723966e-08, - "loss": 3.8234, - "step": 1317000 - }, - { - "epoch": 14.49, - "learning_rate": 8.876313328565927e-08, - "loss": 3.8347, - "step": 1317500 - }, - { - "epoch": 14.5, - "learning_rate": 8.874938115407887e-08, - "loss": 3.8159, - "step": 1318000 - }, - { - "epoch": 14.51, - "learning_rate": 8.873562902249848e-08, - "loss": 3.847, - "step": 1318500 - }, - { - "epoch": 14.51, - "learning_rate": 8.872187689091809e-08, - "loss": 3.824, - "step": 1319000 - }, - { - "epoch": 14.52, - "learning_rate": 8.870812475933768e-08, - "loss": 3.8217, - "step": 1319500 - }, - { - "epoch": 14.52, - "learning_rate": 8.869437262775729e-08, - "loss": 3.8344, - "step": 1320000 - }, - { - "epoch": 14.53, - "learning_rate": 8.86806204961769e-08, - "loss": 3.8279, - "step": 1320500 - }, - { - "epoch": 14.53, - "learning_rate": 8.86668683645965e-08, - "loss": 3.8317, - "step": 1321000 - }, - { - "epoch": 14.54, - "learning_rate": 8.865311623301611e-08, - "loss": 3.8273, - "step": 1321500 - }, - { - "epoch": 14.54, - "learning_rate": 8.863936410143572e-08, - "loss": 3.8307, - "step": 1322000 - }, - { - "epoch": 14.55, - "learning_rate": 8.862561196985531e-08, - "loss": 3.8392, - "step": 1322500 - }, - { - "epoch": 14.56, - "learning_rate": 8.861185983827492e-08, - "loss": 3.8205, - "step": 1323000 - }, - { - "epoch": 14.56, - "learning_rate": 8.859810770669453e-08, - "loss": 3.836, - "step": 1323500 - }, - { - "epoch": 14.57, - "learning_rate": 8.858435557511413e-08, - "loss": 3.8433, - "step": 1324000 - }, - { - "epoch": 14.57, - "learning_rate": 8.857060344353374e-08, - "loss": 3.8135, - "step": 1324500 - }, - { - "epoch": 14.58, - "learning_rate": 8.855685131195335e-08, - "loss": 3.8281, - "step": 1325000 - }, - { - "epoch": 14.58, - "learning_rate": 8.854309918037294e-08, - "loss": 3.8375, - "step": 1325500 - }, - { - "epoch": 14.59, - "learning_rate": 8.852934704879255e-08, - "loss": 3.8107, - "step": 1326000 - }, - { - "epoch": 14.59, - "learning_rate": 8.851559491721216e-08, - "loss": 3.8195, - "step": 1326500 - }, - { - "epoch": 14.6, - "learning_rate": 8.850184278563177e-08, - "loss": 3.8231, - "step": 1327000 - }, - { - "epoch": 14.6, - "learning_rate": 8.848809065405137e-08, - "loss": 3.8288, - "step": 1327500 - }, - { - "epoch": 14.61, - "learning_rate": 8.847433852247098e-08, - "loss": 3.8235, - "step": 1328000 - }, - { - "epoch": 14.62, - "learning_rate": 8.846058639089059e-08, - "loss": 3.8334, - "step": 1328500 - }, - { - "epoch": 14.62, - "learning_rate": 8.84468342593102e-08, - "loss": 3.8329, - "step": 1329000 - }, - { - "epoch": 14.63, - "learning_rate": 8.843308212772979e-08, - "loss": 3.8304, - "step": 1329500 - }, - { - "epoch": 14.63, - "learning_rate": 8.84193299961494e-08, - "loss": 3.8205, - "step": 1330000 - }, - { - "epoch": 14.64, - "learning_rate": 8.840557786456901e-08, - "loss": 3.8253, - "step": 1330500 - }, - { - "epoch": 14.64, - "learning_rate": 8.83918257329886e-08, - "loss": 3.8297, - "step": 1331000 - }, - { - "epoch": 14.65, - "learning_rate": 8.837807360140821e-08, - "loss": 3.8213, - "step": 1331500 - }, - { - "epoch": 14.65, - "learning_rate": 8.836432146982782e-08, - "loss": 3.8419, - "step": 1332000 - }, - { - "epoch": 14.66, - "learning_rate": 8.835056933824743e-08, - "loss": 3.8432, - "step": 1332500 - }, - { - "epoch": 14.67, - "learning_rate": 8.833681720666703e-08, - "loss": 3.8321, - "step": 1333000 - }, - { - "epoch": 14.67, - "learning_rate": 8.832306507508664e-08, - "loss": 3.8338, - "step": 1333500 - }, - { - "epoch": 14.68, - "learning_rate": 8.830931294350625e-08, - "loss": 3.8249, - "step": 1334000 - }, - { - "epoch": 14.68, - "learning_rate": 8.829556081192584e-08, - "loss": 3.8469, - "step": 1334500 - }, - { - "epoch": 14.69, - "learning_rate": 8.828180868034545e-08, - "loss": 3.81, - "step": 1335000 - }, - { - "epoch": 14.69, - "learning_rate": 8.826805654876506e-08, - "loss": 3.8178, - "step": 1335500 - }, - { - "epoch": 14.7, - "learning_rate": 8.825430441718466e-08, - "loss": 3.8459, - "step": 1336000 - }, - { - "epoch": 14.7, - "learning_rate": 8.824055228560427e-08, - "loss": 3.8249, - "step": 1336500 - }, - { - "epoch": 14.71, - "learning_rate": 8.822680015402388e-08, - "loss": 3.8528, - "step": 1337000 - }, - { - "epoch": 14.71, - "learning_rate": 8.821304802244347e-08, - "loss": 3.8539, - "step": 1337500 - }, - { - "epoch": 14.72, - "learning_rate": 8.819929589086308e-08, - "loss": 3.8353, - "step": 1338000 - }, - { - "epoch": 14.73, - "learning_rate": 8.818554375928269e-08, - "loss": 3.8222, - "step": 1338500 - }, - { - "epoch": 14.73, - "learning_rate": 8.817179162770229e-08, - "loss": 3.8415, - "step": 1339000 - }, - { - "epoch": 14.74, - "learning_rate": 8.81580394961219e-08, - "loss": 3.8315, - "step": 1339500 - }, - { - "epoch": 14.74, - "learning_rate": 8.814428736454151e-08, - "loss": 3.8287, - "step": 1340000 - }, - { - "epoch": 14.75, - "learning_rate": 8.81305352329611e-08, - "loss": 3.811, - "step": 1340500 - }, - { - "epoch": 14.75, - "learning_rate": 8.811678310138071e-08, - "loss": 3.8284, - "step": 1341000 - }, - { - "epoch": 14.76, - "learning_rate": 8.810303096980032e-08, - "loss": 3.8333, - "step": 1341500 - }, - { - "epoch": 14.76, - "learning_rate": 8.808927883821992e-08, - "loss": 3.8285, - "step": 1342000 - }, - { - "epoch": 14.77, - "learning_rate": 8.807552670663953e-08, - "loss": 3.8316, - "step": 1342500 - }, - { - "epoch": 14.78, - "learning_rate": 8.806177457505914e-08, - "loss": 3.8229, - "step": 1343000 - }, - { - "epoch": 14.78, - "learning_rate": 8.804802244347873e-08, - "loss": 3.8303, - "step": 1343500 - }, - { - "epoch": 14.79, - "learning_rate": 8.803427031189834e-08, - "loss": 3.8332, - "step": 1344000 - }, - { - "epoch": 14.79, - "learning_rate": 8.802051818031795e-08, - "loss": 3.8348, - "step": 1344500 - }, - { - "epoch": 14.8, - "learning_rate": 8.800676604873755e-08, - "loss": 3.8206, - "step": 1345000 - }, - { - "epoch": 14.8, - "learning_rate": 8.799301391715716e-08, - "loss": 3.811, - "step": 1345500 - }, - { - "epoch": 14.81, - "learning_rate": 8.797926178557677e-08, - "loss": 3.8215, - "step": 1346000 - }, - { - "epoch": 14.81, - "learning_rate": 8.796550965399636e-08, - "loss": 3.8161, - "step": 1346500 - }, - { - "epoch": 14.82, - "learning_rate": 8.795175752241597e-08, - "loss": 3.8191, - "step": 1347000 - }, - { - "epoch": 14.82, - "learning_rate": 8.793800539083558e-08, - "loss": 3.8497, - "step": 1347500 - }, - { - "epoch": 14.83, - "learning_rate": 8.792425325925518e-08, - "loss": 3.8152, - "step": 1348000 - }, - { - "epoch": 14.84, - "learning_rate": 8.791050112767479e-08, - "loss": 3.8222, - "step": 1348500 - }, - { - "epoch": 14.84, - "learning_rate": 8.78967489960944e-08, - "loss": 3.8175, - "step": 1349000 - }, - { - "epoch": 14.85, - "learning_rate": 8.788299686451399e-08, - "loss": 3.8339, - "step": 1349500 - }, - { - "epoch": 14.85, - "learning_rate": 8.78692447329336e-08, - "loss": 3.8322, - "step": 1350000 - }, - { - "epoch": 14.86, - "learning_rate": 8.785549260135321e-08, - "loss": 3.8071, - "step": 1350500 - }, - { - "epoch": 14.86, - "learning_rate": 8.784174046977281e-08, - "loss": 3.8367, - "step": 1351000 - }, - { - "epoch": 14.87, - "learning_rate": 8.782798833819242e-08, - "loss": 3.8149, - "step": 1351500 - }, - { - "epoch": 14.87, - "learning_rate": 8.781423620661203e-08, - "loss": 3.8086, - "step": 1352000 - }, - { - "epoch": 14.88, - "learning_rate": 8.780048407503162e-08, - "loss": 3.8263, - "step": 1352500 - }, - { - "epoch": 14.89, - "learning_rate": 8.778673194345123e-08, - "loss": 3.8311, - "step": 1353000 - }, - { - "epoch": 14.89, - "learning_rate": 8.777297981187084e-08, - "loss": 3.823, - "step": 1353500 - }, - { - "epoch": 14.9, - "learning_rate": 8.775922768029044e-08, - "loss": 3.8287, - "step": 1354000 - }, - { - "epoch": 14.9, - "learning_rate": 8.774547554871005e-08, - "loss": 3.8153, - "step": 1354500 - }, - { - "epoch": 14.91, - "learning_rate": 8.773172341712966e-08, - "loss": 3.8134, - "step": 1355000 - }, - { - "epoch": 14.91, - "learning_rate": 8.771797128554925e-08, - "loss": 3.8061, - "step": 1355500 - }, - { - "epoch": 14.92, - "learning_rate": 8.770421915396886e-08, - "loss": 3.8165, - "step": 1356000 - }, - { - "epoch": 14.92, - "learning_rate": 8.769046702238847e-08, - "loss": 3.8212, - "step": 1356500 - }, - { - "epoch": 14.93, - "learning_rate": 8.767671489080807e-08, - "loss": 3.8316, - "step": 1357000 - }, - { - "epoch": 14.93, - "learning_rate": 8.766296275922768e-08, - "loss": 3.8384, - "step": 1357500 - }, - { - "epoch": 14.94, - "learning_rate": 8.764921062764728e-08, - "loss": 3.8277, - "step": 1358000 - }, - { - "epoch": 14.95, - "learning_rate": 8.763545849606688e-08, - "loss": 3.8165, - "step": 1358500 - }, - { - "epoch": 14.95, - "learning_rate": 8.762170636448649e-08, - "loss": 3.8392, - "step": 1359000 - }, - { - "epoch": 14.96, - "learning_rate": 8.76079542329061e-08, - "loss": 3.8291, - "step": 1359500 - }, - { - "epoch": 14.96, - "learning_rate": 8.75942021013257e-08, - "loss": 3.824, - "step": 1360000 - }, - { - "epoch": 14.97, - "learning_rate": 8.75804499697453e-08, - "loss": 3.8107, - "step": 1360500 - }, - { - "epoch": 14.97, - "learning_rate": 8.756669783816491e-08, - "loss": 3.8143, - "step": 1361000 - }, - { - "epoch": 14.98, - "learning_rate": 8.755294570658451e-08, - "loss": 3.8302, - "step": 1361500 - }, - { - "epoch": 14.98, - "learning_rate": 8.753919357500412e-08, - "loss": 3.8302, - "step": 1362000 - }, - { - "epoch": 14.99, - "learning_rate": 8.752544144342373e-08, - "loss": 3.8267, - "step": 1362500 - }, - { - "epoch": 15.0, - "learning_rate": 8.751168931184333e-08, - "loss": 3.8379, - "step": 1363000 - }, - { - "epoch": 15.0, - "eval_loss": 3.8711750507354736, - "eval_runtime": 6.1393, - "eval_samples_per_second": 253.125, - "step": 1363425 - }, - { - "epoch": 15.0, - "learning_rate": 8.749793718026293e-08, - "loss": 3.819, - "step": 1363500 - }, - { - "epoch": 15.01, - "learning_rate": 8.748418504868254e-08, - "loss": 3.8339, - "step": 1364000 - }, - { - "epoch": 15.01, - "learning_rate": 8.747043291710214e-08, - "loss": 3.8204, - "step": 1364500 - }, - { - "epoch": 15.02, - "learning_rate": 8.745668078552175e-08, - "loss": 3.8354, - "step": 1365000 - }, - { - "epoch": 15.02, - "learning_rate": 8.744292865394136e-08, - "loss": 3.8203, - "step": 1365500 - }, - { - "epoch": 15.03, - "learning_rate": 8.742917652236096e-08, - "loss": 3.8207, - "step": 1366000 - }, - { - "epoch": 15.03, - "learning_rate": 8.741542439078056e-08, - "loss": 3.8104, - "step": 1366500 - }, - { - "epoch": 15.04, - "learning_rate": 8.740167225920017e-08, - "loss": 3.8263, - "step": 1367000 - }, - { - "epoch": 15.04, - "learning_rate": 8.738792012761977e-08, - "loss": 3.8139, - "step": 1367500 - }, - { - "epoch": 15.05, - "learning_rate": 8.737416799603938e-08, - "loss": 3.8409, - "step": 1368000 - }, - { - "epoch": 15.06, - "learning_rate": 8.736041586445899e-08, - "loss": 3.7963, - "step": 1368500 - }, - { - "epoch": 15.06, - "learning_rate": 8.734666373287858e-08, - "loss": 3.8098, - "step": 1369000 - }, - { - "epoch": 15.07, - "learning_rate": 8.73329116012982e-08, - "loss": 3.8256, - "step": 1369500 - }, - { - "epoch": 15.07, - "learning_rate": 8.73191594697178e-08, - "loss": 3.8247, - "step": 1370000 - }, - { - "epoch": 15.08, - "learning_rate": 8.73054073381374e-08, - "loss": 3.8166, - "step": 1370500 - }, - { - "epoch": 15.08, - "learning_rate": 8.729165520655701e-08, - "loss": 3.8518, - "step": 1371000 - }, - { - "epoch": 15.09, - "learning_rate": 8.727790307497662e-08, - "loss": 3.8313, - "step": 1371500 - }, - { - "epoch": 15.09, - "learning_rate": 8.726415094339621e-08, - "loss": 3.8232, - "step": 1372000 - }, - { - "epoch": 15.1, - "learning_rate": 8.725039881181582e-08, - "loss": 3.8179, - "step": 1372500 - }, - { - "epoch": 15.11, - "learning_rate": 8.723664668023543e-08, - "loss": 3.8299, - "step": 1373000 - }, - { - "epoch": 15.11, - "learning_rate": 8.722289454865503e-08, - "loss": 3.8266, - "step": 1373500 - }, - { - "epoch": 15.12, - "learning_rate": 8.720914241707464e-08, - "loss": 3.8215, - "step": 1374000 - }, - { - "epoch": 15.12, - "learning_rate": 8.719539028549425e-08, - "loss": 3.8317, - "step": 1374500 - }, - { - "epoch": 15.13, - "learning_rate": 8.718163815391384e-08, - "loss": 3.8269, - "step": 1375000 - }, - { - "epoch": 15.13, - "learning_rate": 8.716788602233345e-08, - "loss": 3.84, - "step": 1375500 - }, - { - "epoch": 15.14, - "learning_rate": 8.715413389075306e-08, - "loss": 3.8286, - "step": 1376000 - }, - { - "epoch": 15.14, - "learning_rate": 8.714038175917267e-08, - "loss": 3.8404, - "step": 1376500 - }, - { - "epoch": 15.15, - "learning_rate": 8.712662962759227e-08, - "loss": 3.8253, - "step": 1377000 - }, - { - "epoch": 15.15, - "learning_rate": 8.711287749601188e-08, - "loss": 3.824, - "step": 1377500 - }, - { - "epoch": 15.16, - "learning_rate": 8.709912536443149e-08, - "loss": 3.8241, - "step": 1378000 - }, - { - "epoch": 15.17, - "learning_rate": 8.708537323285108e-08, - "loss": 3.803, - "step": 1378500 - }, - { - "epoch": 15.17, - "learning_rate": 8.707162110127069e-08, - "loss": 3.8223, - "step": 1379000 - }, - { - "epoch": 15.18, - "learning_rate": 8.70578689696903e-08, - "loss": 3.8301, - "step": 1379500 - }, - { - "epoch": 15.18, - "learning_rate": 8.704411683810991e-08, - "loss": 3.8231, - "step": 1380000 - }, - { - "epoch": 15.19, - "learning_rate": 8.703036470652951e-08, - "loss": 3.8293, - "step": 1380500 - }, - { - "epoch": 15.19, - "learning_rate": 8.701661257494912e-08, - "loss": 3.8172, - "step": 1381000 - }, - { - "epoch": 15.2, - "learning_rate": 8.700286044336873e-08, - "loss": 3.8155, - "step": 1381500 - }, - { - "epoch": 15.2, - "learning_rate": 8.698910831178833e-08, - "loss": 3.8332, - "step": 1382000 - }, - { - "epoch": 15.21, - "learning_rate": 8.697535618020793e-08, - "loss": 3.8303, - "step": 1382500 - }, - { - "epoch": 15.22, - "learning_rate": 8.696160404862754e-08, - "loss": 3.834, - "step": 1383000 - }, - { - "epoch": 15.22, - "learning_rate": 8.694785191704715e-08, - "loss": 3.831, - "step": 1383500 - }, - { - "epoch": 15.23, - "learning_rate": 8.693409978546675e-08, - "loss": 3.8489, - "step": 1384000 - }, - { - "epoch": 15.23, - "learning_rate": 8.692034765388636e-08, - "loss": 3.8309, - "step": 1384500 - }, - { - "epoch": 15.24, - "learning_rate": 8.690659552230596e-08, - "loss": 3.8224, - "step": 1385000 - }, - { - "epoch": 15.24, - "learning_rate": 8.689284339072556e-08, - "loss": 3.8189, - "step": 1385500 - }, - { - "epoch": 15.25, - "learning_rate": 8.687909125914517e-08, - "loss": 3.8271, - "step": 1386000 - }, - { - "epoch": 15.25, - "learning_rate": 8.686533912756478e-08, - "loss": 3.7946, - "step": 1386500 - }, - { - "epoch": 15.26, - "learning_rate": 8.685158699598438e-08, - "loss": 3.8178, - "step": 1387000 - }, - { - "epoch": 15.26, - "learning_rate": 8.683783486440398e-08, - "loss": 3.8126, - "step": 1387500 - }, - { - "epoch": 15.27, - "learning_rate": 8.68240827328236e-08, - "loss": 3.8171, - "step": 1388000 - }, - { - "epoch": 15.28, - "learning_rate": 8.681033060124319e-08, - "loss": 3.8246, - "step": 1388500 - }, - { - "epoch": 15.28, - "learning_rate": 8.67965784696628e-08, - "loss": 3.8354, - "step": 1389000 - }, - { - "epoch": 15.29, - "learning_rate": 8.678282633808241e-08, - "loss": 3.8337, - "step": 1389500 - }, - { - "epoch": 15.29, - "learning_rate": 8.6769074206502e-08, - "loss": 3.8168, - "step": 1390000 - }, - { - "epoch": 15.3, - "learning_rate": 8.675532207492161e-08, - "loss": 3.8245, - "step": 1390500 - }, - { - "epoch": 15.3, - "learning_rate": 8.674156994334121e-08, - "loss": 3.815, - "step": 1391000 - }, - { - "epoch": 15.31, - "learning_rate": 8.672781781176082e-08, - "loss": 3.8064, - "step": 1391500 - }, - { - "epoch": 15.31, - "learning_rate": 8.671406568018043e-08, - "loss": 3.8283, - "step": 1392000 - }, - { - "epoch": 15.32, - "learning_rate": 8.670031354860003e-08, - "loss": 3.8198, - "step": 1392500 - }, - { - "epoch": 15.33, - "learning_rate": 8.668656141701963e-08, - "loss": 3.8265, - "step": 1393000 - }, - { - "epoch": 15.33, - "learning_rate": 8.667280928543924e-08, - "loss": 3.8383, - "step": 1393500 - }, - { - "epoch": 15.34, - "learning_rate": 8.665905715385884e-08, - "loss": 3.8405, - "step": 1394000 - }, - { - "epoch": 15.34, - "learning_rate": 8.664530502227845e-08, - "loss": 3.8221, - "step": 1394500 - }, - { - "epoch": 15.35, - "learning_rate": 8.663155289069806e-08, - "loss": 3.8046, - "step": 1395000 - }, - { - "epoch": 15.35, - "learning_rate": 8.661780075911765e-08, - "loss": 3.8153, - "step": 1395500 - }, - { - "epoch": 15.36, - "learning_rate": 8.660404862753726e-08, - "loss": 3.8158, - "step": 1396000 - }, - { - "epoch": 15.36, - "learning_rate": 8.659029649595687e-08, - "loss": 3.8261, - "step": 1396500 - }, - { - "epoch": 15.37, - "learning_rate": 8.657654436437647e-08, - "loss": 3.8058, - "step": 1397000 - }, - { - "epoch": 15.37, - "learning_rate": 8.656279223279608e-08, - "loss": 3.8047, - "step": 1397500 - }, - { - "epoch": 15.38, - "learning_rate": 8.654904010121569e-08, - "loss": 3.8213, - "step": 1398000 - }, - { - "epoch": 15.39, - "learning_rate": 8.653528796963528e-08, - "loss": 3.8377, - "step": 1398500 - }, - { - "epoch": 15.39, - "learning_rate": 8.65215358380549e-08, - "loss": 3.8303, - "step": 1399000 - }, - { - "epoch": 15.4, - "learning_rate": 8.65077837064745e-08, - "loss": 3.8232, - "step": 1399500 - }, - { - "epoch": 15.4, - "learning_rate": 8.64940315748941e-08, - "loss": 3.8248, - "step": 1400000 - }, - { - "epoch": 15.41, - "learning_rate": 8.648027944331371e-08, - "loss": 3.8455, - "step": 1400500 - }, - { - "epoch": 15.41, - "learning_rate": 8.646652731173332e-08, - "loss": 3.8225, - "step": 1401000 - }, - { - "epoch": 15.42, - "learning_rate": 8.645277518015291e-08, - "loss": 3.8282, - "step": 1401500 - }, - { - "epoch": 15.42, - "learning_rate": 8.643902304857252e-08, - "loss": 3.8275, - "step": 1402000 - }, - { - "epoch": 15.43, - "learning_rate": 8.642527091699213e-08, - "loss": 3.8292, - "step": 1402500 - }, - { - "epoch": 15.44, - "learning_rate": 8.641151878541173e-08, - "loss": 3.8259, - "step": 1403000 - }, - { - "epoch": 15.44, - "learning_rate": 8.639776665383134e-08, - "loss": 3.83, - "step": 1403500 - }, - { - "epoch": 15.45, - "learning_rate": 8.638401452225095e-08, - "loss": 3.8268, - "step": 1404000 - }, - { - "epoch": 15.45, - "learning_rate": 8.637026239067054e-08, - "loss": 3.8071, - "step": 1404500 - }, - { - "epoch": 15.46, - "learning_rate": 8.635651025909015e-08, - "loss": 3.8132, - "step": 1405000 - }, - { - "epoch": 15.46, - "learning_rate": 8.634275812750976e-08, - "loss": 3.8084, - "step": 1405500 - }, - { - "epoch": 15.47, - "learning_rate": 8.632900599592936e-08, - "loss": 3.8209, - "step": 1406000 - }, - { - "epoch": 15.47, - "learning_rate": 8.631525386434897e-08, - "loss": 3.825, - "step": 1406500 - }, - { - "epoch": 15.48, - "learning_rate": 8.630150173276858e-08, - "loss": 3.8182, - "step": 1407000 - }, - { - "epoch": 15.48, - "learning_rate": 8.628774960118817e-08, - "loss": 3.8156, - "step": 1407500 - }, - { - "epoch": 15.49, - "learning_rate": 8.627399746960778e-08, - "loss": 3.802, - "step": 1408000 - }, - { - "epoch": 15.5, - "learning_rate": 8.626024533802739e-08, - "loss": 3.8365, - "step": 1408500 - }, - { - "epoch": 15.5, - "learning_rate": 8.624649320644699e-08, - "loss": 3.815, - "step": 1409000 - }, - { - "epoch": 15.51, - "learning_rate": 8.62327410748666e-08, - "loss": 3.8232, - "step": 1409500 - }, - { - "epoch": 15.51, - "learning_rate": 8.621898894328621e-08, - "loss": 3.8285, - "step": 1410000 - }, - { - "epoch": 15.52, - "learning_rate": 8.62052368117058e-08, - "loss": 3.825, - "step": 1410500 - }, - { - "epoch": 15.52, - "learning_rate": 8.619148468012541e-08, - "loss": 3.8201, - "step": 1411000 - }, - { - "epoch": 15.53, - "learning_rate": 8.617773254854502e-08, - "loss": 3.8256, - "step": 1411500 - }, - { - "epoch": 15.53, - "learning_rate": 8.616398041696462e-08, - "loss": 3.8158, - "step": 1412000 - }, - { - "epoch": 15.54, - "learning_rate": 8.615022828538423e-08, - "loss": 3.8097, - "step": 1412500 - }, - { - "epoch": 15.55, - "learning_rate": 8.613647615380384e-08, - "loss": 3.8391, - "step": 1413000 - }, - { - "epoch": 15.55, - "learning_rate": 8.612272402222343e-08, - "loss": 3.8291, - "step": 1413500 - }, - { - "epoch": 15.56, - "learning_rate": 8.610897189064304e-08, - "loss": 3.8295, - "step": 1414000 - }, - { - "epoch": 15.56, - "learning_rate": 8.609521975906265e-08, - "loss": 3.8199, - "step": 1414500 - }, - { - "epoch": 15.57, - "learning_rate": 8.608146762748225e-08, - "loss": 3.8236, - "step": 1415000 - }, - { - "epoch": 15.57, - "learning_rate": 8.606771549590186e-08, - "loss": 3.8074, - "step": 1415500 - }, - { - "epoch": 15.58, - "learning_rate": 8.605396336432147e-08, - "loss": 3.8209, - "step": 1416000 - }, - { - "epoch": 15.58, - "learning_rate": 8.604021123274106e-08, - "loss": 3.8179, - "step": 1416500 - }, - { - "epoch": 15.59, - "learning_rate": 8.602645910116067e-08, - "loss": 3.8325, - "step": 1417000 - }, - { - "epoch": 15.59, - "learning_rate": 8.601270696958028e-08, - "loss": 3.8338, - "step": 1417500 - }, - { - "epoch": 15.6, - "learning_rate": 8.599895483799988e-08, - "loss": 3.8088, - "step": 1418000 - }, - { - "epoch": 15.61, - "learning_rate": 8.598520270641949e-08, - "loss": 3.8141, - "step": 1418500 - }, - { - "epoch": 15.61, - "learning_rate": 8.59714505748391e-08, - "loss": 3.8325, - "step": 1419000 - }, - { - "epoch": 15.62, - "learning_rate": 8.595769844325869e-08, - "loss": 3.8181, - "step": 1419500 - }, - { - "epoch": 15.62, - "learning_rate": 8.59439463116783e-08, - "loss": 3.8324, - "step": 1420000 - }, - { - "epoch": 15.63, - "learning_rate": 8.593019418009791e-08, - "loss": 3.8067, - "step": 1420500 - }, - { - "epoch": 15.63, - "learning_rate": 8.59164420485175e-08, - "loss": 3.7967, - "step": 1421000 - }, - { - "epoch": 15.64, - "learning_rate": 8.590268991693712e-08, - "loss": 3.816, - "step": 1421500 - }, - { - "epoch": 15.64, - "learning_rate": 8.588893778535673e-08, - "loss": 3.82, - "step": 1422000 - }, - { - "epoch": 15.65, - "learning_rate": 8.587518565377632e-08, - "loss": 3.8263, - "step": 1422500 - }, - { - "epoch": 15.66, - "learning_rate": 8.586143352219593e-08, - "loss": 3.8351, - "step": 1423000 - }, - { - "epoch": 15.66, - "learning_rate": 8.584768139061554e-08, - "loss": 3.8124, - "step": 1423500 - }, - { - "epoch": 15.67, - "learning_rate": 8.583392925903515e-08, - "loss": 3.8133, - "step": 1424000 - }, - { - "epoch": 15.67, - "learning_rate": 8.582017712745475e-08, - "loss": 3.8214, - "step": 1424500 - }, - { - "epoch": 15.68, - "learning_rate": 8.580642499587435e-08, - "loss": 3.8209, - "step": 1425000 - }, - { - "epoch": 15.68, - "learning_rate": 8.579267286429396e-08, - "loss": 3.8368, - "step": 1425500 - }, - { - "epoch": 15.69, - "learning_rate": 8.577892073271356e-08, - "loss": 3.8159, - "step": 1426000 - }, - { - "epoch": 15.69, - "learning_rate": 8.576516860113317e-08, - "loss": 3.7983, - "step": 1426500 - }, - { - "epoch": 15.7, - "learning_rate": 8.575141646955278e-08, - "loss": 3.8144, - "step": 1427000 - }, - { - "epoch": 15.7, - "learning_rate": 8.573766433797239e-08, - "loss": 3.8218, - "step": 1427500 - }, - { - "epoch": 15.71, - "learning_rate": 8.572391220639198e-08, - "loss": 3.799, - "step": 1428000 - }, - { - "epoch": 15.72, - "learning_rate": 8.57101600748116e-08, - "loss": 3.8186, - "step": 1428500 - }, - { - "epoch": 15.72, - "learning_rate": 8.56964079432312e-08, - "loss": 3.8238, - "step": 1429000 - }, - { - "epoch": 15.73, - "learning_rate": 8.568265581165081e-08, - "loss": 3.8201, - "step": 1429500 - }, - { - "epoch": 15.73, - "learning_rate": 8.566890368007041e-08, - "loss": 3.8304, - "step": 1430000 - }, - { - "epoch": 15.74, - "learning_rate": 8.565515154849002e-08, - "loss": 3.8215, - "step": 1430500 - }, - { - "epoch": 15.74, - "learning_rate": 8.564139941690963e-08, - "loss": 3.8245, - "step": 1431000 - }, - { - "epoch": 15.75, - "learning_rate": 8.562764728532922e-08, - "loss": 3.8111, - "step": 1431500 - }, - { - "epoch": 15.75, - "learning_rate": 8.561389515374883e-08, - "loss": 3.8059, - "step": 1432000 - }, - { - "epoch": 15.76, - "learning_rate": 8.560014302216844e-08, - "loss": 3.8297, - "step": 1432500 - }, - { - "epoch": 15.77, - "learning_rate": 8.558639089058804e-08, - "loss": 3.815, - "step": 1433000 - }, - { - "epoch": 15.77, - "learning_rate": 8.557263875900765e-08, - "loss": 3.816, - "step": 1433500 - }, - { - "epoch": 15.78, - "learning_rate": 8.555888662742726e-08, - "loss": 3.8185, - "step": 1434000 - }, - { - "epoch": 15.78, - "learning_rate": 8.554513449584685e-08, - "loss": 3.8295, - "step": 1434500 - }, - { - "epoch": 15.79, - "learning_rate": 8.553138236426646e-08, - "loss": 3.8245, - "step": 1435000 - }, - { - "epoch": 15.79, - "learning_rate": 8.551763023268607e-08, - "loss": 3.8294, - "step": 1435500 - }, - { - "epoch": 15.8, - "learning_rate": 8.550387810110567e-08, - "loss": 3.8202, - "step": 1436000 - }, - { - "epoch": 15.8, - "learning_rate": 8.549012596952528e-08, - "loss": 3.8467, - "step": 1436500 - }, - { - "epoch": 15.81, - "learning_rate": 8.547637383794489e-08, - "loss": 3.8229, - "step": 1437000 - }, - { - "epoch": 15.81, - "learning_rate": 8.546262170636448e-08, - "loss": 3.8318, - "step": 1437500 - }, - { - "epoch": 15.82, - "learning_rate": 8.544886957478409e-08, - "loss": 3.8036, - "step": 1438000 - }, - { - "epoch": 15.83, - "learning_rate": 8.54351174432037e-08, - "loss": 3.8267, - "step": 1438500 - }, - { - "epoch": 15.83, - "learning_rate": 8.54213653116233e-08, - "loss": 3.841, - "step": 1439000 - }, - { - "epoch": 15.84, - "learning_rate": 8.54076131800429e-08, - "loss": 3.8275, - "step": 1439500 - }, - { - "epoch": 15.84, - "learning_rate": 8.539386104846252e-08, - "loss": 3.8073, - "step": 1440000 - }, - { - "epoch": 15.85, - "learning_rate": 8.538010891688211e-08, - "loss": 3.8203, - "step": 1440500 - }, - { - "epoch": 15.85, - "learning_rate": 8.536635678530172e-08, - "loss": 3.8233, - "step": 1441000 - }, - { - "epoch": 15.86, - "learning_rate": 8.535260465372133e-08, - "loss": 3.828, - "step": 1441500 - }, - { - "epoch": 15.86, - "learning_rate": 8.533885252214093e-08, - "loss": 3.8295, - "step": 1442000 - }, - { - "epoch": 15.87, - "learning_rate": 8.532510039056054e-08, - "loss": 3.8235, - "step": 1442500 - }, - { - "epoch": 15.88, - "learning_rate": 8.531134825898015e-08, - "loss": 3.814, - "step": 1443000 - }, - { - "epoch": 15.88, - "learning_rate": 8.529759612739974e-08, - "loss": 3.8075, - "step": 1443500 - }, - { - "epoch": 15.89, - "learning_rate": 8.528384399581935e-08, - "loss": 3.8301, - "step": 1444000 - }, - { - "epoch": 15.89, - "learning_rate": 8.527009186423896e-08, - "loss": 3.8131, - "step": 1444500 - }, - { - "epoch": 15.9, - "learning_rate": 8.525633973265856e-08, - "loss": 3.815, - "step": 1445000 - }, - { - "epoch": 15.9, - "learning_rate": 8.524258760107817e-08, - "loss": 3.826, - "step": 1445500 - }, - { - "epoch": 15.91, - "learning_rate": 8.522883546949778e-08, - "loss": 3.7978, - "step": 1446000 - }, - { - "epoch": 15.91, - "learning_rate": 8.521508333791737e-08, - "loss": 3.8198, - "step": 1446500 - }, - { - "epoch": 15.92, - "learning_rate": 8.520133120633698e-08, - "loss": 3.8134, - "step": 1447000 - }, - { - "epoch": 15.92, - "learning_rate": 8.518757907475659e-08, - "loss": 3.8186, - "step": 1447500 - }, - { - "epoch": 15.93, - "learning_rate": 8.517382694317619e-08, - "loss": 3.818, - "step": 1448000 - }, - { - "epoch": 15.94, - "learning_rate": 8.51600748115958e-08, - "loss": 3.8001, - "step": 1448500 - }, - { - "epoch": 15.94, - "learning_rate": 8.51463226800154e-08, - "loss": 3.8267, - "step": 1449000 - }, - { - "epoch": 15.95, - "learning_rate": 8.5132570548435e-08, - "loss": 3.8269, - "step": 1449500 - }, - { - "epoch": 15.95, - "learning_rate": 8.511881841685461e-08, - "loss": 3.8159, - "step": 1450000 - }, - { - "epoch": 15.96, - "learning_rate": 8.510506628527422e-08, - "loss": 3.815, - "step": 1450500 - }, - { - "epoch": 15.96, - "learning_rate": 8.509131415369382e-08, - "loss": 3.8191, - "step": 1451000 - }, - { - "epoch": 15.97, - "learning_rate": 8.507756202211342e-08, - "loss": 3.8218, - "step": 1451500 - }, - { - "epoch": 15.97, - "learning_rate": 8.506380989053303e-08, - "loss": 3.8213, - "step": 1452000 - }, - { - "epoch": 15.98, - "learning_rate": 8.505005775895263e-08, - "loss": 3.8265, - "step": 1452500 - }, - { - "epoch": 15.99, - "learning_rate": 8.503630562737224e-08, - "loss": 3.8337, - "step": 1453000 - }, - { - "epoch": 15.99, - "learning_rate": 8.502255349579185e-08, - "loss": 3.8047, - "step": 1453500 - }, - { - "epoch": 16.0, - "learning_rate": 8.500880136421145e-08, - "loss": 3.8296, - "step": 1454000 - }, - { - "epoch": 16.0, - "eval_loss": 3.867938995361328, - "eval_runtime": 6.1324, - "eval_samples_per_second": 253.41, - "step": 1454320 - }, - { - "epoch": 16.0, - "learning_rate": 8.499504923263105e-08, - "loss": 3.8063, - "step": 1454500 - }, - { - "epoch": 16.01, - "learning_rate": 8.498129710105066e-08, - "loss": 3.8215, - "step": 1455000 - }, - { - "epoch": 16.01, - "learning_rate": 8.496754496947026e-08, - "loss": 3.8266, - "step": 1455500 - }, - { - "epoch": 16.02, - "learning_rate": 8.495379283788987e-08, - "loss": 3.8106, - "step": 1456000 - }, - { - "epoch": 16.02, - "learning_rate": 8.494004070630948e-08, - "loss": 3.811, - "step": 1456500 - }, - { - "epoch": 16.03, - "learning_rate": 8.492628857472907e-08, - "loss": 3.8191, - "step": 1457000 - }, - { - "epoch": 16.03, - "learning_rate": 8.491253644314868e-08, - "loss": 3.8184, - "step": 1457500 - }, - { - "epoch": 16.04, - "learning_rate": 8.48987843115683e-08, - "loss": 3.7913, - "step": 1458000 - }, - { - "epoch": 16.05, - "learning_rate": 8.488503217998789e-08, - "loss": 3.827, - "step": 1458500 - }, - { - "epoch": 16.05, - "learning_rate": 8.48712800484075e-08, - "loss": 3.8153, - "step": 1459000 - }, - { - "epoch": 16.06, - "learning_rate": 8.485752791682711e-08, - "loss": 3.8196, - "step": 1459500 - }, - { - "epoch": 16.06, - "learning_rate": 8.48437757852467e-08, - "loss": 3.8183, - "step": 1460000 - }, - { - "epoch": 16.07, - "learning_rate": 8.483002365366631e-08, - "loss": 3.8109, - "step": 1460500 - }, - { - "epoch": 16.07, - "learning_rate": 8.481627152208592e-08, - "loss": 3.8173, - "step": 1461000 - }, - { - "epoch": 16.08, - "learning_rate": 8.480251939050552e-08, - "loss": 3.8081, - "step": 1461500 - }, - { - "epoch": 16.08, - "learning_rate": 8.478876725892513e-08, - "loss": 3.8132, - "step": 1462000 - }, - { - "epoch": 16.09, - "learning_rate": 8.477501512734474e-08, - "loss": 3.8262, - "step": 1462500 - }, - { - "epoch": 16.1, - "learning_rate": 8.476126299576433e-08, - "loss": 3.8229, - "step": 1463000 - }, - { - "epoch": 16.1, - "learning_rate": 8.474751086418394e-08, - "loss": 3.8127, - "step": 1463500 - }, - { - "epoch": 16.11, - "learning_rate": 8.473375873260355e-08, - "loss": 3.8149, - "step": 1464000 - }, - { - "epoch": 16.11, - "learning_rate": 8.472000660102315e-08, - "loss": 3.7961, - "step": 1464500 - }, - { - "epoch": 16.12, - "learning_rate": 8.470625446944276e-08, - "loss": 3.8306, - "step": 1465000 - }, - { - "epoch": 16.12, - "learning_rate": 8.469250233786235e-08, - "loss": 3.8204, - "step": 1465500 - }, - { - "epoch": 16.13, - "learning_rate": 8.467875020628196e-08, - "loss": 3.8135, - "step": 1466000 - }, - { - "epoch": 16.13, - "learning_rate": 8.466499807470157e-08, - "loss": 3.8081, - "step": 1466500 - }, - { - "epoch": 16.14, - "learning_rate": 8.465124594312117e-08, - "loss": 3.8348, - "step": 1467000 - }, - { - "epoch": 16.15, - "learning_rate": 8.463749381154078e-08, - "loss": 3.8197, - "step": 1467500 - }, - { - "epoch": 16.15, - "learning_rate": 8.462374167996039e-08, - "loss": 3.8224, - "step": 1468000 - }, - { - "epoch": 16.16, - "learning_rate": 8.460998954837998e-08, - "loss": 3.8009, - "step": 1468500 - }, - { - "epoch": 16.16, - "learning_rate": 8.459623741679959e-08, - "loss": 3.8142, - "step": 1469000 - }, - { - "epoch": 16.17, - "learning_rate": 8.45824852852192e-08, - "loss": 3.8072, - "step": 1469500 - }, - { - "epoch": 16.17, - "learning_rate": 8.45687331536388e-08, - "loss": 3.8411, - "step": 1470000 - }, - { - "epoch": 16.18, - "learning_rate": 8.455498102205841e-08, - "loss": 3.8058, - "step": 1470500 - }, - { - "epoch": 16.18, - "learning_rate": 8.454122889047802e-08, - "loss": 3.82, - "step": 1471000 - }, - { - "epoch": 16.19, - "learning_rate": 8.452747675889763e-08, - "loss": 3.8322, - "step": 1471500 - }, - { - "epoch": 16.19, - "learning_rate": 8.451372462731722e-08, - "loss": 3.8097, - "step": 1472000 - }, - { - "epoch": 16.2, - "learning_rate": 8.449997249573683e-08, - "loss": 3.8139, - "step": 1472500 - }, - { - "epoch": 16.21, - "learning_rate": 8.448622036415644e-08, - "loss": 3.8154, - "step": 1473000 - }, - { - "epoch": 16.21, - "learning_rate": 8.447246823257604e-08, - "loss": 3.8077, - "step": 1473500 - }, - { - "epoch": 16.22, - "learning_rate": 8.445871610099565e-08, - "loss": 3.8279, - "step": 1474000 - }, - { - "epoch": 16.22, - "learning_rate": 8.444496396941526e-08, - "loss": 3.8178, - "step": 1474500 - }, - { - "epoch": 16.23, - "learning_rate": 8.443121183783487e-08, - "loss": 3.8317, - "step": 1475000 - }, - { - "epoch": 16.23, - "learning_rate": 8.441745970625446e-08, - "loss": 3.8131, - "step": 1475500 - }, - { - "epoch": 16.24, - "learning_rate": 8.440370757467407e-08, - "loss": 3.814, - "step": 1476000 - }, - { - "epoch": 16.24, - "learning_rate": 8.438995544309368e-08, - "loss": 3.8155, - "step": 1476500 - }, - { - "epoch": 16.25, - "learning_rate": 8.437620331151329e-08, - "loss": 3.8094, - "step": 1477000 - }, - { - "epoch": 16.26, - "learning_rate": 8.436245117993289e-08, - "loss": 3.8151, - "step": 1477500 - }, - { - "epoch": 16.26, - "learning_rate": 8.43486990483525e-08, - "loss": 3.8262, - "step": 1478000 - }, - { - "epoch": 16.27, - "learning_rate": 8.43349469167721e-08, - "loss": 3.8172, - "step": 1478500 - }, - { - "epoch": 16.27, - "learning_rate": 8.43211947851917e-08, - "loss": 3.811, - "step": 1479000 - }, - { - "epoch": 16.28, - "learning_rate": 8.430744265361131e-08, - "loss": 3.8097, - "step": 1479500 - }, - { - "epoch": 16.28, - "learning_rate": 8.429369052203092e-08, - "loss": 3.8284, - "step": 1480000 - }, - { - "epoch": 16.29, - "learning_rate": 8.427993839045052e-08, - "loss": 3.8205, - "step": 1480500 - }, - { - "epoch": 16.29, - "learning_rate": 8.426618625887012e-08, - "loss": 3.8206, - "step": 1481000 - }, - { - "epoch": 16.3, - "learning_rate": 8.425243412728973e-08, - "loss": 3.805, - "step": 1481500 - }, - { - "epoch": 16.3, - "learning_rate": 8.423868199570933e-08, - "loss": 3.8002, - "step": 1482000 - }, - { - "epoch": 16.31, - "learning_rate": 8.422492986412894e-08, - "loss": 3.826, - "step": 1482500 - }, - { - "epoch": 16.32, - "learning_rate": 8.421117773254855e-08, - "loss": 3.8249, - "step": 1483000 - }, - { - "epoch": 16.32, - "learning_rate": 8.419742560096814e-08, - "loss": 3.8073, - "step": 1483500 - }, - { - "epoch": 16.33, - "learning_rate": 8.418367346938775e-08, - "loss": 3.8248, - "step": 1484000 - }, - { - "epoch": 16.33, - "learning_rate": 8.416992133780736e-08, - "loss": 3.8351, - "step": 1484500 - }, - { - "epoch": 16.34, - "learning_rate": 8.415616920622696e-08, - "loss": 3.839, - "step": 1485000 - }, - { - "epoch": 16.34, - "learning_rate": 8.414241707464657e-08, - "loss": 3.8135, - "step": 1485500 - }, - { - "epoch": 16.35, - "learning_rate": 8.412866494306618e-08, - "loss": 3.8355, - "step": 1486000 - }, - { - "epoch": 16.35, - "learning_rate": 8.411491281148577e-08, - "loss": 3.8078, - "step": 1486500 - }, - { - "epoch": 16.36, - "learning_rate": 8.410116067990538e-08, - "loss": 3.8177, - "step": 1487000 - }, - { - "epoch": 16.37, - "learning_rate": 8.408740854832499e-08, - "loss": 3.8221, - "step": 1487500 - }, - { - "epoch": 16.37, - "learning_rate": 8.407365641674459e-08, - "loss": 3.8262, - "step": 1488000 - }, - { - "epoch": 16.38, - "learning_rate": 8.40599042851642e-08, - "loss": 3.8176, - "step": 1488500 - }, - { - "epoch": 16.38, - "learning_rate": 8.404615215358381e-08, - "loss": 3.8234, - "step": 1489000 - }, - { - "epoch": 16.39, - "learning_rate": 8.40324000220034e-08, - "loss": 3.8211, - "step": 1489500 - }, - { - "epoch": 16.39, - "learning_rate": 8.401864789042301e-08, - "loss": 3.8328, - "step": 1490000 - }, - { - "epoch": 16.4, - "learning_rate": 8.400489575884262e-08, - "loss": 3.8035, - "step": 1490500 - }, - { - "epoch": 16.4, - "learning_rate": 8.399114362726222e-08, - "loss": 3.8202, - "step": 1491000 - }, - { - "epoch": 16.41, - "learning_rate": 8.397739149568183e-08, - "loss": 3.7977, - "step": 1491500 - }, - { - "epoch": 16.41, - "learning_rate": 8.396363936410144e-08, - "loss": 3.8308, - "step": 1492000 - }, - { - "epoch": 16.42, - "learning_rate": 8.394988723252103e-08, - "loss": 3.8182, - "step": 1492500 - }, - { - "epoch": 16.43, - "learning_rate": 8.393613510094064e-08, - "loss": 3.8137, - "step": 1493000 - }, - { - "epoch": 16.43, - "learning_rate": 8.392238296936025e-08, - "loss": 3.7955, - "step": 1493500 - }, - { - "epoch": 16.44, - "learning_rate": 8.390863083777985e-08, - "loss": 3.8029, - "step": 1494000 - }, - { - "epoch": 16.44, - "learning_rate": 8.389487870619946e-08, - "loss": 3.8189, - "step": 1494500 - }, - { - "epoch": 16.45, - "learning_rate": 8.388112657461907e-08, - "loss": 3.8303, - "step": 1495000 - }, - { - "epoch": 16.45, - "learning_rate": 8.386737444303866e-08, - "loss": 3.8199, - "step": 1495500 - }, - { - "epoch": 16.46, - "learning_rate": 8.385362231145827e-08, - "loss": 3.8183, - "step": 1496000 - }, - { - "epoch": 16.46, - "learning_rate": 8.383987017987788e-08, - "loss": 3.8091, - "step": 1496500 - }, - { - "epoch": 16.47, - "learning_rate": 8.382611804829748e-08, - "loss": 3.8141, - "step": 1497000 - }, - { - "epoch": 16.48, - "learning_rate": 8.381236591671709e-08, - "loss": 3.8107, - "step": 1497500 - }, - { - "epoch": 16.48, - "learning_rate": 8.37986137851367e-08, - "loss": 3.8077, - "step": 1498000 - }, - { - "epoch": 16.49, - "learning_rate": 8.378486165355629e-08, - "loss": 3.8042, - "step": 1498500 - }, - { - "epoch": 16.49, - "learning_rate": 8.37711095219759e-08, - "loss": 3.8148, - "step": 1499000 - }, - { - "epoch": 16.5, - "learning_rate": 8.375735739039551e-08, - "loss": 3.8195, - "step": 1499500 - }, - { - "epoch": 16.5, - "learning_rate": 8.374360525881511e-08, - "loss": 3.8122, - "step": 1500000 - }, - { - "epoch": 16.51, - "learning_rate": 8.372985312723472e-08, - "loss": 3.8212, - "step": 1500500 - }, - { - "epoch": 16.51, - "learning_rate": 8.371610099565433e-08, - "loss": 3.8143, - "step": 1501000 - }, - { - "epoch": 16.52, - "learning_rate": 8.370234886407392e-08, - "loss": 3.8178, - "step": 1501500 - }, - { - "epoch": 16.52, - "learning_rate": 8.368859673249353e-08, - "loss": 3.8171, - "step": 1502000 - }, - { - "epoch": 16.53, - "learning_rate": 8.367484460091314e-08, - "loss": 3.7978, - "step": 1502500 - }, - { - "epoch": 16.54, - "learning_rate": 8.366109246933274e-08, - "loss": 3.8218, - "step": 1503000 - }, - { - "epoch": 16.54, - "learning_rate": 8.364734033775235e-08, - "loss": 3.802, - "step": 1503500 - }, - { - "epoch": 16.55, - "learning_rate": 8.363358820617196e-08, - "loss": 3.8081, - "step": 1504000 - }, - { - "epoch": 16.55, - "learning_rate": 8.361983607459155e-08, - "loss": 3.8245, - "step": 1504500 - }, - { - "epoch": 16.56, - "learning_rate": 8.360608394301116e-08, - "loss": 3.8207, - "step": 1505000 - }, - { - "epoch": 16.56, - "learning_rate": 8.359233181143077e-08, - "loss": 3.8157, - "step": 1505500 - }, - { - "epoch": 16.57, - "learning_rate": 8.357857967985037e-08, - "loss": 3.822, - "step": 1506000 - }, - { - "epoch": 16.57, - "learning_rate": 8.356482754826998e-08, - "loss": 3.8244, - "step": 1506500 - }, - { - "epoch": 16.58, - "learning_rate": 8.355107541668959e-08, - "loss": 3.8074, - "step": 1507000 - }, - { - "epoch": 16.59, - "learning_rate": 8.353732328510918e-08, - "loss": 3.8139, - "step": 1507500 - }, - { - "epoch": 16.59, - "learning_rate": 8.352357115352879e-08, - "loss": 3.8167, - "step": 1508000 - }, - { - "epoch": 16.6, - "learning_rate": 8.35098190219484e-08, - "loss": 3.7909, - "step": 1508500 - }, - { - "epoch": 16.6, - "learning_rate": 8.3496066890368e-08, - "loss": 3.809, - "step": 1509000 - }, - { - "epoch": 16.61, - "learning_rate": 8.34823147587876e-08, - "loss": 3.8155, - "step": 1509500 - }, - { - "epoch": 16.61, - "learning_rate": 8.346856262720722e-08, - "loss": 3.838, - "step": 1510000 - }, - { - "epoch": 16.62, - "learning_rate": 8.345481049562681e-08, - "loss": 3.81, - "step": 1510500 - }, - { - "epoch": 16.62, - "learning_rate": 8.344105836404642e-08, - "loss": 3.8336, - "step": 1511000 - }, - { - "epoch": 16.63, - "learning_rate": 8.342730623246603e-08, - "loss": 3.8026, - "step": 1511500 - }, - { - "epoch": 16.63, - "learning_rate": 8.341355410088563e-08, - "loss": 3.8077, - "step": 1512000 - }, - { - "epoch": 16.64, - "learning_rate": 8.339980196930524e-08, - "loss": 3.8022, - "step": 1512500 - }, - { - "epoch": 16.65, - "learning_rate": 8.338604983772484e-08, - "loss": 3.8183, - "step": 1513000 - }, - { - "epoch": 16.65, - "learning_rate": 8.337229770614444e-08, - "loss": 3.8237, - "step": 1513500 - }, - { - "epoch": 16.66, - "learning_rate": 8.335854557456405e-08, - "loss": 3.8232, - "step": 1514000 - }, - { - "epoch": 16.66, - "learning_rate": 8.334479344298366e-08, - "loss": 3.8215, - "step": 1514500 - }, - { - "epoch": 16.67, - "learning_rate": 8.333104131140326e-08, - "loss": 3.8177, - "step": 1515000 - }, - { - "epoch": 16.67, - "learning_rate": 8.331728917982286e-08, - "loss": 3.8132, - "step": 1515500 - }, - { - "epoch": 16.68, - "learning_rate": 8.330353704824247e-08, - "loss": 3.815, - "step": 1516000 - }, - { - "epoch": 16.68, - "learning_rate": 8.328978491666207e-08, - "loss": 3.8021, - "step": 1516500 - }, - { - "epoch": 16.69, - "learning_rate": 8.327603278508168e-08, - "loss": 3.8201, - "step": 1517000 - }, - { - "epoch": 16.7, - "learning_rate": 8.326228065350129e-08, - "loss": 3.8174, - "step": 1517500 - }, - { - "epoch": 16.7, - "learning_rate": 8.324852852192089e-08, - "loss": 3.8086, - "step": 1518000 - }, - { - "epoch": 16.71, - "learning_rate": 8.32347763903405e-08, - "loss": 3.7831, - "step": 1518500 - }, - { - "epoch": 16.71, - "learning_rate": 8.32210242587601e-08, - "loss": 3.8169, - "step": 1519000 - }, - { - "epoch": 16.72, - "learning_rate": 8.32072721271797e-08, - "loss": 3.8041, - "step": 1519500 - }, - { - "epoch": 16.72, - "learning_rate": 8.319351999559931e-08, - "loss": 3.8056, - "step": 1520000 - }, - { - "epoch": 16.73, - "learning_rate": 8.317976786401892e-08, - "loss": 3.8088, - "step": 1520500 - }, - { - "epoch": 16.73, - "learning_rate": 8.316601573243851e-08, - "loss": 3.8156, - "step": 1521000 - }, - { - "epoch": 16.74, - "learning_rate": 8.315226360085812e-08, - "loss": 3.8172, - "step": 1521500 - }, - { - "epoch": 16.74, - "learning_rate": 8.313851146927773e-08, - "loss": 3.8142, - "step": 1522000 - }, - { - "epoch": 16.75, - "learning_rate": 8.312475933769734e-08, - "loss": 3.7944, - "step": 1522500 - }, - { - "epoch": 16.76, - "learning_rate": 8.311100720611694e-08, - "loss": 3.8203, - "step": 1523000 - }, - { - "epoch": 16.76, - "learning_rate": 8.309725507453655e-08, - "loss": 3.8098, - "step": 1523500 - }, - { - "epoch": 16.77, - "learning_rate": 8.308350294295616e-08, - "loss": 3.8026, - "step": 1524000 - }, - { - "epoch": 16.77, - "learning_rate": 8.306975081137577e-08, - "loss": 3.8283, - "step": 1524500 - }, - { - "epoch": 16.78, - "learning_rate": 8.305599867979536e-08, - "loss": 3.7991, - "step": 1525000 - }, - { - "epoch": 16.78, - "learning_rate": 8.304224654821497e-08, - "loss": 3.801, - "step": 1525500 - }, - { - "epoch": 16.79, - "learning_rate": 8.302849441663458e-08, - "loss": 3.8296, - "step": 1526000 - }, - { - "epoch": 16.79, - "learning_rate": 8.301474228505419e-08, - "loss": 3.8056, - "step": 1526500 - }, - { - "epoch": 16.8, - "learning_rate": 8.300099015347379e-08, - "loss": 3.8248, - "step": 1527000 - }, - { - "epoch": 16.81, - "learning_rate": 8.29872380218934e-08, - "loss": 3.8135, - "step": 1527500 - }, - { - "epoch": 16.81, - "learning_rate": 8.2973485890313e-08, - "loss": 3.8361, - "step": 1528000 - }, - { - "epoch": 16.82, - "learning_rate": 8.29597337587326e-08, - "loss": 3.8299, - "step": 1528500 - }, - { - "epoch": 16.82, - "learning_rate": 8.294598162715221e-08, - "loss": 3.8374, - "step": 1529000 - }, - { - "epoch": 16.83, - "learning_rate": 8.293222949557182e-08, - "loss": 3.8095, - "step": 1529500 - }, - { - "epoch": 16.83, - "learning_rate": 8.291847736399142e-08, - "loss": 3.8198, - "step": 1530000 - }, - { - "epoch": 16.84, - "learning_rate": 8.290472523241103e-08, - "loss": 3.8068, - "step": 1530500 - }, - { - "epoch": 16.84, - "learning_rate": 8.289097310083064e-08, - "loss": 3.8264, - "step": 1531000 - }, - { - "epoch": 16.85, - "learning_rate": 8.287722096925023e-08, - "loss": 3.8222, - "step": 1531500 - }, - { - "epoch": 16.85, - "learning_rate": 8.286346883766984e-08, - "loss": 3.824, - "step": 1532000 - }, - { - "epoch": 16.86, - "learning_rate": 8.284971670608945e-08, - "loss": 3.8305, - "step": 1532500 - }, - { - "epoch": 16.87, - "learning_rate": 8.283596457450905e-08, - "loss": 3.8065, - "step": 1533000 - }, - { - "epoch": 16.87, - "learning_rate": 8.282221244292866e-08, - "loss": 3.8001, - "step": 1533500 - }, - { - "epoch": 16.88, - "learning_rate": 8.280846031134827e-08, - "loss": 3.8058, - "step": 1534000 - }, - { - "epoch": 16.88, - "learning_rate": 8.279470817976786e-08, - "loss": 3.8292, - "step": 1534500 - }, - { - "epoch": 16.89, - "learning_rate": 8.278095604818747e-08, - "loss": 3.8223, - "step": 1535000 - }, - { - "epoch": 16.89, - "learning_rate": 8.276720391660708e-08, - "loss": 3.8232, - "step": 1535500 - }, - { - "epoch": 16.9, - "learning_rate": 8.275345178502668e-08, - "loss": 3.8234, - "step": 1536000 - }, - { - "epoch": 16.9, - "learning_rate": 8.273969965344629e-08, - "loss": 3.8135, - "step": 1536500 - }, - { - "epoch": 16.91, - "learning_rate": 8.27259475218659e-08, - "loss": 3.7976, - "step": 1537000 - }, - { - "epoch": 16.92, - "learning_rate": 8.271219539028549e-08, - "loss": 3.813, - "step": 1537500 - }, - { - "epoch": 16.92, - "learning_rate": 8.26984432587051e-08, - "loss": 3.8222, - "step": 1538000 - }, - { - "epoch": 16.93, - "learning_rate": 8.268469112712471e-08, - "loss": 3.8058, - "step": 1538500 - }, - { - "epoch": 16.93, - "learning_rate": 8.26709389955443e-08, - "loss": 3.797, - "step": 1539000 - }, - { - "epoch": 16.94, - "learning_rate": 8.265718686396391e-08, - "loss": 3.8028, - "step": 1539500 - }, - { - "epoch": 16.94, - "learning_rate": 8.264343473238351e-08, - "loss": 3.8118, - "step": 1540000 - }, - { - "epoch": 16.95, - "learning_rate": 8.262968260080312e-08, - "loss": 3.8241, - "step": 1540500 - }, - { - "epoch": 16.95, - "learning_rate": 8.261593046922273e-08, - "loss": 3.8202, - "step": 1541000 - }, - { - "epoch": 16.96, - "learning_rate": 8.260217833764233e-08, - "loss": 3.8157, - "step": 1541500 - }, - { - "epoch": 16.96, - "learning_rate": 8.258842620606194e-08, - "loss": 3.8186, - "step": 1542000 - }, - { - "epoch": 16.97, - "learning_rate": 8.257467407448154e-08, - "loss": 3.7993, - "step": 1542500 - }, - { - "epoch": 16.98, - "learning_rate": 8.256092194290114e-08, - "loss": 3.8206, - "step": 1543000 - }, - { - "epoch": 16.98, - "learning_rate": 8.254716981132075e-08, - "loss": 3.8121, - "step": 1543500 - }, - { - "epoch": 16.99, - "learning_rate": 8.253341767974036e-08, - "loss": 3.8117, - "step": 1544000 - }, - { - "epoch": 16.99, - "learning_rate": 8.251966554815996e-08, - "loss": 3.8319, - "step": 1544500 - }, - { - "epoch": 17.0, - "learning_rate": 8.250591341657956e-08, - "loss": 3.8055, - "step": 1545000 - }, - { - "epoch": 17.0, - "eval_loss": 3.863797903060913, - "eval_runtime": 6.1486, - "eval_samples_per_second": 252.74, - "step": 1545215 - }, - { - "epoch": 17.0, - "learning_rate": 8.249216128499917e-08, - "loss": 3.8056, - "step": 1545500 - }, - { - "epoch": 17.01, - "learning_rate": 8.247840915341877e-08, - "loss": 3.8228, - "step": 1546000 - }, - { - "epoch": 17.01, - "learning_rate": 8.246465702183838e-08, - "loss": 3.814, - "step": 1546500 - }, - { - "epoch": 17.02, - "learning_rate": 8.245090489025799e-08, - "loss": 3.8356, - "step": 1547000 - }, - { - "epoch": 17.03, - "learning_rate": 8.243715275867759e-08, - "loss": 3.8054, - "step": 1547500 - }, - { - "epoch": 17.03, - "learning_rate": 8.24234006270972e-08, - "loss": 3.8348, - "step": 1548000 - }, - { - "epoch": 17.04, - "learning_rate": 8.24096484955168e-08, - "loss": 3.8178, - "step": 1548500 - }, - { - "epoch": 17.04, - "learning_rate": 8.23958963639364e-08, - "loss": 3.8074, - "step": 1549000 - }, - { - "epoch": 17.05, - "learning_rate": 8.238214423235601e-08, - "loss": 3.8183, - "step": 1549500 - }, - { - "epoch": 17.05, - "learning_rate": 8.236839210077562e-08, - "loss": 3.8092, - "step": 1550000 - }, - { - "epoch": 17.06, - "learning_rate": 8.235463996919521e-08, - "loss": 3.82, - "step": 1550500 - }, - { - "epoch": 17.06, - "learning_rate": 8.234088783761482e-08, - "loss": 3.8148, - "step": 1551000 - }, - { - "epoch": 17.07, - "learning_rate": 8.232713570603443e-08, - "loss": 3.8271, - "step": 1551500 - }, - { - "epoch": 17.07, - "learning_rate": 8.231338357445403e-08, - "loss": 3.7969, - "step": 1552000 - }, - { - "epoch": 17.08, - "learning_rate": 8.229963144287364e-08, - "loss": 3.8158, - "step": 1552500 - }, - { - "epoch": 17.09, - "learning_rate": 8.228587931129325e-08, - "loss": 3.7993, - "step": 1553000 - }, - { - "epoch": 17.09, - "learning_rate": 8.227212717971284e-08, - "loss": 3.8134, - "step": 1553500 - }, - { - "epoch": 17.1, - "learning_rate": 8.225837504813245e-08, - "loss": 3.808, - "step": 1554000 - }, - { - "epoch": 17.1, - "learning_rate": 8.224462291655206e-08, - "loss": 3.8102, - "step": 1554500 - }, - { - "epoch": 17.11, - "learning_rate": 8.223087078497166e-08, - "loss": 3.8079, - "step": 1555000 - }, - { - "epoch": 17.11, - "learning_rate": 8.221711865339127e-08, - "loss": 3.7953, - "step": 1555500 - }, - { - "epoch": 17.12, - "learning_rate": 8.220336652181088e-08, - "loss": 3.8064, - "step": 1556000 - }, - { - "epoch": 17.12, - "learning_rate": 8.218961439023047e-08, - "loss": 3.8192, - "step": 1556500 - }, - { - "epoch": 17.13, - "learning_rate": 8.217586225865008e-08, - "loss": 3.8122, - "step": 1557000 - }, - { - "epoch": 17.14, - "learning_rate": 8.216211012706969e-08, - "loss": 3.8117, - "step": 1557500 - }, - { - "epoch": 17.14, - "learning_rate": 8.214835799548929e-08, - "loss": 3.8003, - "step": 1558000 - }, - { - "epoch": 17.15, - "learning_rate": 8.21346058639089e-08, - "loss": 3.8231, - "step": 1558500 - }, - { - "epoch": 17.15, - "learning_rate": 8.212085373232851e-08, - "loss": 3.8048, - "step": 1559000 - }, - { - "epoch": 17.16, - "learning_rate": 8.21071016007481e-08, - "loss": 3.8077, - "step": 1559500 - }, - { - "epoch": 17.16, - "learning_rate": 8.209334946916771e-08, - "loss": 3.8346, - "step": 1560000 - }, - { - "epoch": 17.17, - "learning_rate": 8.207959733758732e-08, - "loss": 3.806, - "step": 1560500 - }, - { - "epoch": 17.17, - "learning_rate": 8.206584520600692e-08, - "loss": 3.8002, - "step": 1561000 - }, - { - "epoch": 17.18, - "learning_rate": 8.205209307442653e-08, - "loss": 3.8166, - "step": 1561500 - }, - { - "epoch": 17.18, - "learning_rate": 8.203834094284614e-08, - "loss": 3.8159, - "step": 1562000 - }, - { - "epoch": 17.19, - "learning_rate": 8.202458881126573e-08, - "loss": 3.8072, - "step": 1562500 - }, - { - "epoch": 17.2, - "learning_rate": 8.201083667968534e-08, - "loss": 3.817, - "step": 1563000 - }, - { - "epoch": 17.2, - "learning_rate": 8.199708454810495e-08, - "loss": 3.8219, - "step": 1563500 - }, - { - "epoch": 17.21, - "learning_rate": 8.198333241652455e-08, - "loss": 3.8106, - "step": 1564000 - }, - { - "epoch": 17.21, - "learning_rate": 8.196958028494416e-08, - "loss": 3.8281, - "step": 1564500 - }, - { - "epoch": 17.22, - "learning_rate": 8.195582815336377e-08, - "loss": 3.8231, - "step": 1565000 - }, - { - "epoch": 17.22, - "learning_rate": 8.194207602178336e-08, - "loss": 3.8251, - "step": 1565500 - }, - { - "epoch": 17.23, - "learning_rate": 8.192832389020297e-08, - "loss": 3.7855, - "step": 1566000 - }, - { - "epoch": 17.23, - "learning_rate": 8.191457175862258e-08, - "loss": 3.8052, - "step": 1566500 - }, - { - "epoch": 17.24, - "learning_rate": 8.190081962704218e-08, - "loss": 3.804, - "step": 1567000 - }, - { - "epoch": 17.25, - "learning_rate": 8.188706749546179e-08, - "loss": 3.8133, - "step": 1567500 - }, - { - "epoch": 17.25, - "learning_rate": 8.18733153638814e-08, - "loss": 3.8133, - "step": 1568000 - }, - { - "epoch": 17.26, - "learning_rate": 8.1859563232301e-08, - "loss": 3.8117, - "step": 1568500 - }, - { - "epoch": 17.26, - "learning_rate": 8.18458111007206e-08, - "loss": 3.8164, - "step": 1569000 - }, - { - "epoch": 17.27, - "learning_rate": 8.183205896914021e-08, - "loss": 3.8293, - "step": 1569500 - }, - { - "epoch": 17.27, - "learning_rate": 8.181830683755982e-08, - "loss": 3.8111, - "step": 1570000 - }, - { - "epoch": 17.28, - "learning_rate": 8.180455470597942e-08, - "loss": 3.8175, - "step": 1570500 - }, - { - "epoch": 17.28, - "learning_rate": 8.179080257439903e-08, - "loss": 3.8184, - "step": 1571000 - }, - { - "epoch": 17.29, - "learning_rate": 8.177705044281863e-08, - "loss": 3.809, - "step": 1571500 - }, - { - "epoch": 17.29, - "learning_rate": 8.176329831123824e-08, - "loss": 3.8166, - "step": 1572000 - }, - { - "epoch": 17.3, - "learning_rate": 8.174954617965784e-08, - "loss": 3.7916, - "step": 1572500 - }, - { - "epoch": 17.31, - "learning_rate": 8.173579404807745e-08, - "loss": 3.8001, - "step": 1573000 - }, - { - "epoch": 17.31, - "learning_rate": 8.172204191649706e-08, - "loss": 3.7957, - "step": 1573500 - }, - { - "epoch": 17.32, - "learning_rate": 8.170828978491667e-08, - "loss": 3.8287, - "step": 1574000 - }, - { - "epoch": 17.32, - "learning_rate": 8.169453765333626e-08, - "loss": 3.7886, - "step": 1574500 - }, - { - "epoch": 17.33, - "learning_rate": 8.168078552175587e-08, - "loss": 3.7996, - "step": 1575000 - }, - { - "epoch": 17.33, - "learning_rate": 8.166703339017548e-08, - "loss": 3.8156, - "step": 1575500 - }, - { - "epoch": 17.34, - "learning_rate": 8.165328125859508e-08, - "loss": 3.8299, - "step": 1576000 - }, - { - "epoch": 17.34, - "learning_rate": 8.163952912701469e-08, - "loss": 3.7958, - "step": 1576500 - }, - { - "epoch": 17.35, - "learning_rate": 8.16257769954343e-08, - "loss": 3.8098, - "step": 1577000 - }, - { - "epoch": 17.36, - "learning_rate": 8.16120248638539e-08, - "loss": 3.8173, - "step": 1577500 - }, - { - "epoch": 17.36, - "learning_rate": 8.15982727322735e-08, - "loss": 3.8085, - "step": 1578000 - }, - { - "epoch": 17.37, - "learning_rate": 8.158452060069311e-08, - "loss": 3.8139, - "step": 1578500 - }, - { - "epoch": 17.37, - "learning_rate": 8.157076846911271e-08, - "loss": 3.8063, - "step": 1579000 - }, - { - "epoch": 17.38, - "learning_rate": 8.155701633753232e-08, - "loss": 3.812, - "step": 1579500 - }, - { - "epoch": 17.38, - "learning_rate": 8.154326420595193e-08, - "loss": 3.822, - "step": 1580000 - }, - { - "epoch": 17.39, - "learning_rate": 8.152951207437152e-08, - "loss": 3.803, - "step": 1580500 - }, - { - "epoch": 17.39, - "learning_rate": 8.151575994279113e-08, - "loss": 3.8241, - "step": 1581000 - }, - { - "epoch": 17.4, - "learning_rate": 8.150200781121074e-08, - "loss": 3.822, - "step": 1581500 - }, - { - "epoch": 17.4, - "learning_rate": 8.148825567963034e-08, - "loss": 3.8209, - "step": 1582000 - }, - { - "epoch": 17.41, - "learning_rate": 8.147450354804995e-08, - "loss": 3.8123, - "step": 1582500 - }, - { - "epoch": 17.42, - "learning_rate": 8.146075141646956e-08, - "loss": 3.8178, - "step": 1583000 - }, - { - "epoch": 17.42, - "learning_rate": 8.144699928488915e-08, - "loss": 3.8181, - "step": 1583500 - }, - { - "epoch": 17.43, - "learning_rate": 8.143324715330876e-08, - "loss": 3.8058, - "step": 1584000 - }, - { - "epoch": 17.43, - "learning_rate": 8.141949502172837e-08, - "loss": 3.8149, - "step": 1584500 - }, - { - "epoch": 17.44, - "learning_rate": 8.140574289014797e-08, - "loss": 3.7947, - "step": 1585000 - }, - { - "epoch": 17.44, - "learning_rate": 8.139199075856758e-08, - "loss": 3.7961, - "step": 1585500 - }, - { - "epoch": 17.45, - "learning_rate": 8.137823862698719e-08, - "loss": 3.8096, - "step": 1586000 - }, - { - "epoch": 17.45, - "learning_rate": 8.136448649540678e-08, - "loss": 3.8186, - "step": 1586500 - }, - { - "epoch": 17.46, - "learning_rate": 8.135073436382639e-08, - "loss": 3.7836, - "step": 1587000 - }, - { - "epoch": 17.47, - "learning_rate": 8.1336982232246e-08, - "loss": 3.7944, - "step": 1587500 - }, - { - "epoch": 17.47, - "learning_rate": 8.13232301006656e-08, - "loss": 3.8094, - "step": 1588000 - }, - { - "epoch": 17.48, - "learning_rate": 8.130947796908521e-08, - "loss": 3.8001, - "step": 1588500 - }, - { - "epoch": 17.48, - "learning_rate": 8.129572583750482e-08, - "loss": 3.8086, - "step": 1589000 - }, - { - "epoch": 17.49, - "learning_rate": 8.128197370592441e-08, - "loss": 3.8057, - "step": 1589500 - }, - { - "epoch": 17.49, - "learning_rate": 8.126822157434402e-08, - "loss": 3.8218, - "step": 1590000 - }, - { - "epoch": 17.5, - "learning_rate": 8.125446944276363e-08, - "loss": 3.8295, - "step": 1590500 - }, - { - "epoch": 17.5, - "learning_rate": 8.124071731118323e-08, - "loss": 3.8025, - "step": 1591000 - }, - { - "epoch": 17.51, - "learning_rate": 8.122696517960284e-08, - "loss": 3.8178, - "step": 1591500 - }, - { - "epoch": 17.51, - "learning_rate": 8.121321304802245e-08, - "loss": 3.8006, - "step": 1592000 - }, - { - "epoch": 17.52, - "learning_rate": 8.119946091644204e-08, - "loss": 3.8002, - "step": 1592500 - }, - { - "epoch": 17.53, - "learning_rate": 8.118570878486165e-08, - "loss": 3.8318, - "step": 1593000 - }, - { - "epoch": 17.53, - "learning_rate": 8.117195665328126e-08, - "loss": 3.8271, - "step": 1593500 - }, - { - "epoch": 17.54, - "learning_rate": 8.115820452170086e-08, - "loss": 3.8159, - "step": 1594000 - }, - { - "epoch": 17.54, - "learning_rate": 8.114445239012047e-08, - "loss": 3.8124, - "step": 1594500 - }, - { - "epoch": 17.55, - "learning_rate": 8.113070025854008e-08, - "loss": 3.7887, - "step": 1595000 - }, - { - "epoch": 17.55, - "learning_rate": 8.111694812695967e-08, - "loss": 3.7857, - "step": 1595500 - }, - { - "epoch": 17.56, - "learning_rate": 8.110319599537928e-08, - "loss": 3.8085, - "step": 1596000 - }, - { - "epoch": 17.56, - "learning_rate": 8.108944386379889e-08, - "loss": 3.8354, - "step": 1596500 - }, - { - "epoch": 17.57, - "learning_rate": 8.107569173221849e-08, - "loss": 3.8247, - "step": 1597000 - }, - { - "epoch": 17.58, - "learning_rate": 8.10619396006381e-08, - "loss": 3.8248, - "step": 1597500 - }, - { - "epoch": 17.58, - "learning_rate": 8.10481874690577e-08, - "loss": 3.8029, - "step": 1598000 - }, - { - "epoch": 17.59, - "learning_rate": 8.10344353374773e-08, - "loss": 3.8099, - "step": 1598500 - }, - { - "epoch": 17.59, - "learning_rate": 8.102068320589691e-08, - "loss": 3.8066, - "step": 1599000 - }, - { - "epoch": 17.6, - "learning_rate": 8.100693107431652e-08, - "loss": 3.8289, - "step": 1599500 - }, - { - "epoch": 17.6, - "learning_rate": 8.099317894273612e-08, - "loss": 3.8304, - "step": 1600000 - }, - { - "epoch": 17.61, - "learning_rate": 8.097942681115573e-08, - "loss": 3.8041, - "step": 1600500 - }, - { - "epoch": 17.61, - "learning_rate": 8.096567467957533e-08, - "loss": 3.8128, - "step": 1601000 - }, - { - "epoch": 17.62, - "learning_rate": 8.095192254799493e-08, - "loss": 3.821, - "step": 1601500 - }, - { - "epoch": 17.62, - "learning_rate": 8.093817041641454e-08, - "loss": 3.8065, - "step": 1602000 - }, - { - "epoch": 17.63, - "learning_rate": 8.092441828483415e-08, - "loss": 3.7951, - "step": 1602500 - }, - { - "epoch": 17.64, - "learning_rate": 8.091066615325375e-08, - "loss": 3.8105, - "step": 1603000 - }, - { - "epoch": 17.64, - "learning_rate": 8.089691402167336e-08, - "loss": 3.7891, - "step": 1603500 - }, - { - "epoch": 17.65, - "learning_rate": 8.088316189009296e-08, - "loss": 3.8157, - "step": 1604000 - }, - { - "epoch": 17.65, - "learning_rate": 8.086940975851256e-08, - "loss": 3.8124, - "step": 1604500 - }, - { - "epoch": 17.66, - "learning_rate": 8.085565762693217e-08, - "loss": 3.7946, - "step": 1605000 - }, - { - "epoch": 17.66, - "learning_rate": 8.084190549535178e-08, - "loss": 3.8254, - "step": 1605500 - }, - { - "epoch": 17.67, - "learning_rate": 8.082815336377138e-08, - "loss": 3.8205, - "step": 1606000 - }, - { - "epoch": 17.67, - "learning_rate": 8.081440123219098e-08, - "loss": 3.7953, - "step": 1606500 - }, - { - "epoch": 17.68, - "learning_rate": 8.08006491006106e-08, - "loss": 3.7984, - "step": 1607000 - }, - { - "epoch": 17.69, - "learning_rate": 8.078689696903019e-08, - "loss": 3.8071, - "step": 1607500 - }, - { - "epoch": 17.69, - "learning_rate": 8.07731448374498e-08, - "loss": 3.7946, - "step": 1608000 - }, - { - "epoch": 17.7, - "learning_rate": 8.075939270586941e-08, - "loss": 3.8044, - "step": 1608500 - }, - { - "epoch": 17.7, - "learning_rate": 8.0745640574289e-08, - "loss": 3.8063, - "step": 1609000 - }, - { - "epoch": 17.71, - "learning_rate": 8.073188844270861e-08, - "loss": 3.8125, - "step": 1609500 - }, - { - "epoch": 17.71, - "learning_rate": 8.071813631112822e-08, - "loss": 3.8094, - "step": 1610000 - }, - { - "epoch": 17.72, - "learning_rate": 8.070438417954782e-08, - "loss": 3.7871, - "step": 1610500 - }, - { - "epoch": 17.72, - "learning_rate": 8.069063204796743e-08, - "loss": 3.7944, - "step": 1611000 - }, - { - "epoch": 17.73, - "learning_rate": 8.067687991638704e-08, - "loss": 3.8184, - "step": 1611500 - }, - { - "epoch": 17.73, - "learning_rate": 8.066312778480663e-08, - "loss": 3.8204, - "step": 1612000 - }, - { - "epoch": 17.74, - "learning_rate": 8.064937565322624e-08, - "loss": 3.798, - "step": 1612500 - }, - { - "epoch": 17.75, - "learning_rate": 8.063562352164585e-08, - "loss": 3.804, - "step": 1613000 - }, - { - "epoch": 17.75, - "learning_rate": 8.062187139006545e-08, - "loss": 3.8104, - "step": 1613500 - }, - { - "epoch": 17.76, - "learning_rate": 8.060811925848506e-08, - "loss": 3.831, - "step": 1614000 - }, - { - "epoch": 17.76, - "learning_rate": 8.059436712690467e-08, - "loss": 3.8131, - "step": 1614500 - }, - { - "epoch": 17.77, - "learning_rate": 8.058061499532426e-08, - "loss": 3.8057, - "step": 1615000 - }, - { - "epoch": 17.77, - "learning_rate": 8.056686286374387e-08, - "loss": 3.7959, - "step": 1615500 - }, - { - "epoch": 17.78, - "learning_rate": 8.055311073216348e-08, - "loss": 3.8168, - "step": 1616000 - }, - { - "epoch": 17.78, - "learning_rate": 8.053935860058308e-08, - "loss": 3.8002, - "step": 1616500 - }, - { - "epoch": 17.79, - "learning_rate": 8.052560646900269e-08, - "loss": 3.8248, - "step": 1617000 - }, - { - "epoch": 17.8, - "learning_rate": 8.05118543374223e-08, - "loss": 3.8025, - "step": 1617500 - }, - { - "epoch": 17.8, - "learning_rate": 8.04981022058419e-08, - "loss": 3.7936, - "step": 1618000 - }, - { - "epoch": 17.81, - "learning_rate": 8.04843500742615e-08, - "loss": 3.8057, - "step": 1618500 - }, - { - "epoch": 17.81, - "learning_rate": 8.047059794268111e-08, - "loss": 3.8193, - "step": 1619000 - }, - { - "epoch": 17.82, - "learning_rate": 8.045684581110072e-08, - "loss": 3.8163, - "step": 1619500 - }, - { - "epoch": 17.82, - "learning_rate": 8.044309367952032e-08, - "loss": 3.8261, - "step": 1620000 - }, - { - "epoch": 17.83, - "learning_rate": 8.042934154793993e-08, - "loss": 3.8164, - "step": 1620500 - }, - { - "epoch": 17.83, - "learning_rate": 8.041558941635954e-08, - "loss": 3.8159, - "step": 1621000 - }, - { - "epoch": 17.84, - "learning_rate": 8.040183728477915e-08, - "loss": 3.8192, - "step": 1621500 - }, - { - "epoch": 17.84, - "learning_rate": 8.038808515319874e-08, - "loss": 3.8154, - "step": 1622000 - }, - { - "epoch": 17.85, - "learning_rate": 8.037433302161835e-08, - "loss": 3.8084, - "step": 1622500 - }, - { - "epoch": 17.86, - "learning_rate": 8.036058089003796e-08, - "loss": 3.826, - "step": 1623000 - }, - { - "epoch": 17.86, - "learning_rate": 8.034682875845756e-08, - "loss": 3.7953, - "step": 1623500 - }, - { - "epoch": 17.87, - "learning_rate": 8.033307662687717e-08, - "loss": 3.8011, - "step": 1624000 - }, - { - "epoch": 17.87, - "learning_rate": 8.031932449529678e-08, - "loss": 3.8136, - "step": 1624500 - }, - { - "epoch": 17.88, - "learning_rate": 8.030557236371637e-08, - "loss": 3.8168, - "step": 1625000 - }, - { - "epoch": 17.88, - "learning_rate": 8.029182023213598e-08, - "loss": 3.817, - "step": 1625500 - }, - { - "epoch": 17.89, - "learning_rate": 8.027806810055559e-08, - "loss": 3.7977, - "step": 1626000 - }, - { - "epoch": 17.89, - "learning_rate": 8.026431596897519e-08, - "loss": 3.8016, - "step": 1626500 - }, - { - "epoch": 17.9, - "learning_rate": 8.02505638373948e-08, - "loss": 3.8335, - "step": 1627000 - }, - { - "epoch": 17.91, - "learning_rate": 8.02368117058144e-08, - "loss": 3.8079, - "step": 1627500 - }, - { - "epoch": 17.91, - "learning_rate": 8.0223059574234e-08, - "loss": 3.7859, - "step": 1628000 - }, - { - "epoch": 17.92, - "learning_rate": 8.020930744265361e-08, - "loss": 3.8093, - "step": 1628500 - }, - { - "epoch": 17.92, - "learning_rate": 8.019555531107322e-08, - "loss": 3.8239, - "step": 1629000 - }, - { - "epoch": 17.93, - "learning_rate": 8.018180317949282e-08, - "loss": 3.7991, - "step": 1629500 - }, - { - "epoch": 17.93, - "learning_rate": 8.016805104791243e-08, - "loss": 3.8086, - "step": 1630000 - }, - { - "epoch": 17.94, - "learning_rate": 8.015429891633203e-08, - "loss": 3.8006, - "step": 1630500 - }, - { - "epoch": 17.94, - "learning_rate": 8.014054678475163e-08, - "loss": 3.8033, - "step": 1631000 - }, - { - "epoch": 17.95, - "learning_rate": 8.012679465317124e-08, - "loss": 3.8026, - "step": 1631500 - }, - { - "epoch": 17.95, - "learning_rate": 8.011304252159085e-08, - "loss": 3.8134, - "step": 1632000 - }, - { - "epoch": 17.96, - "learning_rate": 8.009929039001045e-08, - "loss": 3.8328, - "step": 1632500 - }, - { - "epoch": 17.97, - "learning_rate": 8.008553825843005e-08, - "loss": 3.8086, - "step": 1633000 - }, - { - "epoch": 17.97, - "learning_rate": 8.007178612684966e-08, - "loss": 3.7903, - "step": 1633500 - }, - { - "epoch": 17.98, - "learning_rate": 8.005803399526926e-08, - "loss": 3.8028, - "step": 1634000 - }, - { - "epoch": 17.98, - "learning_rate": 8.004428186368887e-08, - "loss": 3.8028, - "step": 1634500 - }, - { - "epoch": 17.99, - "learning_rate": 8.003052973210848e-08, - "loss": 3.7952, - "step": 1635000 - }, - { - "epoch": 17.99, - "learning_rate": 8.001677760052808e-08, - "loss": 3.7984, - "step": 1635500 - }, - { - "epoch": 18.0, - "learning_rate": 8.000302546894768e-08, - "loss": 3.788, - "step": 1636000 - }, - { - "epoch": 18.0, - "eval_loss": 3.860872507095337, - "eval_runtime": 6.1352, - "eval_samples_per_second": 253.291, - "step": 1636110 - }, - { - "epoch": 18.0, - "learning_rate": 7.99892733373673e-08, - "loss": 3.8161, - "step": 1636500 - }, - { - "epoch": 18.01, - "learning_rate": 7.997552120578689e-08, - "loss": 3.8062, - "step": 1637000 - }, - { - "epoch": 18.02, - "learning_rate": 7.99617690742065e-08, - "loss": 3.8121, - "step": 1637500 - }, - { - "epoch": 18.02, - "learning_rate": 7.994801694262611e-08, - "loss": 3.8058, - "step": 1638000 - }, - { - "epoch": 18.03, - "learning_rate": 7.99342648110457e-08, - "loss": 3.8118, - "step": 1638500 - }, - { - "epoch": 18.03, - "learning_rate": 7.992051267946531e-08, - "loss": 3.8155, - "step": 1639000 - }, - { - "epoch": 18.04, - "learning_rate": 7.990676054788492e-08, - "loss": 3.8114, - "step": 1639500 - }, - { - "epoch": 18.04, - "learning_rate": 7.989300841630452e-08, - "loss": 3.8172, - "step": 1640000 - }, - { - "epoch": 18.05, - "learning_rate": 7.987925628472413e-08, - "loss": 3.7924, - "step": 1640500 - }, - { - "epoch": 18.05, - "learning_rate": 7.986550415314374e-08, - "loss": 3.8207, - "step": 1641000 - }, - { - "epoch": 18.06, - "learning_rate": 7.985175202156333e-08, - "loss": 3.8103, - "step": 1641500 - }, - { - "epoch": 18.06, - "learning_rate": 7.983799988998294e-08, - "loss": 3.7955, - "step": 1642000 - }, - { - "epoch": 18.07, - "learning_rate": 7.982424775840255e-08, - "loss": 3.8059, - "step": 1642500 - }, - { - "epoch": 18.08, - "learning_rate": 7.981049562682215e-08, - "loss": 3.8079, - "step": 1643000 - }, - { - "epoch": 18.08, - "learning_rate": 7.979674349524176e-08, - "loss": 3.807, - "step": 1643500 - }, - { - "epoch": 18.09, - "learning_rate": 7.978299136366137e-08, - "loss": 3.8145, - "step": 1644000 - }, - { - "epoch": 18.09, - "learning_rate": 7.976923923208096e-08, - "loss": 3.7965, - "step": 1644500 - }, - { - "epoch": 18.1, - "learning_rate": 7.975548710050057e-08, - "loss": 3.8198, - "step": 1645000 - }, - { - "epoch": 18.1, - "learning_rate": 7.974173496892018e-08, - "loss": 3.8106, - "step": 1645500 - }, - { - "epoch": 18.11, - "learning_rate": 7.972798283733978e-08, - "loss": 3.8196, - "step": 1646000 - }, - { - "epoch": 18.11, - "learning_rate": 7.971423070575939e-08, - "loss": 3.8105, - "step": 1646500 - }, - { - "epoch": 18.12, - "learning_rate": 7.9700478574179e-08, - "loss": 3.8011, - "step": 1647000 - }, - { - "epoch": 18.13, - "learning_rate": 7.96867264425986e-08, - "loss": 3.7978, - "step": 1647500 - }, - { - "epoch": 18.13, - "learning_rate": 7.96729743110182e-08, - "loss": 3.8097, - "step": 1648000 - }, - { - "epoch": 18.14, - "learning_rate": 7.965922217943781e-08, - "loss": 3.7914, - "step": 1648500 - }, - { - "epoch": 18.14, - "learning_rate": 7.964547004785741e-08, - "loss": 3.8115, - "step": 1649000 - }, - { - "epoch": 18.15, - "learning_rate": 7.963171791627702e-08, - "loss": 3.8168, - "step": 1649500 - }, - { - "epoch": 18.15, - "learning_rate": 7.961796578469663e-08, - "loss": 3.8022, - "step": 1650000 - }, - { - "epoch": 18.16, - "learning_rate": 7.960421365311622e-08, - "loss": 3.8098, - "step": 1650500 - }, - { - "epoch": 18.16, - "learning_rate": 7.959046152153583e-08, - "loss": 3.8304, - "step": 1651000 - }, - { - "epoch": 18.17, - "learning_rate": 7.957670938995544e-08, - "loss": 3.812, - "step": 1651500 - }, - { - "epoch": 18.17, - "learning_rate": 7.956295725837504e-08, - "loss": 3.8087, - "step": 1652000 - }, - { - "epoch": 18.18, - "learning_rate": 7.954920512679465e-08, - "loss": 3.8073, - "step": 1652500 - }, - { - "epoch": 18.19, - "learning_rate": 7.953545299521426e-08, - "loss": 3.8192, - "step": 1653000 - }, - { - "epoch": 18.19, - "learning_rate": 7.952170086363385e-08, - "loss": 3.7984, - "step": 1653500 - }, - { - "epoch": 18.2, - "learning_rate": 7.950794873205346e-08, - "loss": 3.8063, - "step": 1654000 - }, - { - "epoch": 18.2, - "learning_rate": 7.949419660047307e-08, - "loss": 3.8025, - "step": 1654500 - }, - { - "epoch": 18.21, - "learning_rate": 7.948044446889267e-08, - "loss": 3.8158, - "step": 1655000 - }, - { - "epoch": 18.21, - "learning_rate": 7.946669233731228e-08, - "loss": 3.8049, - "step": 1655500 - }, - { - "epoch": 18.22, - "learning_rate": 7.945294020573189e-08, - "loss": 3.8024, - "step": 1656000 - }, - { - "epoch": 18.22, - "learning_rate": 7.943918807415148e-08, - "loss": 3.8011, - "step": 1656500 - }, - { - "epoch": 18.23, - "learning_rate": 7.942543594257109e-08, - "loss": 3.802, - "step": 1657000 - }, - { - "epoch": 18.24, - "learning_rate": 7.94116838109907e-08, - "loss": 3.8148, - "step": 1657500 - }, - { - "epoch": 18.24, - "learning_rate": 7.93979316794103e-08, - "loss": 3.8231, - "step": 1658000 - }, - { - "epoch": 18.25, - "learning_rate": 7.93841795478299e-08, - "loss": 3.7882, - "step": 1658500 - }, - { - "epoch": 18.25, - "learning_rate": 7.937042741624952e-08, - "loss": 3.8075, - "step": 1659000 - }, - { - "epoch": 18.26, - "learning_rate": 7.935667528466911e-08, - "loss": 3.8046, - "step": 1659500 - }, - { - "epoch": 18.26, - "learning_rate": 7.934292315308872e-08, - "loss": 3.7936, - "step": 1660000 - }, - { - "epoch": 18.27, - "learning_rate": 7.932917102150833e-08, - "loss": 3.8076, - "step": 1660500 - }, - { - "epoch": 18.27, - "learning_rate": 7.931541888992793e-08, - "loss": 3.7951, - "step": 1661000 - }, - { - "epoch": 18.28, - "learning_rate": 7.930166675834754e-08, - "loss": 3.8149, - "step": 1661500 - }, - { - "epoch": 18.28, - "learning_rate": 7.928791462676715e-08, - "loss": 3.8178, - "step": 1662000 - }, - { - "epoch": 18.29, - "learning_rate": 7.927416249518674e-08, - "loss": 3.8022, - "step": 1662500 - }, - { - "epoch": 18.3, - "learning_rate": 7.926041036360635e-08, - "loss": 3.7946, - "step": 1663000 - }, - { - "epoch": 18.3, - "learning_rate": 7.924665823202596e-08, - "loss": 3.8025, - "step": 1663500 - }, - { - "epoch": 18.31, - "learning_rate": 7.923290610044556e-08, - "loss": 3.7899, - "step": 1664000 - }, - { - "epoch": 18.31, - "learning_rate": 7.921915396886517e-08, - "loss": 3.8124, - "step": 1664500 - }, - { - "epoch": 18.32, - "learning_rate": 7.920540183728477e-08, - "loss": 3.8166, - "step": 1665000 - }, - { - "epoch": 18.32, - "learning_rate": 7.919164970570437e-08, - "loss": 3.801, - "step": 1665500 - }, - { - "epoch": 18.33, - "learning_rate": 7.917789757412398e-08, - "loss": 3.7891, - "step": 1666000 - }, - { - "epoch": 18.33, - "learning_rate": 7.916414544254359e-08, - "loss": 3.8237, - "step": 1666500 - }, - { - "epoch": 18.34, - "learning_rate": 7.91503933109632e-08, - "loss": 3.8199, - "step": 1667000 - }, - { - "epoch": 18.35, - "learning_rate": 7.91366411793828e-08, - "loss": 3.7858, - "step": 1667500 - }, - { - "epoch": 18.35, - "learning_rate": 7.91228890478024e-08, - "loss": 3.7956, - "step": 1668000 - }, - { - "epoch": 18.36, - "learning_rate": 7.910913691622201e-08, - "loss": 3.806, - "step": 1668500 - }, - { - "epoch": 18.36, - "learning_rate": 7.909538478464162e-08, - "loss": 3.8147, - "step": 1669000 - }, - { - "epoch": 18.37, - "learning_rate": 7.908163265306122e-08, - "loss": 3.8237, - "step": 1669500 - }, - { - "epoch": 18.37, - "learning_rate": 7.906788052148083e-08, - "loss": 3.8001, - "step": 1670000 - }, - { - "epoch": 18.38, - "learning_rate": 7.905412838990044e-08, - "loss": 3.8098, - "step": 1670500 - }, - { - "epoch": 18.38, - "learning_rate": 7.904037625832003e-08, - "loss": 3.7977, - "step": 1671000 - }, - { - "epoch": 18.39, - "learning_rate": 7.902662412673964e-08, - "loss": 3.8142, - "step": 1671500 - }, - { - "epoch": 18.39, - "learning_rate": 7.901287199515925e-08, - "loss": 3.8138, - "step": 1672000 - }, - { - "epoch": 18.4, - "learning_rate": 7.899911986357886e-08, - "loss": 3.8075, - "step": 1672500 - }, - { - "epoch": 18.41, - "learning_rate": 7.898536773199846e-08, - "loss": 3.7988, - "step": 1673000 - }, - { - "epoch": 18.41, - "learning_rate": 7.897161560041807e-08, - "loss": 3.787, - "step": 1673500 - }, - { - "epoch": 18.42, - "learning_rate": 7.895786346883768e-08, - "loss": 3.8045, - "step": 1674000 - }, - { - "epoch": 18.42, - "learning_rate": 7.894411133725727e-08, - "loss": 3.8195, - "step": 1674500 - }, - { - "epoch": 18.43, - "learning_rate": 7.893035920567688e-08, - "loss": 3.8074, - "step": 1675000 - }, - { - "epoch": 18.43, - "learning_rate": 7.891660707409649e-08, - "loss": 3.7948, - "step": 1675500 - }, - { - "epoch": 18.44, - "learning_rate": 7.890285494251609e-08, - "loss": 3.7984, - "step": 1676000 - }, - { - "epoch": 18.44, - "learning_rate": 7.88891028109357e-08, - "loss": 3.81, - "step": 1676500 - }, - { - "epoch": 18.45, - "learning_rate": 7.88753506793553e-08, - "loss": 3.8049, - "step": 1677000 - }, - { - "epoch": 18.46, - "learning_rate": 7.88615985477749e-08, - "loss": 3.8051, - "step": 1677500 - }, - { - "epoch": 18.46, - "learning_rate": 7.884784641619451e-08, - "loss": 3.8276, - "step": 1678000 - }, - { - "epoch": 18.47, - "learning_rate": 7.883409428461412e-08, - "loss": 3.8282, - "step": 1678500 - }, - { - "epoch": 18.47, - "learning_rate": 7.882034215303372e-08, - "loss": 3.7945, - "step": 1679000 - }, - { - "epoch": 18.48, - "learning_rate": 7.880659002145333e-08, - "loss": 3.7994, - "step": 1679500 - }, - { - "epoch": 18.48, - "learning_rate": 7.879283788987294e-08, - "loss": 3.8124, - "step": 1680000 - }, - { - "epoch": 18.49, - "learning_rate": 7.877908575829253e-08, - "loss": 3.8033, - "step": 1680500 - }, - { - "epoch": 18.49, - "learning_rate": 7.876533362671214e-08, - "loss": 3.7845, - "step": 1681000 - }, - { - "epoch": 18.5, - "learning_rate": 7.875158149513175e-08, - "loss": 3.7968, - "step": 1681500 - }, - { - "epoch": 18.5, - "learning_rate": 7.873782936355135e-08, - "loss": 3.7885, - "step": 1682000 - }, - { - "epoch": 18.51, - "learning_rate": 7.872407723197096e-08, - "loss": 3.8202, - "step": 1682500 - }, - { - "epoch": 18.52, - "learning_rate": 7.871032510039057e-08, - "loss": 3.8037, - "step": 1683000 - }, - { - "epoch": 18.52, - "learning_rate": 7.869657296881016e-08, - "loss": 3.8026, - "step": 1683500 - }, - { - "epoch": 18.53, - "learning_rate": 7.868282083722977e-08, - "loss": 3.7969, - "step": 1684000 - }, - { - "epoch": 18.53, - "learning_rate": 7.866906870564938e-08, - "loss": 3.8033, - "step": 1684500 - }, - { - "epoch": 18.54, - "learning_rate": 7.865531657406898e-08, - "loss": 3.8002, - "step": 1685000 - }, - { - "epoch": 18.54, - "learning_rate": 7.864156444248859e-08, - "loss": 3.8247, - "step": 1685500 - }, - { - "epoch": 18.55, - "learning_rate": 7.86278123109082e-08, - "loss": 3.8131, - "step": 1686000 - }, - { - "epoch": 18.55, - "learning_rate": 7.861406017932779e-08, - "loss": 3.7968, - "step": 1686500 - }, - { - "epoch": 18.56, - "learning_rate": 7.86003080477474e-08, - "loss": 3.8057, - "step": 1687000 - }, - { - "epoch": 18.57, - "learning_rate": 7.858655591616701e-08, - "loss": 3.7945, - "step": 1687500 - }, - { - "epoch": 18.57, - "learning_rate": 7.85728037845866e-08, - "loss": 3.7944, - "step": 1688000 - }, - { - "epoch": 18.58, - "learning_rate": 7.855905165300622e-08, - "loss": 3.8053, - "step": 1688500 - }, - { - "epoch": 18.58, - "learning_rate": 7.854529952142582e-08, - "loss": 3.7992, - "step": 1689000 - }, - { - "epoch": 18.59, - "learning_rate": 7.853154738984542e-08, - "loss": 3.809, - "step": 1689500 - }, - { - "epoch": 18.59, - "learning_rate": 7.851779525826503e-08, - "loss": 3.8041, - "step": 1690000 - }, - { - "epoch": 18.6, - "learning_rate": 7.850404312668463e-08, - "loss": 3.8037, - "step": 1690500 - }, - { - "epoch": 18.6, - "learning_rate": 7.849029099510424e-08, - "loss": 3.8158, - "step": 1691000 - }, - { - "epoch": 18.61, - "learning_rate": 7.847653886352385e-08, - "loss": 3.8153, - "step": 1691500 - }, - { - "epoch": 18.61, - "learning_rate": 7.846278673194344e-08, - "loss": 3.8137, - "step": 1692000 - }, - { - "epoch": 18.62, - "learning_rate": 7.844903460036305e-08, - "loss": 3.8136, - "step": 1692500 - }, - { - "epoch": 18.63, - "learning_rate": 7.843528246878266e-08, - "loss": 3.8069, - "step": 1693000 - }, - { - "epoch": 18.63, - "learning_rate": 7.842153033720226e-08, - "loss": 3.8125, - "step": 1693500 - }, - { - "epoch": 18.64, - "learning_rate": 7.840777820562187e-08, - "loss": 3.8106, - "step": 1694000 - }, - { - "epoch": 18.64, - "learning_rate": 7.839402607404147e-08, - "loss": 3.8094, - "step": 1694500 - }, - { - "epoch": 18.65, - "learning_rate": 7.838027394246107e-08, - "loss": 3.8034, - "step": 1695000 - }, - { - "epoch": 18.65, - "learning_rate": 7.836652181088068e-08, - "loss": 3.8082, - "step": 1695500 - }, - { - "epoch": 18.66, - "learning_rate": 7.835276967930029e-08, - "loss": 3.7996, - "step": 1696000 - }, - { - "epoch": 18.66, - "learning_rate": 7.833901754771989e-08, - "loss": 3.7986, - "step": 1696500 - }, - { - "epoch": 18.67, - "learning_rate": 7.83252654161395e-08, - "loss": 3.8118, - "step": 1697000 - }, - { - "epoch": 18.68, - "learning_rate": 7.83115132845591e-08, - "loss": 3.8101, - "step": 1697500 - }, - { - "epoch": 18.68, - "learning_rate": 7.82977611529787e-08, - "loss": 3.822, - "step": 1698000 - }, - { - "epoch": 18.69, - "learning_rate": 7.828400902139831e-08, - "loss": 3.8212, - "step": 1698500 - }, - { - "epoch": 18.69, - "learning_rate": 7.827025688981792e-08, - "loss": 3.8175, - "step": 1699000 - }, - { - "epoch": 18.7, - "learning_rate": 7.825650475823752e-08, - "loss": 3.7896, - "step": 1699500 - }, - { - "epoch": 18.7, - "learning_rate": 7.824275262665712e-08, - "loss": 3.7976, - "step": 1700000 - }, - { - "epoch": 18.71, - "learning_rate": 7.822900049507673e-08, - "loss": 3.8041, - "step": 1700500 - }, - { - "epoch": 18.71, - "learning_rate": 7.821524836349633e-08, - "loss": 3.8006, - "step": 1701000 - }, - { - "epoch": 18.72, - "learning_rate": 7.820149623191594e-08, - "loss": 3.8091, - "step": 1701500 - }, - { - "epoch": 18.72, - "learning_rate": 7.818774410033555e-08, - "loss": 3.8033, - "step": 1702000 - }, - { - "epoch": 18.73, - "learning_rate": 7.817399196875514e-08, - "loss": 3.8165, - "step": 1702500 - }, - { - "epoch": 18.74, - "learning_rate": 7.816023983717475e-08, - "loss": 3.8281, - "step": 1703000 - }, - { - "epoch": 18.74, - "learning_rate": 7.814648770559436e-08, - "loss": 3.7862, - "step": 1703500 - }, - { - "epoch": 18.75, - "learning_rate": 7.813273557401396e-08, - "loss": 3.798, - "step": 1704000 - }, - { - "epoch": 18.75, - "learning_rate": 7.811898344243357e-08, - "loss": 3.7944, - "step": 1704500 - }, - { - "epoch": 18.76, - "learning_rate": 7.810523131085318e-08, - "loss": 3.7783, - "step": 1705000 - }, - { - "epoch": 18.76, - "learning_rate": 7.809147917927277e-08, - "loss": 3.8174, - "step": 1705500 - }, - { - "epoch": 18.77, - "learning_rate": 7.807772704769238e-08, - "loss": 3.8093, - "step": 1706000 - }, - { - "epoch": 18.77, - "learning_rate": 7.806397491611199e-08, - "loss": 3.812, - "step": 1706500 - }, - { - "epoch": 18.78, - "learning_rate": 7.805022278453159e-08, - "loss": 3.8109, - "step": 1707000 - }, - { - "epoch": 18.79, - "learning_rate": 7.80364706529512e-08, - "loss": 3.7995, - "step": 1707500 - }, - { - "epoch": 18.79, - "learning_rate": 7.802271852137081e-08, - "loss": 3.8057, - "step": 1708000 - }, - { - "epoch": 18.8, - "learning_rate": 7.80089663897904e-08, - "loss": 3.8048, - "step": 1708500 - }, - { - "epoch": 18.8, - "learning_rate": 7.799521425821001e-08, - "loss": 3.792, - "step": 1709000 - }, - { - "epoch": 18.81, - "learning_rate": 7.798146212662962e-08, - "loss": 3.8111, - "step": 1709500 - }, - { - "epoch": 18.81, - "learning_rate": 7.796770999504922e-08, - "loss": 3.8003, - "step": 1710000 - }, - { - "epoch": 18.82, - "learning_rate": 7.795395786346883e-08, - "loss": 3.8219, - "step": 1710500 - }, - { - "epoch": 18.82, - "learning_rate": 7.794020573188844e-08, - "loss": 3.7904, - "step": 1711000 - }, - { - "epoch": 18.83, - "learning_rate": 7.792645360030803e-08, - "loss": 3.8104, - "step": 1711500 - }, - { - "epoch": 18.83, - "learning_rate": 7.791270146872764e-08, - "loss": 3.7934, - "step": 1712000 - }, - { - "epoch": 18.84, - "learning_rate": 7.789894933714725e-08, - "loss": 3.7794, - "step": 1712500 - }, - { - "epoch": 18.85, - "learning_rate": 7.788519720556685e-08, - "loss": 3.8252, - "step": 1713000 - }, - { - "epoch": 18.85, - "learning_rate": 7.787144507398646e-08, - "loss": 3.7968, - "step": 1713500 - }, - { - "epoch": 18.86, - "learning_rate": 7.785769294240607e-08, - "loss": 3.8034, - "step": 1714000 - }, - { - "epoch": 18.86, - "learning_rate": 7.784394081082568e-08, - "loss": 3.7927, - "step": 1714500 - }, - { - "epoch": 18.87, - "learning_rate": 7.783018867924527e-08, - "loss": 3.7913, - "step": 1715000 - }, - { - "epoch": 18.87, - "learning_rate": 7.781643654766488e-08, - "loss": 3.8069, - "step": 1715500 - }, - { - "epoch": 18.88, - "learning_rate": 7.780268441608449e-08, - "loss": 3.8055, - "step": 1716000 - }, - { - "epoch": 18.88, - "learning_rate": 7.77889322845041e-08, - "loss": 3.7979, - "step": 1716500 - }, - { - "epoch": 18.89, - "learning_rate": 7.77751801529237e-08, - "loss": 3.8133, - "step": 1717000 - }, - { - "epoch": 18.9, - "learning_rate": 7.77614280213433e-08, - "loss": 3.8077, - "step": 1717500 - }, - { - "epoch": 18.9, - "learning_rate": 7.774767588976292e-08, - "loss": 3.8178, - "step": 1718000 - }, - { - "epoch": 18.91, - "learning_rate": 7.773392375818251e-08, - "loss": 3.8209, - "step": 1718500 - }, - { - "epoch": 18.91, - "learning_rate": 7.772017162660212e-08, - "loss": 3.8106, - "step": 1719000 - }, - { - "epoch": 18.92, - "learning_rate": 7.770641949502173e-08, - "loss": 3.8102, - "step": 1719500 - }, - { - "epoch": 18.92, - "learning_rate": 7.769266736344134e-08, - "loss": 3.8029, - "step": 1720000 - }, - { - "epoch": 18.93, - "learning_rate": 7.767891523186094e-08, - "loss": 3.7924, - "step": 1720500 - }, - { - "epoch": 18.93, - "learning_rate": 7.766516310028054e-08, - "loss": 3.8019, - "step": 1721000 - }, - { - "epoch": 18.94, - "learning_rate": 7.765141096870015e-08, - "loss": 3.7882, - "step": 1721500 - }, - { - "epoch": 18.94, - "learning_rate": 7.763765883711975e-08, - "loss": 3.813, - "step": 1722000 - }, - { - "epoch": 18.95, - "learning_rate": 7.762390670553936e-08, - "loss": 3.8002, - "step": 1722500 - }, - { - "epoch": 18.96, - "learning_rate": 7.761015457395897e-08, - "loss": 3.791, - "step": 1723000 - }, - { - "epoch": 18.96, - "learning_rate": 7.759640244237857e-08, - "loss": 3.8016, - "step": 1723500 - }, - { - "epoch": 18.97, - "learning_rate": 7.758265031079817e-08, - "loss": 3.7868, - "step": 1724000 - }, - { - "epoch": 18.97, - "learning_rate": 7.756889817921778e-08, - "loss": 3.786, - "step": 1724500 - }, - { - "epoch": 18.98, - "learning_rate": 7.755514604763738e-08, - "loss": 3.8076, - "step": 1725000 - }, - { - "epoch": 18.98, - "learning_rate": 7.754139391605699e-08, - "loss": 3.7932, - "step": 1725500 - }, - { - "epoch": 18.99, - "learning_rate": 7.75276417844766e-08, - "loss": 3.7891, - "step": 1726000 - }, - { - "epoch": 18.99, - "learning_rate": 7.75138896528962e-08, - "loss": 3.8037, - "step": 1726500 - }, - { - "epoch": 19.0, - "learning_rate": 7.75001375213158e-08, - "loss": 3.8056, - "step": 1727000 - }, - { - "epoch": 19.0, - "eval_loss": 3.857905387878418, - "eval_runtime": 6.1406, - "eval_samples_per_second": 253.07, - "step": 1727005 - }, - { - "epoch": 19.01, - "learning_rate": 7.748638538973541e-08, - "loss": 3.7989, - "step": 1727500 - }, - { - "epoch": 19.01, - "learning_rate": 7.747263325815501e-08, - "loss": 3.8012, - "step": 1728000 - }, - { - "epoch": 19.02, - "learning_rate": 7.745888112657462e-08, - "loss": 3.7847, - "step": 1728500 - }, - { - "epoch": 19.02, - "learning_rate": 7.744512899499423e-08, - "loss": 3.8137, - "step": 1729000 - }, - { - "epoch": 19.03, - "learning_rate": 7.743137686341382e-08, - "loss": 3.7922, - "step": 1729500 - }, - { - "epoch": 19.03, - "learning_rate": 7.741762473183343e-08, - "loss": 3.7999, - "step": 1730000 - }, - { - "epoch": 19.04, - "learning_rate": 7.740387260025304e-08, - "loss": 3.7989, - "step": 1730500 - }, - { - "epoch": 19.04, - "learning_rate": 7.739012046867264e-08, - "loss": 3.7866, - "step": 1731000 - }, - { - "epoch": 19.05, - "learning_rate": 7.737636833709225e-08, - "loss": 3.7825, - "step": 1731500 - }, - { - "epoch": 19.05, - "learning_rate": 7.736261620551186e-08, - "loss": 3.7951, - "step": 1732000 - }, - { - "epoch": 19.06, - "learning_rate": 7.734886407393145e-08, - "loss": 3.8076, - "step": 1732500 - }, - { - "epoch": 19.07, - "learning_rate": 7.733511194235106e-08, - "loss": 3.8111, - "step": 1733000 - }, - { - "epoch": 19.07, - "learning_rate": 7.732135981077067e-08, - "loss": 3.8063, - "step": 1733500 - }, - { - "epoch": 19.08, - "learning_rate": 7.730760767919027e-08, - "loss": 3.8003, - "step": 1734000 - }, - { - "epoch": 19.08, - "learning_rate": 7.729385554760988e-08, - "loss": 3.8058, - "step": 1734500 - }, - { - "epoch": 19.09, - "learning_rate": 7.728010341602949e-08, - "loss": 3.7894, - "step": 1735000 - }, - { - "epoch": 19.09, - "learning_rate": 7.726635128444908e-08, - "loss": 3.8014, - "step": 1735500 - }, - { - "epoch": 19.1, - "learning_rate": 7.725259915286869e-08, - "loss": 3.7958, - "step": 1736000 - }, - { - "epoch": 19.1, - "learning_rate": 7.72388470212883e-08, - "loss": 3.7969, - "step": 1736500 - }, - { - "epoch": 19.11, - "learning_rate": 7.72250948897079e-08, - "loss": 3.7957, - "step": 1737000 - }, - { - "epoch": 19.12, - "learning_rate": 7.721134275812751e-08, - "loss": 3.7896, - "step": 1737500 - }, - { - "epoch": 19.12, - "learning_rate": 7.719759062654712e-08, - "loss": 3.802, - "step": 1738000 - }, - { - "epoch": 19.13, - "learning_rate": 7.718383849496671e-08, - "loss": 3.8054, - "step": 1738500 - }, - { - "epoch": 19.13, - "learning_rate": 7.717008636338632e-08, - "loss": 3.817, - "step": 1739000 - }, - { - "epoch": 19.14, - "learning_rate": 7.715633423180593e-08, - "loss": 3.7997, - "step": 1739500 - }, - { - "epoch": 19.14, - "learning_rate": 7.714258210022553e-08, - "loss": 3.805, - "step": 1740000 - }, - { - "epoch": 19.15, - "learning_rate": 7.712882996864514e-08, - "loss": 3.7882, - "step": 1740500 - }, - { - "epoch": 19.15, - "learning_rate": 7.711507783706475e-08, - "loss": 3.7908, - "step": 1741000 - }, - { - "epoch": 19.16, - "learning_rate": 7.710132570548434e-08, - "loss": 3.8155, - "step": 1741500 - }, - { - "epoch": 19.16, - "learning_rate": 7.708757357390395e-08, - "loss": 3.7997, - "step": 1742000 - }, - { - "epoch": 19.17, - "learning_rate": 7.707382144232356e-08, - "loss": 3.7989, - "step": 1742500 - }, - { - "epoch": 19.18, - "learning_rate": 7.706006931074316e-08, - "loss": 3.8028, - "step": 1743000 - }, - { - "epoch": 19.18, - "learning_rate": 7.704631717916277e-08, - "loss": 3.7905, - "step": 1743500 - }, - { - "epoch": 19.19, - "learning_rate": 7.703256504758238e-08, - "loss": 3.79, - "step": 1744000 - }, - { - "epoch": 19.19, - "learning_rate": 7.701881291600197e-08, - "loss": 3.7958, - "step": 1744500 - }, - { - "epoch": 19.2, - "learning_rate": 7.700506078442158e-08, - "loss": 3.7968, - "step": 1745000 - }, - { - "epoch": 19.2, - "learning_rate": 7.699130865284119e-08, - "loss": 3.8083, - "step": 1745500 - }, - { - "epoch": 19.21, - "learning_rate": 7.697755652126079e-08, - "loss": 3.8037, - "step": 1746000 - }, - { - "epoch": 19.21, - "learning_rate": 7.69638043896804e-08, - "loss": 3.7927, - "step": 1746500 - }, - { - "epoch": 19.22, - "learning_rate": 7.69500522581e-08, - "loss": 3.8105, - "step": 1747000 - }, - { - "epoch": 19.23, - "learning_rate": 7.69363001265196e-08, - "loss": 3.8142, - "step": 1747500 - }, - { - "epoch": 19.23, - "learning_rate": 7.692254799493921e-08, - "loss": 3.7748, - "step": 1748000 - }, - { - "epoch": 19.24, - "learning_rate": 7.690879586335882e-08, - "loss": 3.8161, - "step": 1748500 - }, - { - "epoch": 19.24, - "learning_rate": 7.689504373177842e-08, - "loss": 3.8144, - "step": 1749000 - }, - { - "epoch": 19.25, - "learning_rate": 7.688129160019803e-08, - "loss": 3.7775, - "step": 1749500 - }, - { - "epoch": 19.25, - "learning_rate": 7.686753946861764e-08, - "loss": 3.8036, - "step": 1750000 - }, - { - "epoch": 19.26, - "learning_rate": 7.685378733703723e-08, - "loss": 3.8006, - "step": 1750500 - }, - { - "epoch": 19.26, - "learning_rate": 7.684003520545684e-08, - "loss": 3.7953, - "step": 1751000 - }, - { - "epoch": 19.27, - "learning_rate": 7.682628307387645e-08, - "loss": 3.8102, - "step": 1751500 - }, - { - "epoch": 19.27, - "learning_rate": 7.681253094229605e-08, - "loss": 3.7986, - "step": 1752000 - }, - { - "epoch": 19.28, - "learning_rate": 7.679877881071566e-08, - "loss": 3.7997, - "step": 1752500 - }, - { - "epoch": 19.29, - "learning_rate": 7.678502667913526e-08, - "loss": 3.8148, - "step": 1753000 - }, - { - "epoch": 19.29, - "learning_rate": 7.677127454755486e-08, - "loss": 3.7945, - "step": 1753500 - }, - { - "epoch": 19.3, - "learning_rate": 7.675752241597447e-08, - "loss": 3.7945, - "step": 1754000 - }, - { - "epoch": 19.3, - "learning_rate": 7.674377028439408e-08, - "loss": 3.7781, - "step": 1754500 - }, - { - "epoch": 19.31, - "learning_rate": 7.673001815281368e-08, - "loss": 3.7944, - "step": 1755000 - }, - { - "epoch": 19.31, - "learning_rate": 7.671626602123329e-08, - "loss": 3.8031, - "step": 1755500 - }, - { - "epoch": 19.32, - "learning_rate": 7.67025138896529e-08, - "loss": 3.7659, - "step": 1756000 - }, - { - "epoch": 19.32, - "learning_rate": 7.668876175807249e-08, - "loss": 3.8018, - "step": 1756500 - }, - { - "epoch": 19.33, - "learning_rate": 7.66750096264921e-08, - "loss": 3.8116, - "step": 1757000 - }, - { - "epoch": 19.34, - "learning_rate": 7.666125749491171e-08, - "loss": 3.7869, - "step": 1757500 - }, - { - "epoch": 19.34, - "learning_rate": 7.66475053633313e-08, - "loss": 3.8056, - "step": 1758000 - }, - { - "epoch": 19.35, - "learning_rate": 7.663375323175091e-08, - "loss": 3.8048, - "step": 1758500 - }, - { - "epoch": 19.35, - "learning_rate": 7.662000110017052e-08, - "loss": 3.8022, - "step": 1759000 - }, - { - "epoch": 19.36, - "learning_rate": 7.660624896859012e-08, - "loss": 3.8281, - "step": 1759500 - }, - { - "epoch": 19.36, - "learning_rate": 7.659249683700973e-08, - "loss": 3.8001, - "step": 1760000 - }, - { - "epoch": 19.37, - "learning_rate": 7.657874470542934e-08, - "loss": 3.8216, - "step": 1760500 - }, - { - "epoch": 19.37, - "learning_rate": 7.656499257384894e-08, - "loss": 3.7881, - "step": 1761000 - }, - { - "epoch": 19.38, - "learning_rate": 7.655124044226854e-08, - "loss": 3.7993, - "step": 1761500 - }, - { - "epoch": 19.39, - "learning_rate": 7.653748831068815e-08, - "loss": 3.8056, - "step": 1762000 - }, - { - "epoch": 19.39, - "learning_rate": 7.652373617910775e-08, - "loss": 3.8083, - "step": 1762500 - }, - { - "epoch": 19.4, - "learning_rate": 7.650998404752736e-08, - "loss": 3.7851, - "step": 1763000 - }, - { - "epoch": 19.4, - "learning_rate": 7.649623191594697e-08, - "loss": 3.783, - "step": 1763500 - }, - { - "epoch": 19.41, - "learning_rate": 7.648247978436658e-08, - "loss": 3.8046, - "step": 1764000 - }, - { - "epoch": 19.41, - "learning_rate": 7.646872765278617e-08, - "loss": 3.8168, - "step": 1764500 - }, - { - "epoch": 19.42, - "learning_rate": 7.645497552120578e-08, - "loss": 3.8018, - "step": 1765000 - }, - { - "epoch": 19.42, - "learning_rate": 7.644122338962539e-08, - "loss": 3.8224, - "step": 1765500 - }, - { - "epoch": 19.43, - "learning_rate": 7.642747125804499e-08, - "loss": 3.8096, - "step": 1766000 - }, - { - "epoch": 19.43, - "learning_rate": 7.64137191264646e-08, - "loss": 3.8257, - "step": 1766500 - }, - { - "epoch": 19.44, - "learning_rate": 7.639996699488421e-08, - "loss": 3.8101, - "step": 1767000 - }, - { - "epoch": 19.45, - "learning_rate": 7.638621486330382e-08, - "loss": 3.7982, - "step": 1767500 - }, - { - "epoch": 19.45, - "learning_rate": 7.637246273172341e-08, - "loss": 3.8054, - "step": 1768000 - }, - { - "epoch": 19.46, - "learning_rate": 7.635871060014302e-08, - "loss": 3.8039, - "step": 1768500 - }, - { - "epoch": 19.46, - "learning_rate": 7.634495846856263e-08, - "loss": 3.8004, - "step": 1769000 - }, - { - "epoch": 19.47, - "learning_rate": 7.633120633698223e-08, - "loss": 3.8, - "step": 1769500 - }, - { - "epoch": 19.47, - "learning_rate": 7.631745420540184e-08, - "loss": 3.7962, - "step": 1770000 - }, - { - "epoch": 19.48, - "learning_rate": 7.630370207382145e-08, - "loss": 3.781, - "step": 1770500 - }, - { - "epoch": 19.48, - "learning_rate": 7.628994994224104e-08, - "loss": 3.8127, - "step": 1771000 - }, - { - "epoch": 19.49, - "learning_rate": 7.627619781066065e-08, - "loss": 3.8074, - "step": 1771500 - }, - { - "epoch": 19.5, - "learning_rate": 7.626244567908026e-08, - "loss": 3.8072, - "step": 1772000 - }, - { - "epoch": 19.5, - "learning_rate": 7.624869354749986e-08, - "loss": 3.809, - "step": 1772500 - }, - { - "epoch": 19.51, - "learning_rate": 7.623494141591947e-08, - "loss": 3.8032, - "step": 1773000 - }, - { - "epoch": 19.51, - "learning_rate": 7.622118928433908e-08, - "loss": 3.8006, - "step": 1773500 - }, - { - "epoch": 19.52, - "learning_rate": 7.620743715275867e-08, - "loss": 3.7921, - "step": 1774000 - }, - { - "epoch": 19.52, - "learning_rate": 7.619368502117828e-08, - "loss": 3.8094, - "step": 1774500 - }, - { - "epoch": 19.53, - "learning_rate": 7.617993288959789e-08, - "loss": 3.8035, - "step": 1775000 - }, - { - "epoch": 19.53, - "learning_rate": 7.616618075801749e-08, - "loss": 3.8032, - "step": 1775500 - }, - { - "epoch": 19.54, - "learning_rate": 7.61524286264371e-08, - "loss": 3.8109, - "step": 1776000 - }, - { - "epoch": 19.54, - "learning_rate": 7.61386764948567e-08, - "loss": 3.8038, - "step": 1776500 - }, - { - "epoch": 19.55, - "learning_rate": 7.61249243632763e-08, - "loss": 3.809, - "step": 1777000 - }, - { - "epoch": 19.56, - "learning_rate": 7.611117223169591e-08, - "loss": 3.7974, - "step": 1777500 - }, - { - "epoch": 19.56, - "learning_rate": 7.609742010011552e-08, - "loss": 3.7956, - "step": 1778000 - }, - { - "epoch": 19.57, - "learning_rate": 7.608366796853512e-08, - "loss": 3.8194, - "step": 1778500 - }, - { - "epoch": 19.57, - "learning_rate": 7.606991583695473e-08, - "loss": 3.7878, - "step": 1779000 - }, - { - "epoch": 19.58, - "learning_rate": 7.605616370537434e-08, - "loss": 3.8054, - "step": 1779500 - }, - { - "epoch": 19.58, - "learning_rate": 7.604241157379393e-08, - "loss": 3.8027, - "step": 1780000 - }, - { - "epoch": 19.59, - "learning_rate": 7.602865944221354e-08, - "loss": 3.793, - "step": 1780500 - }, - { - "epoch": 19.59, - "learning_rate": 7.601490731063315e-08, - "loss": 3.8004, - "step": 1781000 - }, - { - "epoch": 19.6, - "learning_rate": 7.600115517905275e-08, - "loss": 3.7821, - "step": 1781500 - }, - { - "epoch": 19.61, - "learning_rate": 7.598740304747236e-08, - "loss": 3.7983, - "step": 1782000 - }, - { - "epoch": 19.61, - "learning_rate": 7.597365091589196e-08, - "loss": 3.8056, - "step": 1782500 - }, - { - "epoch": 19.62, - "learning_rate": 7.595989878431156e-08, - "loss": 3.792, - "step": 1783000 - }, - { - "epoch": 19.62, - "learning_rate": 7.594614665273117e-08, - "loss": 3.8047, - "step": 1783500 - }, - { - "epoch": 19.63, - "learning_rate": 7.593239452115078e-08, - "loss": 3.8029, - "step": 1784000 - }, - { - "epoch": 19.63, - "learning_rate": 7.591864238957038e-08, - "loss": 3.8079, - "step": 1784500 - }, - { - "epoch": 19.64, - "learning_rate": 7.590489025798999e-08, - "loss": 3.7968, - "step": 1785000 - }, - { - "epoch": 19.64, - "learning_rate": 7.58911381264096e-08, - "loss": 3.8104, - "step": 1785500 - }, - { - "epoch": 19.65, - "learning_rate": 7.587738599482919e-08, - "loss": 3.7925, - "step": 1786000 - }, - { - "epoch": 19.65, - "learning_rate": 7.58636338632488e-08, - "loss": 3.785, - "step": 1786500 - }, - { - "epoch": 19.66, - "learning_rate": 7.584988173166841e-08, - "loss": 3.7858, - "step": 1787000 - }, - { - "epoch": 19.67, - "learning_rate": 7.5836129600088e-08, - "loss": 3.8069, - "step": 1787500 - }, - { - "epoch": 19.67, - "learning_rate": 7.582237746850761e-08, - "loss": 3.782, - "step": 1788000 - }, - { - "epoch": 19.68, - "learning_rate": 7.580862533692722e-08, - "loss": 3.8182, - "step": 1788500 - }, - { - "epoch": 19.68, - "learning_rate": 7.579487320534682e-08, - "loss": 3.7936, - "step": 1789000 - }, - { - "epoch": 19.69, - "learning_rate": 7.578112107376643e-08, - "loss": 3.8008, - "step": 1789500 - }, - { - "epoch": 19.69, - "learning_rate": 7.576736894218604e-08, - "loss": 3.7869, - "step": 1790000 - }, - { - "epoch": 19.7, - "learning_rate": 7.575361681060563e-08, - "loss": 3.7896, - "step": 1790500 - }, - { - "epoch": 19.7, - "learning_rate": 7.573986467902524e-08, - "loss": 3.8021, - "step": 1791000 - }, - { - "epoch": 19.71, - "learning_rate": 7.572611254744485e-08, - "loss": 3.8002, - "step": 1791500 - }, - { - "epoch": 19.72, - "learning_rate": 7.571236041586445e-08, - "loss": 3.7978, - "step": 1792000 - }, - { - "epoch": 19.72, - "learning_rate": 7.569860828428406e-08, - "loss": 3.7956, - "step": 1792500 - }, - { - "epoch": 19.73, - "learning_rate": 7.568485615270367e-08, - "loss": 3.8255, - "step": 1793000 - }, - { - "epoch": 19.73, - "learning_rate": 7.567110402112326e-08, - "loss": 3.8056, - "step": 1793500 - }, - { - "epoch": 19.74, - "learning_rate": 7.565735188954287e-08, - "loss": 3.7751, - "step": 1794000 - }, - { - "epoch": 19.74, - "learning_rate": 7.564359975796248e-08, - "loss": 3.8257, - "step": 1794500 - }, - { - "epoch": 19.75, - "learning_rate": 7.562984762638208e-08, - "loss": 3.8093, - "step": 1795000 - }, - { - "epoch": 19.75, - "learning_rate": 7.561609549480169e-08, - "loss": 3.7838, - "step": 1795500 - }, - { - "epoch": 19.76, - "learning_rate": 7.56023433632213e-08, - "loss": 3.787, - "step": 1796000 - }, - { - "epoch": 19.76, - "learning_rate": 7.55885912316409e-08, - "loss": 3.8141, - "step": 1796500 - }, - { - "epoch": 19.77, - "learning_rate": 7.55748391000605e-08, - "loss": 3.7984, - "step": 1797000 - }, - { - "epoch": 19.78, - "learning_rate": 7.556108696848011e-08, - "loss": 3.7842, - "step": 1797500 - }, - { - "epoch": 19.78, - "learning_rate": 7.554733483689971e-08, - "loss": 3.7862, - "step": 1798000 - }, - { - "epoch": 19.79, - "learning_rate": 7.553358270531932e-08, - "loss": 3.8035, - "step": 1798500 - }, - { - "epoch": 19.79, - "learning_rate": 7.551983057373893e-08, - "loss": 3.8226, - "step": 1799000 - }, - { - "epoch": 19.8, - "learning_rate": 7.550607844215852e-08, - "loss": 3.7921, - "step": 1799500 - }, - { - "epoch": 19.8, - "learning_rate": 7.549232631057813e-08, - "loss": 3.8111, - "step": 1800000 - }, - { - "epoch": 19.81, - "learning_rate": 7.547857417899774e-08, - "loss": 3.8098, - "step": 1800500 - }, - { - "epoch": 19.81, - "learning_rate": 7.546482204741734e-08, - "loss": 3.8149, - "step": 1801000 - }, - { - "epoch": 19.82, - "learning_rate": 7.545106991583695e-08, - "loss": 3.8072, - "step": 1801500 - }, - { - "epoch": 19.83, - "learning_rate": 7.543731778425656e-08, - "loss": 3.8003, - "step": 1802000 - }, - { - "epoch": 19.83, - "learning_rate": 7.542356565267615e-08, - "loss": 3.8052, - "step": 1802500 - }, - { - "epoch": 19.84, - "learning_rate": 7.540981352109576e-08, - "loss": 3.8257, - "step": 1803000 - }, - { - "epoch": 19.84, - "learning_rate": 7.539606138951537e-08, - "loss": 3.8208, - "step": 1803500 - }, - { - "epoch": 19.85, - "learning_rate": 7.538230925793497e-08, - "loss": 3.8083, - "step": 1804000 - }, - { - "epoch": 19.85, - "learning_rate": 7.536855712635458e-08, - "loss": 3.7994, - "step": 1804500 - }, - { - "epoch": 19.86, - "learning_rate": 7.535480499477419e-08, - "loss": 3.7928, - "step": 1805000 - }, - { - "epoch": 19.86, - "learning_rate": 7.534105286319378e-08, - "loss": 3.8015, - "step": 1805500 - }, - { - "epoch": 19.87, - "learning_rate": 7.532730073161339e-08, - "loss": 3.7857, - "step": 1806000 - }, - { - "epoch": 19.87, - "learning_rate": 7.5313548600033e-08, - "loss": 3.7995, - "step": 1806500 - }, - { - "epoch": 19.88, - "learning_rate": 7.52997964684526e-08, - "loss": 3.7964, - "step": 1807000 - }, - { - "epoch": 19.89, - "learning_rate": 7.528604433687221e-08, - "loss": 3.8072, - "step": 1807500 - }, - { - "epoch": 19.89, - "learning_rate": 7.527229220529182e-08, - "loss": 3.8056, - "step": 1808000 - }, - { - "epoch": 19.9, - "learning_rate": 7.525854007371141e-08, - "loss": 3.8157, - "step": 1808500 - }, - { - "epoch": 19.9, - "learning_rate": 7.524478794213102e-08, - "loss": 3.8169, - "step": 1809000 - }, - { - "epoch": 19.91, - "learning_rate": 7.523103581055063e-08, - "loss": 3.7919, - "step": 1809500 - }, - { - "epoch": 19.91, - "learning_rate": 7.521728367897023e-08, - "loss": 3.8037, - "step": 1810000 - }, - { - "epoch": 19.92, - "learning_rate": 7.520353154738984e-08, - "loss": 3.7931, - "step": 1810500 - }, - { - "epoch": 19.92, - "learning_rate": 7.518977941580945e-08, - "loss": 3.8172, - "step": 1811000 - }, - { - "epoch": 19.93, - "learning_rate": 7.517602728422906e-08, - "loss": 3.7923, - "step": 1811500 - }, - { - "epoch": 19.94, - "learning_rate": 7.516227515264865e-08, - "loss": 3.789, - "step": 1812000 - }, - { - "epoch": 19.94, - "learning_rate": 7.514852302106826e-08, - "loss": 3.7948, - "step": 1812500 - }, - { - "epoch": 19.95, - "learning_rate": 7.513477088948787e-08, - "loss": 3.8161, - "step": 1813000 - }, - { - "epoch": 19.95, - "learning_rate": 7.512101875790748e-08, - "loss": 3.7805, - "step": 1813500 - }, - { - "epoch": 19.96, - "learning_rate": 7.510726662632708e-08, - "loss": 3.8168, - "step": 1814000 - }, - { - "epoch": 19.96, - "learning_rate": 7.509351449474668e-08, - "loss": 3.8, - "step": 1814500 - }, - { - "epoch": 19.97, - "learning_rate": 7.50797623631663e-08, - "loss": 3.7967, - "step": 1815000 - }, - { - "epoch": 19.97, - "learning_rate": 7.506601023158589e-08, - "loss": 3.7892, - "step": 1815500 - }, - { - "epoch": 19.98, - "learning_rate": 7.50522581000055e-08, - "loss": 3.8025, - "step": 1816000 - }, - { - "epoch": 19.98, - "learning_rate": 7.503850596842511e-08, - "loss": 3.7884, - "step": 1816500 - }, - { - "epoch": 19.99, - "learning_rate": 7.502475383684472e-08, - "loss": 3.7893, - "step": 1817000 - }, - { - "epoch": 20.0, - "learning_rate": 7.501100170526431e-08, - "loss": 3.8053, - "step": 1817500 - }, - { - "epoch": 20.0, - "eval_loss": 3.8551576137542725, - "eval_runtime": 6.137, - "eval_samples_per_second": 253.22, - "step": 1817900 - }, - { - "epoch": 20.0, - "learning_rate": 7.499724957368392e-08, - "loss": 3.8178, - "step": 1818000 - }, - { - "epoch": 20.01, - "learning_rate": 7.498349744210353e-08, - "loss": 3.7973, - "step": 1818500 - }, - { - "epoch": 20.01, - "learning_rate": 7.496974531052313e-08, - "loss": 3.8044, - "step": 1819000 - }, - { - "epoch": 20.02, - "learning_rate": 7.495599317894274e-08, - "loss": 3.8117, - "step": 1819500 - }, - { - "epoch": 20.02, - "learning_rate": 7.494224104736235e-08, - "loss": 3.8076, - "step": 1820000 - }, - { - "epoch": 20.03, - "learning_rate": 7.492848891578194e-08, - "loss": 3.7841, - "step": 1820500 - }, - { - "epoch": 20.03, - "learning_rate": 7.491473678420155e-08, - "loss": 3.7897, - "step": 1821000 - }, - { - "epoch": 20.04, - "learning_rate": 7.490098465262116e-08, - "loss": 3.7963, - "step": 1821500 - }, - { - "epoch": 20.05, - "learning_rate": 7.488723252104076e-08, - "loss": 3.799, - "step": 1822000 - }, - { - "epoch": 20.05, - "learning_rate": 7.487348038946037e-08, - "loss": 3.8141, - "step": 1822500 - }, - { - "epoch": 20.06, - "learning_rate": 7.485972825787998e-08, - "loss": 3.7983, - "step": 1823000 - }, - { - "epoch": 20.06, - "learning_rate": 7.484597612629957e-08, - "loss": 3.7868, - "step": 1823500 - }, - { - "epoch": 20.07, - "learning_rate": 7.483222399471918e-08, - "loss": 3.8005, - "step": 1824000 - }, - { - "epoch": 20.07, - "learning_rate": 7.481847186313879e-08, - "loss": 3.7943, - "step": 1824500 - }, - { - "epoch": 20.08, - "learning_rate": 7.480471973155839e-08, - "loss": 3.8048, - "step": 1825000 - }, - { - "epoch": 20.08, - "learning_rate": 7.4790967599978e-08, - "loss": 3.774, - "step": 1825500 - }, - { - "epoch": 20.09, - "learning_rate": 7.477721546839761e-08, - "loss": 3.7974, - "step": 1826000 - }, - { - "epoch": 20.09, - "learning_rate": 7.47634633368172e-08, - "loss": 3.8042, - "step": 1826500 - }, - { - "epoch": 20.1, - "learning_rate": 7.474971120523681e-08, - "loss": 3.7929, - "step": 1827000 - }, - { - "epoch": 20.11, - "learning_rate": 7.473595907365642e-08, - "loss": 3.7979, - "step": 1827500 - }, - { - "epoch": 20.11, - "learning_rate": 7.472220694207602e-08, - "loss": 3.7783, - "step": 1828000 - }, - { - "epoch": 20.12, - "learning_rate": 7.470845481049563e-08, - "loss": 3.8265, - "step": 1828500 - }, - { - "epoch": 20.12, - "learning_rate": 7.469470267891524e-08, - "loss": 3.7892, - "step": 1829000 - }, - { - "epoch": 20.13, - "learning_rate": 7.468095054733483e-08, - "loss": 3.8005, - "step": 1829500 - }, - { - "epoch": 20.13, - "learning_rate": 7.466719841575444e-08, - "loss": 3.8023, - "step": 1830000 - }, - { - "epoch": 20.14, - "learning_rate": 7.465344628417405e-08, - "loss": 3.8144, - "step": 1830500 - }, - { - "epoch": 20.14, - "learning_rate": 7.463969415259365e-08, - "loss": 3.7976, - "step": 1831000 - }, - { - "epoch": 20.15, - "learning_rate": 7.462594202101326e-08, - "loss": 3.8036, - "step": 1831500 - }, - { - "epoch": 20.16, - "learning_rate": 7.461218988943287e-08, - "loss": 3.794, - "step": 1832000 - }, - { - "epoch": 20.16, - "learning_rate": 7.459843775785246e-08, - "loss": 3.8083, - "step": 1832500 - }, - { - "epoch": 20.17, - "learning_rate": 7.458468562627207e-08, - "loss": 3.7875, - "step": 1833000 - }, - { - "epoch": 20.17, - "learning_rate": 7.457093349469168e-08, - "loss": 3.8107, - "step": 1833500 - }, - { - "epoch": 20.18, - "learning_rate": 7.455718136311128e-08, - "loss": 3.7959, - "step": 1834000 - }, - { - "epoch": 20.18, - "learning_rate": 7.454342923153089e-08, - "loss": 3.7881, - "step": 1834500 - }, - { - "epoch": 20.19, - "learning_rate": 7.45296770999505e-08, - "loss": 3.7743, - "step": 1835000 - }, - { - "epoch": 20.19, - "learning_rate": 7.451592496837009e-08, - "loss": 3.7963, - "step": 1835500 - }, - { - "epoch": 20.2, - "learning_rate": 7.45021728367897e-08, - "loss": 3.809, - "step": 1836000 - }, - { - "epoch": 20.2, - "learning_rate": 7.448842070520931e-08, - "loss": 3.7948, - "step": 1836500 - }, - { - "epoch": 20.21, - "learning_rate": 7.447466857362891e-08, - "loss": 3.8011, - "step": 1837000 - }, - { - "epoch": 20.22, - "learning_rate": 7.446091644204852e-08, - "loss": 3.7751, - "step": 1837500 - }, - { - "epoch": 20.22, - "learning_rate": 7.444716431046813e-08, - "loss": 3.8016, - "step": 1838000 - }, - { - "epoch": 20.23, - "learning_rate": 7.443341217888772e-08, - "loss": 3.8048, - "step": 1838500 - }, - { - "epoch": 20.23, - "learning_rate": 7.441966004730733e-08, - "loss": 3.7817, - "step": 1839000 - }, - { - "epoch": 20.24, - "learning_rate": 7.440590791572694e-08, - "loss": 3.7809, - "step": 1839500 - }, - { - "epoch": 20.24, - "learning_rate": 7.439215578414654e-08, - "loss": 3.8087, - "step": 1840000 - }, - { - "epoch": 20.25, - "learning_rate": 7.437840365256615e-08, - "loss": 3.7912, - "step": 1840500 - }, - { - "epoch": 20.25, - "learning_rate": 7.436465152098574e-08, - "loss": 3.8013, - "step": 1841000 - }, - { - "epoch": 20.26, - "learning_rate": 7.435089938940535e-08, - "loss": 3.8082, - "step": 1841500 - }, - { - "epoch": 20.27, - "learning_rate": 7.433714725782496e-08, - "loss": 3.7888, - "step": 1842000 - }, - { - "epoch": 20.27, - "learning_rate": 7.432339512624456e-08, - "loss": 3.8055, - "step": 1842500 - }, - { - "epoch": 20.28, - "learning_rate": 7.430964299466417e-08, - "loss": 3.8234, - "step": 1843000 - }, - { - "epoch": 20.28, - "learning_rate": 7.429589086308378e-08, - "loss": 3.8088, - "step": 1843500 - }, - { - "epoch": 20.29, - "learning_rate": 7.428213873150337e-08, - "loss": 3.8143, - "step": 1844000 - }, - { - "epoch": 20.29, - "learning_rate": 7.426838659992298e-08, - "loss": 3.8115, - "step": 1844500 - }, - { - "epoch": 20.3, - "learning_rate": 7.425463446834259e-08, - "loss": 3.7793, - "step": 1845000 - }, - { - "epoch": 20.3, - "learning_rate": 7.424088233676219e-08, - "loss": 3.8052, - "step": 1845500 - }, - { - "epoch": 20.31, - "learning_rate": 7.42271302051818e-08, - "loss": 3.7956, - "step": 1846000 - }, - { - "epoch": 20.31, - "learning_rate": 7.42133780736014e-08, - "loss": 3.7983, - "step": 1846500 - }, - { - "epoch": 20.32, - "learning_rate": 7.4199625942021e-08, - "loss": 3.8051, - "step": 1847000 - }, - { - "epoch": 20.33, - "learning_rate": 7.418587381044061e-08, - "loss": 3.8185, - "step": 1847500 - }, - { - "epoch": 20.33, - "learning_rate": 7.417212167886022e-08, - "loss": 3.7892, - "step": 1848000 - }, - { - "epoch": 20.34, - "learning_rate": 7.415836954727982e-08, - "loss": 3.7841, - "step": 1848500 - }, - { - "epoch": 20.34, - "learning_rate": 7.414461741569943e-08, - "loss": 3.8014, - "step": 1849000 - }, - { - "epoch": 20.35, - "learning_rate": 7.413086528411903e-08, - "loss": 3.7852, - "step": 1849500 - }, - { - "epoch": 20.35, - "learning_rate": 7.411711315253863e-08, - "loss": 3.8016, - "step": 1850000 - }, - { - "epoch": 20.36, - "learning_rate": 7.410336102095824e-08, - "loss": 3.776, - "step": 1850500 - }, - { - "epoch": 20.36, - "learning_rate": 7.408960888937785e-08, - "loss": 3.7866, - "step": 1851000 - }, - { - "epoch": 20.37, - "learning_rate": 7.407585675779745e-08, - "loss": 3.8026, - "step": 1851500 - }, - { - "epoch": 20.38, - "learning_rate": 7.406210462621705e-08, - "loss": 3.788, - "step": 1852000 - }, - { - "epoch": 20.38, - "learning_rate": 7.404835249463666e-08, - "loss": 3.8102, - "step": 1852500 - }, - { - "epoch": 20.39, - "learning_rate": 7.403460036305626e-08, - "loss": 3.8028, - "step": 1853000 - }, - { - "epoch": 20.39, - "learning_rate": 7.402084823147587e-08, - "loss": 3.8203, - "step": 1853500 - }, - { - "epoch": 20.4, - "learning_rate": 7.400709609989548e-08, - "loss": 3.7897, - "step": 1854000 - }, - { - "epoch": 20.4, - "learning_rate": 7.399334396831507e-08, - "loss": 3.8082, - "step": 1854500 - }, - { - "epoch": 20.41, - "learning_rate": 7.397959183673468e-08, - "loss": 3.8066, - "step": 1855000 - }, - { - "epoch": 20.41, - "learning_rate": 7.39658397051543e-08, - "loss": 3.797, - "step": 1855500 - }, - { - "epoch": 20.42, - "learning_rate": 7.395208757357389e-08, - "loss": 3.7882, - "step": 1856000 - }, - { - "epoch": 20.42, - "learning_rate": 7.39383354419935e-08, - "loss": 3.7947, - "step": 1856500 - }, - { - "epoch": 20.43, - "learning_rate": 7.392458331041311e-08, - "loss": 3.8023, - "step": 1857000 - }, - { - "epoch": 20.44, - "learning_rate": 7.39108311788327e-08, - "loss": 3.7946, - "step": 1857500 - }, - { - "epoch": 20.44, - "learning_rate": 7.389707904725231e-08, - "loss": 3.7923, - "step": 1858000 - }, - { - "epoch": 20.45, - "learning_rate": 7.388332691567192e-08, - "loss": 3.7877, - "step": 1858500 - }, - { - "epoch": 20.45, - "learning_rate": 7.386957478409153e-08, - "loss": 3.7872, - "step": 1859000 - }, - { - "epoch": 20.46, - "learning_rate": 7.385582265251113e-08, - "loss": 3.8052, - "step": 1859500 - }, - { - "epoch": 20.46, - "learning_rate": 7.384207052093074e-08, - "loss": 3.8038, - "step": 1860000 - }, - { - "epoch": 20.47, - "learning_rate": 7.382831838935035e-08, - "loss": 3.8155, - "step": 1860500 - }, - { - "epoch": 20.47, - "learning_rate": 7.381456625776996e-08, - "loss": 3.8019, - "step": 1861000 - }, - { - "epoch": 20.48, - "learning_rate": 7.380081412618955e-08, - "loss": 3.7902, - "step": 1861500 - }, - { - "epoch": 20.49, - "learning_rate": 7.378706199460916e-08, - "loss": 3.7872, - "step": 1862000 - }, - { - "epoch": 20.49, - "learning_rate": 7.377330986302877e-08, - "loss": 3.8149, - "step": 1862500 - }, - { - "epoch": 20.5, - "learning_rate": 7.375955773144837e-08, - "loss": 3.791, - "step": 1863000 - }, - { - "epoch": 20.5, - "learning_rate": 7.374580559986798e-08, - "loss": 3.8104, - "step": 1863500 - }, - { - "epoch": 20.51, - "learning_rate": 7.373205346828759e-08, - "loss": 3.7963, - "step": 1864000 - }, - { - "epoch": 20.51, - "learning_rate": 7.37183013367072e-08, - "loss": 3.7954, - "step": 1864500 - }, - { - "epoch": 20.52, - "learning_rate": 7.370454920512679e-08, - "loss": 3.7881, - "step": 1865000 - }, - { - "epoch": 20.52, - "learning_rate": 7.36907970735464e-08, - "loss": 3.7903, - "step": 1865500 - }, - { - "epoch": 20.53, - "learning_rate": 7.367704494196601e-08, - "loss": 3.7818, - "step": 1866000 - }, - { - "epoch": 20.53, - "learning_rate": 7.36632928103856e-08, - "loss": 3.8289, - "step": 1866500 - }, - { - "epoch": 20.54, - "learning_rate": 7.364954067880522e-08, - "loss": 3.7697, - "step": 1867000 - }, - { - "epoch": 20.55, - "learning_rate": 7.363578854722483e-08, - "loss": 3.7891, - "step": 1867500 - }, - { - "epoch": 20.55, - "learning_rate": 7.362203641564442e-08, - "loss": 3.7982, - "step": 1868000 - }, - { - "epoch": 20.56, - "learning_rate": 7.360828428406403e-08, - "loss": 3.793, - "step": 1868500 - }, - { - "epoch": 20.56, - "learning_rate": 7.359453215248364e-08, - "loss": 3.8045, - "step": 1869000 - }, - { - "epoch": 20.57, - "learning_rate": 7.358078002090324e-08, - "loss": 3.7826, - "step": 1869500 - }, - { - "epoch": 20.57, - "learning_rate": 7.356702788932285e-08, - "loss": 3.8052, - "step": 1870000 - }, - { - "epoch": 20.58, - "learning_rate": 7.355327575774245e-08, - "loss": 3.7954, - "step": 1870500 - }, - { - "epoch": 20.58, - "learning_rate": 7.353952362616205e-08, - "loss": 3.8003, - "step": 1871000 - }, - { - "epoch": 20.59, - "learning_rate": 7.352577149458166e-08, - "loss": 3.8054, - "step": 1871500 - }, - { - "epoch": 20.6, - "learning_rate": 7.351201936300127e-08, - "loss": 3.8011, - "step": 1872000 - }, - { - "epoch": 20.6, - "learning_rate": 7.349826723142087e-08, - "loss": 3.8103, - "step": 1872500 - }, - { - "epoch": 20.61, - "learning_rate": 7.348451509984048e-08, - "loss": 3.8004, - "step": 1873000 - }, - { - "epoch": 20.61, - "learning_rate": 7.347076296826008e-08, - "loss": 3.7807, - "step": 1873500 - }, - { - "epoch": 20.62, - "learning_rate": 7.345701083667968e-08, - "loss": 3.8311, - "step": 1874000 - }, - { - "epoch": 20.62, - "learning_rate": 7.344325870509929e-08, - "loss": 3.8053, - "step": 1874500 - }, - { - "epoch": 20.63, - "learning_rate": 7.34295065735189e-08, - "loss": 3.7859, - "step": 1875000 - }, - { - "epoch": 20.63, - "learning_rate": 7.34157544419385e-08, - "loss": 3.7986, - "step": 1875500 - }, - { - "epoch": 20.64, - "learning_rate": 7.34020023103581e-08, - "loss": 3.8077, - "step": 1876000 - }, - { - "epoch": 20.64, - "learning_rate": 7.338825017877771e-08, - "loss": 3.7949, - "step": 1876500 - }, - { - "epoch": 20.65, - "learning_rate": 7.337449804719731e-08, - "loss": 3.7979, - "step": 1877000 - }, - { - "epoch": 20.66, - "learning_rate": 7.336074591561692e-08, - "loss": 3.7861, - "step": 1877500 - }, - { - "epoch": 20.66, - "learning_rate": 7.334699378403653e-08, - "loss": 3.7987, - "step": 1878000 - }, - { - "epoch": 20.67, - "learning_rate": 7.333324165245612e-08, - "loss": 3.7809, - "step": 1878500 - }, - { - "epoch": 20.67, - "learning_rate": 7.331948952087573e-08, - "loss": 3.7997, - "step": 1879000 - }, - { - "epoch": 20.68, - "learning_rate": 7.330573738929534e-08, - "loss": 3.802, - "step": 1879500 - }, - { - "epoch": 20.68, - "learning_rate": 7.329198525771494e-08, - "loss": 3.8025, - "step": 1880000 - }, - { - "epoch": 20.69, - "learning_rate": 7.327823312613455e-08, - "loss": 3.7878, - "step": 1880500 - }, - { - "epoch": 20.69, - "learning_rate": 7.326448099455416e-08, - "loss": 3.7954, - "step": 1881000 - }, - { - "epoch": 20.7, - "learning_rate": 7.325072886297375e-08, - "loss": 3.7928, - "step": 1881500 - }, - { - "epoch": 20.71, - "learning_rate": 7.323697673139336e-08, - "loss": 3.7754, - "step": 1882000 - }, - { - "epoch": 20.71, - "learning_rate": 7.322322459981297e-08, - "loss": 3.7879, - "step": 1882500 - }, - { - "epoch": 20.72, - "learning_rate": 7.320947246823257e-08, - "loss": 3.8079, - "step": 1883000 - }, - { - "epoch": 20.72, - "learning_rate": 7.319572033665218e-08, - "loss": 3.8016, - "step": 1883500 - }, - { - "epoch": 20.73, - "learning_rate": 7.318196820507179e-08, - "loss": 3.8313, - "step": 1884000 - }, - { - "epoch": 20.73, - "learning_rate": 7.316821607349138e-08, - "loss": 3.7935, - "step": 1884500 - }, - { - "epoch": 20.74, - "learning_rate": 7.3154463941911e-08, - "loss": 3.8102, - "step": 1885000 - }, - { - "epoch": 20.74, - "learning_rate": 7.31407118103306e-08, - "loss": 3.7591, - "step": 1885500 - }, - { - "epoch": 20.75, - "learning_rate": 7.31269596787502e-08, - "loss": 3.795, - "step": 1886000 - }, - { - "epoch": 20.75, - "learning_rate": 7.311320754716981e-08, - "loss": 3.7998, - "step": 1886500 - }, - { - "epoch": 20.76, - "learning_rate": 7.309945541558942e-08, - "loss": 3.7898, - "step": 1887000 - }, - { - "epoch": 20.77, - "learning_rate": 7.308570328400901e-08, - "loss": 3.8012, - "step": 1887500 - }, - { - "epoch": 20.77, - "learning_rate": 7.307195115242862e-08, - "loss": 3.7926, - "step": 1888000 - }, - { - "epoch": 20.78, - "learning_rate": 7.305819902084823e-08, - "loss": 3.7982, - "step": 1888500 - }, - { - "epoch": 20.78, - "learning_rate": 7.304444688926783e-08, - "loss": 3.7926, - "step": 1889000 - }, - { - "epoch": 20.79, - "learning_rate": 7.303069475768744e-08, - "loss": 3.7974, - "step": 1889500 - }, - { - "epoch": 20.79, - "learning_rate": 7.301694262610705e-08, - "loss": 3.7811, - "step": 1890000 - }, - { - "epoch": 20.8, - "learning_rate": 7.300319049452664e-08, - "loss": 3.7749, - "step": 1890500 - }, - { - "epoch": 20.8, - "learning_rate": 7.298943836294625e-08, - "loss": 3.8088, - "step": 1891000 - }, - { - "epoch": 20.81, - "learning_rate": 7.297568623136586e-08, - "loss": 3.7975, - "step": 1891500 - }, - { - "epoch": 20.82, - "learning_rate": 7.296193409978546e-08, - "loss": 3.7964, - "step": 1892000 - }, - { - "epoch": 20.82, - "learning_rate": 7.294818196820507e-08, - "loss": 3.8075, - "step": 1892500 - }, - { - "epoch": 20.83, - "learning_rate": 7.293442983662468e-08, - "loss": 3.8135, - "step": 1893000 - }, - { - "epoch": 20.83, - "learning_rate": 7.292067770504427e-08, - "loss": 3.7881, - "step": 1893500 - }, - { - "epoch": 20.84, - "learning_rate": 7.290692557346388e-08, - "loss": 3.7959, - "step": 1894000 - }, - { - "epoch": 20.84, - "learning_rate": 7.289317344188349e-08, - "loss": 3.798, - "step": 1894500 - }, - { - "epoch": 20.85, - "learning_rate": 7.287942131030309e-08, - "loss": 3.7938, - "step": 1895000 - }, - { - "epoch": 20.85, - "learning_rate": 7.28656691787227e-08, - "loss": 3.7948, - "step": 1895500 - }, - { - "epoch": 20.86, - "learning_rate": 7.28519170471423e-08, - "loss": 3.7893, - "step": 1896000 - }, - { - "epoch": 20.86, - "learning_rate": 7.28381649155619e-08, - "loss": 3.8041, - "step": 1896500 - }, - { - "epoch": 20.87, - "learning_rate": 7.282441278398151e-08, - "loss": 3.7813, - "step": 1897000 - }, - { - "epoch": 20.88, - "learning_rate": 7.281066065240112e-08, - "loss": 3.7873, - "step": 1897500 - }, - { - "epoch": 20.88, - "learning_rate": 7.279690852082072e-08, - "loss": 3.7765, - "step": 1898000 - }, - { - "epoch": 20.89, - "learning_rate": 7.278315638924033e-08, - "loss": 3.7836, - "step": 1898500 - }, - { - "epoch": 20.89, - "learning_rate": 7.276940425765994e-08, - "loss": 3.7898, - "step": 1899000 - }, - { - "epoch": 20.9, - "learning_rate": 7.275565212607953e-08, - "loss": 3.7897, - "step": 1899500 - }, - { - "epoch": 20.9, - "learning_rate": 7.274189999449914e-08, - "loss": 3.7884, - "step": 1900000 - }, - { - "epoch": 20.91, - "learning_rate": 7.272814786291875e-08, - "loss": 3.7891, - "step": 1900500 - }, - { - "epoch": 20.91, - "learning_rate": 7.271439573133835e-08, - "loss": 3.7833, - "step": 1901000 - }, - { - "epoch": 20.92, - "learning_rate": 7.270064359975796e-08, - "loss": 3.8108, - "step": 1901500 - }, - { - "epoch": 20.93, - "learning_rate": 7.268689146817757e-08, - "loss": 3.7944, - "step": 1902000 - }, - { - "epoch": 20.93, - "learning_rate": 7.267313933659716e-08, - "loss": 3.7908, - "step": 1902500 - }, - { - "epoch": 20.94, - "learning_rate": 7.265938720501677e-08, - "loss": 3.7982, - "step": 1903000 - }, - { - "epoch": 20.94, - "learning_rate": 7.264563507343638e-08, - "loss": 3.7978, - "step": 1903500 - }, - { - "epoch": 20.95, - "learning_rate": 7.263188294185598e-08, - "loss": 3.7832, - "step": 1904000 - }, - { - "epoch": 20.95, - "learning_rate": 7.261813081027559e-08, - "loss": 3.7935, - "step": 1904500 - }, - { - "epoch": 20.96, - "learning_rate": 7.26043786786952e-08, - "loss": 3.7906, - "step": 1905000 - }, - { - "epoch": 20.96, - "learning_rate": 7.259062654711479e-08, - "loss": 3.7975, - "step": 1905500 - }, - { - "epoch": 20.97, - "learning_rate": 7.25768744155344e-08, - "loss": 3.8038, - "step": 1906000 - }, - { - "epoch": 20.97, - "learning_rate": 7.256312228395401e-08, - "loss": 3.8055, - "step": 1906500 - }, - { - "epoch": 20.98, - "learning_rate": 7.25493701523736e-08, - "loss": 3.7944, - "step": 1907000 - }, - { - "epoch": 20.99, - "learning_rate": 7.253561802079322e-08, - "loss": 3.7839, - "step": 1907500 - }, - { - "epoch": 20.99, - "learning_rate": 7.252186588921282e-08, - "loss": 3.7735, - "step": 1908000 - }, - { - "epoch": 21.0, - "learning_rate": 7.250811375763243e-08, - "loss": 3.7982, - "step": 1908500 - }, - { - "epoch": 21.0, - "eval_loss": 3.8521878719329834, - "eval_runtime": 6.1908, - "eval_samples_per_second": 251.018, - "step": 1908795 - }, - { - "epoch": 21.0, - "learning_rate": 7.249436162605203e-08, - "loss": 3.7915, - "step": 1909000 - }, - { - "epoch": 21.01, - "learning_rate": 7.248060949447164e-08, - "loss": 3.8036, - "step": 1909500 - }, - { - "epoch": 21.01, - "learning_rate": 7.246685736289125e-08, - "loss": 3.7984, - "step": 1910000 - }, - { - "epoch": 21.02, - "learning_rate": 7.245310523131084e-08, - "loss": 3.8054, - "step": 1910500 - }, - { - "epoch": 21.02, - "learning_rate": 7.243935309973045e-08, - "loss": 3.8057, - "step": 1911000 - }, - { - "epoch": 21.03, - "learning_rate": 7.242560096815006e-08, - "loss": 3.7774, - "step": 1911500 - }, - { - "epoch": 21.04, - "learning_rate": 7.241184883656967e-08, - "loss": 3.7822, - "step": 1912000 - }, - { - "epoch": 21.04, - "learning_rate": 7.239809670498927e-08, - "loss": 3.8008, - "step": 1912500 - }, - { - "epoch": 21.05, - "learning_rate": 7.238434457340888e-08, - "loss": 3.7832, - "step": 1913000 - }, - { - "epoch": 21.05, - "learning_rate": 7.237059244182849e-08, - "loss": 3.7805, - "step": 1913500 - }, - { - "epoch": 21.06, - "learning_rate": 7.23568403102481e-08, - "loss": 3.7764, - "step": 1914000 - }, - { - "epoch": 21.06, - "learning_rate": 7.234308817866769e-08, - "loss": 3.7821, - "step": 1914500 - }, - { - "epoch": 21.07, - "learning_rate": 7.23293360470873e-08, - "loss": 3.797, - "step": 1915000 - }, - { - "epoch": 21.07, - "learning_rate": 7.231558391550691e-08, - "loss": 3.802, - "step": 1915500 - }, - { - "epoch": 21.08, - "learning_rate": 7.230183178392651e-08, - "loss": 3.789, - "step": 1916000 - }, - { - "epoch": 21.08, - "learning_rate": 7.228807965234612e-08, - "loss": 3.787, - "step": 1916500 - }, - { - "epoch": 21.09, - "learning_rate": 7.227432752076571e-08, - "loss": 3.8015, - "step": 1917000 - }, - { - "epoch": 21.1, - "learning_rate": 7.226057538918532e-08, - "loss": 3.8019, - "step": 1917500 - }, - { - "epoch": 21.1, - "learning_rate": 7.224682325760493e-08, - "loss": 3.7979, - "step": 1918000 - }, - { - "epoch": 21.11, - "learning_rate": 7.223307112602453e-08, - "loss": 3.8036, - "step": 1918500 - }, - { - "epoch": 21.11, - "learning_rate": 7.221931899444414e-08, - "loss": 3.7861, - "step": 1919000 - }, - { - "epoch": 21.12, - "learning_rate": 7.220556686286375e-08, - "loss": 3.7904, - "step": 1919500 - }, - { - "epoch": 21.12, - "learning_rate": 7.219181473128334e-08, - "loss": 3.7712, - "step": 1920000 - }, - { - "epoch": 21.13, - "learning_rate": 7.217806259970295e-08, - "loss": 3.8042, - "step": 1920500 - }, - { - "epoch": 21.13, - "learning_rate": 7.216431046812256e-08, - "loss": 3.7906, - "step": 1921000 - }, - { - "epoch": 21.14, - "learning_rate": 7.215055833654216e-08, - "loss": 3.8046, - "step": 1921500 - }, - { - "epoch": 21.15, - "learning_rate": 7.213680620496177e-08, - "loss": 3.7991, - "step": 1922000 - }, - { - "epoch": 21.15, - "learning_rate": 7.212305407338138e-08, - "loss": 3.7979, - "step": 1922500 - }, - { - "epoch": 21.16, - "learning_rate": 7.210930194180097e-08, - "loss": 3.7979, - "step": 1923000 - }, - { - "epoch": 21.16, - "learning_rate": 7.209554981022058e-08, - "loss": 3.8059, - "step": 1923500 - }, - { - "epoch": 21.17, - "learning_rate": 7.208179767864019e-08, - "loss": 3.7963, - "step": 1924000 - }, - { - "epoch": 21.17, - "learning_rate": 7.206804554705979e-08, - "loss": 3.7989, - "step": 1924500 - }, - { - "epoch": 21.18, - "learning_rate": 7.20542934154794e-08, - "loss": 3.7878, - "step": 1925000 - }, - { - "epoch": 21.18, - "learning_rate": 7.2040541283899e-08, - "loss": 3.7871, - "step": 1925500 - }, - { - "epoch": 21.19, - "learning_rate": 7.20267891523186e-08, - "loss": 3.8068, - "step": 1926000 - }, - { - "epoch": 21.19, - "learning_rate": 7.201303702073821e-08, - "loss": 3.7973, - "step": 1926500 - }, - { - "epoch": 21.2, - "learning_rate": 7.199928488915782e-08, - "loss": 3.7968, - "step": 1927000 - }, - { - "epoch": 21.21, - "learning_rate": 7.198553275757742e-08, - "loss": 3.7848, - "step": 1927500 - }, - { - "epoch": 21.21, - "learning_rate": 7.197178062599703e-08, - "loss": 3.7986, - "step": 1928000 - }, - { - "epoch": 21.22, - "learning_rate": 7.195802849441664e-08, - "loss": 3.7859, - "step": 1928500 - }, - { - "epoch": 21.22, - "learning_rate": 7.194427636283623e-08, - "loss": 3.7906, - "step": 1929000 - }, - { - "epoch": 21.23, - "learning_rate": 7.193052423125584e-08, - "loss": 3.8065, - "step": 1929500 - }, - { - "epoch": 21.23, - "learning_rate": 7.191677209967545e-08, - "loss": 3.7865, - "step": 1930000 - }, - { - "epoch": 21.24, - "learning_rate": 7.190301996809505e-08, - "loss": 3.8025, - "step": 1930500 - }, - { - "epoch": 21.24, - "learning_rate": 7.188926783651466e-08, - "loss": 3.7834, - "step": 1931000 - }, - { - "epoch": 21.25, - "learning_rate": 7.187551570493427e-08, - "loss": 3.8058, - "step": 1931500 - }, - { - "epoch": 21.26, - "learning_rate": 7.186176357335386e-08, - "loss": 3.8074, - "step": 1932000 - }, - { - "epoch": 21.26, - "learning_rate": 7.184801144177347e-08, - "loss": 3.8105, - "step": 1932500 - }, - { - "epoch": 21.27, - "learning_rate": 7.183425931019308e-08, - "loss": 3.8106, - "step": 1933000 - }, - { - "epoch": 21.27, - "learning_rate": 7.182050717861268e-08, - "loss": 3.7753, - "step": 1933500 - }, - { - "epoch": 21.28, - "learning_rate": 7.180675504703229e-08, - "loss": 3.7976, - "step": 1934000 - }, - { - "epoch": 21.28, - "learning_rate": 7.17930029154519e-08, - "loss": 3.7777, - "step": 1934500 - }, - { - "epoch": 21.29, - "learning_rate": 7.177925078387149e-08, - "loss": 3.7885, - "step": 1935000 - }, - { - "epoch": 21.29, - "learning_rate": 7.17654986522911e-08, - "loss": 3.8018, - "step": 1935500 - }, - { - "epoch": 21.3, - "learning_rate": 7.175174652071071e-08, - "loss": 3.7808, - "step": 1936000 - }, - { - "epoch": 21.3, - "learning_rate": 7.17379943891303e-08, - "loss": 3.7829, - "step": 1936500 - }, - { - "epoch": 21.31, - "learning_rate": 7.172424225754992e-08, - "loss": 3.8059, - "step": 1937000 - }, - { - "epoch": 21.32, - "learning_rate": 7.171049012596952e-08, - "loss": 3.7811, - "step": 1937500 - }, - { - "epoch": 21.32, - "learning_rate": 7.169673799438912e-08, - "loss": 3.799, - "step": 1938000 - }, - { - "epoch": 21.33, - "learning_rate": 7.168298586280873e-08, - "loss": 3.7879, - "step": 1938500 - }, - { - "epoch": 21.33, - "learning_rate": 7.166923373122834e-08, - "loss": 3.7984, - "step": 1939000 - }, - { - "epoch": 21.34, - "learning_rate": 7.165548159964794e-08, - "loss": 3.7884, - "step": 1939500 - }, - { - "epoch": 21.34, - "learning_rate": 7.164172946806754e-08, - "loss": 3.7958, - "step": 1940000 - }, - { - "epoch": 21.35, - "learning_rate": 7.162797733648715e-08, - "loss": 3.7943, - "step": 1940500 - }, - { - "epoch": 21.35, - "learning_rate": 7.161422520490675e-08, - "loss": 3.7958, - "step": 1941000 - }, - { - "epoch": 21.36, - "learning_rate": 7.160047307332636e-08, - "loss": 3.7818, - "step": 1941500 - }, - { - "epoch": 21.37, - "learning_rate": 7.158672094174597e-08, - "loss": 3.7927, - "step": 1942000 - }, - { - "epoch": 21.37, - "learning_rate": 7.157296881016557e-08, - "loss": 3.8043, - "step": 1942500 - }, - { - "epoch": 21.38, - "learning_rate": 7.155921667858517e-08, - "loss": 3.7694, - "step": 1943000 - }, - { - "epoch": 21.38, - "learning_rate": 7.154546454700478e-08, - "loss": 3.7858, - "step": 1943500 - }, - { - "epoch": 21.39, - "learning_rate": 7.153171241542438e-08, - "loss": 3.7929, - "step": 1944000 - }, - { - "epoch": 21.39, - "learning_rate": 7.151796028384399e-08, - "loss": 3.7837, - "step": 1944500 - }, - { - "epoch": 21.4, - "learning_rate": 7.15042081522636e-08, - "loss": 3.809, - "step": 1945000 - }, - { - "epoch": 21.4, - "learning_rate": 7.14904560206832e-08, - "loss": 3.793, - "step": 1945500 - }, - { - "epoch": 21.41, - "learning_rate": 7.14767038891028e-08, - "loss": 3.7922, - "step": 1946000 - }, - { - "epoch": 21.41, - "learning_rate": 7.146295175752241e-08, - "loss": 3.7974, - "step": 1946500 - }, - { - "epoch": 21.42, - "learning_rate": 7.144919962594201e-08, - "loss": 3.7915, - "step": 1947000 - }, - { - "epoch": 21.43, - "learning_rate": 7.143544749436162e-08, - "loss": 3.7852, - "step": 1947500 - }, - { - "epoch": 21.43, - "learning_rate": 7.142169536278123e-08, - "loss": 3.793, - "step": 1948000 - }, - { - "epoch": 21.44, - "learning_rate": 7.140794323120082e-08, - "loss": 3.7972, - "step": 1948500 - }, - { - "epoch": 21.44, - "learning_rate": 7.139419109962043e-08, - "loss": 3.7753, - "step": 1949000 - }, - { - "epoch": 21.45, - "learning_rate": 7.138043896804004e-08, - "loss": 3.8026, - "step": 1949500 - }, - { - "epoch": 21.45, - "learning_rate": 7.136668683645964e-08, - "loss": 3.8111, - "step": 1950000 - }, - { - "epoch": 21.46, - "learning_rate": 7.135293470487925e-08, - "loss": 3.789, - "step": 1950500 - }, - { - "epoch": 21.46, - "learning_rate": 7.133918257329886e-08, - "loss": 3.8035, - "step": 1951000 - }, - { - "epoch": 21.47, - "learning_rate": 7.132543044171845e-08, - "loss": 3.8021, - "step": 1951500 - }, - { - "epoch": 21.48, - "learning_rate": 7.131167831013806e-08, - "loss": 3.792, - "step": 1952000 - }, - { - "epoch": 21.48, - "learning_rate": 7.129792617855767e-08, - "loss": 3.7927, - "step": 1952500 - }, - { - "epoch": 21.49, - "learning_rate": 7.128417404697727e-08, - "loss": 3.8073, - "step": 1953000 - }, - { - "epoch": 21.49, - "learning_rate": 7.127042191539688e-08, - "loss": 3.7969, - "step": 1953500 - }, - { - "epoch": 21.5, - "learning_rate": 7.125666978381649e-08, - "loss": 3.7974, - "step": 1954000 - }, - { - "epoch": 21.5, - "learning_rate": 7.124291765223608e-08, - "loss": 3.7906, - "step": 1954500 - }, - { - "epoch": 21.51, - "learning_rate": 7.122916552065569e-08, - "loss": 3.8093, - "step": 1955000 - }, - { - "epoch": 21.51, - "learning_rate": 7.12154133890753e-08, - "loss": 3.7916, - "step": 1955500 - }, - { - "epoch": 21.52, - "learning_rate": 7.120166125749491e-08, - "loss": 3.791, - "step": 1956000 - }, - { - "epoch": 21.52, - "learning_rate": 7.118790912591451e-08, - "loss": 3.7791, - "step": 1956500 - }, - { - "epoch": 21.53, - "learning_rate": 7.117415699433412e-08, - "loss": 3.7935, - "step": 1957000 - }, - { - "epoch": 21.54, - "learning_rate": 7.116040486275373e-08, - "loss": 3.8089, - "step": 1957500 - }, - { - "epoch": 21.54, - "learning_rate": 7.114665273117332e-08, - "loss": 3.7956, - "step": 1958000 - }, - { - "epoch": 21.55, - "learning_rate": 7.113290059959293e-08, - "loss": 3.7878, - "step": 1958500 - }, - { - "epoch": 21.55, - "learning_rate": 7.111914846801254e-08, - "loss": 3.7664, - "step": 1959000 - }, - { - "epoch": 21.56, - "learning_rate": 7.110539633643215e-08, - "loss": 3.8112, - "step": 1959500 - }, - { - "epoch": 21.56, - "learning_rate": 7.109164420485175e-08, - "loss": 3.7953, - "step": 1960000 - }, - { - "epoch": 21.57, - "learning_rate": 7.107789207327136e-08, - "loss": 3.7952, - "step": 1960500 - }, - { - "epoch": 21.57, - "learning_rate": 7.106413994169097e-08, - "loss": 3.7973, - "step": 1961000 - }, - { - "epoch": 21.58, - "learning_rate": 7.105038781011057e-08, - "loss": 3.8128, - "step": 1961500 - }, - { - "epoch": 21.59, - "learning_rate": 7.103663567853017e-08, - "loss": 3.7756, - "step": 1962000 - }, - { - "epoch": 21.59, - "learning_rate": 7.102288354694978e-08, - "loss": 3.7989, - "step": 1962500 - }, - { - "epoch": 21.6, - "learning_rate": 7.100913141536939e-08, - "loss": 3.7933, - "step": 1963000 - }, - { - "epoch": 21.6, - "learning_rate": 7.099537928378899e-08, - "loss": 3.7988, - "step": 1963500 - }, - { - "epoch": 21.61, - "learning_rate": 7.09816271522086e-08, - "loss": 3.7644, - "step": 1964000 - }, - { - "epoch": 21.61, - "learning_rate": 7.09678750206282e-08, - "loss": 3.7737, - "step": 1964500 - }, - { - "epoch": 21.62, - "learning_rate": 7.09541228890478e-08, - "loss": 3.8122, - "step": 1965000 - }, - { - "epoch": 21.62, - "learning_rate": 7.094037075746741e-08, - "loss": 3.8006, - "step": 1965500 - }, - { - "epoch": 21.63, - "learning_rate": 7.092661862588702e-08, - "loss": 3.7809, - "step": 1966000 - }, - { - "epoch": 21.63, - "learning_rate": 7.091286649430662e-08, - "loss": 3.7782, - "step": 1966500 - }, - { - "epoch": 21.64, - "learning_rate": 7.089911436272622e-08, - "loss": 3.7981, - "step": 1967000 - }, - { - "epoch": 21.65, - "learning_rate": 7.088536223114583e-08, - "loss": 3.7923, - "step": 1967500 - }, - { - "epoch": 21.65, - "learning_rate": 7.087161009956543e-08, - "loss": 3.7902, - "step": 1968000 - }, - { - "epoch": 21.66, - "learning_rate": 7.085785796798504e-08, - "loss": 3.7796, - "step": 1968500 - }, - { - "epoch": 21.66, - "learning_rate": 7.084410583640465e-08, - "loss": 3.7889, - "step": 1969000 - }, - { - "epoch": 21.67, - "learning_rate": 7.083035370482424e-08, - "loss": 3.7837, - "step": 1969500 - }, - { - "epoch": 21.67, - "learning_rate": 7.081660157324385e-08, - "loss": 3.7844, - "step": 1970000 - }, - { - "epoch": 21.68, - "learning_rate": 7.080284944166346e-08, - "loss": 3.7851, - "step": 1970500 - }, - { - "epoch": 21.68, - "learning_rate": 7.078909731008306e-08, - "loss": 3.8011, - "step": 1971000 - }, - { - "epoch": 21.69, - "learning_rate": 7.077534517850267e-08, - "loss": 3.7916, - "step": 1971500 - }, - { - "epoch": 21.7, - "learning_rate": 7.076159304692228e-08, - "loss": 3.79, - "step": 1972000 - }, - { - "epoch": 21.7, - "learning_rate": 7.074784091534187e-08, - "loss": 3.7911, - "step": 1972500 - }, - { - "epoch": 21.71, - "learning_rate": 7.073408878376148e-08, - "loss": 3.7921, - "step": 1973000 - }, - { - "epoch": 21.71, - "learning_rate": 7.072033665218109e-08, - "loss": 3.7653, - "step": 1973500 - }, - { - "epoch": 21.72, - "learning_rate": 7.070658452060069e-08, - "loss": 3.8045, - "step": 1974000 - }, - { - "epoch": 21.72, - "learning_rate": 7.06928323890203e-08, - "loss": 3.8068, - "step": 1974500 - }, - { - "epoch": 21.73, - "learning_rate": 7.067908025743991e-08, - "loss": 3.7931, - "step": 1975000 - }, - { - "epoch": 21.73, - "learning_rate": 7.06653281258595e-08, - "loss": 3.79, - "step": 1975500 - }, - { - "epoch": 21.74, - "learning_rate": 7.065157599427911e-08, - "loss": 3.8049, - "step": 1976000 - }, - { - "epoch": 21.74, - "learning_rate": 7.063782386269872e-08, - "loss": 3.7892, - "step": 1976500 - }, - { - "epoch": 21.75, - "learning_rate": 7.062407173111832e-08, - "loss": 3.7835, - "step": 1977000 - }, - { - "epoch": 21.76, - "learning_rate": 7.061031959953793e-08, - "loss": 3.7738, - "step": 1977500 - }, - { - "epoch": 21.76, - "learning_rate": 7.059656746795754e-08, - "loss": 3.7954, - "step": 1978000 - }, - { - "epoch": 21.77, - "learning_rate": 7.058281533637713e-08, - "loss": 3.8024, - "step": 1978500 - }, - { - "epoch": 21.77, - "learning_rate": 7.056906320479674e-08, - "loss": 3.786, - "step": 1979000 - }, - { - "epoch": 21.78, - "learning_rate": 7.055531107321635e-08, - "loss": 3.8177, - "step": 1979500 - }, - { - "epoch": 21.78, - "learning_rate": 7.054155894163595e-08, - "loss": 3.7887, - "step": 1980000 - }, - { - "epoch": 21.79, - "learning_rate": 7.052780681005556e-08, - "loss": 3.7662, - "step": 1980500 - }, - { - "epoch": 21.79, - "learning_rate": 7.051405467847517e-08, - "loss": 3.7833, - "step": 1981000 - }, - { - "epoch": 21.8, - "learning_rate": 7.050030254689476e-08, - "loss": 3.7735, - "step": 1981500 - }, - { - "epoch": 21.81, - "learning_rate": 7.048655041531437e-08, - "loss": 3.7807, - "step": 1982000 - }, - { - "epoch": 21.81, - "learning_rate": 7.047279828373398e-08, - "loss": 3.8062, - "step": 1982500 - }, - { - "epoch": 21.82, - "learning_rate": 7.045904615215358e-08, - "loss": 3.7878, - "step": 1983000 - }, - { - "epoch": 21.82, - "learning_rate": 7.044529402057319e-08, - "loss": 3.7903, - "step": 1983500 - }, - { - "epoch": 21.83, - "learning_rate": 7.04315418889928e-08, - "loss": 3.8164, - "step": 1984000 - }, - { - "epoch": 21.83, - "learning_rate": 7.041778975741239e-08, - "loss": 3.8074, - "step": 1984500 - }, - { - "epoch": 21.84, - "learning_rate": 7.0404037625832e-08, - "loss": 3.779, - "step": 1985000 - }, - { - "epoch": 21.84, - "learning_rate": 7.039028549425161e-08, - "loss": 3.7793, - "step": 1985500 - }, - { - "epoch": 21.85, - "learning_rate": 7.037653336267121e-08, - "loss": 3.7971, - "step": 1986000 - }, - { - "epoch": 21.85, - "learning_rate": 7.036278123109082e-08, - "loss": 3.8091, - "step": 1986500 - }, - { - "epoch": 21.86, - "learning_rate": 7.034902909951043e-08, - "loss": 3.7856, - "step": 1987000 - }, - { - "epoch": 21.87, - "learning_rate": 7.033527696793002e-08, - "loss": 3.7919, - "step": 1987500 - }, - { - "epoch": 21.87, - "learning_rate": 7.032152483634963e-08, - "loss": 3.7837, - "step": 1988000 - }, - { - "epoch": 21.88, - "learning_rate": 7.030777270476924e-08, - "loss": 3.8052, - "step": 1988500 - }, - { - "epoch": 21.88, - "learning_rate": 7.029402057318884e-08, - "loss": 3.8023, - "step": 1989000 - }, - { - "epoch": 21.89, - "learning_rate": 7.028026844160845e-08, - "loss": 3.7983, - "step": 1989500 - }, - { - "epoch": 21.89, - "learning_rate": 7.026651631002806e-08, - "loss": 3.7899, - "step": 1990000 - }, - { - "epoch": 21.9, - "learning_rate": 7.025276417844765e-08, - "loss": 3.7778, - "step": 1990500 - }, - { - "epoch": 21.9, - "learning_rate": 7.023901204686726e-08, - "loss": 3.8032, - "step": 1991000 - }, - { - "epoch": 21.91, - "learning_rate": 7.022525991528686e-08, - "loss": 3.7824, - "step": 1991500 - }, - { - "epoch": 21.92, - "learning_rate": 7.021150778370647e-08, - "loss": 3.7982, - "step": 1992000 - }, - { - "epoch": 21.92, - "learning_rate": 7.019775565212608e-08, - "loss": 3.7951, - "step": 1992500 - }, - { - "epoch": 21.93, - "learning_rate": 7.018400352054567e-08, - "loss": 3.7974, - "step": 1993000 - }, - { - "epoch": 21.93, - "learning_rate": 7.017025138896528e-08, - "loss": 3.7859, - "step": 1993500 - }, - { - "epoch": 21.94, - "learning_rate": 7.015649925738489e-08, - "loss": 3.8058, - "step": 1994000 - }, - { - "epoch": 21.94, - "learning_rate": 7.014274712580449e-08, - "loss": 3.7919, - "step": 1994500 - }, - { - "epoch": 21.95, - "learning_rate": 7.01289949942241e-08, - "loss": 3.7757, - "step": 1995000 - }, - { - "epoch": 21.95, - "learning_rate": 7.01152428626437e-08, - "loss": 3.7913, - "step": 1995500 - }, - { - "epoch": 21.96, - "learning_rate": 7.01014907310633e-08, - "loss": 3.7847, - "step": 1996000 - }, - { - "epoch": 21.96, - "learning_rate": 7.008773859948291e-08, - "loss": 3.7816, - "step": 1996500 - }, - { - "epoch": 21.97, - "learning_rate": 7.007398646790252e-08, - "loss": 3.7847, - "step": 1997000 - }, - { - "epoch": 21.98, - "learning_rate": 7.006023433632212e-08, - "loss": 3.8077, - "step": 1997500 - }, - { - "epoch": 21.98, - "learning_rate": 7.004648220474173e-08, - "loss": 3.7798, - "step": 1998000 - }, - { - "epoch": 21.99, - "learning_rate": 7.003273007316134e-08, - "loss": 3.8163, - "step": 1998500 - }, - { - "epoch": 21.99, - "learning_rate": 7.001897794158093e-08, - "loss": 3.7825, - "step": 1999000 - }, - { - "epoch": 22.0, - "learning_rate": 7.000522581000054e-08, - "loss": 3.7664, - "step": 1999500 - }, - { - "epoch": 22.0, - "eval_loss": 3.850017786026001, - "eval_runtime": 6.1404, - "eval_samples_per_second": 253.077, - "step": 1999690 - }, - { - "epoch": 22.0, - "learning_rate": 6.999147367842015e-08, - "loss": 3.8066, - "step": 2000000 - }, - { - "epoch": 22.01, - "learning_rate": 6.997772154683975e-08, - "loss": 3.7788, - "step": 2000500 - }, - { - "epoch": 22.01, - "learning_rate": 6.996396941525936e-08, - "loss": 3.7936, - "step": 2001000 - }, - { - "epoch": 22.02, - "learning_rate": 6.995021728367896e-08, - "loss": 3.7973, - "step": 2001500 - }, - { - "epoch": 22.03, - "learning_rate": 6.993646515209856e-08, - "loss": 3.7848, - "step": 2002000 - }, - { - "epoch": 22.03, - "learning_rate": 6.992271302051817e-08, - "loss": 3.7862, - "step": 2002500 - }, - { - "epoch": 22.04, - "learning_rate": 6.990896088893778e-08, - "loss": 3.796, - "step": 2003000 - }, - { - "epoch": 22.04, - "learning_rate": 6.989520875735739e-08, - "loss": 3.8068, - "step": 2003500 - }, - { - "epoch": 22.05, - "learning_rate": 6.988145662577698e-08, - "loss": 3.7887, - "step": 2004000 - }, - { - "epoch": 22.05, - "learning_rate": 6.98677044941966e-08, - "loss": 3.7964, - "step": 2004500 - }, - { - "epoch": 22.06, - "learning_rate": 6.98539523626162e-08, - "loss": 3.79, - "step": 2005000 - }, - { - "epoch": 22.06, - "learning_rate": 6.98402002310358e-08, - "loss": 3.8109, - "step": 2005500 - }, - { - "epoch": 22.07, - "learning_rate": 6.982644809945541e-08, - "loss": 3.8085, - "step": 2006000 - }, - { - "epoch": 22.07, - "learning_rate": 6.981269596787502e-08, - "loss": 3.7893, - "step": 2006500 - }, - { - "epoch": 22.08, - "learning_rate": 6.979894383629463e-08, - "loss": 3.8047, - "step": 2007000 - }, - { - "epoch": 22.09, - "learning_rate": 6.978519170471422e-08, - "loss": 3.7857, - "step": 2007500 - }, - { - "epoch": 22.09, - "learning_rate": 6.977143957313383e-08, - "loss": 3.7952, - "step": 2008000 - }, - { - "epoch": 22.1, - "learning_rate": 6.975768744155344e-08, - "loss": 3.792, - "step": 2008500 - }, - { - "epoch": 22.1, - "learning_rate": 6.974393530997305e-08, - "loss": 3.7971, - "step": 2009000 - }, - { - "epoch": 22.11, - "learning_rate": 6.973018317839265e-08, - "loss": 3.7849, - "step": 2009500 - }, - { - "epoch": 22.11, - "learning_rate": 6.971643104681226e-08, - "loss": 3.7939, - "step": 2010000 - }, - { - "epoch": 22.12, - "learning_rate": 6.970267891523187e-08, - "loss": 3.7788, - "step": 2010500 - }, - { - "epoch": 22.12, - "learning_rate": 6.968892678365146e-08, - "loss": 3.779, - "step": 2011000 - }, - { - "epoch": 22.13, - "learning_rate": 6.967517465207107e-08, - "loss": 3.8088, - "step": 2011500 - }, - { - "epoch": 22.14, - "learning_rate": 6.966142252049068e-08, - "loss": 3.7793, - "step": 2012000 - }, - { - "epoch": 22.14, - "learning_rate": 6.964767038891028e-08, - "loss": 3.8087, - "step": 2012500 - }, - { - "epoch": 22.15, - "learning_rate": 6.963391825732989e-08, - "loss": 3.7913, - "step": 2013000 - }, - { - "epoch": 22.15, - "learning_rate": 6.96201661257495e-08, - "loss": 3.7824, - "step": 2013500 - }, - { - "epoch": 22.16, - "learning_rate": 6.960641399416909e-08, - "loss": 3.7825, - "step": 2014000 - }, - { - "epoch": 22.16, - "learning_rate": 6.95926618625887e-08, - "loss": 3.7882, - "step": 2014500 - }, - { - "epoch": 22.17, - "learning_rate": 6.957890973100831e-08, - "loss": 3.8101, - "step": 2015000 - }, - { - "epoch": 22.17, - "learning_rate": 6.956515759942791e-08, - "loss": 3.8003, - "step": 2015500 - }, - { - "epoch": 22.18, - "learning_rate": 6.955140546784752e-08, - "loss": 3.8157, - "step": 2016000 - }, - { - "epoch": 22.18, - "learning_rate": 6.953765333626713e-08, - "loss": 3.7551, - "step": 2016500 - }, - { - "epoch": 22.19, - "learning_rate": 6.952390120468672e-08, - "loss": 3.7764, - "step": 2017000 - }, - { - "epoch": 22.2, - "learning_rate": 6.951014907310633e-08, - "loss": 3.7947, - "step": 2017500 - }, - { - "epoch": 22.2, - "learning_rate": 6.949639694152594e-08, - "loss": 3.778, - "step": 2018000 - }, - { - "epoch": 22.21, - "learning_rate": 6.948264480994554e-08, - "loss": 3.7788, - "step": 2018500 - }, - { - "epoch": 22.21, - "learning_rate": 6.946889267836515e-08, - "loss": 3.7832, - "step": 2019000 - }, - { - "epoch": 22.22, - "learning_rate": 6.945514054678476e-08, - "loss": 3.8007, - "step": 2019500 - }, - { - "epoch": 22.22, - "learning_rate": 6.944138841520435e-08, - "loss": 3.7839, - "step": 2020000 - }, - { - "epoch": 22.23, - "learning_rate": 6.942763628362396e-08, - "loss": 3.8046, - "step": 2020500 - }, - { - "epoch": 22.23, - "learning_rate": 6.941388415204357e-08, - "loss": 3.7683, - "step": 2021000 - }, - { - "epoch": 22.24, - "learning_rate": 6.940013202046317e-08, - "loss": 3.7778, - "step": 2021500 - }, - { - "epoch": 22.25, - "learning_rate": 6.938637988888278e-08, - "loss": 3.7763, - "step": 2022000 - }, - { - "epoch": 22.25, - "learning_rate": 6.937262775730239e-08, - "loss": 3.8052, - "step": 2022500 - }, - { - "epoch": 22.26, - "learning_rate": 6.935887562572198e-08, - "loss": 3.7951, - "step": 2023000 - }, - { - "epoch": 22.26, - "learning_rate": 6.934512349414159e-08, - "loss": 3.7712, - "step": 2023500 - }, - { - "epoch": 22.27, - "learning_rate": 6.93313713625612e-08, - "loss": 3.7888, - "step": 2024000 - }, - { - "epoch": 22.27, - "learning_rate": 6.93176192309808e-08, - "loss": 3.7814, - "step": 2024500 - }, - { - "epoch": 22.28, - "learning_rate": 6.93038670994004e-08, - "loss": 3.7712, - "step": 2025000 - }, - { - "epoch": 22.28, - "learning_rate": 6.929011496782001e-08, - "loss": 3.7829, - "step": 2025500 - }, - { - "epoch": 22.29, - "learning_rate": 6.927636283623961e-08, - "loss": 3.8138, - "step": 2026000 - }, - { - "epoch": 22.29, - "learning_rate": 6.926261070465922e-08, - "loss": 3.7753, - "step": 2026500 - }, - { - "epoch": 22.3, - "learning_rate": 6.924885857307883e-08, - "loss": 3.7798, - "step": 2027000 - }, - { - "epoch": 22.31, - "learning_rate": 6.923510644149843e-08, - "loss": 3.8074, - "step": 2027500 - }, - { - "epoch": 22.31, - "learning_rate": 6.922135430991803e-08, - "loss": 3.7888, - "step": 2028000 - }, - { - "epoch": 22.32, - "learning_rate": 6.920760217833764e-08, - "loss": 3.7851, - "step": 2028500 - }, - { - "epoch": 22.32, - "learning_rate": 6.919385004675724e-08, - "loss": 3.7909, - "step": 2029000 - }, - { - "epoch": 22.33, - "learning_rate": 6.918009791517685e-08, - "loss": 3.7922, - "step": 2029500 - }, - { - "epoch": 22.33, - "learning_rate": 6.916634578359646e-08, - "loss": 3.8047, - "step": 2030000 - }, - { - "epoch": 22.34, - "learning_rate": 6.915259365201606e-08, - "loss": 3.8004, - "step": 2030500 - }, - { - "epoch": 22.34, - "learning_rate": 6.913884152043566e-08, - "loss": 3.7928, - "step": 2031000 - }, - { - "epoch": 22.35, - "learning_rate": 6.912508938885527e-08, - "loss": 3.7868, - "step": 2031500 - }, - { - "epoch": 22.36, - "learning_rate": 6.911133725727487e-08, - "loss": 3.7918, - "step": 2032000 - }, - { - "epoch": 22.36, - "learning_rate": 6.909758512569448e-08, - "loss": 3.797, - "step": 2032500 - }, - { - "epoch": 22.37, - "learning_rate": 6.908383299411409e-08, - "loss": 3.793, - "step": 2033000 - }, - { - "epoch": 22.37, - "learning_rate": 6.907008086253368e-08, - "loss": 3.7841, - "step": 2033500 - }, - { - "epoch": 22.38, - "learning_rate": 6.90563287309533e-08, - "loss": 3.7899, - "step": 2034000 - }, - { - "epoch": 22.38, - "learning_rate": 6.90425765993729e-08, - "loss": 3.7759, - "step": 2034500 - }, - { - "epoch": 22.39, - "learning_rate": 6.90288244677925e-08, - "loss": 3.7857, - "step": 2035000 - }, - { - "epoch": 22.39, - "learning_rate": 6.901507233621211e-08, - "loss": 3.7883, - "step": 2035500 - }, - { - "epoch": 22.4, - "learning_rate": 6.900132020463172e-08, - "loss": 3.7867, - "step": 2036000 - }, - { - "epoch": 22.4, - "learning_rate": 6.898756807305131e-08, - "loss": 3.767, - "step": 2036500 - }, - { - "epoch": 22.41, - "learning_rate": 6.897381594147092e-08, - "loss": 3.7827, - "step": 2037000 - }, - { - "epoch": 22.42, - "learning_rate": 6.896006380989053e-08, - "loss": 3.7938, - "step": 2037500 - }, - { - "epoch": 22.42, - "learning_rate": 6.894631167831013e-08, - "loss": 3.7737, - "step": 2038000 - }, - { - "epoch": 22.43, - "learning_rate": 6.893255954672974e-08, - "loss": 3.8141, - "step": 2038500 - }, - { - "epoch": 22.43, - "learning_rate": 6.891880741514935e-08, - "loss": 3.7932, - "step": 2039000 - }, - { - "epoch": 22.44, - "learning_rate": 6.890505528356894e-08, - "loss": 3.809, - "step": 2039500 - }, - { - "epoch": 22.44, - "learning_rate": 6.889130315198855e-08, - "loss": 3.7865, - "step": 2040000 - }, - { - "epoch": 22.45, - "learning_rate": 6.887755102040816e-08, - "loss": 3.8069, - "step": 2040500 - }, - { - "epoch": 22.45, - "learning_rate": 6.886379888882776e-08, - "loss": 3.7876, - "step": 2041000 - }, - { - "epoch": 22.46, - "learning_rate": 6.885004675724737e-08, - "loss": 3.7873, - "step": 2041500 - }, - { - "epoch": 22.47, - "learning_rate": 6.883629462566698e-08, - "loss": 3.788, - "step": 2042000 - }, - { - "epoch": 22.47, - "learning_rate": 6.882254249408657e-08, - "loss": 3.7951, - "step": 2042500 - }, - { - "epoch": 22.48, - "learning_rate": 6.880879036250618e-08, - "loss": 3.7851, - "step": 2043000 - }, - { - "epoch": 22.48, - "learning_rate": 6.879503823092579e-08, - "loss": 3.7649, - "step": 2043500 - }, - { - "epoch": 22.49, - "learning_rate": 6.878128609934539e-08, - "loss": 3.7786, - "step": 2044000 - }, - { - "epoch": 22.49, - "learning_rate": 6.8767533967765e-08, - "loss": 3.8037, - "step": 2044500 - }, - { - "epoch": 22.5, - "learning_rate": 6.875378183618461e-08, - "loss": 3.8096, - "step": 2045000 - }, - { - "epoch": 22.5, - "learning_rate": 6.87400297046042e-08, - "loss": 3.7658, - "step": 2045500 - }, - { - "epoch": 22.51, - "learning_rate": 6.872627757302381e-08, - "loss": 3.79, - "step": 2046000 - }, - { - "epoch": 22.51, - "learning_rate": 6.871252544144342e-08, - "loss": 3.7836, - "step": 2046500 - }, - { - "epoch": 22.52, - "learning_rate": 6.869877330986302e-08, - "loss": 3.7934, - "step": 2047000 - }, - { - "epoch": 22.53, - "learning_rate": 6.868502117828263e-08, - "loss": 3.7855, - "step": 2047500 - }, - { - "epoch": 22.53, - "learning_rate": 6.867126904670224e-08, - "loss": 3.7741, - "step": 2048000 - }, - { - "epoch": 22.54, - "learning_rate": 6.865751691512183e-08, - "loss": 3.8051, - "step": 2048500 - }, - { - "epoch": 22.54, - "learning_rate": 6.864376478354144e-08, - "loss": 3.7978, - "step": 2049000 - }, - { - "epoch": 22.55, - "learning_rate": 6.863001265196105e-08, - "loss": 3.7771, - "step": 2049500 - }, - { - "epoch": 22.55, - "learning_rate": 6.861626052038065e-08, - "loss": 3.8046, - "step": 2050000 - }, - { - "epoch": 22.56, - "learning_rate": 6.860250838880026e-08, - "loss": 3.7837, - "step": 2050500 - }, - { - "epoch": 22.56, - "learning_rate": 6.858875625721987e-08, - "loss": 3.7748, - "step": 2051000 - }, - { - "epoch": 22.57, - "learning_rate": 6.857500412563946e-08, - "loss": 3.7899, - "step": 2051500 - }, - { - "epoch": 22.58, - "learning_rate": 6.856125199405907e-08, - "loss": 3.775, - "step": 2052000 - }, - { - "epoch": 22.58, - "learning_rate": 6.854749986247868e-08, - "loss": 3.7796, - "step": 2052500 - }, - { - "epoch": 22.59, - "learning_rate": 6.853374773089828e-08, - "loss": 3.8067, - "step": 2053000 - }, - { - "epoch": 22.59, - "learning_rate": 6.851999559931789e-08, - "loss": 3.8023, - "step": 2053500 - }, - { - "epoch": 22.6, - "learning_rate": 6.85062434677375e-08, - "loss": 3.7928, - "step": 2054000 - }, - { - "epoch": 22.6, - "learning_rate": 6.84924913361571e-08, - "loss": 3.7919, - "step": 2054500 - }, - { - "epoch": 22.61, - "learning_rate": 6.84787392045767e-08, - "loss": 3.7914, - "step": 2055000 - }, - { - "epoch": 22.61, - "learning_rate": 6.846498707299631e-08, - "loss": 3.7881, - "step": 2055500 - }, - { - "epoch": 22.62, - "learning_rate": 6.845123494141592e-08, - "loss": 3.771, - "step": 2056000 - }, - { - "epoch": 22.63, - "learning_rate": 6.843748280983553e-08, - "loss": 3.7893, - "step": 2056500 - }, - { - "epoch": 22.63, - "learning_rate": 6.842373067825513e-08, - "loss": 3.7695, - "step": 2057000 - }, - { - "epoch": 22.64, - "learning_rate": 6.840997854667473e-08, - "loss": 3.7846, - "step": 2057500 - }, - { - "epoch": 22.64, - "learning_rate": 6.839622641509434e-08, - "loss": 3.7742, - "step": 2058000 - }, - { - "epoch": 22.65, - "learning_rate": 6.838247428351395e-08, - "loss": 3.7913, - "step": 2058500 - }, - { - "epoch": 22.65, - "learning_rate": 6.836872215193355e-08, - "loss": 3.7827, - "step": 2059000 - }, - { - "epoch": 22.66, - "learning_rate": 6.835497002035316e-08, - "loss": 3.7851, - "step": 2059500 - }, - { - "epoch": 22.66, - "learning_rate": 6.834121788877277e-08, - "loss": 3.7711, - "step": 2060000 - }, - { - "epoch": 22.67, - "learning_rate": 6.832746575719236e-08, - "loss": 3.8043, - "step": 2060500 - }, - { - "epoch": 22.67, - "learning_rate": 6.831371362561197e-08, - "loss": 3.7923, - "step": 2061000 - }, - { - "epoch": 22.68, - "learning_rate": 6.829996149403158e-08, - "loss": 3.8001, - "step": 2061500 - }, - { - "epoch": 22.69, - "learning_rate": 6.828620936245118e-08, - "loss": 3.7818, - "step": 2062000 - }, - { - "epoch": 22.69, - "learning_rate": 6.827245723087079e-08, - "loss": 3.8019, - "step": 2062500 - }, - { - "epoch": 22.7, - "learning_rate": 6.82587050992904e-08, - "loss": 3.7833, - "step": 2063000 - }, - { - "epoch": 22.7, - "learning_rate": 6.824495296771e-08, - "loss": 3.7806, - "step": 2063500 - }, - { - "epoch": 22.71, - "learning_rate": 6.82312008361296e-08, - "loss": 3.788, - "step": 2064000 - }, - { - "epoch": 22.71, - "learning_rate": 6.821744870454921e-08, - "loss": 3.7813, - "step": 2064500 - }, - { - "epoch": 22.72, - "learning_rate": 6.820369657296881e-08, - "loss": 3.802, - "step": 2065000 - }, - { - "epoch": 22.72, - "learning_rate": 6.818994444138842e-08, - "loss": 3.8017, - "step": 2065500 - }, - { - "epoch": 22.73, - "learning_rate": 6.817619230980801e-08, - "loss": 3.7946, - "step": 2066000 - }, - { - "epoch": 22.74, - "learning_rate": 6.816244017822762e-08, - "loss": 3.7915, - "step": 2066500 - }, - { - "epoch": 22.74, - "learning_rate": 6.814868804664723e-08, - "loss": 3.7797, - "step": 2067000 - }, - { - "epoch": 22.75, - "learning_rate": 6.813493591506683e-08, - "loss": 3.7801, - "step": 2067500 - }, - { - "epoch": 22.75, - "learning_rate": 6.812118378348644e-08, - "loss": 3.7907, - "step": 2068000 - }, - { - "epoch": 22.76, - "learning_rate": 6.810743165190605e-08, - "loss": 3.7919, - "step": 2068500 - }, - { - "epoch": 22.76, - "learning_rate": 6.809367952032564e-08, - "loss": 3.7985, - "step": 2069000 - }, - { - "epoch": 22.77, - "learning_rate": 6.807992738874525e-08, - "loss": 3.7606, - "step": 2069500 - }, - { - "epoch": 22.77, - "learning_rate": 6.806617525716486e-08, - "loss": 3.7897, - "step": 2070000 - }, - { - "epoch": 22.78, - "learning_rate": 6.805242312558446e-08, - "loss": 3.7761, - "step": 2070500 - }, - { - "epoch": 22.78, - "learning_rate": 6.803867099400407e-08, - "loss": 3.7869, - "step": 2071000 - }, - { - "epoch": 22.79, - "learning_rate": 6.802491886242368e-08, - "loss": 3.787, - "step": 2071500 - }, - { - "epoch": 22.8, - "learning_rate": 6.801116673084327e-08, - "loss": 3.7958, - "step": 2072000 - }, - { - "epoch": 22.8, - "learning_rate": 6.799741459926288e-08, - "loss": 3.803, - "step": 2072500 - }, - { - "epoch": 22.81, - "learning_rate": 6.798366246768249e-08, - "loss": 3.7943, - "step": 2073000 - }, - { - "epoch": 22.81, - "learning_rate": 6.796991033610209e-08, - "loss": 3.7805, - "step": 2073500 - }, - { - "epoch": 22.82, - "learning_rate": 6.79561582045217e-08, - "loss": 3.8007, - "step": 2074000 - }, - { - "epoch": 22.82, - "learning_rate": 6.794240607294131e-08, - "loss": 3.7836, - "step": 2074500 - }, - { - "epoch": 22.83, - "learning_rate": 6.79286539413609e-08, - "loss": 3.7707, - "step": 2075000 - }, - { - "epoch": 22.83, - "learning_rate": 6.791490180978051e-08, - "loss": 3.7915, - "step": 2075500 - }, - { - "epoch": 22.84, - "learning_rate": 6.790114967820012e-08, - "loss": 3.7768, - "step": 2076000 - }, - { - "epoch": 22.85, - "learning_rate": 6.788739754661972e-08, - "loss": 3.7799, - "step": 2076500 - }, - { - "epoch": 22.85, - "learning_rate": 6.787364541503933e-08, - "loss": 3.79, - "step": 2077000 - }, - { - "epoch": 22.86, - "learning_rate": 6.785989328345894e-08, - "loss": 3.7838, - "step": 2077500 - }, - { - "epoch": 22.86, - "learning_rate": 6.784614115187853e-08, - "loss": 3.7975, - "step": 2078000 - }, - { - "epoch": 22.87, - "learning_rate": 6.783238902029814e-08, - "loss": 3.7785, - "step": 2078500 - }, - { - "epoch": 22.87, - "learning_rate": 6.781863688871775e-08, - "loss": 3.7954, - "step": 2079000 - }, - { - "epoch": 22.88, - "learning_rate": 6.780488475713735e-08, - "loss": 3.77, - "step": 2079500 - }, - { - "epoch": 22.88, - "learning_rate": 6.779113262555696e-08, - "loss": 3.7757, - "step": 2080000 - }, - { - "epoch": 22.89, - "learning_rate": 6.777738049397657e-08, - "loss": 3.806, - "step": 2080500 - }, - { - "epoch": 22.89, - "learning_rate": 6.776362836239616e-08, - "loss": 3.7922, - "step": 2081000 - }, - { - "epoch": 22.9, - "learning_rate": 6.774987623081577e-08, - "loss": 3.7956, - "step": 2081500 - }, - { - "epoch": 22.91, - "learning_rate": 6.773612409923538e-08, - "loss": 3.8087, - "step": 2082000 - }, - { - "epoch": 22.91, - "learning_rate": 6.772237196765498e-08, - "loss": 3.7701, - "step": 2082500 - }, - { - "epoch": 22.92, - "learning_rate": 6.770861983607459e-08, - "loss": 3.7702, - "step": 2083000 - }, - { - "epoch": 22.92, - "learning_rate": 6.76948677044942e-08, - "loss": 3.7899, - "step": 2083500 - }, - { - "epoch": 22.93, - "learning_rate": 6.768111557291379e-08, - "loss": 3.7991, - "step": 2084000 - }, - { - "epoch": 22.93, - "learning_rate": 6.76673634413334e-08, - "loss": 3.7842, - "step": 2084500 - }, - { - "epoch": 22.94, - "learning_rate": 6.765361130975301e-08, - "loss": 3.7864, - "step": 2085000 - }, - { - "epoch": 22.94, - "learning_rate": 6.76398591781726e-08, - "loss": 3.7759, - "step": 2085500 - }, - { - "epoch": 22.95, - "learning_rate": 6.762610704659222e-08, - "loss": 3.8131, - "step": 2086000 - }, - { - "epoch": 22.96, - "learning_rate": 6.761235491501183e-08, - "loss": 3.7874, - "step": 2086500 - }, - { - "epoch": 22.96, - "learning_rate": 6.759860278343142e-08, - "loss": 3.7895, - "step": 2087000 - }, - { - "epoch": 22.97, - "learning_rate": 6.758485065185103e-08, - "loss": 3.7987, - "step": 2087500 - }, - { - "epoch": 22.97, - "learning_rate": 6.757109852027064e-08, - "loss": 3.7952, - "step": 2088000 - }, - { - "epoch": 22.98, - "learning_rate": 6.755734638869024e-08, - "loss": 3.7918, - "step": 2088500 - }, - { - "epoch": 22.98, - "learning_rate": 6.754359425710985e-08, - "loss": 3.7925, - "step": 2089000 - }, - { - "epoch": 22.99, - "learning_rate": 6.752984212552945e-08, - "loss": 3.7983, - "step": 2089500 - }, - { - "epoch": 22.99, - "learning_rate": 6.751608999394905e-08, - "loss": 3.803, - "step": 2090000 - }, - { - "epoch": 23.0, - "learning_rate": 6.750233786236866e-08, - "loss": 3.7918, - "step": 2090500 - }, - { - "epoch": 23.0, - "eval_loss": 3.8476030826568604, - "eval_runtime": 6.1381, - "eval_samples_per_second": 253.175, - "step": 2090585 - }, - { - "epoch": 23.0, - "learning_rate": 6.748858573078827e-08, - "loss": 3.7923, - "step": 2091000 - }, - { - "epoch": 23.01, - "learning_rate": 6.747483359920787e-08, - "loss": 3.7865, - "step": 2091500 - }, - { - "epoch": 23.02, - "learning_rate": 6.746108146762747e-08, - "loss": 3.8029, - "step": 2092000 - }, - { - "epoch": 23.02, - "learning_rate": 6.744732933604708e-08, - "loss": 3.7909, - "step": 2092500 - }, - { - "epoch": 23.03, - "learning_rate": 6.743357720446668e-08, - "loss": 3.7795, - "step": 2093000 - }, - { - "epoch": 23.03, - "learning_rate": 6.741982507288629e-08, - "loss": 3.7981, - "step": 2093500 - }, - { - "epoch": 23.04, - "learning_rate": 6.74060729413059e-08, - "loss": 3.7653, - "step": 2094000 - }, - { - "epoch": 23.04, - "learning_rate": 6.73923208097255e-08, - "loss": 3.7908, - "step": 2094500 - }, - { - "epoch": 23.05, - "learning_rate": 6.73785686781451e-08, - "loss": 3.8037, - "step": 2095000 - }, - { - "epoch": 23.05, - "learning_rate": 6.736481654656471e-08, - "loss": 3.8039, - "step": 2095500 - }, - { - "epoch": 23.06, - "learning_rate": 6.735106441498431e-08, - "loss": 3.7688, - "step": 2096000 - }, - { - "epoch": 23.07, - "learning_rate": 6.733731228340392e-08, - "loss": 3.7864, - "step": 2096500 - }, - { - "epoch": 23.07, - "learning_rate": 6.732356015182353e-08, - "loss": 3.8128, - "step": 2097000 - }, - { - "epoch": 23.08, - "learning_rate": 6.730980802024312e-08, - "loss": 3.8003, - "step": 2097500 - }, - { - "epoch": 23.08, - "learning_rate": 6.729605588866273e-08, - "loss": 3.7882, - "step": 2098000 - }, - { - "epoch": 23.09, - "learning_rate": 6.728230375708234e-08, - "loss": 3.7851, - "step": 2098500 - }, - { - "epoch": 23.09, - "learning_rate": 6.726855162550194e-08, - "loss": 3.804, - "step": 2099000 - }, - { - "epoch": 23.1, - "learning_rate": 6.725479949392155e-08, - "loss": 3.7985, - "step": 2099500 - }, - { - "epoch": 23.1, - "learning_rate": 6.724104736234116e-08, - "loss": 3.792, - "step": 2100000 - }, - { - "epoch": 23.11, - "learning_rate": 6.722729523076077e-08, - "loss": 3.7817, - "step": 2100500 - }, - { - "epoch": 23.11, - "learning_rate": 6.721354309918036e-08, - "loss": 3.7726, - "step": 2101000 - }, - { - "epoch": 23.12, - "learning_rate": 6.719979096759997e-08, - "loss": 3.7833, - "step": 2101500 - }, - { - "epoch": 23.13, - "learning_rate": 6.718603883601958e-08, - "loss": 3.7814, - "step": 2102000 - }, - { - "epoch": 23.13, - "learning_rate": 6.717228670443918e-08, - "loss": 3.798, - "step": 2102500 - }, - { - "epoch": 23.14, - "learning_rate": 6.715853457285879e-08, - "loss": 3.7779, - "step": 2103000 - }, - { - "epoch": 23.14, - "learning_rate": 6.71447824412784e-08, - "loss": 3.7791, - "step": 2103500 - }, - { - "epoch": 23.15, - "learning_rate": 6.7131030309698e-08, - "loss": 3.7828, - "step": 2104000 - }, - { - "epoch": 23.15, - "learning_rate": 6.71172781781176e-08, - "loss": 3.7968, - "step": 2104500 - }, - { - "epoch": 23.16, - "learning_rate": 6.710352604653721e-08, - "loss": 3.7775, - "step": 2105000 - }, - { - "epoch": 23.16, - "learning_rate": 6.708977391495682e-08, - "loss": 3.7672, - "step": 2105500 - }, - { - "epoch": 23.17, - "learning_rate": 6.707602178337643e-08, - "loss": 3.7741, - "step": 2106000 - }, - { - "epoch": 23.18, - "learning_rate": 6.706226965179603e-08, - "loss": 3.7777, - "step": 2106500 - }, - { - "epoch": 23.18, - "learning_rate": 6.704851752021564e-08, - "loss": 3.8072, - "step": 2107000 - }, - { - "epoch": 23.19, - "learning_rate": 6.703476538863525e-08, - "loss": 3.7885, - "step": 2107500 - }, - { - "epoch": 23.19, - "learning_rate": 6.702101325705484e-08, - "loss": 3.7898, - "step": 2108000 - }, - { - "epoch": 23.2, - "learning_rate": 6.700726112547445e-08, - "loss": 3.7962, - "step": 2108500 - }, - { - "epoch": 23.2, - "learning_rate": 6.699350899389406e-08, - "loss": 3.7817, - "step": 2109000 - }, - { - "epoch": 23.21, - "learning_rate": 6.697975686231366e-08, - "loss": 3.7932, - "step": 2109500 - }, - { - "epoch": 23.21, - "learning_rate": 6.696600473073327e-08, - "loss": 3.7787, - "step": 2110000 - }, - { - "epoch": 23.22, - "learning_rate": 6.695225259915288e-08, - "loss": 3.7838, - "step": 2110500 - }, - { - "epoch": 23.22, - "learning_rate": 6.693850046757247e-08, - "loss": 3.7731, - "step": 2111000 - }, - { - "epoch": 23.23, - "learning_rate": 6.692474833599208e-08, - "loss": 3.7806, - "step": 2111500 - }, - { - "epoch": 23.24, - "learning_rate": 6.691099620441169e-08, - "loss": 3.779, - "step": 2112000 - }, - { - "epoch": 23.24, - "learning_rate": 6.689724407283129e-08, - "loss": 3.7721, - "step": 2112500 - }, - { - "epoch": 23.25, - "learning_rate": 6.68834919412509e-08, - "loss": 3.7955, - "step": 2113000 - }, - { - "epoch": 23.25, - "learning_rate": 6.68697398096705e-08, - "loss": 3.7856, - "step": 2113500 - }, - { - "epoch": 23.26, - "learning_rate": 6.68559876780901e-08, - "loss": 3.7926, - "step": 2114000 - }, - { - "epoch": 23.26, - "learning_rate": 6.684223554650971e-08, - "loss": 3.7946, - "step": 2114500 - }, - { - "epoch": 23.27, - "learning_rate": 6.682848341492932e-08, - "loss": 3.7947, - "step": 2115000 - }, - { - "epoch": 23.27, - "learning_rate": 6.681473128334892e-08, - "loss": 3.7704, - "step": 2115500 - }, - { - "epoch": 23.28, - "learning_rate": 6.680097915176852e-08, - "loss": 3.7955, - "step": 2116000 - }, - { - "epoch": 23.29, - "learning_rate": 6.678722702018813e-08, - "loss": 3.7845, - "step": 2116500 - }, - { - "epoch": 23.29, - "learning_rate": 6.677347488860773e-08, - "loss": 3.7842, - "step": 2117000 - }, - { - "epoch": 23.3, - "learning_rate": 6.675972275702734e-08, - "loss": 3.7909, - "step": 2117500 - }, - { - "epoch": 23.3, - "learning_rate": 6.674597062544695e-08, - "loss": 3.7703, - "step": 2118000 - }, - { - "epoch": 23.31, - "learning_rate": 6.673221849386655e-08, - "loss": 3.7604, - "step": 2118500 - }, - { - "epoch": 23.31, - "learning_rate": 6.671846636228615e-08, - "loss": 3.782, - "step": 2119000 - }, - { - "epoch": 23.32, - "learning_rate": 6.670471423070576e-08, - "loss": 3.7961, - "step": 2119500 - }, - { - "epoch": 23.32, - "learning_rate": 6.669096209912536e-08, - "loss": 3.7941, - "step": 2120000 - }, - { - "epoch": 23.33, - "learning_rate": 6.667720996754497e-08, - "loss": 3.7854, - "step": 2120500 - }, - { - "epoch": 23.33, - "learning_rate": 6.666345783596458e-08, - "loss": 3.8026, - "step": 2121000 - }, - { - "epoch": 23.34, - "learning_rate": 6.664970570438417e-08, - "loss": 3.7924, - "step": 2121500 - }, - { - "epoch": 23.35, - "learning_rate": 6.663595357280378e-08, - "loss": 3.7815, - "step": 2122000 - }, - { - "epoch": 23.35, - "learning_rate": 6.66222014412234e-08, - "loss": 3.7973, - "step": 2122500 - }, - { - "epoch": 23.36, - "learning_rate": 6.660844930964299e-08, - "loss": 3.7851, - "step": 2123000 - }, - { - "epoch": 23.36, - "learning_rate": 6.65946971780626e-08, - "loss": 3.7672, - "step": 2123500 - }, - { - "epoch": 23.37, - "learning_rate": 6.658094504648221e-08, - "loss": 3.792, - "step": 2124000 - }, - { - "epoch": 23.37, - "learning_rate": 6.65671929149018e-08, - "loss": 3.791, - "step": 2124500 - }, - { - "epoch": 23.38, - "learning_rate": 6.655344078332141e-08, - "loss": 3.7732, - "step": 2125000 - }, - { - "epoch": 23.38, - "learning_rate": 6.653968865174102e-08, - "loss": 3.7763, - "step": 2125500 - }, - { - "epoch": 23.39, - "learning_rate": 6.652593652016062e-08, - "loss": 3.7699, - "step": 2126000 - }, - { - "epoch": 23.4, - "learning_rate": 6.651218438858023e-08, - "loss": 3.8097, - "step": 2126500 - }, - { - "epoch": 23.4, - "learning_rate": 6.649843225699984e-08, - "loss": 3.8056, - "step": 2127000 - }, - { - "epoch": 23.41, - "learning_rate": 6.648468012541943e-08, - "loss": 3.7866, - "step": 2127500 - }, - { - "epoch": 23.41, - "learning_rate": 6.647092799383904e-08, - "loss": 3.7866, - "step": 2128000 - }, - { - "epoch": 23.42, - "learning_rate": 6.645717586225865e-08, - "loss": 3.7972, - "step": 2128500 - }, - { - "epoch": 23.42, - "learning_rate": 6.644342373067825e-08, - "loss": 3.7713, - "step": 2129000 - }, - { - "epoch": 23.43, - "learning_rate": 6.642967159909786e-08, - "loss": 3.7773, - "step": 2129500 - }, - { - "epoch": 23.43, - "learning_rate": 6.641591946751747e-08, - "loss": 3.7944, - "step": 2130000 - }, - { - "epoch": 23.44, - "learning_rate": 6.640216733593706e-08, - "loss": 3.796, - "step": 2130500 - }, - { - "epoch": 23.44, - "learning_rate": 6.638841520435667e-08, - "loss": 3.78, - "step": 2131000 - }, - { - "epoch": 23.45, - "learning_rate": 6.637466307277628e-08, - "loss": 3.7835, - "step": 2131500 - }, - { - "epoch": 23.46, - "learning_rate": 6.636091094119588e-08, - "loss": 3.8021, - "step": 2132000 - }, - { - "epoch": 23.46, - "learning_rate": 6.634715880961549e-08, - "loss": 3.7906, - "step": 2132500 - }, - { - "epoch": 23.47, - "learning_rate": 6.63334066780351e-08, - "loss": 3.77, - "step": 2133000 - }, - { - "epoch": 23.47, - "learning_rate": 6.631965454645469e-08, - "loss": 3.7904, - "step": 2133500 - }, - { - "epoch": 23.48, - "learning_rate": 6.63059024148743e-08, - "loss": 3.7869, - "step": 2134000 - }, - { - "epoch": 23.48, - "learning_rate": 6.629215028329391e-08, - "loss": 3.8014, - "step": 2134500 - }, - { - "epoch": 23.49, - "learning_rate": 6.627839815171351e-08, - "loss": 3.801, - "step": 2135000 - }, - { - "epoch": 23.49, - "learning_rate": 6.626464602013312e-08, - "loss": 3.8017, - "step": 2135500 - }, - { - "epoch": 23.5, - "learning_rate": 6.625089388855273e-08, - "loss": 3.7829, - "step": 2136000 - }, - { - "epoch": 23.51, - "learning_rate": 6.623714175697232e-08, - "loss": 3.7841, - "step": 2136500 - }, - { - "epoch": 23.51, - "learning_rate": 6.622338962539193e-08, - "loss": 3.7978, - "step": 2137000 - }, - { - "epoch": 23.52, - "learning_rate": 6.620963749381154e-08, - "loss": 3.7788, - "step": 2137500 - }, - { - "epoch": 23.52, - "learning_rate": 6.619588536223114e-08, - "loss": 3.7865, - "step": 2138000 - }, - { - "epoch": 23.53, - "learning_rate": 6.618213323065075e-08, - "loss": 3.7776, - "step": 2138500 - }, - { - "epoch": 23.53, - "learning_rate": 6.616838109907036e-08, - "loss": 3.7965, - "step": 2139000 - }, - { - "epoch": 23.54, - "learning_rate": 6.615462896748995e-08, - "loss": 3.7865, - "step": 2139500 - }, - { - "epoch": 23.54, - "learning_rate": 6.614087683590956e-08, - "loss": 3.7859, - "step": 2140000 - }, - { - "epoch": 23.55, - "learning_rate": 6.612712470432917e-08, - "loss": 3.7904, - "step": 2140500 - }, - { - "epoch": 23.55, - "learning_rate": 6.611337257274877e-08, - "loss": 3.7755, - "step": 2141000 - }, - { - "epoch": 23.56, - "learning_rate": 6.609962044116838e-08, - "loss": 3.7803, - "step": 2141500 - }, - { - "epoch": 23.57, - "learning_rate": 6.608586830958797e-08, - "loss": 3.7904, - "step": 2142000 - }, - { - "epoch": 23.57, - "learning_rate": 6.607211617800758e-08, - "loss": 3.7813, - "step": 2142500 - }, - { - "epoch": 23.58, - "learning_rate": 6.605836404642719e-08, - "loss": 3.7805, - "step": 2143000 - }, - { - "epoch": 23.58, - "learning_rate": 6.604461191484679e-08, - "loss": 3.8034, - "step": 2143500 - }, - { - "epoch": 23.59, - "learning_rate": 6.60308597832664e-08, - "loss": 3.7744, - "step": 2144000 - }, - { - "epoch": 23.59, - "learning_rate": 6.6017107651686e-08, - "loss": 3.7755, - "step": 2144500 - }, - { - "epoch": 23.6, - "learning_rate": 6.60033555201056e-08, - "loss": 3.7819, - "step": 2145000 - }, - { - "epoch": 23.6, - "learning_rate": 6.598960338852521e-08, - "loss": 3.7644, - "step": 2145500 - }, - { - "epoch": 23.61, - "learning_rate": 6.597585125694482e-08, - "loss": 3.7802, - "step": 2146000 - }, - { - "epoch": 23.62, - "learning_rate": 6.596209912536442e-08, - "loss": 3.7869, - "step": 2146500 - }, - { - "epoch": 23.62, - "learning_rate": 6.594834699378403e-08, - "loss": 3.7928, - "step": 2147000 - }, - { - "epoch": 23.63, - "learning_rate": 6.593459486220364e-08, - "loss": 3.7752, - "step": 2147500 - }, - { - "epoch": 23.63, - "learning_rate": 6.592084273062324e-08, - "loss": 3.7859, - "step": 2148000 - }, - { - "epoch": 23.64, - "learning_rate": 6.590709059904284e-08, - "loss": 3.7979, - "step": 2148500 - }, - { - "epoch": 23.64, - "learning_rate": 6.589333846746245e-08, - "loss": 3.7873, - "step": 2149000 - }, - { - "epoch": 23.65, - "learning_rate": 6.587958633588206e-08, - "loss": 3.7993, - "step": 2149500 - }, - { - "epoch": 23.65, - "learning_rate": 6.586583420430166e-08, - "loss": 3.7738, - "step": 2150000 - }, - { - "epoch": 23.66, - "learning_rate": 6.585208207272127e-08, - "loss": 3.7902, - "step": 2150500 - }, - { - "epoch": 23.66, - "learning_rate": 6.583832994114087e-08, - "loss": 3.7978, - "step": 2151000 - }, - { - "epoch": 23.67, - "learning_rate": 6.582457780956048e-08, - "loss": 3.7804, - "step": 2151500 - }, - { - "epoch": 23.68, - "learning_rate": 6.581082567798008e-08, - "loss": 3.7932, - "step": 2152000 - }, - { - "epoch": 23.68, - "learning_rate": 6.579707354639969e-08, - "loss": 3.7693, - "step": 2152500 - }, - { - "epoch": 23.69, - "learning_rate": 6.57833214148193e-08, - "loss": 3.787, - "step": 2153000 - }, - { - "epoch": 23.69, - "learning_rate": 6.576956928323891e-08, - "loss": 3.7787, - "step": 2153500 - }, - { - "epoch": 23.7, - "learning_rate": 6.57558171516585e-08, - "loss": 3.7675, - "step": 2154000 - }, - { - "epoch": 23.7, - "learning_rate": 6.574206502007811e-08, - "loss": 3.7807, - "step": 2154500 - }, - { - "epoch": 23.71, - "learning_rate": 6.572831288849772e-08, - "loss": 3.7665, - "step": 2155000 - }, - { - "epoch": 23.71, - "learning_rate": 6.571456075691732e-08, - "loss": 3.8003, - "step": 2155500 - }, - { - "epoch": 23.72, - "learning_rate": 6.570080862533693e-08, - "loss": 3.7891, - "step": 2156000 - }, - { - "epoch": 23.73, - "learning_rate": 6.568705649375654e-08, - "loss": 3.7925, - "step": 2156500 - }, - { - "epoch": 23.73, - "learning_rate": 6.567330436217613e-08, - "loss": 3.7628, - "step": 2157000 - }, - { - "epoch": 23.74, - "learning_rate": 6.565955223059574e-08, - "loss": 3.7741, - "step": 2157500 - }, - { - "epoch": 23.74, - "learning_rate": 6.564580009901535e-08, - "loss": 3.7682, - "step": 2158000 - }, - { - "epoch": 23.75, - "learning_rate": 6.563204796743495e-08, - "loss": 3.7939, - "step": 2158500 - }, - { - "epoch": 23.75, - "learning_rate": 6.561829583585456e-08, - "loss": 3.7871, - "step": 2159000 - }, - { - "epoch": 23.76, - "learning_rate": 6.560454370427417e-08, - "loss": 3.7872, - "step": 2159500 - }, - { - "epoch": 23.76, - "learning_rate": 6.559079157269376e-08, - "loss": 3.7955, - "step": 2160000 - }, - { - "epoch": 23.77, - "learning_rate": 6.557703944111337e-08, - "loss": 3.771, - "step": 2160500 - }, - { - "epoch": 23.77, - "learning_rate": 6.556328730953298e-08, - "loss": 3.7673, - "step": 2161000 - }, - { - "epoch": 23.78, - "learning_rate": 6.554953517795258e-08, - "loss": 3.7961, - "step": 2161500 - }, - { - "epoch": 23.79, - "learning_rate": 6.553578304637219e-08, - "loss": 3.7783, - "step": 2162000 - }, - { - "epoch": 23.79, - "learning_rate": 6.55220309147918e-08, - "loss": 3.7833, - "step": 2162500 - }, - { - "epoch": 23.8, - "learning_rate": 6.550827878321139e-08, - "loss": 3.7698, - "step": 2163000 - }, - { - "epoch": 23.8, - "learning_rate": 6.5494526651631e-08, - "loss": 3.7815, - "step": 2163500 - }, - { - "epoch": 23.81, - "learning_rate": 6.548077452005061e-08, - "loss": 3.7752, - "step": 2164000 - }, - { - "epoch": 23.81, - "learning_rate": 6.546702238847021e-08, - "loss": 3.778, - "step": 2164500 - }, - { - "epoch": 23.82, - "learning_rate": 6.545327025688982e-08, - "loss": 3.7862, - "step": 2165000 - }, - { - "epoch": 23.82, - "learning_rate": 6.543951812530943e-08, - "loss": 3.7898, - "step": 2165500 - }, - { - "epoch": 23.83, - "learning_rate": 6.542576599372902e-08, - "loss": 3.777, - "step": 2166000 - }, - { - "epoch": 23.84, - "learning_rate": 6.541201386214863e-08, - "loss": 3.7993, - "step": 2166500 - }, - { - "epoch": 23.84, - "learning_rate": 6.539826173056824e-08, - "loss": 3.77, - "step": 2167000 - }, - { - "epoch": 23.85, - "learning_rate": 6.538450959898784e-08, - "loss": 3.7857, - "step": 2167500 - }, - { - "epoch": 23.85, - "learning_rate": 6.537075746740745e-08, - "loss": 3.7901, - "step": 2168000 - }, - { - "epoch": 23.86, - "learning_rate": 6.535700533582706e-08, - "loss": 3.7658, - "step": 2168500 - }, - { - "epoch": 23.86, - "learning_rate": 6.534325320424665e-08, - "loss": 3.8108, - "step": 2169000 - }, - { - "epoch": 23.87, - "learning_rate": 6.532950107266626e-08, - "loss": 3.7875, - "step": 2169500 - }, - { - "epoch": 23.87, - "learning_rate": 6.531574894108587e-08, - "loss": 3.7712, - "step": 2170000 - }, - { - "epoch": 23.88, - "learning_rate": 6.530199680950547e-08, - "loss": 3.7845, - "step": 2170500 - }, - { - "epoch": 23.88, - "learning_rate": 6.528824467792508e-08, - "loss": 3.7862, - "step": 2171000 - }, - { - "epoch": 23.89, - "learning_rate": 6.527449254634469e-08, - "loss": 3.7958, - "step": 2171500 - }, - { - "epoch": 23.9, - "learning_rate": 6.526074041476428e-08, - "loss": 3.785, - "step": 2172000 - }, - { - "epoch": 23.9, - "learning_rate": 6.524698828318389e-08, - "loss": 3.784, - "step": 2172500 - }, - { - "epoch": 23.91, - "learning_rate": 6.52332361516035e-08, - "loss": 3.7767, - "step": 2173000 - }, - { - "epoch": 23.91, - "learning_rate": 6.52194840200231e-08, - "loss": 3.7828, - "step": 2173500 - }, - { - "epoch": 23.92, - "learning_rate": 6.52057318884427e-08, - "loss": 3.7802, - "step": 2174000 - }, - { - "epoch": 23.92, - "learning_rate": 6.519197975686232e-08, - "loss": 3.7831, - "step": 2174500 - }, - { - "epoch": 23.93, - "learning_rate": 6.517822762528191e-08, - "loss": 3.7759, - "step": 2175000 - }, - { - "epoch": 23.93, - "learning_rate": 6.516447549370152e-08, - "loss": 3.8091, - "step": 2175500 - }, - { - "epoch": 23.94, - "learning_rate": 6.515072336212113e-08, - "loss": 3.7912, - "step": 2176000 - }, - { - "epoch": 23.95, - "learning_rate": 6.513697123054073e-08, - "loss": 3.7834, - "step": 2176500 - }, - { - "epoch": 23.95, - "learning_rate": 6.512321909896034e-08, - "loss": 3.7651, - "step": 2177000 - }, - { - "epoch": 23.96, - "learning_rate": 6.510946696737994e-08, - "loss": 3.8108, - "step": 2177500 - }, - { - "epoch": 23.96, - "learning_rate": 6.509571483579954e-08, - "loss": 3.7605, - "step": 2178000 - }, - { - "epoch": 23.97, - "learning_rate": 6.508196270421915e-08, - "loss": 3.7836, - "step": 2178500 - }, - { - "epoch": 23.97, - "learning_rate": 6.506821057263876e-08, - "loss": 3.803, - "step": 2179000 - }, - { - "epoch": 23.98, - "learning_rate": 6.505445844105836e-08, - "loss": 3.7886, - "step": 2179500 - }, - { - "epoch": 23.98, - "learning_rate": 6.504070630947797e-08, - "loss": 3.7975, - "step": 2180000 - }, - { - "epoch": 23.99, - "learning_rate": 6.502695417789757e-08, - "loss": 3.7913, - "step": 2180500 - }, - { - "epoch": 23.99, - "learning_rate": 6.501320204631717e-08, - "loss": 3.7933, - "step": 2181000 - }, - { - "epoch": 24.0, - "eval_loss": 3.8459019660949707, - "eval_runtime": 6.1331, - "eval_samples_per_second": 253.378, - "step": 2181480 - }, - { - "epoch": 24.0, - "learning_rate": 6.499944991473678e-08, - "loss": 3.7727, - "step": 2181500 - }, - { - "epoch": 24.01, - "learning_rate": 6.498569778315639e-08, - "loss": 3.7902, - "step": 2182000 - }, - { - "epoch": 24.01, - "learning_rate": 6.497194565157599e-08, - "loss": 3.8013, - "step": 2182500 - }, - { - "epoch": 24.02, - "learning_rate": 6.49581935199956e-08, - "loss": 3.7888, - "step": 2183000 - }, - { - "epoch": 24.02, - "learning_rate": 6.49444413884152e-08, - "loss": 3.7796, - "step": 2183500 - }, - { - "epoch": 24.03, - "learning_rate": 6.49306892568348e-08, - "loss": 3.7925, - "step": 2184000 - }, - { - "epoch": 24.03, - "learning_rate": 6.491693712525441e-08, - "loss": 3.7916, - "step": 2184500 - }, - { - "epoch": 24.04, - "learning_rate": 6.490318499367402e-08, - "loss": 3.7738, - "step": 2185000 - }, - { - "epoch": 24.04, - "learning_rate": 6.488943286209361e-08, - "loss": 3.785, - "step": 2185500 - }, - { - "epoch": 24.05, - "learning_rate": 6.487568073051322e-08, - "loss": 3.7681, - "step": 2186000 - }, - { - "epoch": 24.06, - "learning_rate": 6.486192859893283e-08, - "loss": 3.7897, - "step": 2186500 - }, - { - "epoch": 24.06, - "learning_rate": 6.484817646735243e-08, - "loss": 3.7961, - "step": 2187000 - }, - { - "epoch": 24.07, - "learning_rate": 6.483442433577204e-08, - "loss": 3.7765, - "step": 2187500 - }, - { - "epoch": 24.07, - "learning_rate": 6.482067220419165e-08, - "loss": 3.7851, - "step": 2188000 - }, - { - "epoch": 24.08, - "learning_rate": 6.480692007261124e-08, - "loss": 3.7912, - "step": 2188500 - }, - { - "epoch": 24.08, - "learning_rate": 6.479316794103085e-08, - "loss": 3.8018, - "step": 2189000 - }, - { - "epoch": 24.09, - "learning_rate": 6.477941580945046e-08, - "loss": 3.7736, - "step": 2189500 - }, - { - "epoch": 24.09, - "learning_rate": 6.476566367787006e-08, - "loss": 3.8048, - "step": 2190000 - }, - { - "epoch": 24.1, - "learning_rate": 6.475191154628967e-08, - "loss": 3.7711, - "step": 2190500 - }, - { - "epoch": 24.1, - "learning_rate": 6.473815941470928e-08, - "loss": 3.7748, - "step": 2191000 - }, - { - "epoch": 24.11, - "learning_rate": 6.472440728312887e-08, - "loss": 3.7644, - "step": 2191500 - }, - { - "epoch": 24.12, - "learning_rate": 6.471065515154848e-08, - "loss": 3.7776, - "step": 2192000 - }, - { - "epoch": 24.12, - "learning_rate": 6.469690301996809e-08, - "loss": 3.7648, - "step": 2192500 - }, - { - "epoch": 24.13, - "learning_rate": 6.468315088838769e-08, - "loss": 3.7914, - "step": 2193000 - }, - { - "epoch": 24.13, - "learning_rate": 6.46693987568073e-08, - "loss": 3.7858, - "step": 2193500 - }, - { - "epoch": 24.14, - "learning_rate": 6.465564662522691e-08, - "loss": 3.7813, - "step": 2194000 - }, - { - "epoch": 24.14, - "learning_rate": 6.46418944936465e-08, - "loss": 3.7772, - "step": 2194500 - }, - { - "epoch": 24.15, - "learning_rate": 6.462814236206611e-08, - "loss": 3.7733, - "step": 2195000 - }, - { - "epoch": 24.15, - "learning_rate": 6.461439023048572e-08, - "loss": 3.7552, - "step": 2195500 - }, - { - "epoch": 24.16, - "learning_rate": 6.460063809890532e-08, - "loss": 3.7866, - "step": 2196000 - }, - { - "epoch": 24.17, - "learning_rate": 6.458688596732493e-08, - "loss": 3.7733, - "step": 2196500 - }, - { - "epoch": 24.17, - "learning_rate": 6.457313383574454e-08, - "loss": 3.7834, - "step": 2197000 - }, - { - "epoch": 24.18, - "learning_rate": 6.455938170416413e-08, - "loss": 3.7992, - "step": 2197500 - }, - { - "epoch": 24.18, - "learning_rate": 6.454562957258374e-08, - "loss": 3.7752, - "step": 2198000 - }, - { - "epoch": 24.19, - "learning_rate": 6.453187744100335e-08, - "loss": 3.7766, - "step": 2198500 - }, - { - "epoch": 24.19, - "learning_rate": 6.451812530942296e-08, - "loss": 3.7866, - "step": 2199000 - }, - { - "epoch": 24.2, - "learning_rate": 6.450437317784256e-08, - "loss": 3.7986, - "step": 2199500 - }, - { - "epoch": 24.2, - "learning_rate": 6.449062104626217e-08, - "loss": 3.8012, - "step": 2200000 - }, - { - "epoch": 24.21, - "learning_rate": 6.447686891468178e-08, - "loss": 3.7978, - "step": 2200500 - }, - { - "epoch": 24.21, - "learning_rate": 6.446311678310139e-08, - "loss": 3.7853, - "step": 2201000 - }, - { - "epoch": 24.22, - "learning_rate": 6.444936465152098e-08, - "loss": 3.7722, - "step": 2201500 - }, - { - "epoch": 24.23, - "learning_rate": 6.443561251994059e-08, - "loss": 3.7814, - "step": 2202000 - }, - { - "epoch": 24.23, - "learning_rate": 6.44218603883602e-08, - "loss": 3.8017, - "step": 2202500 - }, - { - "epoch": 24.24, - "learning_rate": 6.44081082567798e-08, - "loss": 3.7893, - "step": 2203000 - }, - { - "epoch": 24.24, - "learning_rate": 6.43943561251994e-08, - "loss": 3.7741, - "step": 2203500 - }, - { - "epoch": 24.25, - "learning_rate": 6.438060399361902e-08, - "loss": 3.7874, - "step": 2204000 - }, - { - "epoch": 24.25, - "learning_rate": 6.436685186203862e-08, - "loss": 3.7698, - "step": 2204500 - }, - { - "epoch": 24.26, - "learning_rate": 6.435309973045822e-08, - "loss": 3.7822, - "step": 2205000 - }, - { - "epoch": 24.26, - "learning_rate": 6.433934759887783e-08, - "loss": 3.7918, - "step": 2205500 - }, - { - "epoch": 24.27, - "learning_rate": 6.432559546729744e-08, - "loss": 3.7934, - "step": 2206000 - }, - { - "epoch": 24.28, - "learning_rate": 6.431184333571704e-08, - "loss": 3.8104, - "step": 2206500 - }, - { - "epoch": 24.28, - "learning_rate": 6.429809120413664e-08, - "loss": 3.782, - "step": 2207000 - }, - { - "epoch": 24.29, - "learning_rate": 6.428433907255625e-08, - "loss": 3.7724, - "step": 2207500 - }, - { - "epoch": 24.29, - "learning_rate": 6.427058694097585e-08, - "loss": 3.7681, - "step": 2208000 - }, - { - "epoch": 24.3, - "learning_rate": 6.425683480939546e-08, - "loss": 3.7791, - "step": 2208500 - }, - { - "epoch": 24.3, - "learning_rate": 6.424308267781507e-08, - "loss": 3.7833, - "step": 2209000 - }, - { - "epoch": 24.31, - "learning_rate": 6.422933054623466e-08, - "loss": 3.7919, - "step": 2209500 - }, - { - "epoch": 24.31, - "learning_rate": 6.421557841465427e-08, - "loss": 3.7678, - "step": 2210000 - }, - { - "epoch": 24.32, - "learning_rate": 6.420182628307388e-08, - "loss": 3.7824, - "step": 2210500 - }, - { - "epoch": 24.32, - "learning_rate": 6.418807415149348e-08, - "loss": 3.7914, - "step": 2211000 - }, - { - "epoch": 24.33, - "learning_rate": 6.417432201991309e-08, - "loss": 3.785, - "step": 2211500 - }, - { - "epoch": 24.34, - "learning_rate": 6.41605698883327e-08, - "loss": 3.7755, - "step": 2212000 - }, - { - "epoch": 24.34, - "learning_rate": 6.41468177567523e-08, - "loss": 3.7889, - "step": 2212500 - }, - { - "epoch": 24.35, - "learning_rate": 6.41330656251719e-08, - "loss": 3.7747, - "step": 2213000 - }, - { - "epoch": 24.35, - "learning_rate": 6.411931349359151e-08, - "loss": 3.7758, - "step": 2213500 - }, - { - "epoch": 24.36, - "learning_rate": 6.410556136201111e-08, - "loss": 3.7724, - "step": 2214000 - }, - { - "epoch": 24.36, - "learning_rate": 6.409180923043072e-08, - "loss": 3.7604, - "step": 2214500 - }, - { - "epoch": 24.37, - "learning_rate": 6.407805709885033e-08, - "loss": 3.8005, - "step": 2215000 - }, - { - "epoch": 24.37, - "learning_rate": 6.406430496726992e-08, - "loss": 3.7671, - "step": 2215500 - }, - { - "epoch": 24.38, - "learning_rate": 6.405055283568953e-08, - "loss": 3.8044, - "step": 2216000 - }, - { - "epoch": 24.39, - "learning_rate": 6.403680070410913e-08, - "loss": 3.7939, - "step": 2216500 - }, - { - "epoch": 24.39, - "learning_rate": 6.402304857252874e-08, - "loss": 3.7879, - "step": 2217000 - }, - { - "epoch": 24.4, - "learning_rate": 6.400929644094835e-08, - "loss": 3.7789, - "step": 2217500 - }, - { - "epoch": 24.4, - "learning_rate": 6.399554430936794e-08, - "loss": 3.7805, - "step": 2218000 - }, - { - "epoch": 24.41, - "learning_rate": 6.398179217778755e-08, - "loss": 3.782, - "step": 2218500 - }, - { - "epoch": 24.41, - "learning_rate": 6.396804004620716e-08, - "loss": 3.7824, - "step": 2219000 - }, - { - "epoch": 24.42, - "learning_rate": 6.395428791462676e-08, - "loss": 3.7862, - "step": 2219500 - }, - { - "epoch": 24.42, - "learning_rate": 6.394053578304637e-08, - "loss": 3.783, - "step": 2220000 - }, - { - "epoch": 24.43, - "learning_rate": 6.392678365146598e-08, - "loss": 3.7777, - "step": 2220500 - }, - { - "epoch": 24.43, - "learning_rate": 6.391303151988557e-08, - "loss": 3.793, - "step": 2221000 - }, - { - "epoch": 24.44, - "learning_rate": 6.389927938830518e-08, - "loss": 3.7864, - "step": 2221500 - }, - { - "epoch": 24.45, - "learning_rate": 6.388552725672479e-08, - "loss": 3.7589, - "step": 2222000 - }, - { - "epoch": 24.45, - "learning_rate": 6.387177512514439e-08, - "loss": 3.7835, - "step": 2222500 - }, - { - "epoch": 24.46, - "learning_rate": 6.3858022993564e-08, - "loss": 3.8005, - "step": 2223000 - }, - { - "epoch": 24.46, - "learning_rate": 6.384427086198361e-08, - "loss": 3.7688, - "step": 2223500 - }, - { - "epoch": 24.47, - "learning_rate": 6.38305187304032e-08, - "loss": 3.7897, - "step": 2224000 - }, - { - "epoch": 24.47, - "learning_rate": 6.381676659882281e-08, - "loss": 3.7793, - "step": 2224500 - }, - { - "epoch": 24.48, - "learning_rate": 6.380301446724242e-08, - "loss": 3.7966, - "step": 2225000 - }, - { - "epoch": 24.48, - "learning_rate": 6.378926233566202e-08, - "loss": 3.7577, - "step": 2225500 - }, - { - "epoch": 24.49, - "learning_rate": 6.377551020408163e-08, - "loss": 3.7956, - "step": 2226000 - }, - { - "epoch": 24.5, - "learning_rate": 6.376175807250124e-08, - "loss": 3.792, - "step": 2226500 - }, - { - "epoch": 24.5, - "learning_rate": 6.374800594092083e-08, - "loss": 3.7719, - "step": 2227000 - }, - { - "epoch": 24.51, - "learning_rate": 6.373425380934044e-08, - "loss": 3.8003, - "step": 2227500 - }, - { - "epoch": 24.51, - "learning_rate": 6.372050167776005e-08, - "loss": 3.7854, - "step": 2228000 - }, - { - "epoch": 24.52, - "learning_rate": 6.370674954617965e-08, - "loss": 3.7709, - "step": 2228500 - }, - { - "epoch": 24.52, - "learning_rate": 6.369299741459926e-08, - "loss": 3.7885, - "step": 2229000 - }, - { - "epoch": 24.53, - "learning_rate": 6.367924528301887e-08, - "loss": 3.7725, - "step": 2229500 - }, - { - "epoch": 24.53, - "learning_rate": 6.366549315143846e-08, - "loss": 3.773, - "step": 2230000 - }, - { - "epoch": 24.54, - "learning_rate": 6.365174101985807e-08, - "loss": 3.8165, - "step": 2230500 - }, - { - "epoch": 24.54, - "learning_rate": 6.363798888827768e-08, - "loss": 3.7903, - "step": 2231000 - }, - { - "epoch": 24.55, - "learning_rate": 6.362423675669728e-08, - "loss": 3.7867, - "step": 2231500 - }, - { - "epoch": 24.56, - "learning_rate": 6.361048462511689e-08, - "loss": 3.7889, - "step": 2232000 - }, - { - "epoch": 24.56, - "learning_rate": 6.35967324935365e-08, - "loss": 3.7864, - "step": 2232500 - }, - { - "epoch": 24.57, - "learning_rate": 6.358298036195609e-08, - "loss": 3.7649, - "step": 2233000 - }, - { - "epoch": 24.57, - "learning_rate": 6.35692282303757e-08, - "loss": 3.7964, - "step": 2233500 - }, - { - "epoch": 24.58, - "learning_rate": 6.355547609879531e-08, - "loss": 3.7768, - "step": 2234000 - }, - { - "epoch": 24.58, - "learning_rate": 6.354172396721491e-08, - "loss": 3.7996, - "step": 2234500 - }, - { - "epoch": 24.59, - "learning_rate": 6.352797183563452e-08, - "loss": 3.7782, - "step": 2235000 - }, - { - "epoch": 24.59, - "learning_rate": 6.351421970405413e-08, - "loss": 3.7833, - "step": 2235500 - }, - { - "epoch": 24.6, - "learning_rate": 6.350046757247372e-08, - "loss": 3.7789, - "step": 2236000 - }, - { - "epoch": 24.61, - "learning_rate": 6.348671544089333e-08, - "loss": 3.7888, - "step": 2236500 - }, - { - "epoch": 24.61, - "learning_rate": 6.347296330931294e-08, - "loss": 3.7814, - "step": 2237000 - }, - { - "epoch": 24.62, - "learning_rate": 6.345921117773254e-08, - "loss": 3.7624, - "step": 2237500 - }, - { - "epoch": 24.62, - "learning_rate": 6.344545904615215e-08, - "loss": 3.7913, - "step": 2238000 - }, - { - "epoch": 24.63, - "learning_rate": 6.343170691457176e-08, - "loss": 3.7761, - "step": 2238500 - }, - { - "epoch": 24.63, - "learning_rate": 6.341795478299135e-08, - "loss": 3.7954, - "step": 2239000 - }, - { - "epoch": 24.64, - "learning_rate": 6.340420265141096e-08, - "loss": 3.7944, - "step": 2239500 - }, - { - "epoch": 24.64, - "learning_rate": 6.339045051983057e-08, - "loss": 3.7627, - "step": 2240000 - }, - { - "epoch": 24.65, - "learning_rate": 6.337669838825017e-08, - "loss": 3.7854, - "step": 2240500 - }, - { - "epoch": 24.65, - "learning_rate": 6.336294625666978e-08, - "loss": 3.7795, - "step": 2241000 - }, - { - "epoch": 24.66, - "learning_rate": 6.334919412508938e-08, - "loss": 3.7945, - "step": 2241500 - }, - { - "epoch": 24.67, - "learning_rate": 6.333544199350898e-08, - "loss": 3.7853, - "step": 2242000 - }, - { - "epoch": 24.67, - "learning_rate": 6.332168986192859e-08, - "loss": 3.7593, - "step": 2242500 - }, - { - "epoch": 24.68, - "learning_rate": 6.33079377303482e-08, - "loss": 3.7882, - "step": 2243000 - }, - { - "epoch": 24.68, - "learning_rate": 6.32941855987678e-08, - "loss": 3.7964, - "step": 2243500 - }, - { - "epoch": 24.69, - "learning_rate": 6.32804334671874e-08, - "loss": 3.7842, - "step": 2244000 - }, - { - "epoch": 24.69, - "learning_rate": 6.326668133560701e-08, - "loss": 3.7538, - "step": 2244500 - }, - { - "epoch": 24.7, - "learning_rate": 6.325292920402661e-08, - "loss": 3.7671, - "step": 2245000 - }, - { - "epoch": 24.7, - "learning_rate": 6.323917707244622e-08, - "loss": 3.7769, - "step": 2245500 - }, - { - "epoch": 24.71, - "learning_rate": 6.322542494086583e-08, - "loss": 3.7886, - "step": 2246000 - }, - { - "epoch": 24.72, - "learning_rate": 6.321167280928544e-08, - "loss": 3.7843, - "step": 2246500 - }, - { - "epoch": 24.72, - "learning_rate": 6.319792067770503e-08, - "loss": 3.7952, - "step": 2247000 - }, - { - "epoch": 24.73, - "learning_rate": 6.318416854612464e-08, - "loss": 3.7913, - "step": 2247500 - }, - { - "epoch": 24.73, - "learning_rate": 6.317041641454425e-08, - "loss": 3.7783, - "step": 2248000 - }, - { - "epoch": 24.74, - "learning_rate": 6.315666428296386e-08, - "loss": 3.781, - "step": 2248500 - }, - { - "epoch": 24.74, - "learning_rate": 6.314291215138346e-08, - "loss": 3.7849, - "step": 2249000 - }, - { - "epoch": 24.75, - "learning_rate": 6.312916001980307e-08, - "loss": 3.7823, - "step": 2249500 - }, - { - "epoch": 24.75, - "learning_rate": 6.311540788822268e-08, - "loss": 3.7945, - "step": 2250000 - }, - { - "epoch": 24.76, - "learning_rate": 6.310165575664227e-08, - "loss": 3.772, - "step": 2250500 - }, - { - "epoch": 24.76, - "learning_rate": 6.308790362506188e-08, - "loss": 3.7838, - "step": 2251000 - }, - { - "epoch": 24.77, - "learning_rate": 6.307415149348149e-08, - "loss": 3.784, - "step": 2251500 - }, - { - "epoch": 24.78, - "learning_rate": 6.30603993619011e-08, - "loss": 3.7837, - "step": 2252000 - }, - { - "epoch": 24.78, - "learning_rate": 6.30466472303207e-08, - "loss": 3.7702, - "step": 2252500 - }, - { - "epoch": 24.79, - "learning_rate": 6.303289509874031e-08, - "loss": 3.7945, - "step": 2253000 - }, - { - "epoch": 24.79, - "learning_rate": 6.301914296715992e-08, - "loss": 3.7806, - "step": 2253500 - }, - { - "epoch": 24.8, - "learning_rate": 6.300539083557951e-08, - "loss": 3.789, - "step": 2254000 - }, - { - "epoch": 24.8, - "learning_rate": 6.299163870399912e-08, - "loss": 3.7947, - "step": 2254500 - }, - { - "epoch": 24.81, - "learning_rate": 6.297788657241873e-08, - "loss": 3.7788, - "step": 2255000 - }, - { - "epoch": 24.81, - "learning_rate": 6.296413444083833e-08, - "loss": 3.7698, - "step": 2255500 - }, - { - "epoch": 24.82, - "learning_rate": 6.295038230925794e-08, - "loss": 3.791, - "step": 2256000 - }, - { - "epoch": 24.83, - "learning_rate": 6.293663017767755e-08, - "loss": 3.7946, - "step": 2256500 - }, - { - "epoch": 24.83, - "learning_rate": 6.292287804609714e-08, - "loss": 3.7807, - "step": 2257000 - }, - { - "epoch": 24.84, - "learning_rate": 6.290912591451675e-08, - "loss": 3.7608, - "step": 2257500 - }, - { - "epoch": 24.84, - "learning_rate": 6.289537378293636e-08, - "loss": 3.7852, - "step": 2258000 - }, - { - "epoch": 24.85, - "learning_rate": 6.288162165135596e-08, - "loss": 3.7601, - "step": 2258500 - }, - { - "epoch": 24.85, - "learning_rate": 6.286786951977557e-08, - "loss": 3.7698, - "step": 2259000 - }, - { - "epoch": 24.86, - "learning_rate": 6.285411738819518e-08, - "loss": 3.7934, - "step": 2259500 - }, - { - "epoch": 24.86, - "learning_rate": 6.284036525661477e-08, - "loss": 3.7819, - "step": 2260000 - }, - { - "epoch": 24.87, - "learning_rate": 6.282661312503438e-08, - "loss": 3.7845, - "step": 2260500 - }, - { - "epoch": 24.87, - "learning_rate": 6.281286099345399e-08, - "loss": 3.7846, - "step": 2261000 - }, - { - "epoch": 24.88, - "learning_rate": 6.279910886187359e-08, - "loss": 3.7928, - "step": 2261500 - }, - { - "epoch": 24.89, - "learning_rate": 6.27853567302932e-08, - "loss": 3.788, - "step": 2262000 - }, - { - "epoch": 24.89, - "learning_rate": 6.27716045987128e-08, - "loss": 3.7788, - "step": 2262500 - }, - { - "epoch": 24.9, - "learning_rate": 6.27578524671324e-08, - "loss": 3.7765, - "step": 2263000 - }, - { - "epoch": 24.9, - "learning_rate": 6.274410033555201e-08, - "loss": 3.7985, - "step": 2263500 - }, - { - "epoch": 24.91, - "learning_rate": 6.273034820397162e-08, - "loss": 3.7623, - "step": 2264000 - }, - { - "epoch": 24.91, - "learning_rate": 6.271659607239122e-08, - "loss": 3.7797, - "step": 2264500 - }, - { - "epoch": 24.92, - "learning_rate": 6.270284394081083e-08, - "loss": 3.7569, - "step": 2265000 - }, - { - "epoch": 24.92, - "learning_rate": 6.268909180923043e-08, - "loss": 3.7794, - "step": 2265500 - }, - { - "epoch": 24.93, - "learning_rate": 6.267533967765003e-08, - "loss": 3.7702, - "step": 2266000 - }, - { - "epoch": 24.94, - "learning_rate": 6.266158754606964e-08, - "loss": 3.7864, - "step": 2266500 - }, - { - "epoch": 24.94, - "learning_rate": 6.264783541448925e-08, - "loss": 3.7948, - "step": 2267000 - }, - { - "epoch": 24.95, - "learning_rate": 6.263408328290885e-08, - "loss": 3.7727, - "step": 2267500 - }, - { - "epoch": 24.95, - "learning_rate": 6.262033115132846e-08, - "loss": 3.7818, - "step": 2268000 - }, - { - "epoch": 24.96, - "learning_rate": 6.260657901974806e-08, - "loss": 3.7972, - "step": 2268500 - }, - { - "epoch": 24.96, - "learning_rate": 6.259282688816766e-08, - "loss": 3.7806, - "step": 2269000 - }, - { - "epoch": 24.97, - "learning_rate": 6.257907475658727e-08, - "loss": 3.7722, - "step": 2269500 - }, - { - "epoch": 24.97, - "learning_rate": 6.256532262500688e-08, - "loss": 3.7786, - "step": 2270000 - }, - { - "epoch": 24.98, - "learning_rate": 6.255157049342648e-08, - "loss": 3.7767, - "step": 2270500 - }, - { - "epoch": 24.98, - "learning_rate": 6.253781836184608e-08, - "loss": 3.787, - "step": 2271000 - }, - { - "epoch": 24.99, - "learning_rate": 6.25240662302657e-08, - "loss": 3.7705, - "step": 2271500 - }, - { - "epoch": 25.0, - "learning_rate": 6.251031409868529e-08, - "loss": 3.7566, - "step": 2272000 - }, - { - "epoch": 25.0, - "eval_loss": 3.8438775539398193, - "eval_runtime": 6.1326, - "eval_samples_per_second": 253.401, - "step": 2272375 - }, - { - "epoch": 25.0, - "learning_rate": 6.24965619671049e-08, - "loss": 3.7873, - "step": 2272500 - }, - { - "epoch": 25.01, - "learning_rate": 6.248280983552451e-08, - "loss": 3.7669, - "step": 2273000 - }, - { - "epoch": 25.01, - "learning_rate": 6.24690577039441e-08, - "loss": 3.7735, - "step": 2273500 - }, - { - "epoch": 25.02, - "learning_rate": 6.245530557236371e-08, - "loss": 3.7649, - "step": 2274000 - }, - { - "epoch": 25.02, - "learning_rate": 6.244155344078332e-08, - "loss": 3.7931, - "step": 2274500 - }, - { - "epoch": 25.03, - "learning_rate": 6.242780130920292e-08, - "loss": 3.7839, - "step": 2275000 - }, - { - "epoch": 25.03, - "learning_rate": 6.241404917762253e-08, - "loss": 3.8016, - "step": 2275500 - }, - { - "epoch": 25.04, - "learning_rate": 6.240029704604214e-08, - "loss": 3.7919, - "step": 2276000 - }, - { - "epoch": 25.05, - "learning_rate": 6.238654491446173e-08, - "loss": 3.752, - "step": 2276500 - }, - { - "epoch": 25.05, - "learning_rate": 6.237279278288134e-08, - "loss": 3.7826, - "step": 2277000 - }, - { - "epoch": 25.06, - "learning_rate": 6.235904065130095e-08, - "loss": 3.7753, - "step": 2277500 - }, - { - "epoch": 25.06, - "learning_rate": 6.234528851972055e-08, - "loss": 3.7775, - "step": 2278000 - }, - { - "epoch": 25.07, - "learning_rate": 6.233153638814016e-08, - "loss": 3.7891, - "step": 2278500 - }, - { - "epoch": 25.07, - "learning_rate": 6.231778425655977e-08, - "loss": 3.7868, - "step": 2279000 - }, - { - "epoch": 25.08, - "learning_rate": 6.230403212497936e-08, - "loss": 3.7845, - "step": 2279500 - }, - { - "epoch": 25.08, - "learning_rate": 6.229027999339897e-08, - "loss": 3.7854, - "step": 2280000 - }, - { - "epoch": 25.09, - "learning_rate": 6.227652786181858e-08, - "loss": 3.7873, - "step": 2280500 - }, - { - "epoch": 25.09, - "learning_rate": 6.226277573023818e-08, - "loss": 3.788, - "step": 2281000 - }, - { - "epoch": 25.1, - "learning_rate": 6.224902359865779e-08, - "loss": 3.7844, - "step": 2281500 - }, - { - "epoch": 25.11, - "learning_rate": 6.22352714670774e-08, - "loss": 3.7674, - "step": 2282000 - }, - { - "epoch": 25.11, - "learning_rate": 6.2221519335497e-08, - "loss": 3.7884, - "step": 2282500 - }, - { - "epoch": 25.12, - "learning_rate": 6.22077672039166e-08, - "loss": 3.7587, - "step": 2283000 - }, - { - "epoch": 25.12, - "learning_rate": 6.219401507233621e-08, - "loss": 3.7868, - "step": 2283500 - }, - { - "epoch": 25.13, - "learning_rate": 6.218026294075581e-08, - "loss": 3.774, - "step": 2284000 - }, - { - "epoch": 25.13, - "learning_rate": 6.216651080917542e-08, - "loss": 3.7733, - "step": 2284500 - }, - { - "epoch": 25.14, - "learning_rate": 6.215275867759503e-08, - "loss": 3.7893, - "step": 2285000 - }, - { - "epoch": 25.14, - "learning_rate": 6.213900654601462e-08, - "loss": 3.7815, - "step": 2285500 - }, - { - "epoch": 25.15, - "learning_rate": 6.212525441443423e-08, - "loss": 3.7774, - "step": 2286000 - }, - { - "epoch": 25.16, - "learning_rate": 6.211150228285384e-08, - "loss": 3.7847, - "step": 2286500 - }, - { - "epoch": 25.16, - "learning_rate": 6.209775015127344e-08, - "loss": 3.7967, - "step": 2287000 - }, - { - "epoch": 25.17, - "learning_rate": 6.208399801969305e-08, - "loss": 3.7801, - "step": 2287500 - }, - { - "epoch": 25.17, - "learning_rate": 6.207024588811266e-08, - "loss": 3.7852, - "step": 2288000 - }, - { - "epoch": 25.18, - "learning_rate": 6.205649375653225e-08, - "loss": 3.7871, - "step": 2288500 - }, - { - "epoch": 25.18, - "learning_rate": 6.204274162495186e-08, - "loss": 3.7657, - "step": 2289000 - }, - { - "epoch": 25.19, - "learning_rate": 6.202898949337147e-08, - "loss": 3.7727, - "step": 2289500 - }, - { - "epoch": 25.19, - "learning_rate": 6.201523736179107e-08, - "loss": 3.7613, - "step": 2290000 - }, - { - "epoch": 25.2, - "learning_rate": 6.200148523021068e-08, - "loss": 3.7813, - "step": 2290500 - }, - { - "epoch": 25.2, - "learning_rate": 6.198773309863029e-08, - "loss": 3.7844, - "step": 2291000 - }, - { - "epoch": 25.21, - "learning_rate": 6.197398096704988e-08, - "loss": 3.7852, - "step": 2291500 - }, - { - "epoch": 25.22, - "learning_rate": 6.196022883546949e-08, - "loss": 3.7711, - "step": 2292000 - }, - { - "epoch": 25.22, - "learning_rate": 6.194647670388909e-08, - "loss": 3.7706, - "step": 2292500 - }, - { - "epoch": 25.23, - "learning_rate": 6.19327245723087e-08, - "loss": 3.7737, - "step": 2293000 - }, - { - "epoch": 25.23, - "learning_rate": 6.191897244072831e-08, - "loss": 3.7703, - "step": 2293500 - }, - { - "epoch": 25.24, - "learning_rate": 6.190522030914792e-08, - "loss": 3.8006, - "step": 2294000 - }, - { - "epoch": 25.24, - "learning_rate": 6.189146817756751e-08, - "loss": 3.7648, - "step": 2294500 - }, - { - "epoch": 25.25, - "learning_rate": 6.187771604598712e-08, - "loss": 3.7828, - "step": 2295000 - }, - { - "epoch": 25.25, - "learning_rate": 6.186396391440673e-08, - "loss": 3.771, - "step": 2295500 - }, - { - "epoch": 25.26, - "learning_rate": 6.185021178282634e-08, - "loss": 3.7829, - "step": 2296000 - }, - { - "epoch": 25.27, - "learning_rate": 6.183645965124594e-08, - "loss": 3.7817, - "step": 2296500 - }, - { - "epoch": 25.27, - "learning_rate": 6.182270751966555e-08, - "loss": 3.7844, - "step": 2297000 - }, - { - "epoch": 25.28, - "learning_rate": 6.180895538808515e-08, - "loss": 3.7805, - "step": 2297500 - }, - { - "epoch": 25.28, - "learning_rate": 6.179520325650475e-08, - "loss": 3.773, - "step": 2298000 - }, - { - "epoch": 25.29, - "learning_rate": 6.178145112492436e-08, - "loss": 3.7693, - "step": 2298500 - }, - { - "epoch": 25.29, - "learning_rate": 6.176769899334397e-08, - "loss": 3.7696, - "step": 2299000 - }, - { - "epoch": 25.3, - "learning_rate": 6.175394686176358e-08, - "loss": 3.7855, - "step": 2299500 - }, - { - "epoch": 25.3, - "learning_rate": 6.174019473018318e-08, - "loss": 3.7852, - "step": 2300000 - }, - { - "epoch": 25.31, - "learning_rate": 6.172644259860278e-08, - "loss": 3.7705, - "step": 2300500 - }, - { - "epoch": 25.31, - "learning_rate": 6.17126904670224e-08, - "loss": 3.7708, - "step": 2301000 - }, - { - "epoch": 25.32, - "learning_rate": 6.169893833544199e-08, - "loss": 3.7663, - "step": 2301500 - }, - { - "epoch": 25.33, - "learning_rate": 6.16851862038616e-08, - "loss": 3.7739, - "step": 2302000 - }, - { - "epoch": 25.33, - "learning_rate": 6.167143407228121e-08, - "loss": 3.7757, - "step": 2302500 - }, - { - "epoch": 25.34, - "learning_rate": 6.16576819407008e-08, - "loss": 3.7826, - "step": 2303000 - }, - { - "epoch": 25.34, - "learning_rate": 6.164392980912041e-08, - "loss": 3.8014, - "step": 2303500 - }, - { - "epoch": 25.35, - "learning_rate": 6.163017767754002e-08, - "loss": 3.7793, - "step": 2304000 - }, - { - "epoch": 25.35, - "learning_rate": 6.161642554595962e-08, - "loss": 3.7651, - "step": 2304500 - }, - { - "epoch": 25.36, - "learning_rate": 6.160267341437923e-08, - "loss": 3.7919, - "step": 2305000 - }, - { - "epoch": 25.36, - "learning_rate": 6.158892128279884e-08, - "loss": 3.7844, - "step": 2305500 - }, - { - "epoch": 25.37, - "learning_rate": 6.157516915121843e-08, - "loss": 3.7775, - "step": 2306000 - }, - { - "epoch": 25.38, - "learning_rate": 6.156141701963804e-08, - "loss": 3.7775, - "step": 2306500 - }, - { - "epoch": 25.38, - "learning_rate": 6.154766488805765e-08, - "loss": 3.782, - "step": 2307000 - }, - { - "epoch": 25.39, - "learning_rate": 6.153391275647725e-08, - "loss": 3.7555, - "step": 2307500 - }, - { - "epoch": 25.39, - "learning_rate": 6.152016062489686e-08, - "loss": 3.8002, - "step": 2308000 - }, - { - "epoch": 25.4, - "learning_rate": 6.150640849331647e-08, - "loss": 3.7858, - "step": 2308500 - }, - { - "epoch": 25.4, - "learning_rate": 6.149265636173606e-08, - "loss": 3.7874, - "step": 2309000 - }, - { - "epoch": 25.41, - "learning_rate": 6.147890423015567e-08, - "loss": 3.7941, - "step": 2309500 - }, - { - "epoch": 25.41, - "learning_rate": 6.146515209857528e-08, - "loss": 3.7707, - "step": 2310000 - }, - { - "epoch": 25.42, - "learning_rate": 6.145139996699488e-08, - "loss": 3.7573, - "step": 2310500 - }, - { - "epoch": 25.42, - "learning_rate": 6.143764783541449e-08, - "loss": 3.7707, - "step": 2311000 - }, - { - "epoch": 25.43, - "learning_rate": 6.14238957038341e-08, - "loss": 3.7959, - "step": 2311500 - }, - { - "epoch": 25.44, - "learning_rate": 6.14101435722537e-08, - "loss": 3.7978, - "step": 2312000 - }, - { - "epoch": 25.44, - "learning_rate": 6.13963914406733e-08, - "loss": 3.7869, - "step": 2312500 - }, - { - "epoch": 25.45, - "learning_rate": 6.138263930909291e-08, - "loss": 3.7771, - "step": 2313000 - }, - { - "epoch": 25.45, - "learning_rate": 6.136888717751251e-08, - "loss": 3.7888, - "step": 2313500 - }, - { - "epoch": 25.46, - "learning_rate": 6.135513504593212e-08, - "loss": 3.7739, - "step": 2314000 - }, - { - "epoch": 25.46, - "learning_rate": 6.134138291435173e-08, - "loss": 3.7694, - "step": 2314500 - }, - { - "epoch": 25.47, - "learning_rate": 6.132763078277132e-08, - "loss": 3.7725, - "step": 2315000 - }, - { - "epoch": 25.47, - "learning_rate": 6.131387865119093e-08, - "loss": 3.7655, - "step": 2315500 - }, - { - "epoch": 25.48, - "learning_rate": 6.130012651961054e-08, - "loss": 3.7822, - "step": 2316000 - }, - { - "epoch": 25.49, - "learning_rate": 6.128637438803014e-08, - "loss": 3.7942, - "step": 2316500 - }, - { - "epoch": 25.49, - "learning_rate": 6.127262225644975e-08, - "loss": 3.7736, - "step": 2317000 - }, - { - "epoch": 25.5, - "learning_rate": 6.125887012486936e-08, - "loss": 3.795, - "step": 2317500 - }, - { - "epoch": 25.5, - "learning_rate": 6.124511799328895e-08, - "loss": 3.7659, - "step": 2318000 - }, - { - "epoch": 25.51, - "learning_rate": 6.123136586170856e-08, - "loss": 3.7899, - "step": 2318500 - }, - { - "epoch": 25.51, - "learning_rate": 6.121761373012817e-08, - "loss": 3.7795, - "step": 2319000 - }, - { - "epoch": 25.52, - "learning_rate": 6.120386159854777e-08, - "loss": 3.7932, - "step": 2319500 - }, - { - "epoch": 25.52, - "learning_rate": 6.119010946696738e-08, - "loss": 3.7747, - "step": 2320000 - }, - { - "epoch": 25.53, - "learning_rate": 6.117635733538699e-08, - "loss": 3.7987, - "step": 2320500 - }, - { - "epoch": 25.53, - "learning_rate": 6.116260520380658e-08, - "loss": 3.7687, - "step": 2321000 - }, - { - "epoch": 25.54, - "learning_rate": 6.114885307222619e-08, - "loss": 3.7621, - "step": 2321500 - }, - { - "epoch": 25.55, - "learning_rate": 6.11351009406458e-08, - "loss": 3.7836, - "step": 2322000 - }, - { - "epoch": 25.55, - "learning_rate": 6.11213488090654e-08, - "loss": 3.7598, - "step": 2322500 - }, - { - "epoch": 25.56, - "learning_rate": 6.1107596677485e-08, - "loss": 3.7756, - "step": 2323000 - }, - { - "epoch": 25.56, - "learning_rate": 6.109384454590462e-08, - "loss": 3.7616, - "step": 2323500 - }, - { - "epoch": 25.57, - "learning_rate": 6.108009241432421e-08, - "loss": 3.7796, - "step": 2324000 - }, - { - "epoch": 25.57, - "learning_rate": 6.106634028274382e-08, - "loss": 3.7985, - "step": 2324500 - }, - { - "epoch": 25.58, - "learning_rate": 6.105258815116343e-08, - "loss": 3.7815, - "step": 2325000 - }, - { - "epoch": 25.58, - "learning_rate": 6.103883601958303e-08, - "loss": 3.7842, - "step": 2325500 - }, - { - "epoch": 25.59, - "learning_rate": 6.102508388800264e-08, - "loss": 3.7893, - "step": 2326000 - }, - { - "epoch": 25.6, - "learning_rate": 6.101133175642225e-08, - "loss": 3.7794, - "step": 2326500 - }, - { - "epoch": 25.6, - "learning_rate": 6.099757962484184e-08, - "loss": 3.7716, - "step": 2327000 - }, - { - "epoch": 25.61, - "learning_rate": 6.098382749326145e-08, - "loss": 3.7854, - "step": 2327500 - }, - { - "epoch": 25.61, - "learning_rate": 6.097007536168106e-08, - "loss": 3.7813, - "step": 2328000 - }, - { - "epoch": 25.62, - "learning_rate": 6.095632323010066e-08, - "loss": 3.7766, - "step": 2328500 - }, - { - "epoch": 25.62, - "learning_rate": 6.094257109852027e-08, - "loss": 3.7656, - "step": 2329000 - }, - { - "epoch": 25.63, - "learning_rate": 6.092881896693987e-08, - "loss": 3.7788, - "step": 2329500 - }, - { - "epoch": 25.63, - "learning_rate": 6.091506683535947e-08, - "loss": 3.7611, - "step": 2330000 - }, - { - "epoch": 25.64, - "learning_rate": 6.090131470377908e-08, - "loss": 3.7729, - "step": 2330500 - }, - { - "epoch": 25.64, - "learning_rate": 6.088756257219869e-08, - "loss": 3.7794, - "step": 2331000 - }, - { - "epoch": 25.65, - "learning_rate": 6.087381044061829e-08, - "loss": 3.8079, - "step": 2331500 - }, - { - "epoch": 25.66, - "learning_rate": 6.08600583090379e-08, - "loss": 3.7762, - "step": 2332000 - }, - { - "epoch": 25.66, - "learning_rate": 6.08463061774575e-08, - "loss": 3.7987, - "step": 2332500 - }, - { - "epoch": 25.67, - "learning_rate": 6.08325540458771e-08, - "loss": 3.7761, - "step": 2333000 - }, - { - "epoch": 25.67, - "learning_rate": 6.081880191429671e-08, - "loss": 3.7683, - "step": 2333500 - }, - { - "epoch": 25.68, - "learning_rate": 6.080504978271632e-08, - "loss": 3.777, - "step": 2334000 - }, - { - "epoch": 25.68, - "learning_rate": 6.079129765113592e-08, - "loss": 3.7876, - "step": 2334500 - }, - { - "epoch": 25.69, - "learning_rate": 6.077754551955552e-08, - "loss": 3.7628, - "step": 2335000 - }, - { - "epoch": 25.69, - "learning_rate": 6.076379338797513e-08, - "loss": 3.7726, - "step": 2335500 - }, - { - "epoch": 25.7, - "learning_rate": 6.075004125639473e-08, - "loss": 3.7645, - "step": 2336000 - }, - { - "epoch": 25.71, - "learning_rate": 6.073628912481434e-08, - "loss": 3.7705, - "step": 2336500 - }, - { - "epoch": 25.71, - "learning_rate": 6.072253699323395e-08, - "loss": 3.7557, - "step": 2337000 - }, - { - "epoch": 25.72, - "learning_rate": 6.070878486165355e-08, - "loss": 3.788, - "step": 2337500 - }, - { - "epoch": 25.72, - "learning_rate": 6.069503273007315e-08, - "loss": 3.7836, - "step": 2338000 - }, - { - "epoch": 25.73, - "learning_rate": 6.068128059849276e-08, - "loss": 3.7669, - "step": 2338500 - }, - { - "epoch": 25.73, - "learning_rate": 6.066752846691236e-08, - "loss": 3.7823, - "step": 2339000 - }, - { - "epoch": 25.74, - "learning_rate": 6.065377633533197e-08, - "loss": 3.7789, - "step": 2339500 - }, - { - "epoch": 25.74, - "learning_rate": 6.064002420375158e-08, - "loss": 3.7757, - "step": 2340000 - }, - { - "epoch": 25.75, - "learning_rate": 6.062627207217117e-08, - "loss": 3.7749, - "step": 2340500 - }, - { - "epoch": 25.75, - "learning_rate": 6.061251994059078e-08, - "loss": 3.7677, - "step": 2341000 - }, - { - "epoch": 25.76, - "learning_rate": 6.059876780901039e-08, - "loss": 3.7828, - "step": 2341500 - }, - { - "epoch": 25.77, - "learning_rate": 6.058501567742999e-08, - "loss": 3.7845, - "step": 2342000 - }, - { - "epoch": 25.77, - "learning_rate": 6.05712635458496e-08, - "loss": 3.7801, - "step": 2342500 - }, - { - "epoch": 25.78, - "learning_rate": 6.055751141426921e-08, - "loss": 3.7764, - "step": 2343000 - }, - { - "epoch": 25.78, - "learning_rate": 6.054375928268882e-08, - "loss": 3.7942, - "step": 2343500 - }, - { - "epoch": 25.79, - "learning_rate": 6.053000715110841e-08, - "loss": 3.7978, - "step": 2344000 - }, - { - "epoch": 25.79, - "learning_rate": 6.051625501952802e-08, - "loss": 3.7915, - "step": 2344500 - }, - { - "epoch": 25.8, - "learning_rate": 6.050250288794763e-08, - "loss": 3.7939, - "step": 2345000 - }, - { - "epoch": 25.8, - "learning_rate": 6.048875075636724e-08, - "loss": 3.7668, - "step": 2345500 - }, - { - "epoch": 25.81, - "learning_rate": 6.047499862478684e-08, - "loss": 3.7792, - "step": 2346000 - }, - { - "epoch": 25.82, - "learning_rate": 6.046124649320645e-08, - "loss": 3.779, - "step": 2346500 - }, - { - "epoch": 25.82, - "learning_rate": 6.044749436162606e-08, - "loss": 3.7851, - "step": 2347000 - }, - { - "epoch": 25.83, - "learning_rate": 6.043374223004565e-08, - "loss": 3.7814, - "step": 2347500 - }, - { - "epoch": 25.83, - "learning_rate": 6.041999009846526e-08, - "loss": 3.7667, - "step": 2348000 - }, - { - "epoch": 25.84, - "learning_rate": 6.040623796688487e-08, - "loss": 3.7815, - "step": 2348500 - }, - { - "epoch": 25.84, - "learning_rate": 6.039248583530448e-08, - "loss": 3.8019, - "step": 2349000 - }, - { - "epoch": 25.85, - "learning_rate": 6.037873370372408e-08, - "loss": 3.7686, - "step": 2349500 - }, - { - "epoch": 25.85, - "learning_rate": 6.036498157214369e-08, - "loss": 3.7932, - "step": 2350000 - }, - { - "epoch": 25.86, - "learning_rate": 6.03512294405633e-08, - "loss": 3.8067, - "step": 2350500 - }, - { - "epoch": 25.87, - "learning_rate": 6.033747730898289e-08, - "loss": 3.7819, - "step": 2351000 - }, - { - "epoch": 25.87, - "learning_rate": 6.03237251774025e-08, - "loss": 3.7826, - "step": 2351500 - }, - { - "epoch": 25.88, - "learning_rate": 6.030997304582211e-08, - "loss": 3.7856, - "step": 2352000 - }, - { - "epoch": 25.88, - "learning_rate": 6.02962209142417e-08, - "loss": 3.784, - "step": 2352500 - }, - { - "epoch": 25.89, - "learning_rate": 6.028246878266132e-08, - "loss": 3.7675, - "step": 2353000 - }, - { - "epoch": 25.89, - "learning_rate": 6.026871665108092e-08, - "loss": 3.7782, - "step": 2353500 - }, - { - "epoch": 25.9, - "learning_rate": 6.025496451950052e-08, - "loss": 3.7778, - "step": 2354000 - }, - { - "epoch": 25.9, - "learning_rate": 6.024121238792013e-08, - "loss": 3.7635, - "step": 2354500 - }, - { - "epoch": 25.91, - "learning_rate": 6.022746025633974e-08, - "loss": 3.7648, - "step": 2355000 - }, - { - "epoch": 25.91, - "learning_rate": 6.021370812475934e-08, - "loss": 3.7992, - "step": 2355500 - }, - { - "epoch": 25.92, - "learning_rate": 6.019995599317895e-08, - "loss": 3.7981, - "step": 2356000 - }, - { - "epoch": 25.93, - "learning_rate": 6.018620386159855e-08, - "loss": 3.7838, - "step": 2356500 - }, - { - "epoch": 25.93, - "learning_rate": 6.017245173001815e-08, - "loss": 3.7791, - "step": 2357000 - }, - { - "epoch": 25.94, - "learning_rate": 6.015869959843776e-08, - "loss": 3.7912, - "step": 2357500 - }, - { - "epoch": 25.94, - "learning_rate": 6.014494746685737e-08, - "loss": 3.7875, - "step": 2358000 - }, - { - "epoch": 25.95, - "learning_rate": 6.013119533527697e-08, - "loss": 3.7618, - "step": 2358500 - }, - { - "epoch": 25.95, - "learning_rate": 6.011744320369657e-08, - "loss": 3.7864, - "step": 2359000 - }, - { - "epoch": 25.96, - "learning_rate": 6.010369107211618e-08, - "loss": 3.7703, - "step": 2359500 - }, - { - "epoch": 25.96, - "learning_rate": 6.008993894053578e-08, - "loss": 3.762, - "step": 2360000 - }, - { - "epoch": 25.97, - "learning_rate": 6.007618680895539e-08, - "loss": 3.7799, - "step": 2360500 - }, - { - "epoch": 25.98, - "learning_rate": 6.0062434677375e-08, - "loss": 3.7896, - "step": 2361000 - }, - { - "epoch": 25.98, - "learning_rate": 6.00486825457946e-08, - "loss": 3.7912, - "step": 2361500 - }, - { - "epoch": 25.99, - "learning_rate": 6.00349304142142e-08, - "loss": 3.7781, - "step": 2362000 - }, - { - "epoch": 25.99, - "learning_rate": 6.002117828263381e-08, - "loss": 3.7658, - "step": 2362500 - }, - { - "epoch": 26.0, - "learning_rate": 6.000742615105341e-08, - "loss": 3.7737, - "step": 2363000 - }, - { - "epoch": 26.0, - "eval_loss": 3.842207193374634, - "eval_runtime": 6.1338, - "eval_samples_per_second": 253.352, - "step": 2363270 - }, - { - "epoch": 26.0, - "learning_rate": 5.999367401947302e-08, - "loss": 3.7872, - "step": 2363500 - }, - { - "epoch": 26.01, - "learning_rate": 5.997992188789263e-08, - "loss": 3.7856, - "step": 2364000 - }, - { - "epoch": 26.01, - "learning_rate": 5.996616975631222e-08, - "loss": 3.7764, - "step": 2364500 - }, - { - "epoch": 26.02, - "learning_rate": 5.995241762473183e-08, - "loss": 3.7921, - "step": 2365000 - }, - { - "epoch": 26.02, - "learning_rate": 5.993866549315144e-08, - "loss": 3.7861, - "step": 2365500 - }, - { - "epoch": 26.03, - "learning_rate": 5.992491336157104e-08, - "loss": 3.7851, - "step": 2366000 - }, - { - "epoch": 26.04, - "learning_rate": 5.991116122999065e-08, - "loss": 3.7833, - "step": 2366500 - }, - { - "epoch": 26.04, - "learning_rate": 5.989740909841024e-08, - "loss": 3.7879, - "step": 2367000 - }, - { - "epoch": 26.05, - "learning_rate": 5.988365696682985e-08, - "loss": 3.7698, - "step": 2367500 - }, - { - "epoch": 26.05, - "learning_rate": 5.986990483524946e-08, - "loss": 3.7789, - "step": 2368000 - }, - { - "epoch": 26.06, - "learning_rate": 5.985615270366906e-08, - "loss": 3.7655, - "step": 2368500 - }, - { - "epoch": 26.06, - "learning_rate": 5.984240057208867e-08, - "loss": 3.7814, - "step": 2369000 - }, - { - "epoch": 26.07, - "learning_rate": 5.982864844050828e-08, - "loss": 3.7753, - "step": 2369500 - }, - { - "epoch": 26.07, - "learning_rate": 5.981489630892787e-08, - "loss": 3.777, - "step": 2370000 - }, - { - "epoch": 26.08, - "learning_rate": 5.980114417734748e-08, - "loss": 3.7726, - "step": 2370500 - }, - { - "epoch": 26.09, - "learning_rate": 5.978739204576709e-08, - "loss": 3.7943, - "step": 2371000 - }, - { - "epoch": 26.09, - "learning_rate": 5.977363991418669e-08, - "loss": 3.7709, - "step": 2371500 - }, - { - "epoch": 26.1, - "learning_rate": 5.97598877826063e-08, - "loss": 3.7716, - "step": 2372000 - }, - { - "epoch": 26.1, - "learning_rate": 5.974613565102591e-08, - "loss": 3.7818, - "step": 2372500 - }, - { - "epoch": 26.11, - "learning_rate": 5.97323835194455e-08, - "loss": 3.794, - "step": 2373000 - }, - { - "epoch": 26.11, - "learning_rate": 5.971863138786511e-08, - "loss": 3.7818, - "step": 2373500 - }, - { - "epoch": 26.12, - "learning_rate": 5.970487925628472e-08, - "loss": 3.7946, - "step": 2374000 - }, - { - "epoch": 26.12, - "learning_rate": 5.969112712470432e-08, - "loss": 3.7899, - "step": 2374500 - }, - { - "epoch": 26.13, - "learning_rate": 5.967737499312393e-08, - "loss": 3.7919, - "step": 2375000 - }, - { - "epoch": 26.13, - "learning_rate": 5.966362286154354e-08, - "loss": 3.7795, - "step": 2375500 - }, - { - "epoch": 26.14, - "learning_rate": 5.964987072996313e-08, - "loss": 3.7831, - "step": 2376000 - }, - { - "epoch": 26.15, - "learning_rate": 5.963611859838274e-08, - "loss": 3.7806, - "step": 2376500 - }, - { - "epoch": 26.15, - "learning_rate": 5.962236646680235e-08, - "loss": 3.7736, - "step": 2377000 - }, - { - "epoch": 26.16, - "learning_rate": 5.960861433522195e-08, - "loss": 3.763, - "step": 2377500 - }, - { - "epoch": 26.16, - "learning_rate": 5.959486220364156e-08, - "loss": 3.7864, - "step": 2378000 - }, - { - "epoch": 26.17, - "learning_rate": 5.958111007206117e-08, - "loss": 3.7685, - "step": 2378500 - }, - { - "epoch": 26.17, - "learning_rate": 5.956735794048077e-08, - "loss": 3.7795, - "step": 2379000 - }, - { - "epoch": 26.18, - "learning_rate": 5.955360580890037e-08, - "loss": 3.7827, - "step": 2379500 - }, - { - "epoch": 26.18, - "learning_rate": 5.953985367731998e-08, - "loss": 3.7858, - "step": 2380000 - }, - { - "epoch": 26.19, - "learning_rate": 5.9526101545739585e-08, - "loss": 3.7505, - "step": 2380500 - }, - { - "epoch": 26.2, - "learning_rate": 5.9512349414159194e-08, - "loss": 3.7931, - "step": 2381000 - }, - { - "epoch": 26.2, - "learning_rate": 5.94985972825788e-08, - "loss": 3.7812, - "step": 2381500 - }, - { - "epoch": 26.21, - "learning_rate": 5.94848451509984e-08, - "loss": 3.7708, - "step": 2382000 - }, - { - "epoch": 26.21, - "learning_rate": 5.947109301941801e-08, - "loss": 3.7741, - "step": 2382500 - }, - { - "epoch": 26.22, - "learning_rate": 5.945734088783761e-08, - "loss": 3.7581, - "step": 2383000 - }, - { - "epoch": 26.22, - "learning_rate": 5.9443588756257214e-08, - "loss": 3.7857, - "step": 2383500 - }, - { - "epoch": 26.23, - "learning_rate": 5.9429836624676824e-08, - "loss": 3.7701, - "step": 2384000 - }, - { - "epoch": 26.23, - "learning_rate": 5.941608449309643e-08, - "loss": 3.7712, - "step": 2384500 - }, - { - "epoch": 26.24, - "learning_rate": 5.940233236151603e-08, - "loss": 3.7747, - "step": 2385000 - }, - { - "epoch": 26.24, - "learning_rate": 5.938858022993564e-08, - "loss": 3.7815, - "step": 2385500 - }, - { - "epoch": 26.25, - "learning_rate": 5.937482809835525e-08, - "loss": 3.7773, - "step": 2386000 - }, - { - "epoch": 26.26, - "learning_rate": 5.9361075966774844e-08, - "loss": 3.7711, - "step": 2386500 - }, - { - "epoch": 26.26, - "learning_rate": 5.934732383519445e-08, - "loss": 3.7903, - "step": 2387000 - }, - { - "epoch": 26.27, - "learning_rate": 5.933357170361406e-08, - "loss": 3.7735, - "step": 2387500 - }, - { - "epoch": 26.27, - "learning_rate": 5.931981957203366e-08, - "loss": 3.7697, - "step": 2388000 - }, - { - "epoch": 26.28, - "learning_rate": 5.930606744045327e-08, - "loss": 3.7602, - "step": 2388500 - }, - { - "epoch": 26.28, - "learning_rate": 5.929231530887288e-08, - "loss": 3.7904, - "step": 2389000 - }, - { - "epoch": 26.29, - "learning_rate": 5.9278563177292473e-08, - "loss": 3.7805, - "step": 2389500 - }, - { - "epoch": 26.29, - "learning_rate": 5.926481104571208e-08, - "loss": 3.7785, - "step": 2390000 - }, - { - "epoch": 26.3, - "learning_rate": 5.925105891413169e-08, - "loss": 3.7777, - "step": 2390500 - }, - { - "epoch": 26.31, - "learning_rate": 5.923730678255129e-08, - "loss": 3.7741, - "step": 2391000 - }, - { - "epoch": 26.31, - "learning_rate": 5.92235546509709e-08, - "loss": 3.7773, - "step": 2391500 - }, - { - "epoch": 26.32, - "learning_rate": 5.920980251939051e-08, - "loss": 3.7667, - "step": 2392000 - }, - { - "epoch": 26.32, - "learning_rate": 5.91960503878101e-08, - "loss": 3.7742, - "step": 2392500 - }, - { - "epoch": 26.33, - "learning_rate": 5.918229825622971e-08, - "loss": 3.7687, - "step": 2393000 - }, - { - "epoch": 26.33, - "learning_rate": 5.916854612464932e-08, - "loss": 3.7527, - "step": 2393500 - }, - { - "epoch": 26.34, - "learning_rate": 5.915479399306892e-08, - "loss": 3.774, - "step": 2394000 - }, - { - "epoch": 26.34, - "learning_rate": 5.914104186148853e-08, - "loss": 3.7849, - "step": 2394500 - }, - { - "epoch": 26.35, - "learning_rate": 5.9127289729908136e-08, - "loss": 3.7754, - "step": 2395000 - }, - { - "epoch": 26.35, - "learning_rate": 5.911353759832773e-08, - "loss": 3.7695, - "step": 2395500 - }, - { - "epoch": 26.36, - "learning_rate": 5.909978546674734e-08, - "loss": 3.7717, - "step": 2396000 - }, - { - "epoch": 26.37, - "learning_rate": 5.908603333516695e-08, - "loss": 3.7749, - "step": 2396500 - }, - { - "epoch": 26.37, - "learning_rate": 5.907228120358655e-08, - "loss": 3.7851, - "step": 2397000 - }, - { - "epoch": 26.38, - "learning_rate": 5.905852907200616e-08, - "loss": 3.7749, - "step": 2397500 - }, - { - "epoch": 26.38, - "learning_rate": 5.9044776940425766e-08, - "loss": 3.8007, - "step": 2398000 - }, - { - "epoch": 26.39, - "learning_rate": 5.903102480884536e-08, - "loss": 3.7722, - "step": 2398500 - }, - { - "epoch": 26.39, - "learning_rate": 5.901727267726497e-08, - "loss": 3.7644, - "step": 2399000 - }, - { - "epoch": 26.4, - "learning_rate": 5.900352054568458e-08, - "loss": 3.7884, - "step": 2399500 - }, - { - "epoch": 26.4, - "learning_rate": 5.898976841410418e-08, - "loss": 3.7847, - "step": 2400000 - }, - { - "epoch": 26.41, - "learning_rate": 5.8976016282523786e-08, - "loss": 3.7774, - "step": 2400500 - }, - { - "epoch": 26.42, - "learning_rate": 5.8962264150943396e-08, - "loss": 3.7701, - "step": 2401000 - }, - { - "epoch": 26.42, - "learning_rate": 5.894851201936299e-08, - "loss": 3.7751, - "step": 2401500 - }, - { - "epoch": 26.43, - "learning_rate": 5.89347598877826e-08, - "loss": 3.7587, - "step": 2402000 - }, - { - "epoch": 26.43, - "learning_rate": 5.892100775620221e-08, - "loss": 3.7736, - "step": 2402500 - }, - { - "epoch": 26.44, - "learning_rate": 5.890725562462181e-08, - "loss": 3.7764, - "step": 2403000 - }, - { - "epoch": 26.44, - "learning_rate": 5.8893503493041416e-08, - "loss": 3.7746, - "step": 2403500 - }, - { - "epoch": 26.45, - "learning_rate": 5.8879751361461025e-08, - "loss": 3.7611, - "step": 2404000 - }, - { - "epoch": 26.45, - "learning_rate": 5.886599922988063e-08, - "loss": 3.7949, - "step": 2404500 - }, - { - "epoch": 26.46, - "learning_rate": 5.885224709830023e-08, - "loss": 3.7623, - "step": 2405000 - }, - { - "epoch": 26.46, - "learning_rate": 5.883849496671984e-08, - "loss": 3.7845, - "step": 2405500 - }, - { - "epoch": 26.47, - "learning_rate": 5.882474283513944e-08, - "loss": 3.7728, - "step": 2406000 - }, - { - "epoch": 26.48, - "learning_rate": 5.881099070355905e-08, - "loss": 3.7767, - "step": 2406500 - }, - { - "epoch": 26.48, - "learning_rate": 5.8797238571978655e-08, - "loss": 3.7894, - "step": 2407000 - }, - { - "epoch": 26.49, - "learning_rate": 5.878348644039826e-08, - "loss": 3.7906, - "step": 2407500 - }, - { - "epoch": 26.49, - "learning_rate": 5.876973430881787e-08, - "loss": 3.7745, - "step": 2408000 - }, - { - "epoch": 26.5, - "learning_rate": 5.8755982177237476e-08, - "loss": 3.7815, - "step": 2408500 - }, - { - "epoch": 26.5, - "learning_rate": 5.874223004565707e-08, - "loss": 3.7855, - "step": 2409000 - }, - { - "epoch": 26.51, - "learning_rate": 5.872847791407668e-08, - "loss": 3.7589, - "step": 2409500 - }, - { - "epoch": 26.51, - "learning_rate": 5.871472578249629e-08, - "loss": 3.7803, - "step": 2410000 - }, - { - "epoch": 26.52, - "learning_rate": 5.870097365091589e-08, - "loss": 3.7816, - "step": 2410500 - }, - { - "epoch": 26.53, - "learning_rate": 5.8687221519335497e-08, - "loss": 3.7804, - "step": 2411000 - }, - { - "epoch": 26.53, - "learning_rate": 5.8673469387755106e-08, - "loss": 3.7652, - "step": 2411500 - }, - { - "epoch": 26.54, - "learning_rate": 5.86597172561747e-08, - "loss": 3.7813, - "step": 2412000 - }, - { - "epoch": 26.54, - "learning_rate": 5.864596512459431e-08, - "loss": 3.7654, - "step": 2412500 - }, - { - "epoch": 26.55, - "learning_rate": 5.863221299301392e-08, - "loss": 3.7867, - "step": 2413000 - }, - { - "epoch": 26.55, - "learning_rate": 5.861846086143352e-08, - "loss": 3.7677, - "step": 2413500 - }, - { - "epoch": 26.56, - "learning_rate": 5.8604708729853126e-08, - "loss": 3.761, - "step": 2414000 - }, - { - "epoch": 26.56, - "learning_rate": 5.8590956598272735e-08, - "loss": 3.77, - "step": 2414500 - }, - { - "epoch": 26.57, - "learning_rate": 5.857720446669233e-08, - "loss": 3.7598, - "step": 2415000 - }, - { - "epoch": 26.57, - "learning_rate": 5.856345233511194e-08, - "loss": 3.7889, - "step": 2415500 - }, - { - "epoch": 26.58, - "learning_rate": 5.854970020353155e-08, - "loss": 3.7803, - "step": 2416000 - }, - { - "epoch": 26.59, - "learning_rate": 5.8535948071951146e-08, - "loss": 3.7704, - "step": 2416500 - }, - { - "epoch": 26.59, - "learning_rate": 5.8522195940370756e-08, - "loss": 3.7908, - "step": 2417000 - }, - { - "epoch": 26.6, - "learning_rate": 5.8508443808790365e-08, - "loss": 3.7793, - "step": 2417500 - }, - { - "epoch": 26.6, - "learning_rate": 5.849469167720996e-08, - "loss": 3.7613, - "step": 2418000 - }, - { - "epoch": 26.61, - "learning_rate": 5.848093954562957e-08, - "loss": 3.759, - "step": 2418500 - }, - { - "epoch": 26.61, - "learning_rate": 5.846718741404918e-08, - "loss": 3.7916, - "step": 2419000 - }, - { - "epoch": 26.62, - "learning_rate": 5.8453435282468776e-08, - "loss": 3.7645, - "step": 2419500 - }, - { - "epoch": 26.62, - "learning_rate": 5.8439683150888385e-08, - "loss": 3.7631, - "step": 2420000 - }, - { - "epoch": 26.63, - "learning_rate": 5.8425931019307995e-08, - "loss": 3.7749, - "step": 2420500 - }, - { - "epoch": 26.64, - "learning_rate": 5.841217888772759e-08, - "loss": 3.7805, - "step": 2421000 - }, - { - "epoch": 26.64, - "learning_rate": 5.83984267561472e-08, - "loss": 3.7797, - "step": 2421500 - }, - { - "epoch": 26.65, - "learning_rate": 5.838467462456681e-08, - "loss": 3.7695, - "step": 2422000 - }, - { - "epoch": 26.65, - "learning_rate": 5.8370922492986406e-08, - "loss": 3.7667, - "step": 2422500 - }, - { - "epoch": 26.66, - "learning_rate": 5.8357170361406015e-08, - "loss": 3.7953, - "step": 2423000 - }, - { - "epoch": 26.66, - "learning_rate": 5.8343418229825624e-08, - "loss": 3.7707, - "step": 2423500 - }, - { - "epoch": 26.67, - "learning_rate": 5.832966609824522e-08, - "loss": 3.7805, - "step": 2424000 - }, - { - "epoch": 26.67, - "learning_rate": 5.831591396666483e-08, - "loss": 3.7897, - "step": 2424500 - }, - { - "epoch": 26.68, - "learning_rate": 5.830216183508444e-08, - "loss": 3.7988, - "step": 2425000 - }, - { - "epoch": 26.68, - "learning_rate": 5.8288409703504035e-08, - "loss": 3.7814, - "step": 2425500 - }, - { - "epoch": 26.69, - "learning_rate": 5.8274657571923644e-08, - "loss": 3.7742, - "step": 2426000 - }, - { - "epoch": 26.7, - "learning_rate": 5.8260905440343254e-08, - "loss": 3.7692, - "step": 2426500 - }, - { - "epoch": 26.7, - "learning_rate": 5.824715330876285e-08, - "loss": 3.786, - "step": 2427000 - }, - { - "epoch": 26.71, - "learning_rate": 5.823340117718246e-08, - "loss": 3.7545, - "step": 2427500 - }, - { - "epoch": 26.71, - "learning_rate": 5.821964904560207e-08, - "loss": 3.7692, - "step": 2428000 - }, - { - "epoch": 26.72, - "learning_rate": 5.820589691402167e-08, - "loss": 3.7482, - "step": 2428500 - }, - { - "epoch": 26.72, - "learning_rate": 5.8192144782441274e-08, - "loss": 3.7687, - "step": 2429000 - }, - { - "epoch": 26.73, - "learning_rate": 5.8178392650860883e-08, - "loss": 3.7766, - "step": 2429500 - }, - { - "epoch": 26.73, - "learning_rate": 5.8164640519280486e-08, - "loss": 3.7693, - "step": 2430000 - }, - { - "epoch": 26.74, - "learning_rate": 5.8150888387700095e-08, - "loss": 3.7813, - "step": 2430500 - }, - { - "epoch": 26.75, - "learning_rate": 5.81371362561197e-08, - "loss": 3.7796, - "step": 2431000 - }, - { - "epoch": 26.75, - "learning_rate": 5.81233841245393e-08, - "loss": 3.7788, - "step": 2431500 - }, - { - "epoch": 26.76, - "learning_rate": 5.810963199295891e-08, - "loss": 3.7897, - "step": 2432000 - }, - { - "epoch": 26.76, - "learning_rate": 5.809587986137851e-08, - "loss": 3.7686, - "step": 2432500 - }, - { - "epoch": 26.77, - "learning_rate": 5.8082127729798116e-08, - "loss": 3.7768, - "step": 2433000 - }, - { - "epoch": 26.77, - "learning_rate": 5.8068375598217725e-08, - "loss": 3.7731, - "step": 2433500 - }, - { - "epoch": 26.78, - "learning_rate": 5.8054623466637334e-08, - "loss": 3.7948, - "step": 2434000 - }, - { - "epoch": 26.78, - "learning_rate": 5.804087133505693e-08, - "loss": 3.7717, - "step": 2434500 - }, - { - "epoch": 26.79, - "learning_rate": 5.802711920347654e-08, - "loss": 3.7593, - "step": 2435000 - }, - { - "epoch": 26.79, - "learning_rate": 5.801336707189615e-08, - "loss": 3.7777, - "step": 2435500 - }, - { - "epoch": 26.8, - "learning_rate": 5.7999614940315745e-08, - "loss": 3.772, - "step": 2436000 - }, - { - "epoch": 26.81, - "learning_rate": 5.7985862808735355e-08, - "loss": 3.7756, - "step": 2436500 - }, - { - "epoch": 26.81, - "learning_rate": 5.7972110677154964e-08, - "loss": 3.7812, - "step": 2437000 - }, - { - "epoch": 26.82, - "learning_rate": 5.795835854557456e-08, - "loss": 3.7801, - "step": 2437500 - }, - { - "epoch": 26.82, - "learning_rate": 5.794460641399417e-08, - "loss": 3.7796, - "step": 2438000 - }, - { - "epoch": 26.83, - "learning_rate": 5.793085428241378e-08, - "loss": 3.7765, - "step": 2438500 - }, - { - "epoch": 26.83, - "learning_rate": 5.7917102150833375e-08, - "loss": 3.7594, - "step": 2439000 - }, - { - "epoch": 26.84, - "learning_rate": 5.7903350019252984e-08, - "loss": 3.7695, - "step": 2439500 - }, - { - "epoch": 26.84, - "learning_rate": 5.7889597887672594e-08, - "loss": 3.7611, - "step": 2440000 - }, - { - "epoch": 26.85, - "learning_rate": 5.787584575609219e-08, - "loss": 3.7929, - "step": 2440500 - }, - { - "epoch": 26.86, - "learning_rate": 5.78620936245118e-08, - "loss": 3.7723, - "step": 2441000 - }, - { - "epoch": 26.86, - "learning_rate": 5.7848341492931395e-08, - "loss": 3.7771, - "step": 2441500 - }, - { - "epoch": 26.87, - "learning_rate": 5.7834589361351005e-08, - "loss": 3.7813, - "step": 2442000 - }, - { - "epoch": 26.87, - "learning_rate": 5.7820837229770614e-08, - "loss": 3.7879, - "step": 2442500 - }, - { - "epoch": 26.88, - "learning_rate": 5.780708509819021e-08, - "loss": 3.7668, - "step": 2443000 - }, - { - "epoch": 26.88, - "learning_rate": 5.779333296660982e-08, - "loss": 3.7743, - "step": 2443500 - }, - { - "epoch": 26.89, - "learning_rate": 5.777958083502943e-08, - "loss": 3.7716, - "step": 2444000 - }, - { - "epoch": 26.89, - "learning_rate": 5.7765828703449025e-08, - "loss": 3.7499, - "step": 2444500 - }, - { - "epoch": 26.9, - "learning_rate": 5.7752076571868634e-08, - "loss": 3.785, - "step": 2445000 - }, - { - "epoch": 26.9, - "learning_rate": 5.7738324440288243e-08, - "loss": 3.7754, - "step": 2445500 - }, - { - "epoch": 26.91, - "learning_rate": 5.772457230870784e-08, - "loss": 3.7717, - "step": 2446000 - }, - { - "epoch": 26.92, - "learning_rate": 5.771082017712745e-08, - "loss": 3.7622, - "step": 2446500 - }, - { - "epoch": 26.92, - "learning_rate": 5.769706804554706e-08, - "loss": 3.7753, - "step": 2447000 - }, - { - "epoch": 26.93, - "learning_rate": 5.7683315913966654e-08, - "loss": 3.7747, - "step": 2447500 - }, - { - "epoch": 26.93, - "learning_rate": 5.7669563782386264e-08, - "loss": 3.7903, - "step": 2448000 - }, - { - "epoch": 26.94, - "learning_rate": 5.765581165080587e-08, - "loss": 3.7934, - "step": 2448500 - }, - { - "epoch": 26.94, - "learning_rate": 5.764205951922547e-08, - "loss": 3.7794, - "step": 2449000 - }, - { - "epoch": 26.95, - "learning_rate": 5.762830738764508e-08, - "loss": 3.7702, - "step": 2449500 - }, - { - "epoch": 26.95, - "learning_rate": 5.761455525606469e-08, - "loss": 3.7847, - "step": 2450000 - }, - { - "epoch": 26.96, - "learning_rate": 5.760080312448429e-08, - "loss": 3.7946, - "step": 2450500 - }, - { - "epoch": 26.97, - "learning_rate": 5.758705099290389e-08, - "loss": 3.7673, - "step": 2451000 - }, - { - "epoch": 26.97, - "learning_rate": 5.75732988613235e-08, - "loss": 3.7819, - "step": 2451500 - }, - { - "epoch": 26.98, - "learning_rate": 5.7559546729743105e-08, - "loss": 3.7858, - "step": 2452000 - }, - { - "epoch": 26.98, - "learning_rate": 5.754579459816271e-08, - "loss": 3.7811, - "step": 2452500 - }, - { - "epoch": 26.99, - "learning_rate": 5.753204246658232e-08, - "loss": 3.7859, - "step": 2453000 - }, - { - "epoch": 26.99, - "learning_rate": 5.751829033500192e-08, - "loss": 3.7775, - "step": 2453500 - }, - { - "epoch": 27.0, - "learning_rate": 5.750453820342153e-08, - "loss": 3.7925, - "step": 2454000 - }, - { - "epoch": 27.0, - "eval_loss": 3.840773105621338, - "eval_runtime": 6.1339, - "eval_samples_per_second": 253.348, - "step": 2454165 - }, - { - "epoch": 27.0, - "learning_rate": 5.749078607184113e-08, - "loss": 3.7784, - "step": 2454500 - }, - { - "epoch": 27.01, - "learning_rate": 5.7477033940260735e-08, - "loss": 3.7795, - "step": 2455000 - }, - { - "epoch": 27.01, - "learning_rate": 5.7463281808680344e-08, - "loss": 3.7811, - "step": 2455500 - }, - { - "epoch": 27.02, - "learning_rate": 5.7449529677099954e-08, - "loss": 3.7829, - "step": 2456000 - }, - { - "epoch": 27.03, - "learning_rate": 5.743577754551955e-08, - "loss": 3.779, - "step": 2456500 - }, - { - "epoch": 27.03, - "learning_rate": 5.742202541393916e-08, - "loss": 3.7681, - "step": 2457000 - }, - { - "epoch": 27.04, - "learning_rate": 5.740827328235877e-08, - "loss": 3.782, - "step": 2457500 - }, - { - "epoch": 27.04, - "learning_rate": 5.7394521150778365e-08, - "loss": 3.7715, - "step": 2458000 - }, - { - "epoch": 27.05, - "learning_rate": 5.7380769019197974e-08, - "loss": 3.7683, - "step": 2458500 - }, - { - "epoch": 27.05, - "learning_rate": 5.736701688761758e-08, - "loss": 3.7647, - "step": 2459000 - }, - { - "epoch": 27.06, - "learning_rate": 5.735326475603718e-08, - "loss": 3.7747, - "step": 2459500 - }, - { - "epoch": 27.06, - "learning_rate": 5.733951262445679e-08, - "loss": 3.7878, - "step": 2460000 - }, - { - "epoch": 27.07, - "learning_rate": 5.73257604928764e-08, - "loss": 3.7735, - "step": 2460500 - }, - { - "epoch": 27.08, - "learning_rate": 5.7312008361295994e-08, - "loss": 3.7622, - "step": 2461000 - }, - { - "epoch": 27.08, - "learning_rate": 5.7298256229715603e-08, - "loss": 3.773, - "step": 2461500 - }, - { - "epoch": 27.09, - "learning_rate": 5.728450409813521e-08, - "loss": 3.7763, - "step": 2462000 - }, - { - "epoch": 27.09, - "learning_rate": 5.727075196655481e-08, - "loss": 3.7688, - "step": 2462500 - }, - { - "epoch": 27.1, - "learning_rate": 5.725699983497442e-08, - "loss": 3.7821, - "step": 2463000 - }, - { - "epoch": 27.1, - "learning_rate": 5.724324770339403e-08, - "loss": 3.7752, - "step": 2463500 - }, - { - "epoch": 27.11, - "learning_rate": 5.7229495571813624e-08, - "loss": 3.7674, - "step": 2464000 - }, - { - "epoch": 27.11, - "learning_rate": 5.721574344023323e-08, - "loss": 3.7745, - "step": 2464500 - }, - { - "epoch": 27.12, - "learning_rate": 5.720199130865284e-08, - "loss": 3.7602, - "step": 2465000 - }, - { - "epoch": 27.12, - "learning_rate": 5.718823917707244e-08, - "loss": 3.7678, - "step": 2465500 - }, - { - "epoch": 27.13, - "learning_rate": 5.717448704549205e-08, - "loss": 3.7643, - "step": 2466000 - }, - { - "epoch": 27.14, - "learning_rate": 5.716073491391166e-08, - "loss": 3.7778, - "step": 2466500 - }, - { - "epoch": 27.14, - "learning_rate": 5.7146982782331253e-08, - "loss": 3.7777, - "step": 2467000 - }, - { - "epoch": 27.15, - "learning_rate": 5.713323065075086e-08, - "loss": 3.7687, - "step": 2467500 - }, - { - "epoch": 27.15, - "learning_rate": 5.711947851917047e-08, - "loss": 3.7464, - "step": 2468000 - }, - { - "epoch": 27.16, - "learning_rate": 5.710572638759007e-08, - "loss": 3.77, - "step": 2468500 - }, - { - "epoch": 27.16, - "learning_rate": 5.709197425600968e-08, - "loss": 3.7767, - "step": 2469000 - }, - { - "epoch": 27.17, - "learning_rate": 5.707822212442929e-08, - "loss": 3.7757, - "step": 2469500 - }, - { - "epoch": 27.17, - "learning_rate": 5.706446999284888e-08, - "loss": 3.7763, - "step": 2470000 - }, - { - "epoch": 27.18, - "learning_rate": 5.705071786126849e-08, - "loss": 3.7761, - "step": 2470500 - }, - { - "epoch": 27.19, - "learning_rate": 5.70369657296881e-08, - "loss": 3.7845, - "step": 2471000 - }, - { - "epoch": 27.19, - "learning_rate": 5.70232135981077e-08, - "loss": 3.7746, - "step": 2471500 - }, - { - "epoch": 27.2, - "learning_rate": 5.700946146652731e-08, - "loss": 3.788, - "step": 2472000 - }, - { - "epoch": 27.2, - "learning_rate": 5.6995709334946916e-08, - "loss": 3.7648, - "step": 2472500 - }, - { - "epoch": 27.21, - "learning_rate": 5.698195720336651e-08, - "loss": 3.784, - "step": 2473000 - }, - { - "epoch": 27.21, - "learning_rate": 5.696820507178612e-08, - "loss": 3.7723, - "step": 2473500 - }, - { - "epoch": 27.22, - "learning_rate": 5.695445294020573e-08, - "loss": 3.7671, - "step": 2474000 - }, - { - "epoch": 27.22, - "learning_rate": 5.694070080862533e-08, - "loss": 3.7665, - "step": 2474500 - }, - { - "epoch": 27.23, - "learning_rate": 5.6926948677044937e-08, - "loss": 3.7761, - "step": 2475000 - }, - { - "epoch": 27.23, - "learning_rate": 5.6913196545464546e-08, - "loss": 3.7808, - "step": 2475500 - }, - { - "epoch": 27.24, - "learning_rate": 5.689944441388415e-08, - "loss": 3.7728, - "step": 2476000 - }, - { - "epoch": 27.25, - "learning_rate": 5.688569228230375e-08, - "loss": 3.7791, - "step": 2476500 - }, - { - "epoch": 27.25, - "learning_rate": 5.687194015072336e-08, - "loss": 3.7714, - "step": 2477000 - }, - { - "epoch": 27.26, - "learning_rate": 5.6858188019142963e-08, - "loss": 3.7593, - "step": 2477500 - }, - { - "epoch": 27.26, - "learning_rate": 5.684443588756257e-08, - "loss": 3.7739, - "step": 2478000 - }, - { - "epoch": 27.27, - "learning_rate": 5.6830683755982176e-08, - "loss": 3.7801, - "step": 2478500 - }, - { - "epoch": 27.27, - "learning_rate": 5.681693162440178e-08, - "loss": 3.7791, - "step": 2479000 - }, - { - "epoch": 27.28, - "learning_rate": 5.680317949282139e-08, - "loss": 3.7957, - "step": 2479500 - }, - { - "epoch": 27.28, - "learning_rate": 5.678942736124099e-08, - "loss": 3.8024, - "step": 2480000 - }, - { - "epoch": 27.29, - "learning_rate": 5.677567522966059e-08, - "loss": 3.7799, - "step": 2480500 - }, - { - "epoch": 27.3, - "learning_rate": 5.67619230980802e-08, - "loss": 3.7696, - "step": 2481000 - }, - { - "epoch": 27.3, - "learning_rate": 5.674817096649981e-08, - "loss": 3.7921, - "step": 2481500 - }, - { - "epoch": 27.31, - "learning_rate": 5.673441883491941e-08, - "loss": 3.7616, - "step": 2482000 - }, - { - "epoch": 27.31, - "learning_rate": 5.672066670333902e-08, - "loss": 3.7644, - "step": 2482500 - }, - { - "epoch": 27.32, - "learning_rate": 5.6706914571758627e-08, - "loss": 3.7694, - "step": 2483000 - }, - { - "epoch": 27.32, - "learning_rate": 5.669316244017822e-08, - "loss": 3.8009, - "step": 2483500 - }, - { - "epoch": 27.33, - "learning_rate": 5.667941030859783e-08, - "loss": 3.7742, - "step": 2484000 - }, - { - "epoch": 27.33, - "learning_rate": 5.666565817701744e-08, - "loss": 3.7777, - "step": 2484500 - }, - { - "epoch": 27.34, - "learning_rate": 5.665190604543704e-08, - "loss": 3.7744, - "step": 2485000 - }, - { - "epoch": 27.34, - "learning_rate": 5.663815391385665e-08, - "loss": 3.7674, - "step": 2485500 - }, - { - "epoch": 27.35, - "learning_rate": 5.6624401782276256e-08, - "loss": 3.76, - "step": 2486000 - }, - { - "epoch": 27.36, - "learning_rate": 5.661064965069585e-08, - "loss": 3.7629, - "step": 2486500 - }, - { - "epoch": 27.36, - "learning_rate": 5.659689751911546e-08, - "loss": 3.7733, - "step": 2487000 - }, - { - "epoch": 27.37, - "learning_rate": 5.658314538753507e-08, - "loss": 3.7698, - "step": 2487500 - }, - { - "epoch": 27.37, - "learning_rate": 5.656939325595467e-08, - "loss": 3.7703, - "step": 2488000 - }, - { - "epoch": 27.38, - "learning_rate": 5.6555641124374276e-08, - "loss": 3.7668, - "step": 2488500 - }, - { - "epoch": 27.38, - "learning_rate": 5.6541888992793886e-08, - "loss": 3.7885, - "step": 2489000 - }, - { - "epoch": 27.39, - "learning_rate": 5.652813686121348e-08, - "loss": 3.745, - "step": 2489500 - }, - { - "epoch": 27.39, - "learning_rate": 5.651438472963309e-08, - "loss": 3.7769, - "step": 2490000 - }, - { - "epoch": 27.4, - "learning_rate": 5.65006325980527e-08, - "loss": 3.7878, - "step": 2490500 - }, - { - "epoch": 27.41, - "learning_rate": 5.6486880466472297e-08, - "loss": 3.773, - "step": 2491000 - }, - { - "epoch": 27.41, - "learning_rate": 5.6473128334891906e-08, - "loss": 3.7826, - "step": 2491500 - }, - { - "epoch": 27.42, - "learning_rate": 5.6459376203311515e-08, - "loss": 3.7878, - "step": 2492000 - }, - { - "epoch": 27.42, - "learning_rate": 5.644562407173111e-08, - "loss": 3.7658, - "step": 2492500 - }, - { - "epoch": 27.43, - "learning_rate": 5.643187194015072e-08, - "loss": 3.7805, - "step": 2493000 - }, - { - "epoch": 27.43, - "learning_rate": 5.641811980857033e-08, - "loss": 3.7743, - "step": 2493500 - }, - { - "epoch": 27.44, - "learning_rate": 5.6404367676989926e-08, - "loss": 3.7648, - "step": 2494000 - }, - { - "epoch": 27.44, - "learning_rate": 5.6390615545409536e-08, - "loss": 3.7586, - "step": 2494500 - }, - { - "epoch": 27.45, - "learning_rate": 5.6376863413829145e-08, - "loss": 3.7699, - "step": 2495000 - }, - { - "epoch": 27.45, - "learning_rate": 5.636311128224874e-08, - "loss": 3.7685, - "step": 2495500 - }, - { - "epoch": 27.46, - "learning_rate": 5.634935915066835e-08, - "loss": 3.7542, - "step": 2496000 - }, - { - "epoch": 27.47, - "learning_rate": 5.633560701908796e-08, - "loss": 3.7903, - "step": 2496500 - }, - { - "epoch": 27.47, - "learning_rate": 5.6321854887507556e-08, - "loss": 3.7841, - "step": 2497000 - }, - { - "epoch": 27.48, - "learning_rate": 5.6308102755927165e-08, - "loss": 3.7718, - "step": 2497500 - }, - { - "epoch": 27.48, - "learning_rate": 5.6294350624346775e-08, - "loss": 3.7784, - "step": 2498000 - }, - { - "epoch": 27.49, - "learning_rate": 5.628059849276637e-08, - "loss": 3.7739, - "step": 2498500 - }, - { - "epoch": 27.49, - "learning_rate": 5.626684636118598e-08, - "loss": 3.7789, - "step": 2499000 - }, - { - "epoch": 27.5, - "learning_rate": 5.625309422960559e-08, - "loss": 3.7729, - "step": 2499500 - }, - { - "epoch": 27.5, - "learning_rate": 5.6239342098025185e-08, - "loss": 3.776, - "step": 2500000 - }, - { - "epoch": 27.51, - "learning_rate": 5.6225589966444795e-08, - "loss": 3.7615, - "step": 2500500 - }, - { - "epoch": 27.52, - "learning_rate": 5.6211837834864404e-08, - "loss": 3.7569, - "step": 2501000 - }, - { - "epoch": 27.52, - "learning_rate": 5.619808570328401e-08, - "loss": 3.7833, - "step": 2501500 - }, - { - "epoch": 27.53, - "learning_rate": 5.618433357170361e-08, - "loss": 3.7894, - "step": 2502000 - }, - { - "epoch": 27.53, - "learning_rate": 5.617058144012322e-08, - "loss": 3.7727, - "step": 2502500 - }, - { - "epoch": 27.54, - "learning_rate": 5.615682930854282e-08, - "loss": 3.7861, - "step": 2503000 - }, - { - "epoch": 27.54, - "learning_rate": 5.614307717696243e-08, - "loss": 3.7824, - "step": 2503500 - }, - { - "epoch": 27.55, - "learning_rate": 5.6129325045382034e-08, - "loss": 3.7856, - "step": 2504000 - }, - { - "epoch": 27.55, - "learning_rate": 5.6115572913801636e-08, - "loss": 3.7811, - "step": 2504500 - }, - { - "epoch": 27.56, - "learning_rate": 5.6101820782221246e-08, - "loss": 3.7596, - "step": 2505000 - }, - { - "epoch": 27.56, - "learning_rate": 5.608806865064085e-08, - "loss": 3.768, - "step": 2505500 - }, - { - "epoch": 27.57, - "learning_rate": 5.607431651906045e-08, - "loss": 3.7423, - "step": 2506000 - }, - { - "epoch": 27.58, - "learning_rate": 5.606056438748006e-08, - "loss": 3.749, - "step": 2506500 - }, - { - "epoch": 27.58, - "learning_rate": 5.604681225589967e-08, - "loss": 3.7694, - "step": 2507000 - }, - { - "epoch": 27.59, - "learning_rate": 5.6033060124319266e-08, - "loss": 3.7716, - "step": 2507500 - }, - { - "epoch": 27.59, - "learning_rate": 5.6019307992738875e-08, - "loss": 3.7752, - "step": 2508000 - }, - { - "epoch": 27.6, - "learning_rate": 5.6005555861158485e-08, - "loss": 3.7713, - "step": 2508500 - }, - { - "epoch": 27.6, - "learning_rate": 5.599180372957808e-08, - "loss": 3.7807, - "step": 2509000 - }, - { - "epoch": 27.61, - "learning_rate": 5.597805159799769e-08, - "loss": 3.7974, - "step": 2509500 - }, - { - "epoch": 27.61, - "learning_rate": 5.59642994664173e-08, - "loss": 3.7773, - "step": 2510000 - }, - { - "epoch": 27.62, - "learning_rate": 5.5950547334836896e-08, - "loss": 3.7589, - "step": 2510500 - }, - { - "epoch": 27.63, - "learning_rate": 5.5936795203256505e-08, - "loss": 3.7781, - "step": 2511000 - }, - { - "epoch": 27.63, - "learning_rate": 5.5923043071676114e-08, - "loss": 3.7828, - "step": 2511500 - }, - { - "epoch": 27.64, - "learning_rate": 5.590929094009571e-08, - "loss": 3.7844, - "step": 2512000 - }, - { - "epoch": 27.64, - "learning_rate": 5.589553880851532e-08, - "loss": 3.7958, - "step": 2512500 - }, - { - "epoch": 27.65, - "learning_rate": 5.588178667693493e-08, - "loss": 3.7846, - "step": 2513000 - }, - { - "epoch": 27.65, - "learning_rate": 5.5868034545354525e-08, - "loss": 3.7839, - "step": 2513500 - }, - { - "epoch": 27.66, - "learning_rate": 5.5854282413774135e-08, - "loss": 3.7775, - "step": 2514000 - }, - { - "epoch": 27.66, - "learning_rate": 5.5840530282193744e-08, - "loss": 3.7874, - "step": 2514500 - }, - { - "epoch": 27.67, - "learning_rate": 5.582677815061334e-08, - "loss": 3.7782, - "step": 2515000 - }, - { - "epoch": 27.67, - "learning_rate": 5.581302601903295e-08, - "loss": 3.7515, - "step": 2515500 - }, - { - "epoch": 27.68, - "learning_rate": 5.579927388745256e-08, - "loss": 3.7642, - "step": 2516000 - }, - { - "epoch": 27.69, - "learning_rate": 5.5785521755872155e-08, - "loss": 3.7799, - "step": 2516500 - }, - { - "epoch": 27.69, - "learning_rate": 5.5771769624291764e-08, - "loss": 3.7762, - "step": 2517000 - }, - { - "epoch": 27.7, - "learning_rate": 5.575801749271136e-08, - "loss": 3.7714, - "step": 2517500 - }, - { - "epoch": 27.7, - "learning_rate": 5.574426536113097e-08, - "loss": 3.7664, - "step": 2518000 - }, - { - "epoch": 27.71, - "learning_rate": 5.573051322955058e-08, - "loss": 3.7528, - "step": 2518500 - }, - { - "epoch": 27.71, - "learning_rate": 5.5716761097970175e-08, - "loss": 3.7635, - "step": 2519000 - }, - { - "epoch": 27.72, - "learning_rate": 5.5703008966389784e-08, - "loss": 3.7855, - "step": 2519500 - }, - { - "epoch": 27.72, - "learning_rate": 5.5689256834809394e-08, - "loss": 3.7817, - "step": 2520000 - }, - { - "epoch": 27.73, - "learning_rate": 5.567550470322899e-08, - "loss": 3.7862, - "step": 2520500 - }, - { - "epoch": 27.74, - "learning_rate": 5.56617525716486e-08, - "loss": 3.7665, - "step": 2521000 - }, - { - "epoch": 27.74, - "learning_rate": 5.564800044006821e-08, - "loss": 3.7795, - "step": 2521500 - }, - { - "epoch": 27.75, - "learning_rate": 5.5634248308487805e-08, - "loss": 3.7701, - "step": 2522000 - }, - { - "epoch": 27.75, - "learning_rate": 5.5620496176907414e-08, - "loss": 3.773, - "step": 2522500 - }, - { - "epoch": 27.76, - "learning_rate": 5.5606744045327023e-08, - "loss": 3.7687, - "step": 2523000 - }, - { - "epoch": 27.76, - "learning_rate": 5.5592991913746626e-08, - "loss": 3.7872, - "step": 2523500 - }, - { - "epoch": 27.77, - "learning_rate": 5.557923978216623e-08, - "loss": 3.7766, - "step": 2524000 - }, - { - "epoch": 27.77, - "learning_rate": 5.556548765058584e-08, - "loss": 3.7713, - "step": 2524500 - }, - { - "epoch": 27.78, - "learning_rate": 5.555173551900544e-08, - "loss": 3.7771, - "step": 2525000 - }, - { - "epoch": 27.78, - "learning_rate": 5.553798338742505e-08, - "loss": 3.7745, - "step": 2525500 - }, - { - "epoch": 27.79, - "learning_rate": 5.552423125584465e-08, - "loss": 3.7837, - "step": 2526000 - }, - { - "epoch": 27.8, - "learning_rate": 5.5510479124264256e-08, - "loss": 3.765, - "step": 2526500 - }, - { - "epoch": 27.8, - "learning_rate": 5.5496726992683865e-08, - "loss": 3.7627, - "step": 2527000 - }, - { - "epoch": 27.81, - "learning_rate": 5.548297486110347e-08, - "loss": 3.7865, - "step": 2527500 - }, - { - "epoch": 27.81, - "learning_rate": 5.546922272952307e-08, - "loss": 3.7823, - "step": 2528000 - }, - { - "epoch": 27.82, - "learning_rate": 5.545547059794268e-08, - "loss": 3.7855, - "step": 2528500 - }, - { - "epoch": 27.82, - "learning_rate": 5.544171846636229e-08, - "loss": 3.7676, - "step": 2529000 - }, - { - "epoch": 27.83, - "learning_rate": 5.5427966334781885e-08, - "loss": 3.7727, - "step": 2529500 - }, - { - "epoch": 27.83, - "learning_rate": 5.5414214203201495e-08, - "loss": 3.7777, - "step": 2530000 - }, - { - "epoch": 27.84, - "learning_rate": 5.5400462071621104e-08, - "loss": 3.7815, - "step": 2530500 - }, - { - "epoch": 27.85, - "learning_rate": 5.53867099400407e-08, - "loss": 3.7724, - "step": 2531000 - }, - { - "epoch": 27.85, - "learning_rate": 5.537295780846031e-08, - "loss": 3.7604, - "step": 2531500 - }, - { - "epoch": 27.86, - "learning_rate": 5.535920567687992e-08, - "loss": 3.7824, - "step": 2532000 - }, - { - "epoch": 27.86, - "learning_rate": 5.5345453545299515e-08, - "loss": 3.7874, - "step": 2532500 - }, - { - "epoch": 27.87, - "learning_rate": 5.5331701413719124e-08, - "loss": 3.7708, - "step": 2533000 - }, - { - "epoch": 27.87, - "learning_rate": 5.5317949282138734e-08, - "loss": 3.784, - "step": 2533500 - }, - { - "epoch": 27.88, - "learning_rate": 5.530419715055833e-08, - "loss": 3.7727, - "step": 2534000 - }, - { - "epoch": 27.88, - "learning_rate": 5.529044501897794e-08, - "loss": 3.7758, - "step": 2534500 - }, - { - "epoch": 27.89, - "learning_rate": 5.527669288739755e-08, - "loss": 3.7757, - "step": 2535000 - }, - { - "epoch": 27.89, - "learning_rate": 5.5262940755817144e-08, - "loss": 3.7779, - "step": 2535500 - }, - { - "epoch": 27.9, - "learning_rate": 5.5249188624236754e-08, - "loss": 3.7977, - "step": 2536000 - }, - { - "epoch": 27.91, - "learning_rate": 5.523543649265636e-08, - "loss": 3.7672, - "step": 2536500 - }, - { - "epoch": 27.91, - "learning_rate": 5.522168436107596e-08, - "loss": 3.7609, - "step": 2537000 - }, - { - "epoch": 27.92, - "learning_rate": 5.520793222949557e-08, - "loss": 3.7765, - "step": 2537500 - }, - { - "epoch": 27.92, - "learning_rate": 5.519418009791518e-08, - "loss": 3.772, - "step": 2538000 - }, - { - "epoch": 27.93, - "learning_rate": 5.5180427966334774e-08, - "loss": 3.7711, - "step": 2538500 - }, - { - "epoch": 27.93, - "learning_rate": 5.5166675834754383e-08, - "loss": 3.773, - "step": 2539000 - }, - { - "epoch": 27.94, - "learning_rate": 5.515292370317399e-08, - "loss": 3.7819, - "step": 2539500 - }, - { - "epoch": 27.94, - "learning_rate": 5.513917157159359e-08, - "loss": 3.7896, - "step": 2540000 - }, - { - "epoch": 27.95, - "learning_rate": 5.51254194400132e-08, - "loss": 3.7858, - "step": 2540500 - }, - { - "epoch": 27.96, - "learning_rate": 5.511166730843281e-08, - "loss": 3.7686, - "step": 2541000 - }, - { - "epoch": 27.96, - "learning_rate": 5.5097915176852404e-08, - "loss": 3.7729, - "step": 2541500 - }, - { - "epoch": 27.97, - "learning_rate": 5.508416304527201e-08, - "loss": 3.748, - "step": 2542000 - }, - { - "epoch": 27.97, - "learning_rate": 5.507041091369162e-08, - "loss": 3.7825, - "step": 2542500 - }, - { - "epoch": 27.98, - "learning_rate": 5.505665878211122e-08, - "loss": 3.7678, - "step": 2543000 - }, - { - "epoch": 27.98, - "learning_rate": 5.504290665053083e-08, - "loss": 3.7622, - "step": 2543500 - }, - { - "epoch": 27.99, - "learning_rate": 5.502915451895044e-08, - "loss": 3.7713, - "step": 2544000 - }, - { - "epoch": 27.99, - "learning_rate": 5.501540238737003e-08, - "loss": 3.7515, - "step": 2544500 - }, - { - "epoch": 28.0, - "learning_rate": 5.500165025578964e-08, - "loss": 3.7844, - "step": 2545000 - }, - { - "epoch": 28.0, - "eval_loss": 3.839246988296509, - "eval_runtime": 6.1365, - "eval_samples_per_second": 253.24, - "step": 2545060 - }, - { - "epoch": 28.0, - "learning_rate": 5.498789812420925e-08, - "loss": 3.7648, - "step": 2545500 - }, - { - "epoch": 28.01, - "learning_rate": 5.497414599262885e-08, - "loss": 3.7538, - "step": 2546000 - }, - { - "epoch": 28.02, - "learning_rate": 5.496039386104846e-08, - "loss": 3.7758, - "step": 2546500 - }, - { - "epoch": 28.02, - "learning_rate": 5.4946641729468067e-08, - "loss": 3.7803, - "step": 2547000 - }, - { - "epoch": 28.03, - "learning_rate": 5.493288959788766e-08, - "loss": 3.7638, - "step": 2547500 - }, - { - "epoch": 28.03, - "learning_rate": 5.491913746630727e-08, - "loss": 3.7699, - "step": 2548000 - }, - { - "epoch": 28.04, - "learning_rate": 5.490538533472688e-08, - "loss": 3.7678, - "step": 2548500 - }, - { - "epoch": 28.04, - "learning_rate": 5.4891633203146484e-08, - "loss": 3.777, - "step": 2549000 - }, - { - "epoch": 28.05, - "learning_rate": 5.487788107156609e-08, - "loss": 3.7615, - "step": 2549500 - }, - { - "epoch": 28.05, - "learning_rate": 5.4864128939985696e-08, - "loss": 3.7693, - "step": 2550000 - }, - { - "epoch": 28.06, - "learning_rate": 5.48503768084053e-08, - "loss": 3.7768, - "step": 2550500 - }, - { - "epoch": 28.07, - "learning_rate": 5.483662467682491e-08, - "loss": 3.7737, - "step": 2551000 - }, - { - "epoch": 28.07, - "learning_rate": 5.482287254524451e-08, - "loss": 3.7877, - "step": 2551500 - }, - { - "epoch": 28.08, - "learning_rate": 5.4809120413664114e-08, - "loss": 3.7772, - "step": 2552000 - }, - { - "epoch": 28.08, - "learning_rate": 5.479536828208372e-08, - "loss": 3.7896, - "step": 2552500 - }, - { - "epoch": 28.09, - "learning_rate": 5.478161615050333e-08, - "loss": 3.7897, - "step": 2553000 - }, - { - "epoch": 28.09, - "learning_rate": 5.476786401892293e-08, - "loss": 3.781, - "step": 2553500 - }, - { - "epoch": 28.1, - "learning_rate": 5.475411188734254e-08, - "loss": 3.7779, - "step": 2554000 - }, - { - "epoch": 28.1, - "learning_rate": 5.474035975576215e-08, - "loss": 3.7693, - "step": 2554500 - }, - { - "epoch": 28.11, - "learning_rate": 5.4726607624181743e-08, - "loss": 3.7703, - "step": 2555000 - }, - { - "epoch": 28.11, - "learning_rate": 5.471285549260135e-08, - "loss": 3.7683, - "step": 2555500 - }, - { - "epoch": 28.12, - "learning_rate": 5.469910336102096e-08, - "loss": 3.7797, - "step": 2556000 - }, - { - "epoch": 28.13, - "learning_rate": 5.468535122944056e-08, - "loss": 3.7733, - "step": 2556500 - }, - { - "epoch": 28.13, - "learning_rate": 5.467159909786017e-08, - "loss": 3.793, - "step": 2557000 - }, - { - "epoch": 28.14, - "learning_rate": 5.465784696627978e-08, - "loss": 3.7693, - "step": 2557500 - }, - { - "epoch": 28.14, - "learning_rate": 5.464409483469937e-08, - "loss": 3.7774, - "step": 2558000 - }, - { - "epoch": 28.15, - "learning_rate": 5.463034270311898e-08, - "loss": 3.7578, - "step": 2558500 - }, - { - "epoch": 28.15, - "learning_rate": 5.461659057153859e-08, - "loss": 3.7819, - "step": 2559000 - }, - { - "epoch": 28.16, - "learning_rate": 5.460283843995819e-08, - "loss": 3.7834, - "step": 2559500 - }, - { - "epoch": 28.16, - "learning_rate": 5.45890863083778e-08, - "loss": 3.7725, - "step": 2560000 - }, - { - "epoch": 28.17, - "learning_rate": 5.4575334176797406e-08, - "loss": 3.7732, - "step": 2560500 - }, - { - "epoch": 28.18, - "learning_rate": 5.4561582045217e-08, - "loss": 3.7672, - "step": 2561000 - }, - { - "epoch": 28.18, - "learning_rate": 5.454782991363661e-08, - "loss": 3.7612, - "step": 2561500 - }, - { - "epoch": 28.19, - "learning_rate": 5.453407778205622e-08, - "loss": 3.7845, - "step": 2562000 - }, - { - "epoch": 28.19, - "learning_rate": 5.452032565047582e-08, - "loss": 3.7869, - "step": 2562500 - }, - { - "epoch": 28.2, - "learning_rate": 5.450657351889543e-08, - "loss": 3.7658, - "step": 2563000 - }, - { - "epoch": 28.2, - "learning_rate": 5.4492821387315036e-08, - "loss": 3.7645, - "step": 2563500 - }, - { - "epoch": 28.21, - "learning_rate": 5.447906925573463e-08, - "loss": 3.7735, - "step": 2564000 - }, - { - "epoch": 28.21, - "learning_rate": 5.446531712415424e-08, - "loss": 3.7891, - "step": 2564500 - }, - { - "epoch": 28.22, - "learning_rate": 5.445156499257385e-08, - "loss": 3.7481, - "step": 2565000 - }, - { - "epoch": 28.22, - "learning_rate": 5.443781286099345e-08, - "loss": 3.7873, - "step": 2565500 - }, - { - "epoch": 28.23, - "learning_rate": 5.4424060729413056e-08, - "loss": 3.7717, - "step": 2566000 - }, - { - "epoch": 28.24, - "learning_rate": 5.4410308597832666e-08, - "loss": 3.7897, - "step": 2566500 - }, - { - "epoch": 28.24, - "learning_rate": 5.439655646625226e-08, - "loss": 3.7744, - "step": 2567000 - }, - { - "epoch": 28.25, - "learning_rate": 5.438280433467187e-08, - "loss": 3.7865, - "step": 2567500 - }, - { - "epoch": 28.25, - "learning_rate": 5.436905220309148e-08, - "loss": 3.7623, - "step": 2568000 - }, - { - "epoch": 28.26, - "learning_rate": 5.4355300071511077e-08, - "loss": 3.7768, - "step": 2568500 - }, - { - "epoch": 28.26, - "learning_rate": 5.4341547939930686e-08, - "loss": 3.741, - "step": 2569000 - }, - { - "epoch": 28.27, - "learning_rate": 5.4327795808350295e-08, - "loss": 3.772, - "step": 2569500 - }, - { - "epoch": 28.27, - "learning_rate": 5.431404367676989e-08, - "loss": 3.7886, - "step": 2570000 - }, - { - "epoch": 28.28, - "learning_rate": 5.43002915451895e-08, - "loss": 3.7689, - "step": 2570500 - }, - { - "epoch": 28.29, - "learning_rate": 5.428653941360911e-08, - "loss": 3.751, - "step": 2571000 - }, - { - "epoch": 28.29, - "learning_rate": 5.4272787282028706e-08, - "loss": 3.7591, - "step": 2571500 - }, - { - "epoch": 28.3, - "learning_rate": 5.4259035150448315e-08, - "loss": 3.7691, - "step": 2572000 - }, - { - "epoch": 28.3, - "learning_rate": 5.4245283018867925e-08, - "loss": 3.7827, - "step": 2572500 - }, - { - "epoch": 28.31, - "learning_rate": 5.423153088728753e-08, - "loss": 3.7593, - "step": 2573000 - }, - { - "epoch": 28.31, - "learning_rate": 5.421777875570713e-08, - "loss": 3.7846, - "step": 2573500 - }, - { - "epoch": 28.32, - "learning_rate": 5.420402662412674e-08, - "loss": 3.7724, - "step": 2574000 - }, - { - "epoch": 28.32, - "learning_rate": 5.419027449254634e-08, - "loss": 3.776, - "step": 2574500 - }, - { - "epoch": 28.33, - "learning_rate": 5.4176522360965945e-08, - "loss": 3.7606, - "step": 2575000 - }, - { - "epoch": 28.33, - "learning_rate": 5.4162770229385554e-08, - "loss": 3.7724, - "step": 2575500 - }, - { - "epoch": 28.34, - "learning_rate": 5.414901809780516e-08, - "loss": 3.7714, - "step": 2576000 - }, - { - "epoch": 28.35, - "learning_rate": 5.4135265966224766e-08, - "loss": 3.7638, - "step": 2576500 - }, - { - "epoch": 28.35, - "learning_rate": 5.412151383464437e-08, - "loss": 3.7734, - "step": 2577000 - }, - { - "epoch": 28.36, - "learning_rate": 5.410776170306397e-08, - "loss": 3.7924, - "step": 2577500 - }, - { - "epoch": 28.36, - "learning_rate": 5.409400957148358e-08, - "loss": 3.7568, - "step": 2578000 - }, - { - "epoch": 28.37, - "learning_rate": 5.408025743990319e-08, - "loss": 3.7593, - "step": 2578500 - }, - { - "epoch": 28.37, - "learning_rate": 5.406650530832279e-08, - "loss": 3.7787, - "step": 2579000 - }, - { - "epoch": 28.38, - "learning_rate": 5.4052753176742396e-08, - "loss": 3.7753, - "step": 2579500 - }, - { - "epoch": 28.38, - "learning_rate": 5.4039001045162005e-08, - "loss": 3.7882, - "step": 2580000 - }, - { - "epoch": 28.39, - "learning_rate": 5.40252489135816e-08, - "loss": 3.7676, - "step": 2580500 - }, - { - "epoch": 28.4, - "learning_rate": 5.401149678200121e-08, - "loss": 3.7744, - "step": 2581000 - }, - { - "epoch": 28.4, - "learning_rate": 5.399774465042082e-08, - "loss": 3.7636, - "step": 2581500 - }, - { - "epoch": 28.41, - "learning_rate": 5.3983992518840416e-08, - "loss": 3.7886, - "step": 2582000 - }, - { - "epoch": 28.41, - "learning_rate": 5.3970240387260026e-08, - "loss": 3.7681, - "step": 2582500 - }, - { - "epoch": 28.42, - "learning_rate": 5.3956488255679635e-08, - "loss": 3.766, - "step": 2583000 - }, - { - "epoch": 28.42, - "learning_rate": 5.394273612409923e-08, - "loss": 3.7915, - "step": 2583500 - }, - { - "epoch": 28.43, - "learning_rate": 5.392898399251884e-08, - "loss": 3.7581, - "step": 2584000 - }, - { - "epoch": 28.43, - "learning_rate": 5.391523186093845e-08, - "loss": 3.7746, - "step": 2584500 - }, - { - "epoch": 28.44, - "learning_rate": 5.3901479729358046e-08, - "loss": 3.7623, - "step": 2585000 - }, - { - "epoch": 28.44, - "learning_rate": 5.3887727597777655e-08, - "loss": 3.75, - "step": 2585500 - }, - { - "epoch": 28.45, - "learning_rate": 5.3873975466197265e-08, - "loss": 3.7702, - "step": 2586000 - }, - { - "epoch": 28.46, - "learning_rate": 5.386022333461686e-08, - "loss": 3.7589, - "step": 2586500 - }, - { - "epoch": 28.46, - "learning_rate": 5.384647120303647e-08, - "loss": 3.772, - "step": 2587000 - }, - { - "epoch": 28.47, - "learning_rate": 5.383271907145608e-08, - "loss": 3.7585, - "step": 2587500 - }, - { - "epoch": 28.47, - "learning_rate": 5.3818966939875676e-08, - "loss": 3.7907, - "step": 2588000 - }, - { - "epoch": 28.48, - "learning_rate": 5.3805214808295285e-08, - "loss": 3.7732, - "step": 2588500 - }, - { - "epoch": 28.48, - "learning_rate": 5.3791462676714894e-08, - "loss": 3.7656, - "step": 2589000 - }, - { - "epoch": 28.49, - "learning_rate": 5.377771054513449e-08, - "loss": 3.7644, - "step": 2589500 - }, - { - "epoch": 28.49, - "learning_rate": 5.37639584135541e-08, - "loss": 3.77, - "step": 2590000 - }, - { - "epoch": 28.5, - "learning_rate": 5.375020628197371e-08, - "loss": 3.7773, - "step": 2590500 - }, - { - "epoch": 28.51, - "learning_rate": 5.3736454150393305e-08, - "loss": 3.786, - "step": 2591000 - }, - { - "epoch": 28.51, - "learning_rate": 5.3722702018812914e-08, - "loss": 3.7799, - "step": 2591500 - }, - { - "epoch": 28.52, - "learning_rate": 5.370894988723251e-08, - "loss": 3.7574, - "step": 2592000 - }, - { - "epoch": 28.52, - "learning_rate": 5.369519775565212e-08, - "loss": 3.7796, - "step": 2592500 - }, - { - "epoch": 28.53, - "learning_rate": 5.368144562407173e-08, - "loss": 3.7768, - "step": 2593000 - }, - { - "epoch": 28.53, - "learning_rate": 5.3667693492491325e-08, - "loss": 3.7606, - "step": 2593500 - }, - { - "epoch": 28.54, - "learning_rate": 5.3653941360910935e-08, - "loss": 3.7666, - "step": 2594000 - }, - { - "epoch": 28.54, - "learning_rate": 5.3640189229330544e-08, - "loss": 3.7677, - "step": 2594500 - }, - { - "epoch": 28.55, - "learning_rate": 5.362643709775015e-08, - "loss": 3.7744, - "step": 2595000 - }, - { - "epoch": 28.55, - "learning_rate": 5.361268496616975e-08, - "loss": 3.7769, - "step": 2595500 - }, - { - "epoch": 28.56, - "learning_rate": 5.359893283458936e-08, - "loss": 3.7697, - "step": 2596000 - }, - { - "epoch": 28.57, - "learning_rate": 5.358518070300896e-08, - "loss": 3.7767, - "step": 2596500 - }, - { - "epoch": 28.57, - "learning_rate": 5.3571428571428564e-08, - "loss": 3.7853, - "step": 2597000 - }, - { - "epoch": 28.58, - "learning_rate": 5.3557676439848174e-08, - "loss": 3.7758, - "step": 2597500 - }, - { - "epoch": 28.58, - "learning_rate": 5.3543924308267776e-08, - "loss": 3.7597, - "step": 2598000 - }, - { - "epoch": 28.59, - "learning_rate": 5.3530172176687386e-08, - "loss": 3.7688, - "step": 2598500 - }, - { - "epoch": 28.59, - "learning_rate": 5.351642004510699e-08, - "loss": 3.7669, - "step": 2599000 - }, - { - "epoch": 28.6, - "learning_rate": 5.350266791352659e-08, - "loss": 3.7656, - "step": 2599500 - }, - { - "epoch": 28.6, - "learning_rate": 5.34889157819462e-08, - "loss": 3.7675, - "step": 2600000 - }, - { - "epoch": 28.61, - "learning_rate": 5.347516365036581e-08, - "loss": 3.7869, - "step": 2600500 - }, - { - "epoch": 28.62, - "learning_rate": 5.3461411518785406e-08, - "loss": 3.779, - "step": 2601000 - }, - { - "epoch": 28.62, - "learning_rate": 5.3447659387205015e-08, - "loss": 3.7399, - "step": 2601500 - }, - { - "epoch": 28.63, - "learning_rate": 5.3433907255624625e-08, - "loss": 3.7663, - "step": 2602000 - }, - { - "epoch": 28.63, - "learning_rate": 5.342015512404422e-08, - "loss": 3.7642, - "step": 2602500 - }, - { - "epoch": 28.64, - "learning_rate": 5.340640299246383e-08, - "loss": 3.7799, - "step": 2603000 - }, - { - "epoch": 28.64, - "learning_rate": 5.339265086088344e-08, - "loss": 3.7614, - "step": 2603500 - }, - { - "epoch": 28.65, - "learning_rate": 5.3378898729303036e-08, - "loss": 3.7695, - "step": 2604000 - }, - { - "epoch": 28.65, - "learning_rate": 5.3365146597722645e-08, - "loss": 3.7504, - "step": 2604500 - }, - { - "epoch": 28.66, - "learning_rate": 5.3351394466142254e-08, - "loss": 3.7806, - "step": 2605000 - }, - { - "epoch": 28.66, - "learning_rate": 5.333764233456185e-08, - "loss": 3.7669, - "step": 2605500 - }, - { - "epoch": 28.67, - "learning_rate": 5.332389020298146e-08, - "loss": 3.7608, - "step": 2606000 - }, - { - "epoch": 28.68, - "learning_rate": 5.331013807140107e-08, - "loss": 3.7731, - "step": 2606500 - }, - { - "epoch": 28.68, - "learning_rate": 5.3296385939820665e-08, - "loss": 3.7712, - "step": 2607000 - }, - { - "epoch": 28.69, - "learning_rate": 5.3282633808240274e-08, - "loss": 3.7637, - "step": 2607500 - }, - { - "epoch": 28.69, - "learning_rate": 5.3268881676659884e-08, - "loss": 3.777, - "step": 2608000 - }, - { - "epoch": 28.7, - "learning_rate": 5.325512954507948e-08, - "loss": 3.8021, - "step": 2608500 - }, - { - "epoch": 28.7, - "learning_rate": 5.324137741349909e-08, - "loss": 3.7743, - "step": 2609000 - }, - { - "epoch": 28.71, - "learning_rate": 5.32276252819187e-08, - "loss": 3.7644, - "step": 2609500 - }, - { - "epoch": 28.71, - "learning_rate": 5.3213873150338295e-08, - "loss": 3.7773, - "step": 2610000 - }, - { - "epoch": 28.72, - "learning_rate": 5.3200121018757904e-08, - "loss": 3.7637, - "step": 2610500 - }, - { - "epoch": 28.73, - "learning_rate": 5.3186368887177513e-08, - "loss": 3.7642, - "step": 2611000 - }, - { - "epoch": 28.73, - "learning_rate": 5.317261675559711e-08, - "loss": 3.7669, - "step": 2611500 - }, - { - "epoch": 28.74, - "learning_rate": 5.315886462401672e-08, - "loss": 3.754, - "step": 2612000 - }, - { - "epoch": 28.74, - "learning_rate": 5.314511249243633e-08, - "loss": 3.7749, - "step": 2612500 - }, - { - "epoch": 28.75, - "learning_rate": 5.3131360360855924e-08, - "loss": 3.7772, - "step": 2613000 - }, - { - "epoch": 28.75, - "learning_rate": 5.3117608229275534e-08, - "loss": 3.7566, - "step": 2613500 - }, - { - "epoch": 28.76, - "learning_rate": 5.310385609769514e-08, - "loss": 3.7741, - "step": 2614000 - }, - { - "epoch": 28.76, - "learning_rate": 5.309010396611474e-08, - "loss": 3.7913, - "step": 2614500 - }, - { - "epoch": 28.77, - "learning_rate": 5.307635183453435e-08, - "loss": 3.7543, - "step": 2615000 - }, - { - "epoch": 28.77, - "learning_rate": 5.306259970295396e-08, - "loss": 3.7629, - "step": 2615500 - }, - { - "epoch": 28.78, - "learning_rate": 5.3048847571373554e-08, - "loss": 3.7702, - "step": 2616000 - }, - { - "epoch": 28.79, - "learning_rate": 5.303509543979316e-08, - "loss": 3.7668, - "step": 2616500 - }, - { - "epoch": 28.79, - "learning_rate": 5.302134330821277e-08, - "loss": 3.7776, - "step": 2617000 - }, - { - "epoch": 28.8, - "learning_rate": 5.300759117663237e-08, - "loss": 3.7817, - "step": 2617500 - }, - { - "epoch": 28.8, - "learning_rate": 5.299383904505198e-08, - "loss": 3.7509, - "step": 2618000 - }, - { - "epoch": 28.81, - "learning_rate": 5.298008691347159e-08, - "loss": 3.7949, - "step": 2618500 - }, - { - "epoch": 28.81, - "learning_rate": 5.2966334781891183e-08, - "loss": 3.7508, - "step": 2619000 - }, - { - "epoch": 28.82, - "learning_rate": 5.295258265031079e-08, - "loss": 3.7715, - "step": 2619500 - }, - { - "epoch": 28.82, - "learning_rate": 5.29388305187304e-08, - "loss": 3.7736, - "step": 2620000 - }, - { - "epoch": 28.83, - "learning_rate": 5.2925078387150005e-08, - "loss": 3.8018, - "step": 2620500 - }, - { - "epoch": 28.84, - "learning_rate": 5.291132625556961e-08, - "loss": 3.746, - "step": 2621000 - }, - { - "epoch": 28.84, - "learning_rate": 5.289757412398922e-08, - "loss": 3.7793, - "step": 2621500 - }, - { - "epoch": 28.85, - "learning_rate": 5.288382199240882e-08, - "loss": 3.7789, - "step": 2622000 - }, - { - "epoch": 28.85, - "learning_rate": 5.287006986082842e-08, - "loss": 3.7764, - "step": 2622500 - }, - { - "epoch": 28.86, - "learning_rate": 5.285631772924803e-08, - "loss": 3.7818, - "step": 2623000 - }, - { - "epoch": 28.86, - "learning_rate": 5.2842565597667634e-08, - "loss": 3.7871, - "step": 2623500 - }, - { - "epoch": 28.87, - "learning_rate": 5.2828813466087244e-08, - "loss": 3.7518, - "step": 2624000 - }, - { - "epoch": 28.87, - "learning_rate": 5.2815061334506847e-08, - "loss": 3.7617, - "step": 2624500 - }, - { - "epoch": 28.88, - "learning_rate": 5.280130920292645e-08, - "loss": 3.7624, - "step": 2625000 - }, - { - "epoch": 28.88, - "learning_rate": 5.278755707134606e-08, - "loss": 3.7674, - "step": 2625500 - }, - { - "epoch": 28.89, - "learning_rate": 5.277380493976567e-08, - "loss": 3.7542, - "step": 2626000 - }, - { - "epoch": 28.9, - "learning_rate": 5.2760052808185264e-08, - "loss": 3.7845, - "step": 2626500 - }, - { - "epoch": 28.9, - "learning_rate": 5.2746300676604873e-08, - "loss": 3.7622, - "step": 2627000 - }, - { - "epoch": 28.91, - "learning_rate": 5.273254854502448e-08, - "loss": 3.7746, - "step": 2627500 - }, - { - "epoch": 28.91, - "learning_rate": 5.271879641344408e-08, - "loss": 3.7668, - "step": 2628000 - }, - { - "epoch": 28.92, - "learning_rate": 5.270504428186369e-08, - "loss": 3.7733, - "step": 2628500 - }, - { - "epoch": 28.92, - "learning_rate": 5.26912921502833e-08, - "loss": 3.7719, - "step": 2629000 - }, - { - "epoch": 28.93, - "learning_rate": 5.2677540018702894e-08, - "loss": 3.7859, - "step": 2629500 - }, - { - "epoch": 28.93, - "learning_rate": 5.26637878871225e-08, - "loss": 3.7451, - "step": 2630000 - }, - { - "epoch": 28.94, - "learning_rate": 5.265003575554211e-08, - "loss": 3.7672, - "step": 2630500 - }, - { - "epoch": 28.95, - "learning_rate": 5.263628362396171e-08, - "loss": 3.7799, - "step": 2631000 - }, - { - "epoch": 28.95, - "learning_rate": 5.262253149238132e-08, - "loss": 3.7727, - "step": 2631500 - }, - { - "epoch": 28.96, - "learning_rate": 5.260877936080093e-08, - "loss": 3.7583, - "step": 2632000 - }, - { - "epoch": 28.96, - "learning_rate": 5.259502722922052e-08, - "loss": 3.7717, - "step": 2632500 - }, - { - "epoch": 28.97, - "learning_rate": 5.258127509764013e-08, - "loss": 3.7811, - "step": 2633000 - }, - { - "epoch": 28.97, - "learning_rate": 5.256752296605974e-08, - "loss": 3.7761, - "step": 2633500 - }, - { - "epoch": 28.98, - "learning_rate": 5.255377083447934e-08, - "loss": 3.7703, - "step": 2634000 - }, - { - "epoch": 28.98, - "learning_rate": 5.254001870289895e-08, - "loss": 3.7956, - "step": 2634500 - }, - { - "epoch": 28.99, - "learning_rate": 5.252626657131856e-08, - "loss": 3.7707, - "step": 2635000 - }, - { - "epoch": 28.99, - "learning_rate": 5.251251443973815e-08, - "loss": 3.7806, - "step": 2635500 - }, - { - "epoch": 29.0, - "eval_loss": 3.8371315002441406, - "eval_runtime": 6.1315, - "eval_samples_per_second": 253.443, - "step": 2635955 - }, - { - "epoch": 29.0, - "learning_rate": 5.249876230815776e-08, - "loss": 3.7788, - "step": 2636000 - }, - { - "epoch": 29.01, - "learning_rate": 5.248501017657737e-08, - "loss": 3.7582, - "step": 2636500 - }, - { - "epoch": 29.01, - "learning_rate": 5.247125804499697e-08, - "loss": 3.7732, - "step": 2637000 - }, - { - "epoch": 29.02, - "learning_rate": 5.245750591341658e-08, - "loss": 3.7512, - "step": 2637500 - }, - { - "epoch": 29.02, - "learning_rate": 5.2443753781836186e-08, - "loss": 3.7929, - "step": 2638000 - }, - { - "epoch": 29.03, - "learning_rate": 5.243000165025578e-08, - "loss": 3.7656, - "step": 2638500 - }, - { - "epoch": 29.03, - "learning_rate": 5.241624951867539e-08, - "loss": 3.7692, - "step": 2639000 - }, - { - "epoch": 29.04, - "learning_rate": 5.2402497387095e-08, - "loss": 3.777, - "step": 2639500 - }, - { - "epoch": 29.04, - "learning_rate": 5.23887452555146e-08, - "loss": 3.7839, - "step": 2640000 - }, - { - "epoch": 29.05, - "learning_rate": 5.2374993123934207e-08, - "loss": 3.7703, - "step": 2640500 - }, - { - "epoch": 29.06, - "learning_rate": 5.2361240992353816e-08, - "loss": 3.7759, - "step": 2641000 - }, - { - "epoch": 29.06, - "learning_rate": 5.234748886077341e-08, - "loss": 3.7743, - "step": 2641500 - }, - { - "epoch": 29.07, - "learning_rate": 5.233373672919302e-08, - "loss": 3.7739, - "step": 2642000 - }, - { - "epoch": 29.07, - "learning_rate": 5.231998459761263e-08, - "loss": 3.766, - "step": 2642500 - }, - { - "epoch": 29.08, - "learning_rate": 5.230623246603223e-08, - "loss": 3.78, - "step": 2643000 - }, - { - "epoch": 29.08, - "learning_rate": 5.2292480334451836e-08, - "loss": 3.7674, - "step": 2643500 - }, - { - "epoch": 29.09, - "learning_rate": 5.2278728202871446e-08, - "loss": 3.7697, - "step": 2644000 - }, - { - "epoch": 29.09, - "learning_rate": 5.226497607129104e-08, - "loss": 3.7716, - "step": 2644500 - }, - { - "epoch": 29.1, - "learning_rate": 5.225122393971065e-08, - "loss": 3.7889, - "step": 2645000 - }, - { - "epoch": 29.11, - "learning_rate": 5.223747180813026e-08, - "loss": 3.7806, - "step": 2645500 - }, - { - "epoch": 29.11, - "learning_rate": 5.222371967654986e-08, - "loss": 3.7795, - "step": 2646000 - }, - { - "epoch": 29.12, - "learning_rate": 5.2209967544969466e-08, - "loss": 3.7646, - "step": 2646500 - }, - { - "epoch": 29.12, - "learning_rate": 5.2196215413389075e-08, - "loss": 3.7739, - "step": 2647000 - }, - { - "epoch": 29.13, - "learning_rate": 5.218246328180868e-08, - "loss": 3.7712, - "step": 2647500 - }, - { - "epoch": 29.13, - "learning_rate": 5.216871115022829e-08, - "loss": 3.7654, - "step": 2648000 - }, - { - "epoch": 29.14, - "learning_rate": 5.215495901864789e-08, - "loss": 3.7617, - "step": 2648500 - }, - { - "epoch": 29.14, - "learning_rate": 5.214120688706749e-08, - "loss": 3.7845, - "step": 2649000 - }, - { - "epoch": 29.15, - "learning_rate": 5.21274547554871e-08, - "loss": 3.7715, - "step": 2649500 - }, - { - "epoch": 29.15, - "learning_rate": 5.2113702623906705e-08, - "loss": 3.7715, - "step": 2650000 - }, - { - "epoch": 29.16, - "learning_rate": 5.209995049232631e-08, - "loss": 3.7799, - "step": 2650500 - }, - { - "epoch": 29.17, - "learning_rate": 5.208619836074592e-08, - "loss": 3.7781, - "step": 2651000 - }, - { - "epoch": 29.17, - "learning_rate": 5.2072446229165526e-08, - "loss": 3.7449, - "step": 2651500 - }, - { - "epoch": 29.18, - "learning_rate": 5.205869409758512e-08, - "loss": 3.7581, - "step": 2652000 - }, - { - "epoch": 29.18, - "learning_rate": 5.204494196600473e-08, - "loss": 3.7688, - "step": 2652500 - }, - { - "epoch": 29.19, - "learning_rate": 5.203118983442434e-08, - "loss": 3.7602, - "step": 2653000 - }, - { - "epoch": 29.19, - "learning_rate": 5.201743770284394e-08, - "loss": 3.7786, - "step": 2653500 - }, - { - "epoch": 29.2, - "learning_rate": 5.2003685571263546e-08, - "loss": 3.7608, - "step": 2654000 - }, - { - "epoch": 29.2, - "learning_rate": 5.1989933439683156e-08, - "loss": 3.7783, - "step": 2654500 - }, - { - "epoch": 29.21, - "learning_rate": 5.197618130810275e-08, - "loss": 3.7864, - "step": 2655000 - }, - { - "epoch": 29.22, - "learning_rate": 5.196242917652236e-08, - "loss": 3.7672, - "step": 2655500 - }, - { - "epoch": 29.22, - "learning_rate": 5.194867704494197e-08, - "loss": 3.7579, - "step": 2656000 - }, - { - "epoch": 29.23, - "learning_rate": 5.1934924913361567e-08, - "loss": 3.7875, - "step": 2656500 - }, - { - "epoch": 29.23, - "learning_rate": 5.1921172781781176e-08, - "loss": 3.7724, - "step": 2657000 - }, - { - "epoch": 29.24, - "learning_rate": 5.1907420650200785e-08, - "loss": 3.7789, - "step": 2657500 - }, - { - "epoch": 29.24, - "learning_rate": 5.189366851862038e-08, - "loss": 3.7829, - "step": 2658000 - }, - { - "epoch": 29.25, - "learning_rate": 5.187991638703999e-08, - "loss": 3.7595, - "step": 2658500 - }, - { - "epoch": 29.25, - "learning_rate": 5.18661642554596e-08, - "loss": 3.7664, - "step": 2659000 - }, - { - "epoch": 29.26, - "learning_rate": 5.1852412123879196e-08, - "loss": 3.7671, - "step": 2659500 - }, - { - "epoch": 29.26, - "learning_rate": 5.1838659992298806e-08, - "loss": 3.7832, - "step": 2660000 - }, - { - "epoch": 29.27, - "learning_rate": 5.1824907860718415e-08, - "loss": 3.767, - "step": 2660500 - }, - { - "epoch": 29.28, - "learning_rate": 5.181115572913801e-08, - "loss": 3.7562, - "step": 2661000 - }, - { - "epoch": 29.28, - "learning_rate": 5.179740359755762e-08, - "loss": 3.7839, - "step": 2661500 - }, - { - "epoch": 29.29, - "learning_rate": 5.178365146597723e-08, - "loss": 3.7679, - "step": 2662000 - }, - { - "epoch": 29.29, - "learning_rate": 5.1769899334396826e-08, - "loss": 3.7655, - "step": 2662500 - }, - { - "epoch": 29.3, - "learning_rate": 5.1756147202816435e-08, - "loss": 3.7888, - "step": 2663000 - }, - { - "epoch": 29.3, - "learning_rate": 5.1742395071236044e-08, - "loss": 3.7766, - "step": 2663500 - }, - { - "epoch": 29.31, - "learning_rate": 5.172864293965564e-08, - "loss": 3.7862, - "step": 2664000 - }, - { - "epoch": 29.31, - "learning_rate": 5.171489080807525e-08, - "loss": 3.7609, - "step": 2664500 - }, - { - "epoch": 29.32, - "learning_rate": 5.170113867649486e-08, - "loss": 3.7497, - "step": 2665000 - }, - { - "epoch": 29.33, - "learning_rate": 5.1687386544914455e-08, - "loss": 3.766, - "step": 2665500 - }, - { - "epoch": 29.33, - "learning_rate": 5.1673634413334065e-08, - "loss": 3.7607, - "step": 2666000 - }, - { - "epoch": 29.34, - "learning_rate": 5.1659882281753674e-08, - "loss": 3.7743, - "step": 2666500 - }, - { - "epoch": 29.34, - "learning_rate": 5.164613015017327e-08, - "loss": 3.7687, - "step": 2667000 - }, - { - "epoch": 29.35, - "learning_rate": 5.163237801859288e-08, - "loss": 3.7682, - "step": 2667500 - }, - { - "epoch": 29.35, - "learning_rate": 5.161862588701248e-08, - "loss": 3.7585, - "step": 2668000 - }, - { - "epoch": 29.36, - "learning_rate": 5.1604873755432085e-08, - "loss": 3.7678, - "step": 2668500 - }, - { - "epoch": 29.36, - "learning_rate": 5.1591121623851694e-08, - "loss": 3.7719, - "step": 2669000 - }, - { - "epoch": 29.37, - "learning_rate": 5.15773694922713e-08, - "loss": 3.7735, - "step": 2669500 - }, - { - "epoch": 29.37, - "learning_rate": 5.15636173606909e-08, - "loss": 3.7626, - "step": 2670000 - }, - { - "epoch": 29.38, - "learning_rate": 5.154986522911051e-08, - "loss": 3.7917, - "step": 2670500 - }, - { - "epoch": 29.39, - "learning_rate": 5.153611309753011e-08, - "loss": 3.7661, - "step": 2671000 - }, - { - "epoch": 29.39, - "learning_rate": 5.152236096594972e-08, - "loss": 3.7685, - "step": 2671500 - }, - { - "epoch": 29.4, - "learning_rate": 5.1508608834369324e-08, - "loss": 3.7689, - "step": 2672000 - }, - { - "epoch": 29.4, - "learning_rate": 5.1494856702788927e-08, - "loss": 3.7658, - "step": 2672500 - }, - { - "epoch": 29.41, - "learning_rate": 5.1481104571208536e-08, - "loss": 3.7561, - "step": 2673000 - }, - { - "epoch": 29.41, - "learning_rate": 5.1467352439628145e-08, - "loss": 3.7797, - "step": 2673500 - }, - { - "epoch": 29.42, - "learning_rate": 5.145360030804774e-08, - "loss": 3.75, - "step": 2674000 - }, - { - "epoch": 29.42, - "learning_rate": 5.143984817646735e-08, - "loss": 3.784, - "step": 2674500 - }, - { - "epoch": 29.43, - "learning_rate": 5.142609604488696e-08, - "loss": 3.753, - "step": 2675000 - }, - { - "epoch": 29.44, - "learning_rate": 5.1412343913306556e-08, - "loss": 3.7834, - "step": 2675500 - }, - { - "epoch": 29.44, - "learning_rate": 5.1398591781726166e-08, - "loss": 3.7866, - "step": 2676000 - }, - { - "epoch": 29.45, - "learning_rate": 5.1384839650145775e-08, - "loss": 3.7743, - "step": 2676500 - }, - { - "epoch": 29.45, - "learning_rate": 5.137108751856537e-08, - "loss": 3.7687, - "step": 2677000 - }, - { - "epoch": 29.46, - "learning_rate": 5.135733538698498e-08, - "loss": 3.7778, - "step": 2677500 - }, - { - "epoch": 29.46, - "learning_rate": 5.134358325540459e-08, - "loss": 3.7665, - "step": 2678000 - }, - { - "epoch": 29.47, - "learning_rate": 5.1329831123824186e-08, - "loss": 3.76, - "step": 2678500 - }, - { - "epoch": 29.47, - "learning_rate": 5.1316078992243795e-08, - "loss": 3.7785, - "step": 2679000 - }, - { - "epoch": 29.48, - "learning_rate": 5.1302326860663405e-08, - "loss": 3.7741, - "step": 2679500 - }, - { - "epoch": 29.48, - "learning_rate": 5.1288574729083e-08, - "loss": 3.7606, - "step": 2680000 - }, - { - "epoch": 29.49, - "learning_rate": 5.127482259750261e-08, - "loss": 3.7899, - "step": 2680500 - }, - { - "epoch": 29.5, - "learning_rate": 5.126107046592222e-08, - "loss": 3.7911, - "step": 2681000 - }, - { - "epoch": 29.5, - "learning_rate": 5.1247318334341815e-08, - "loss": 3.7766, - "step": 2681500 - }, - { - "epoch": 29.51, - "learning_rate": 5.1233566202761425e-08, - "loss": 3.7699, - "step": 2682000 - }, - { - "epoch": 29.51, - "learning_rate": 5.1219814071181034e-08, - "loss": 3.7947, - "step": 2682500 - }, - { - "epoch": 29.52, - "learning_rate": 5.120606193960063e-08, - "loss": 3.7589, - "step": 2683000 - }, - { - "epoch": 29.52, - "learning_rate": 5.119230980802024e-08, - "loss": 3.7703, - "step": 2683500 - }, - { - "epoch": 29.53, - "learning_rate": 5.117855767643985e-08, - "loss": 3.7753, - "step": 2684000 - }, - { - "epoch": 29.53, - "learning_rate": 5.1164805544859445e-08, - "loss": 3.7753, - "step": 2684500 - }, - { - "epoch": 29.54, - "learning_rate": 5.1151053413279054e-08, - "loss": 3.7778, - "step": 2685000 - }, - { - "epoch": 29.55, - "learning_rate": 5.1137301281698664e-08, - "loss": 3.7739, - "step": 2685500 - }, - { - "epoch": 29.55, - "learning_rate": 5.112354915011826e-08, - "loss": 3.7665, - "step": 2686000 - }, - { - "epoch": 29.56, - "learning_rate": 5.110979701853787e-08, - "loss": 3.7768, - "step": 2686500 - }, - { - "epoch": 29.56, - "learning_rate": 5.109604488695748e-08, - "loss": 3.7747, - "step": 2687000 - }, - { - "epoch": 29.57, - "learning_rate": 5.1082292755377075e-08, - "loss": 3.7648, - "step": 2687500 - }, - { - "epoch": 29.57, - "learning_rate": 5.1068540623796684e-08, - "loss": 3.777, - "step": 2688000 - }, - { - "epoch": 29.58, - "learning_rate": 5.105478849221629e-08, - "loss": 3.7719, - "step": 2688500 - }, - { - "epoch": 29.58, - "learning_rate": 5.104103636063589e-08, - "loss": 3.7652, - "step": 2689000 - }, - { - "epoch": 29.59, - "learning_rate": 5.10272842290555e-08, - "loss": 3.7532, - "step": 2689500 - }, - { - "epoch": 29.59, - "learning_rate": 5.101353209747511e-08, - "loss": 3.7796, - "step": 2690000 - }, - { - "epoch": 29.6, - "learning_rate": 5.0999779965894704e-08, - "loss": 3.7788, - "step": 2690500 - }, - { - "epoch": 29.61, - "learning_rate": 5.0986027834314314e-08, - "loss": 3.7661, - "step": 2691000 - }, - { - "epoch": 29.61, - "learning_rate": 5.097227570273392e-08, - "loss": 3.779, - "step": 2691500 - }, - { - "epoch": 29.62, - "learning_rate": 5.095852357115352e-08, - "loss": 3.7811, - "step": 2692000 - }, - { - "epoch": 29.62, - "learning_rate": 5.094477143957313e-08, - "loss": 3.7689, - "step": 2692500 - }, - { - "epoch": 29.63, - "learning_rate": 5.093101930799274e-08, - "loss": 3.7608, - "step": 2693000 - }, - { - "epoch": 29.63, - "learning_rate": 5.091726717641234e-08, - "loss": 3.7392, - "step": 2693500 - }, - { - "epoch": 29.64, - "learning_rate": 5.090351504483194e-08, - "loss": 3.7649, - "step": 2694000 - }, - { - "epoch": 29.64, - "learning_rate": 5.088976291325155e-08, - "loss": 3.7671, - "step": 2694500 - }, - { - "epoch": 29.65, - "learning_rate": 5.0876010781671155e-08, - "loss": 3.7487, - "step": 2695000 - }, - { - "epoch": 29.66, - "learning_rate": 5.0862258650090765e-08, - "loss": 3.786, - "step": 2695500 - }, - { - "epoch": 29.66, - "learning_rate": 5.084850651851037e-08, - "loss": 3.765, - "step": 2696000 - }, - { - "epoch": 29.67, - "learning_rate": 5.083475438692997e-08, - "loss": 3.7641, - "step": 2696500 - }, - { - "epoch": 29.67, - "learning_rate": 5.082100225534958e-08, - "loss": 3.7796, - "step": 2697000 - }, - { - "epoch": 29.68, - "learning_rate": 5.080725012376918e-08, - "loss": 3.7616, - "step": 2697500 - }, - { - "epoch": 29.68, - "learning_rate": 5.0793497992188785e-08, - "loss": 3.7701, - "step": 2698000 - }, - { - "epoch": 29.69, - "learning_rate": 5.0779745860608394e-08, - "loss": 3.7539, - "step": 2698500 - }, - { - "epoch": 29.69, - "learning_rate": 5.0765993729028003e-08, - "loss": 3.7698, - "step": 2699000 - }, - { - "epoch": 29.7, - "learning_rate": 5.07522415974476e-08, - "loss": 3.7831, - "step": 2699500 - }, - { - "epoch": 29.7, - "learning_rate": 5.073848946586721e-08, - "loss": 3.7817, - "step": 2700000 - }, - { - "epoch": 29.71, - "learning_rate": 5.072473733428682e-08, - "loss": 3.7802, - "step": 2700500 - }, - { - "epoch": 29.72, - "learning_rate": 5.0710985202706414e-08, - "loss": 3.7495, - "step": 2701000 - }, - { - "epoch": 29.72, - "learning_rate": 5.0697233071126024e-08, - "loss": 3.7709, - "step": 2701500 - }, - { - "epoch": 29.73, - "learning_rate": 5.068348093954563e-08, - "loss": 3.7522, - "step": 2702000 - }, - { - "epoch": 29.73, - "learning_rate": 5.066972880796523e-08, - "loss": 3.7773, - "step": 2702500 - }, - { - "epoch": 29.74, - "learning_rate": 5.065597667638484e-08, - "loss": 3.7634, - "step": 2703000 - }, - { - "epoch": 29.74, - "learning_rate": 5.064222454480445e-08, - "loss": 3.7454, - "step": 2703500 - }, - { - "epoch": 29.75, - "learning_rate": 5.0628472413224044e-08, - "loss": 3.7574, - "step": 2704000 - }, - { - "epoch": 29.75, - "learning_rate": 5.0614720281643653e-08, - "loss": 3.7686, - "step": 2704500 - }, - { - "epoch": 29.76, - "learning_rate": 5.060096815006326e-08, - "loss": 3.7488, - "step": 2705000 - }, - { - "epoch": 29.77, - "learning_rate": 5.058721601848286e-08, - "loss": 3.7639, - "step": 2705500 - }, - { - "epoch": 29.77, - "learning_rate": 5.057346388690247e-08, - "loss": 3.7683, - "step": 2706000 - }, - { - "epoch": 29.78, - "learning_rate": 5.055971175532208e-08, - "loss": 3.7728, - "step": 2706500 - }, - { - "epoch": 29.78, - "learning_rate": 5.0545959623741674e-08, - "loss": 3.7648, - "step": 2707000 - }, - { - "epoch": 29.79, - "learning_rate": 5.053220749216128e-08, - "loss": 3.773, - "step": 2707500 - }, - { - "epoch": 29.79, - "learning_rate": 5.051845536058089e-08, - "loss": 3.7837, - "step": 2708000 - }, - { - "epoch": 29.8, - "learning_rate": 5.050470322900049e-08, - "loss": 3.784, - "step": 2708500 - }, - { - "epoch": 29.8, - "learning_rate": 5.04909510974201e-08, - "loss": 3.7826, - "step": 2709000 - }, - { - "epoch": 29.81, - "learning_rate": 5.047719896583971e-08, - "loss": 3.769, - "step": 2709500 - }, - { - "epoch": 29.81, - "learning_rate": 5.04634468342593e-08, - "loss": 3.7715, - "step": 2710000 - }, - { - "epoch": 29.82, - "learning_rate": 5.044969470267891e-08, - "loss": 3.7552, - "step": 2710500 - }, - { - "epoch": 29.83, - "learning_rate": 5.043594257109852e-08, - "loss": 3.7617, - "step": 2711000 - }, - { - "epoch": 29.83, - "learning_rate": 5.042219043951812e-08, - "loss": 3.7364, - "step": 2711500 - }, - { - "epoch": 29.84, - "learning_rate": 5.040843830793773e-08, - "loss": 3.7823, - "step": 2712000 - }, - { - "epoch": 29.84, - "learning_rate": 5.0394686176357337e-08, - "loss": 3.7426, - "step": 2712500 - }, - { - "epoch": 29.85, - "learning_rate": 5.038093404477693e-08, - "loss": 3.7587, - "step": 2713000 - }, - { - "epoch": 29.85, - "learning_rate": 5.036718191319654e-08, - "loss": 3.7369, - "step": 2713500 - }, - { - "epoch": 29.86, - "learning_rate": 5.035342978161615e-08, - "loss": 3.778, - "step": 2714000 - }, - { - "epoch": 29.86, - "learning_rate": 5.033967765003575e-08, - "loss": 3.7513, - "step": 2714500 - }, - { - "epoch": 29.87, - "learning_rate": 5.032592551845536e-08, - "loss": 3.7663, - "step": 2715000 - }, - { - "epoch": 29.88, - "learning_rate": 5.0312173386874966e-08, - "loss": 3.7903, - "step": 2715500 - }, - { - "epoch": 29.88, - "learning_rate": 5.029842125529456e-08, - "loss": 3.7809, - "step": 2716000 - }, - { - "epoch": 29.89, - "learning_rate": 5.028466912371417e-08, - "loss": 3.7518, - "step": 2716500 - }, - { - "epoch": 29.89, - "learning_rate": 5.027091699213378e-08, - "loss": 3.7621, - "step": 2717000 - }, - { - "epoch": 29.9, - "learning_rate": 5.0257164860553384e-08, - "loss": 3.7608, - "step": 2717500 - }, - { - "epoch": 29.9, - "learning_rate": 5.0243412728972986e-08, - "loss": 3.7797, - "step": 2718000 - }, - { - "epoch": 29.91, - "learning_rate": 5.0229660597392596e-08, - "loss": 3.7561, - "step": 2718500 - }, - { - "epoch": 29.91, - "learning_rate": 5.02159084658122e-08, - "loss": 3.7748, - "step": 2719000 - }, - { - "epoch": 29.92, - "learning_rate": 5.02021563342318e-08, - "loss": 3.7772, - "step": 2719500 - }, - { - "epoch": 29.92, - "learning_rate": 5.018840420265141e-08, - "loss": 3.7611, - "step": 2720000 - }, - { - "epoch": 29.93, - "learning_rate": 5.0174652071071013e-08, - "loss": 3.766, - "step": 2720500 - }, - { - "epoch": 29.94, - "learning_rate": 5.016089993949062e-08, - "loss": 3.7769, - "step": 2721000 - }, - { - "epoch": 29.94, - "learning_rate": 5.0147147807910225e-08, - "loss": 3.7593, - "step": 2721500 - }, - { - "epoch": 29.95, - "learning_rate": 5.013339567632983e-08, - "loss": 3.761, - "step": 2722000 - }, - { - "epoch": 29.95, - "learning_rate": 5.011964354474944e-08, - "loss": 3.7647, - "step": 2722500 - }, - { - "epoch": 29.96, - "learning_rate": 5.010589141316905e-08, - "loss": 3.7685, - "step": 2723000 - }, - { - "epoch": 29.96, - "learning_rate": 5.009213928158864e-08, - "loss": 3.764, - "step": 2723500 - }, - { - "epoch": 29.97, - "learning_rate": 5.007838715000825e-08, - "loss": 3.7785, - "step": 2724000 - }, - { - "epoch": 29.97, - "learning_rate": 5.006463501842786e-08, - "loss": 3.7616, - "step": 2724500 - }, - { - "epoch": 29.98, - "learning_rate": 5.005088288684746e-08, - "loss": 3.7695, - "step": 2725000 - }, - { - "epoch": 29.99, - "learning_rate": 5.003713075526707e-08, - "loss": 3.7728, - "step": 2725500 - }, - { - "epoch": 29.99, - "learning_rate": 5.0023378623686676e-08, - "loss": 3.7582, - "step": 2726000 - }, - { - "epoch": 30.0, - "learning_rate": 5.000962649210627e-08, - "loss": 3.7561, - "step": 2726500 - }, - { - "epoch": 30.0, - "eval_loss": 3.83630633354187, - "eval_runtime": 6.1313, - "eval_samples_per_second": 253.455, - "step": 2726850 - }, - { - "epoch": 30.0, - "learning_rate": 4.999587436052588e-08, - "loss": 3.7688, - "step": 2727000 - }, - { - "epoch": 30.01, - "learning_rate": 4.9982122228945485e-08, - "loss": 3.7779, - "step": 2727500 - }, - { - "epoch": 30.01, - "learning_rate": 4.996837009736509e-08, - "loss": 3.7676, - "step": 2728000 - }, - { - "epoch": 30.02, - "learning_rate": 4.9954617965784697e-08, - "loss": 3.7718, - "step": 2728500 - }, - { - "epoch": 30.02, - "learning_rate": 4.99408658342043e-08, - "loss": 3.7617, - "step": 2729000 - }, - { - "epoch": 30.03, - "learning_rate": 4.99271137026239e-08, - "loss": 3.7666, - "step": 2729500 - }, - { - "epoch": 30.03, - "learning_rate": 4.991336157104351e-08, - "loss": 3.7736, - "step": 2730000 - }, - { - "epoch": 30.04, - "learning_rate": 4.9899609439463114e-08, - "loss": 3.7874, - "step": 2730500 - }, - { - "epoch": 30.05, - "learning_rate": 4.988585730788272e-08, - "loss": 3.7805, - "step": 2731000 - }, - { - "epoch": 30.05, - "learning_rate": 4.9872105176302326e-08, - "loss": 3.7761, - "step": 2731500 - }, - { - "epoch": 30.06, - "learning_rate": 4.985835304472193e-08, - "loss": 3.7637, - "step": 2732000 - }, - { - "epoch": 30.06, - "learning_rate": 4.984460091314153e-08, - "loss": 3.7604, - "step": 2732500 - }, - { - "epoch": 30.07, - "learning_rate": 4.983084878156114e-08, - "loss": 3.7796, - "step": 2733000 - }, - { - "epoch": 30.07, - "learning_rate": 4.9817096649980744e-08, - "loss": 3.7736, - "step": 2733500 - }, - { - "epoch": 30.08, - "learning_rate": 4.9803344518400346e-08, - "loss": 3.7527, - "step": 2734000 - }, - { - "epoch": 30.08, - "learning_rate": 4.9789592386819956e-08, - "loss": 3.7637, - "step": 2734500 - }, - { - "epoch": 30.09, - "learning_rate": 4.977584025523956e-08, - "loss": 3.751, - "step": 2735000 - }, - { - "epoch": 30.1, - "learning_rate": 4.976208812365916e-08, - "loss": 3.7808, - "step": 2735500 - }, - { - "epoch": 30.1, - "learning_rate": 4.974833599207877e-08, - "loss": 3.7896, - "step": 2736000 - }, - { - "epoch": 30.11, - "learning_rate": 4.9734583860498373e-08, - "loss": 3.7621, - "step": 2736500 - }, - { - "epoch": 30.11, - "learning_rate": 4.9720831728917976e-08, - "loss": 3.7667, - "step": 2737000 - }, - { - "epoch": 30.12, - "learning_rate": 4.9707079597337585e-08, - "loss": 3.7652, - "step": 2737500 - }, - { - "epoch": 30.12, - "learning_rate": 4.969332746575719e-08, - "loss": 3.7661, - "step": 2738000 - }, - { - "epoch": 30.13, - "learning_rate": 4.967957533417679e-08, - "loss": 3.7616, - "step": 2738500 - }, - { - "epoch": 30.13, - "learning_rate": 4.96658232025964e-08, - "loss": 3.7662, - "step": 2739000 - }, - { - "epoch": 30.14, - "learning_rate": 4.9652071071016e-08, - "loss": 3.7703, - "step": 2739500 - }, - { - "epoch": 30.14, - "learning_rate": 4.9638318939435606e-08, - "loss": 3.7515, - "step": 2740000 - }, - { - "epoch": 30.15, - "learning_rate": 4.9624566807855215e-08, - "loss": 3.7783, - "step": 2740500 - }, - { - "epoch": 30.16, - "learning_rate": 4.961081467627482e-08, - "loss": 3.764, - "step": 2741000 - }, - { - "epoch": 30.16, - "learning_rate": 4.959706254469442e-08, - "loss": 3.7763, - "step": 2741500 - }, - { - "epoch": 30.17, - "learning_rate": 4.958331041311403e-08, - "loss": 3.7594, - "step": 2742000 - }, - { - "epoch": 30.17, - "learning_rate": 4.956955828153363e-08, - "loss": 3.7763, - "step": 2742500 - }, - { - "epoch": 30.18, - "learning_rate": 4.955580614995324e-08, - "loss": 3.7589, - "step": 2743000 - }, - { - "epoch": 30.18, - "learning_rate": 4.9542054018372845e-08, - "loss": 3.7605, - "step": 2743500 - }, - { - "epoch": 30.19, - "learning_rate": 4.9528301886792454e-08, - "loss": 3.7564, - "step": 2744000 - }, - { - "epoch": 30.19, - "learning_rate": 4.9514549755212057e-08, - "loss": 3.741, - "step": 2744500 - }, - { - "epoch": 30.2, - "learning_rate": 4.950079762363166e-08, - "loss": 3.7658, - "step": 2745000 - }, - { - "epoch": 30.21, - "learning_rate": 4.948704549205127e-08, - "loss": 3.7663, - "step": 2745500 - }, - { - "epoch": 30.21, - "learning_rate": 4.947329336047087e-08, - "loss": 3.7596, - "step": 2746000 - }, - { - "epoch": 30.22, - "learning_rate": 4.945954122889048e-08, - "loss": 3.7453, - "step": 2746500 - }, - { - "epoch": 30.22, - "learning_rate": 4.9445789097310084e-08, - "loss": 3.7729, - "step": 2747000 - }, - { - "epoch": 30.23, - "learning_rate": 4.9432036965729686e-08, - "loss": 3.764, - "step": 2747500 - }, - { - "epoch": 30.23, - "learning_rate": 4.9418284834149296e-08, - "loss": 3.7804, - "step": 2748000 - }, - { - "epoch": 30.24, - "learning_rate": 4.94045327025689e-08, - "loss": 3.7645, - "step": 2748500 - }, - { - "epoch": 30.24, - "learning_rate": 4.93907805709885e-08, - "loss": 3.7773, - "step": 2749000 - }, - { - "epoch": 30.25, - "learning_rate": 4.937702843940811e-08, - "loss": 3.7722, - "step": 2749500 - }, - { - "epoch": 30.25, - "learning_rate": 4.936327630782771e-08, - "loss": 3.7499, - "step": 2750000 - }, - { - "epoch": 30.26, - "learning_rate": 4.9349524176247316e-08, - "loss": 3.7661, - "step": 2750500 - }, - { - "epoch": 30.27, - "learning_rate": 4.9335772044666925e-08, - "loss": 3.7661, - "step": 2751000 - }, - { - "epoch": 30.27, - "learning_rate": 4.932201991308653e-08, - "loss": 3.7737, - "step": 2751500 - }, - { - "epoch": 30.28, - "learning_rate": 4.930826778150613e-08, - "loss": 3.7708, - "step": 2752000 - }, - { - "epoch": 30.28, - "learning_rate": 4.929451564992574e-08, - "loss": 3.7807, - "step": 2752500 - }, - { - "epoch": 30.29, - "learning_rate": 4.928076351834534e-08, - "loss": 3.7713, - "step": 2753000 - }, - { - "epoch": 30.29, - "learning_rate": 4.9267011386764945e-08, - "loss": 3.7693, - "step": 2753500 - }, - { - "epoch": 30.3, - "learning_rate": 4.9253259255184555e-08, - "loss": 3.7757, - "step": 2754000 - }, - { - "epoch": 30.3, - "learning_rate": 4.923950712360416e-08, - "loss": 3.7532, - "step": 2754500 - }, - { - "epoch": 30.31, - "learning_rate": 4.922575499202376e-08, - "loss": 3.7784, - "step": 2755000 - }, - { - "epoch": 30.32, - "learning_rate": 4.921200286044337e-08, - "loss": 3.7774, - "step": 2755500 - }, - { - "epoch": 30.32, - "learning_rate": 4.919825072886297e-08, - "loss": 3.7813, - "step": 2756000 - }, - { - "epoch": 30.33, - "learning_rate": 4.9184498597282575e-08, - "loss": 3.7772, - "step": 2756500 - }, - { - "epoch": 30.33, - "learning_rate": 4.9170746465702184e-08, - "loss": 3.7765, - "step": 2757000 - }, - { - "epoch": 30.34, - "learning_rate": 4.915699433412179e-08, - "loss": 3.7641, - "step": 2757500 - }, - { - "epoch": 30.34, - "learning_rate": 4.914324220254139e-08, - "loss": 3.7798, - "step": 2758000 - }, - { - "epoch": 30.35, - "learning_rate": 4.9129490070961e-08, - "loss": 3.773, - "step": 2758500 - }, - { - "epoch": 30.35, - "learning_rate": 4.91157379393806e-08, - "loss": 3.7698, - "step": 2759000 - }, - { - "epoch": 30.36, - "learning_rate": 4.9101985807800205e-08, - "loss": 3.7586, - "step": 2759500 - }, - { - "epoch": 30.36, - "learning_rate": 4.9088233676219814e-08, - "loss": 3.7472, - "step": 2760000 - }, - { - "epoch": 30.37, - "learning_rate": 4.907448154463942e-08, - "loss": 3.7723, - "step": 2760500 - }, - { - "epoch": 30.38, - "learning_rate": 4.906072941305902e-08, - "loss": 3.7598, - "step": 2761000 - }, - { - "epoch": 30.38, - "learning_rate": 4.904697728147862e-08, - "loss": 3.7702, - "step": 2761500 - }, - { - "epoch": 30.39, - "learning_rate": 4.903322514989823e-08, - "loss": 3.7761, - "step": 2762000 - }, - { - "epoch": 30.39, - "learning_rate": 4.9019473018317834e-08, - "loss": 3.79, - "step": 2762500 - }, - { - "epoch": 30.4, - "learning_rate": 4.900572088673744e-08, - "loss": 3.7617, - "step": 2763000 - }, - { - "epoch": 30.4, - "learning_rate": 4.8991968755157046e-08, - "loss": 3.7543, - "step": 2763500 - }, - { - "epoch": 30.41, - "learning_rate": 4.897821662357665e-08, - "loss": 3.7747, - "step": 2764000 - }, - { - "epoch": 30.41, - "learning_rate": 4.896446449199625e-08, - "loss": 3.7671, - "step": 2764500 - }, - { - "epoch": 30.42, - "learning_rate": 4.895071236041586e-08, - "loss": 3.7713, - "step": 2765000 - }, - { - "epoch": 30.43, - "learning_rate": 4.8936960228835464e-08, - "loss": 3.7651, - "step": 2765500 - }, - { - "epoch": 30.43, - "learning_rate": 4.8923208097255067e-08, - "loss": 3.7741, - "step": 2766000 - }, - { - "epoch": 30.44, - "learning_rate": 4.8909455965674676e-08, - "loss": 3.7969, - "step": 2766500 - }, - { - "epoch": 30.44, - "learning_rate": 4.889570383409428e-08, - "loss": 3.7808, - "step": 2767000 - }, - { - "epoch": 30.45, - "learning_rate": 4.888195170251389e-08, - "loss": 3.765, - "step": 2767500 - }, - { - "epoch": 30.45, - "learning_rate": 4.886819957093349e-08, - "loss": 3.7633, - "step": 2768000 - }, - { - "epoch": 30.46, - "learning_rate": 4.88544474393531e-08, - "loss": 3.7839, - "step": 2768500 - }, - { - "epoch": 30.46, - "learning_rate": 4.88406953077727e-08, - "loss": 3.7686, - "step": 2769000 - }, - { - "epoch": 30.47, - "learning_rate": 4.882694317619231e-08, - "loss": 3.7721, - "step": 2769500 - }, - { - "epoch": 30.47, - "learning_rate": 4.8813191044611915e-08, - "loss": 3.7667, - "step": 2770000 - }, - { - "epoch": 30.48, - "learning_rate": 4.879943891303152e-08, - "loss": 3.764, - "step": 2770500 - }, - { - "epoch": 30.49, - "learning_rate": 4.878568678145113e-08, - "loss": 3.7623, - "step": 2771000 - }, - { - "epoch": 30.49, - "learning_rate": 4.877193464987073e-08, - "loss": 3.7597, - "step": 2771500 - }, - { - "epoch": 30.5, - "learning_rate": 4.875818251829033e-08, - "loss": 3.7763, - "step": 2772000 - }, - { - "epoch": 30.5, - "learning_rate": 4.874443038670994e-08, - "loss": 3.7755, - "step": 2772500 - }, - { - "epoch": 30.51, - "learning_rate": 4.8730678255129544e-08, - "loss": 3.7711, - "step": 2773000 - }, - { - "epoch": 30.51, - "learning_rate": 4.871692612354915e-08, - "loss": 3.7698, - "step": 2773500 - }, - { - "epoch": 30.52, - "learning_rate": 4.8703173991968756e-08, - "loss": 3.7626, - "step": 2774000 - }, - { - "epoch": 30.52, - "learning_rate": 4.868942186038836e-08, - "loss": 3.783, - "step": 2774500 - }, - { - "epoch": 30.53, - "learning_rate": 4.867566972880796e-08, - "loss": 3.768, - "step": 2775000 - }, - { - "epoch": 30.54, - "learning_rate": 4.866191759722757e-08, - "loss": 3.7432, - "step": 2775500 - }, - { - "epoch": 30.54, - "learning_rate": 4.8648165465647174e-08, - "loss": 3.7664, - "step": 2776000 - }, - { - "epoch": 30.55, - "learning_rate": 4.863441333406678e-08, - "loss": 3.7585, - "step": 2776500 - }, - { - "epoch": 30.55, - "learning_rate": 4.8620661202486386e-08, - "loss": 3.7453, - "step": 2777000 - }, - { - "epoch": 30.56, - "learning_rate": 4.860690907090599e-08, - "loss": 3.7638, - "step": 2777500 - }, - { - "epoch": 30.56, - "learning_rate": 4.859315693932559e-08, - "loss": 3.774, - "step": 2778000 - }, - { - "epoch": 30.57, - "learning_rate": 4.85794048077452e-08, - "loss": 3.7559, - "step": 2778500 - }, - { - "epoch": 30.57, - "learning_rate": 4.8565652676164804e-08, - "loss": 3.7843, - "step": 2779000 - }, - { - "epoch": 30.58, - "learning_rate": 4.8551900544584406e-08, - "loss": 3.7573, - "step": 2779500 - }, - { - "epoch": 30.58, - "learning_rate": 4.8538148413004016e-08, - "loss": 3.7607, - "step": 2780000 - }, - { - "epoch": 30.59, - "learning_rate": 4.852439628142362e-08, - "loss": 3.7865, - "step": 2780500 - }, - { - "epoch": 30.6, - "learning_rate": 4.851064414984322e-08, - "loss": 3.7575, - "step": 2781000 - }, - { - "epoch": 30.6, - "learning_rate": 4.849689201826283e-08, - "loss": 3.7812, - "step": 2781500 - }, - { - "epoch": 30.61, - "learning_rate": 4.848313988668243e-08, - "loss": 3.7777, - "step": 2782000 - }, - { - "epoch": 30.61, - "learning_rate": 4.8469387755102036e-08, - "loss": 3.779, - "step": 2782500 - }, - { - "epoch": 30.62, - "learning_rate": 4.8455635623521645e-08, - "loss": 3.7601, - "step": 2783000 - }, - { - "epoch": 30.62, - "learning_rate": 4.844188349194125e-08, - "loss": 3.7718, - "step": 2783500 - }, - { - "epoch": 30.63, - "learning_rate": 4.842813136036085e-08, - "loss": 3.7692, - "step": 2784000 - }, - { - "epoch": 30.63, - "learning_rate": 4.841437922878046e-08, - "loss": 3.762, - "step": 2784500 - }, - { - "epoch": 30.64, - "learning_rate": 4.840062709720006e-08, - "loss": 3.7785, - "step": 2785000 - }, - { - "epoch": 30.65, - "learning_rate": 4.8386874965619666e-08, - "loss": 3.7583, - "step": 2785500 - }, - { - "epoch": 30.65, - "learning_rate": 4.8373122834039275e-08, - "loss": 3.7628, - "step": 2786000 - }, - { - "epoch": 30.66, - "learning_rate": 4.835937070245888e-08, - "loss": 3.7599, - "step": 2786500 - }, - { - "epoch": 30.66, - "learning_rate": 4.834561857087848e-08, - "loss": 3.7768, - "step": 2787000 - }, - { - "epoch": 30.67, - "learning_rate": 4.833186643929809e-08, - "loss": 3.7559, - "step": 2787500 - }, - { - "epoch": 30.67, - "learning_rate": 4.831811430771769e-08, - "loss": 3.7761, - "step": 2788000 - }, - { - "epoch": 30.68, - "learning_rate": 4.8304362176137295e-08, - "loss": 3.7617, - "step": 2788500 - }, - { - "epoch": 30.68, - "learning_rate": 4.8290610044556904e-08, - "loss": 3.7389, - "step": 2789000 - }, - { - "epoch": 30.69, - "learning_rate": 4.827685791297651e-08, - "loss": 3.7612, - "step": 2789500 - }, - { - "epoch": 30.69, - "learning_rate": 4.826310578139611e-08, - "loss": 3.7523, - "step": 2790000 - }, - { - "epoch": 30.7, - "learning_rate": 4.824935364981572e-08, - "loss": 3.7705, - "step": 2790500 - }, - { - "epoch": 30.71, - "learning_rate": 4.823560151823532e-08, - "loss": 3.7817, - "step": 2791000 - }, - { - "epoch": 30.71, - "learning_rate": 4.822184938665493e-08, - "loss": 3.781, - "step": 2791500 - }, - { - "epoch": 30.72, - "learning_rate": 4.8208097255074534e-08, - "loss": 3.772, - "step": 2792000 - }, - { - "epoch": 30.72, - "learning_rate": 4.819434512349414e-08, - "loss": 3.7727, - "step": 2792500 - }, - { - "epoch": 30.73, - "learning_rate": 4.8180592991913746e-08, - "loss": 3.758, - "step": 2793000 - }, - { - "epoch": 30.73, - "learning_rate": 4.816684086033335e-08, - "loss": 3.759, - "step": 2793500 - }, - { - "epoch": 30.74, - "learning_rate": 4.815308872875296e-08, - "loss": 3.7775, - "step": 2794000 - }, - { - "epoch": 30.74, - "learning_rate": 4.813933659717256e-08, - "loss": 3.7637, - "step": 2794500 - }, - { - "epoch": 30.75, - "learning_rate": 4.812558446559217e-08, - "loss": 3.7672, - "step": 2795000 - }, - { - "epoch": 30.76, - "learning_rate": 4.811183233401177e-08, - "loss": 3.7612, - "step": 2795500 - }, - { - "epoch": 30.76, - "learning_rate": 4.8098080202431376e-08, - "loss": 3.7714, - "step": 2796000 - }, - { - "epoch": 30.77, - "learning_rate": 4.8084328070850985e-08, - "loss": 3.7668, - "step": 2796500 - }, - { - "epoch": 30.77, - "learning_rate": 4.807057593927059e-08, - "loss": 3.7779, - "step": 2797000 - }, - { - "epoch": 30.78, - "learning_rate": 4.805682380769019e-08, - "loss": 3.7697, - "step": 2797500 - }, - { - "epoch": 30.78, - "learning_rate": 4.80430716761098e-08, - "loss": 3.7632, - "step": 2798000 - }, - { - "epoch": 30.79, - "learning_rate": 4.80293195445294e-08, - "loss": 3.7711, - "step": 2798500 - }, - { - "epoch": 30.79, - "learning_rate": 4.8015567412949005e-08, - "loss": 3.7685, - "step": 2799000 - }, - { - "epoch": 30.8, - "learning_rate": 4.800181528136861e-08, - "loss": 3.75, - "step": 2799500 - }, - { - "epoch": 30.8, - "learning_rate": 4.798806314978822e-08, - "loss": 3.7665, - "step": 2800000 - }, - { - "epoch": 30.81, - "learning_rate": 4.797431101820782e-08, - "loss": 3.7597, - "step": 2800500 - }, - { - "epoch": 30.82, - "learning_rate": 4.796055888662742e-08, - "loss": 3.7627, - "step": 2801000 - }, - { - "epoch": 30.82, - "learning_rate": 4.794680675504703e-08, - "loss": 3.7578, - "step": 2801500 - }, - { - "epoch": 30.83, - "learning_rate": 4.7933054623466635e-08, - "loss": 3.7702, - "step": 2802000 - }, - { - "epoch": 30.83, - "learning_rate": 4.791930249188624e-08, - "loss": 3.7588, - "step": 2802500 - }, - { - "epoch": 30.84, - "learning_rate": 4.790555036030585e-08, - "loss": 3.7708, - "step": 2803000 - }, - { - "epoch": 30.84, - "learning_rate": 4.789179822872545e-08, - "loss": 3.7717, - "step": 2803500 - }, - { - "epoch": 30.85, - "learning_rate": 4.787804609714505e-08, - "loss": 3.7545, - "step": 2804000 - }, - { - "epoch": 30.85, - "learning_rate": 4.786429396556466e-08, - "loss": 3.7612, - "step": 2804500 - }, - { - "epoch": 30.86, - "learning_rate": 4.7850541833984264e-08, - "loss": 3.7512, - "step": 2805000 - }, - { - "epoch": 30.87, - "learning_rate": 4.783678970240387e-08, - "loss": 3.7577, - "step": 2805500 - }, - { - "epoch": 30.87, - "learning_rate": 4.7823037570823477e-08, - "loss": 3.7646, - "step": 2806000 - }, - { - "epoch": 30.88, - "learning_rate": 4.780928543924308e-08, - "loss": 3.755, - "step": 2806500 - }, - { - "epoch": 30.88, - "learning_rate": 4.779553330766268e-08, - "loss": 3.769, - "step": 2807000 - }, - { - "epoch": 30.89, - "learning_rate": 4.778178117608229e-08, - "loss": 3.767, - "step": 2807500 - }, - { - "epoch": 30.89, - "learning_rate": 4.7768029044501894e-08, - "loss": 3.7876, - "step": 2808000 - }, - { - "epoch": 30.9, - "learning_rate": 4.77542769129215e-08, - "loss": 3.7541, - "step": 2808500 - }, - { - "epoch": 30.9, - "learning_rate": 4.7740524781341106e-08, - "loss": 3.7731, - "step": 2809000 - }, - { - "epoch": 30.91, - "learning_rate": 4.772677264976071e-08, - "loss": 3.7782, - "step": 2809500 - }, - { - "epoch": 30.91, - "learning_rate": 4.771302051818031e-08, - "loss": 3.7528, - "step": 2810000 - }, - { - "epoch": 30.92, - "learning_rate": 4.769926838659992e-08, - "loss": 3.7666, - "step": 2810500 - }, - { - "epoch": 30.93, - "learning_rate": 4.7685516255019524e-08, - "loss": 3.7498, - "step": 2811000 - }, - { - "epoch": 30.93, - "learning_rate": 4.7671764123439126e-08, - "loss": 3.7747, - "step": 2811500 - }, - { - "epoch": 30.94, - "learning_rate": 4.7658011991858736e-08, - "loss": 3.7664, - "step": 2812000 - }, - { - "epoch": 30.94, - "learning_rate": 4.764425986027834e-08, - "loss": 3.7761, - "step": 2812500 - }, - { - "epoch": 30.95, - "learning_rate": 4.763050772869794e-08, - "loss": 3.7742, - "step": 2813000 - }, - { - "epoch": 30.95, - "learning_rate": 4.761675559711755e-08, - "loss": 3.7603, - "step": 2813500 - }, - { - "epoch": 30.96, - "learning_rate": 4.760300346553715e-08, - "loss": 3.7617, - "step": 2814000 - }, - { - "epoch": 30.96, - "learning_rate": 4.7589251333956756e-08, - "loss": 3.7783, - "step": 2814500 - }, - { - "epoch": 30.97, - "learning_rate": 4.7575499202376365e-08, - "loss": 3.7608, - "step": 2815000 - }, - { - "epoch": 30.98, - "learning_rate": 4.756174707079597e-08, - "loss": 3.7677, - "step": 2815500 - }, - { - "epoch": 30.98, - "learning_rate": 4.754799493921558e-08, - "loss": 3.7615, - "step": 2816000 - }, - { - "epoch": 30.99, - "learning_rate": 4.753424280763518e-08, - "loss": 3.7602, - "step": 2816500 - }, - { - "epoch": 30.99, - "learning_rate": 4.752049067605479e-08, - "loss": 3.7753, - "step": 2817000 - }, - { - "epoch": 31.0, - "learning_rate": 4.750673854447439e-08, - "loss": 3.7547, - "step": 2817500 - }, - { - "epoch": 31.0, - "eval_loss": 3.8350541591644287, - "eval_runtime": 6.1317, - "eval_samples_per_second": 253.436, - "step": 2817745 - }, - { - "epoch": 31.0, - "learning_rate": 4.7492986412894e-08, - "loss": 3.7804, - "step": 2818000 - }, - { - "epoch": 31.01, - "learning_rate": 4.7479234281313604e-08, - "loss": 3.7624, - "step": 2818500 - }, - { - "epoch": 31.01, - "learning_rate": 4.746548214973321e-08, - "loss": 3.7589, - "step": 2819000 - }, - { - "epoch": 31.02, - "learning_rate": 4.7451730018152816e-08, - "loss": 3.7668, - "step": 2819500 - }, - { - "epoch": 31.02, - "learning_rate": 4.743797788657242e-08, - "loss": 3.7825, - "step": 2820000 - }, - { - "epoch": 31.03, - "learning_rate": 4.742422575499202e-08, - "loss": 3.7547, - "step": 2820500 - }, - { - "epoch": 31.04, - "learning_rate": 4.741047362341163e-08, - "loss": 3.7509, - "step": 2821000 - }, - { - "epoch": 31.04, - "learning_rate": 4.7396721491831234e-08, - "loss": 3.7609, - "step": 2821500 - }, - { - "epoch": 31.05, - "learning_rate": 4.7382969360250837e-08, - "loss": 3.7754, - "step": 2822000 - }, - { - "epoch": 31.05, - "learning_rate": 4.7369217228670446e-08, - "loss": 3.7622, - "step": 2822500 - }, - { - "epoch": 31.06, - "learning_rate": 4.735546509709005e-08, - "loss": 3.7794, - "step": 2823000 - }, - { - "epoch": 31.06, - "learning_rate": 4.734171296550965e-08, - "loss": 3.7724, - "step": 2823500 - }, - { - "epoch": 31.07, - "learning_rate": 4.732796083392926e-08, - "loss": 3.7799, - "step": 2824000 - }, - { - "epoch": 31.07, - "learning_rate": 4.7314208702348863e-08, - "loss": 3.7618, - "step": 2824500 - }, - { - "epoch": 31.08, - "learning_rate": 4.7300456570768466e-08, - "loss": 3.7584, - "step": 2825000 - }, - { - "epoch": 31.09, - "learning_rate": 4.7286704439188076e-08, - "loss": 3.7682, - "step": 2825500 - }, - { - "epoch": 31.09, - "learning_rate": 4.727295230760768e-08, - "loss": 3.7581, - "step": 2826000 - }, - { - "epoch": 31.1, - "learning_rate": 4.725920017602728e-08, - "loss": 3.7488, - "step": 2826500 - }, - { - "epoch": 31.1, - "learning_rate": 4.724544804444689e-08, - "loss": 3.7473, - "step": 2827000 - }, - { - "epoch": 31.11, - "learning_rate": 4.723169591286649e-08, - "loss": 3.7451, - "step": 2827500 - }, - { - "epoch": 31.11, - "learning_rate": 4.7217943781286096e-08, - "loss": 3.7653, - "step": 2828000 - }, - { - "epoch": 31.12, - "learning_rate": 4.7204191649705705e-08, - "loss": 3.7823, - "step": 2828500 - }, - { - "epoch": 31.12, - "learning_rate": 4.719043951812531e-08, - "loss": 3.7602, - "step": 2829000 - }, - { - "epoch": 31.13, - "learning_rate": 4.717668738654491e-08, - "loss": 3.7746, - "step": 2829500 - }, - { - "epoch": 31.13, - "learning_rate": 4.716293525496452e-08, - "loss": 3.7704, - "step": 2830000 - }, - { - "epoch": 31.14, - "learning_rate": 4.714918312338412e-08, - "loss": 3.7468, - "step": 2830500 - }, - { - "epoch": 31.15, - "learning_rate": 4.7135430991803725e-08, - "loss": 3.7529, - "step": 2831000 - }, - { - "epoch": 31.15, - "learning_rate": 4.7121678860223335e-08, - "loss": 3.7644, - "step": 2831500 - }, - { - "epoch": 31.16, - "learning_rate": 4.710792672864294e-08, - "loss": 3.7472, - "step": 2832000 - }, - { - "epoch": 31.16, - "learning_rate": 4.709417459706254e-08, - "loss": 3.762, - "step": 2832500 - }, - { - "epoch": 31.17, - "learning_rate": 4.708042246548215e-08, - "loss": 3.7639, - "step": 2833000 - }, - { - "epoch": 31.17, - "learning_rate": 4.706667033390175e-08, - "loss": 3.7704, - "step": 2833500 - }, - { - "epoch": 31.18, - "learning_rate": 4.7052918202321355e-08, - "loss": 3.781, - "step": 2834000 - }, - { - "epoch": 31.18, - "learning_rate": 4.7039166070740964e-08, - "loss": 3.754, - "step": 2834500 - }, - { - "epoch": 31.19, - "learning_rate": 4.702541393916057e-08, - "loss": 3.7404, - "step": 2835000 - }, - { - "epoch": 31.2, - "learning_rate": 4.701166180758017e-08, - "loss": 3.7774, - "step": 2835500 - }, - { - "epoch": 31.2, - "learning_rate": 4.699790967599977e-08, - "loss": 3.7722, - "step": 2836000 - }, - { - "epoch": 31.21, - "learning_rate": 4.698415754441938e-08, - "loss": 3.7581, - "step": 2836500 - }, - { - "epoch": 31.21, - "learning_rate": 4.6970405412838985e-08, - "loss": 3.7647, - "step": 2837000 - }, - { - "epoch": 31.22, - "learning_rate": 4.695665328125859e-08, - "loss": 3.7561, - "step": 2837500 - }, - { - "epoch": 31.22, - "learning_rate": 4.6942901149678197e-08, - "loss": 3.7666, - "step": 2838000 - }, - { - "epoch": 31.23, - "learning_rate": 4.69291490180978e-08, - "loss": 3.7781, - "step": 2838500 - }, - { - "epoch": 31.23, - "learning_rate": 4.691539688651741e-08, - "loss": 3.7711, - "step": 2839000 - }, - { - "epoch": 31.24, - "learning_rate": 4.690164475493701e-08, - "loss": 3.7762, - "step": 2839500 - }, - { - "epoch": 31.24, - "learning_rate": 4.688789262335662e-08, - "loss": 3.7832, - "step": 2840000 - }, - { - "epoch": 31.25, - "learning_rate": 4.6874140491776223e-08, - "loss": 3.7576, - "step": 2840500 - }, - { - "epoch": 31.26, - "learning_rate": 4.6860388360195826e-08, - "loss": 3.7571, - "step": 2841000 - }, - { - "epoch": 31.26, - "learning_rate": 4.6846636228615436e-08, - "loss": 3.7655, - "step": 2841500 - }, - { - "epoch": 31.27, - "learning_rate": 4.683288409703504e-08, - "loss": 3.7752, - "step": 2842000 - }, - { - "epoch": 31.27, - "learning_rate": 4.681913196545465e-08, - "loss": 3.7684, - "step": 2842500 - }, - { - "epoch": 31.28, - "learning_rate": 4.680537983387425e-08, - "loss": 3.7697, - "step": 2843000 - }, - { - "epoch": 31.28, - "learning_rate": 4.679162770229385e-08, - "loss": 3.7572, - "step": 2843500 - }, - { - "epoch": 31.29, - "learning_rate": 4.677787557071346e-08, - "loss": 3.7696, - "step": 2844000 - }, - { - "epoch": 31.29, - "learning_rate": 4.6764123439133065e-08, - "loss": 3.7776, - "step": 2844500 - }, - { - "epoch": 31.3, - "learning_rate": 4.675037130755267e-08, - "loss": 3.7747, - "step": 2845000 - }, - { - "epoch": 31.31, - "learning_rate": 4.673661917597228e-08, - "loss": 3.7542, - "step": 2845500 - }, - { - "epoch": 31.31, - "learning_rate": 4.672286704439188e-08, - "loss": 3.7617, - "step": 2846000 - }, - { - "epoch": 31.32, - "learning_rate": 4.670911491281148e-08, - "loss": 3.7723, - "step": 2846500 - }, - { - "epoch": 31.32, - "learning_rate": 4.669536278123109e-08, - "loss": 3.7529, - "step": 2847000 - }, - { - "epoch": 31.33, - "learning_rate": 4.6681610649650695e-08, - "loss": 3.7716, - "step": 2847500 - }, - { - "epoch": 31.33, - "learning_rate": 4.66678585180703e-08, - "loss": 3.7513, - "step": 2848000 - }, - { - "epoch": 31.34, - "learning_rate": 4.665410638648991e-08, - "loss": 3.7451, - "step": 2848500 - }, - { - "epoch": 31.34, - "learning_rate": 4.664035425490951e-08, - "loss": 3.7455, - "step": 2849000 - }, - { - "epoch": 31.35, - "learning_rate": 4.662660212332911e-08, - "loss": 3.7479, - "step": 2849500 - }, - { - "epoch": 31.35, - "learning_rate": 4.661284999174872e-08, - "loss": 3.7753, - "step": 2850000 - }, - { - "epoch": 31.36, - "learning_rate": 4.6599097860168324e-08, - "loss": 3.7624, - "step": 2850500 - }, - { - "epoch": 31.37, - "learning_rate": 4.658534572858793e-08, - "loss": 3.7497, - "step": 2851000 - }, - { - "epoch": 31.37, - "learning_rate": 4.6571593597007536e-08, - "loss": 3.7774, - "step": 2851500 - }, - { - "epoch": 31.38, - "learning_rate": 4.655784146542714e-08, - "loss": 3.7462, - "step": 2852000 - }, - { - "epoch": 31.38, - "learning_rate": 4.654408933384674e-08, - "loss": 3.756, - "step": 2852500 - }, - { - "epoch": 31.39, - "learning_rate": 4.653033720226635e-08, - "loss": 3.7646, - "step": 2853000 - }, - { - "epoch": 31.39, - "learning_rate": 4.6516585070685954e-08, - "loss": 3.7675, - "step": 2853500 - }, - { - "epoch": 31.4, - "learning_rate": 4.6502832939105557e-08, - "loss": 3.7844, - "step": 2854000 - }, - { - "epoch": 31.4, - "learning_rate": 4.6489080807525166e-08, - "loss": 3.7741, - "step": 2854500 - }, - { - "epoch": 31.41, - "learning_rate": 4.647532867594477e-08, - "loss": 3.7726, - "step": 2855000 - }, - { - "epoch": 31.42, - "learning_rate": 4.646157654436437e-08, - "loss": 3.7899, - "step": 2855500 - }, - { - "epoch": 31.42, - "learning_rate": 4.644782441278398e-08, - "loss": 3.7748, - "step": 2856000 - }, - { - "epoch": 31.43, - "learning_rate": 4.6434072281203583e-08, - "loss": 3.7596, - "step": 2856500 - }, - { - "epoch": 31.43, - "learning_rate": 4.6420320149623186e-08, - "loss": 3.7571, - "step": 2857000 - }, - { - "epoch": 31.44, - "learning_rate": 4.6406568018042796e-08, - "loss": 3.773, - "step": 2857500 - }, - { - "epoch": 31.44, - "learning_rate": 4.63928158864624e-08, - "loss": 3.7514, - "step": 2858000 - }, - { - "epoch": 31.45, - "learning_rate": 4.6379063754882e-08, - "loss": 3.7691, - "step": 2858500 - }, - { - "epoch": 31.45, - "learning_rate": 4.636531162330161e-08, - "loss": 3.7671, - "step": 2859000 - }, - { - "epoch": 31.46, - "learning_rate": 4.635155949172121e-08, - "loss": 3.7645, - "step": 2859500 - }, - { - "epoch": 31.46, - "learning_rate": 4.6337807360140816e-08, - "loss": 3.7678, - "step": 2860000 - }, - { - "epoch": 31.47, - "learning_rate": 4.6324055228560425e-08, - "loss": 3.7796, - "step": 2860500 - }, - { - "epoch": 31.48, - "learning_rate": 4.631030309698003e-08, - "loss": 3.7344, - "step": 2861000 - }, - { - "epoch": 31.48, - "learning_rate": 4.629655096539963e-08, - "loss": 3.773, - "step": 2861500 - }, - { - "epoch": 31.49, - "learning_rate": 4.628279883381924e-08, - "loss": 3.7784, - "step": 2862000 - }, - { - "epoch": 31.49, - "learning_rate": 4.626904670223884e-08, - "loss": 3.7664, - "step": 2862500 - }, - { - "epoch": 31.5, - "learning_rate": 4.6255294570658445e-08, - "loss": 3.7637, - "step": 2863000 - }, - { - "epoch": 31.5, - "learning_rate": 4.6241542439078055e-08, - "loss": 3.76, - "step": 2863500 - }, - { - "epoch": 31.51, - "learning_rate": 4.622779030749766e-08, - "loss": 3.7663, - "step": 2864000 - }, - { - "epoch": 31.51, - "learning_rate": 4.621403817591727e-08, - "loss": 3.7632, - "step": 2864500 - }, - { - "epoch": 31.52, - "learning_rate": 4.620028604433687e-08, - "loss": 3.7738, - "step": 2865000 - }, - { - "epoch": 31.53, - "learning_rate": 4.618653391275648e-08, - "loss": 3.7734, - "step": 2865500 - }, - { - "epoch": 31.53, - "learning_rate": 4.617278178117608e-08, - "loss": 3.7733, - "step": 2866000 - }, - { - "epoch": 31.54, - "learning_rate": 4.615902964959569e-08, - "loss": 3.7602, - "step": 2866500 - }, - { - "epoch": 31.54, - "learning_rate": 4.6145277518015294e-08, - "loss": 3.759, - "step": 2867000 - }, - { - "epoch": 31.55, - "learning_rate": 4.6131525386434896e-08, - "loss": 3.7606, - "step": 2867500 - }, - { - "epoch": 31.55, - "learning_rate": 4.6117773254854506e-08, - "loss": 3.7613, - "step": 2868000 - }, - { - "epoch": 31.56, - "learning_rate": 4.610402112327411e-08, - "loss": 3.7669, - "step": 2868500 - }, - { - "epoch": 31.56, - "learning_rate": 4.609026899169371e-08, - "loss": 3.7778, - "step": 2869000 - }, - { - "epoch": 31.57, - "learning_rate": 4.607651686011332e-08, - "loss": 3.7637, - "step": 2869500 - }, - { - "epoch": 31.57, - "learning_rate": 4.606276472853292e-08, - "loss": 3.7706, - "step": 2870000 - }, - { - "epoch": 31.58, - "learning_rate": 4.6049012596952526e-08, - "loss": 3.7623, - "step": 2870500 - }, - { - "epoch": 31.59, - "learning_rate": 4.6035260465372135e-08, - "loss": 3.7506, - "step": 2871000 - }, - { - "epoch": 31.59, - "learning_rate": 4.602150833379174e-08, - "loss": 3.7803, - "step": 2871500 - }, - { - "epoch": 31.6, - "learning_rate": 4.600775620221134e-08, - "loss": 3.763, - "step": 2872000 - }, - { - "epoch": 31.6, - "learning_rate": 4.599400407063095e-08, - "loss": 3.7775, - "step": 2872500 - }, - { - "epoch": 31.61, - "learning_rate": 4.598025193905055e-08, - "loss": 3.7881, - "step": 2873000 - }, - { - "epoch": 31.61, - "learning_rate": 4.5966499807470156e-08, - "loss": 3.7619, - "step": 2873500 - }, - { - "epoch": 31.62, - "learning_rate": 4.595274767588976e-08, - "loss": 3.7801, - "step": 2874000 - }, - { - "epoch": 31.62, - "learning_rate": 4.593899554430937e-08, - "loss": 3.7487, - "step": 2874500 - }, - { - "epoch": 31.63, - "learning_rate": 4.592524341272897e-08, - "loss": 3.7724, - "step": 2875000 - }, - { - "epoch": 31.64, - "learning_rate": 4.591149128114857e-08, - "loss": 3.7762, - "step": 2875500 - }, - { - "epoch": 31.64, - "learning_rate": 4.589773914956818e-08, - "loss": 3.7645, - "step": 2876000 - }, - { - "epoch": 31.65, - "learning_rate": 4.5883987017987785e-08, - "loss": 3.7904, - "step": 2876500 - }, - { - "epoch": 31.65, - "learning_rate": 4.587023488640739e-08, - "loss": 3.7704, - "step": 2877000 - }, - { - "epoch": 31.66, - "learning_rate": 4.5856482754827e-08, - "loss": 3.7736, - "step": 2877500 - }, - { - "epoch": 31.66, - "learning_rate": 4.58427306232466e-08, - "loss": 3.7675, - "step": 2878000 - }, - { - "epoch": 31.67, - "learning_rate": 4.58289784916662e-08, - "loss": 3.7622, - "step": 2878500 - }, - { - "epoch": 31.67, - "learning_rate": 4.581522636008581e-08, - "loss": 3.7754, - "step": 2879000 - }, - { - "epoch": 31.68, - "learning_rate": 4.5801474228505415e-08, - "loss": 3.7542, - "step": 2879500 - }, - { - "epoch": 31.68, - "learning_rate": 4.578772209692502e-08, - "loss": 3.7504, - "step": 2880000 - }, - { - "epoch": 31.69, - "learning_rate": 4.577396996534463e-08, - "loss": 3.7874, - "step": 2880500 - }, - { - "epoch": 31.7, - "learning_rate": 4.576021783376423e-08, - "loss": 3.7741, - "step": 2881000 - }, - { - "epoch": 31.7, - "learning_rate": 4.574646570218383e-08, - "loss": 3.7626, - "step": 2881500 - }, - { - "epoch": 31.71, - "learning_rate": 4.573271357060344e-08, - "loss": 3.7653, - "step": 2882000 - }, - { - "epoch": 31.71, - "learning_rate": 4.5718961439023044e-08, - "loss": 3.7624, - "step": 2882500 - }, - { - "epoch": 31.72, - "learning_rate": 4.570520930744265e-08, - "loss": 3.7706, - "step": 2883000 - }, - { - "epoch": 31.72, - "learning_rate": 4.5691457175862256e-08, - "loss": 3.7713, - "step": 2883500 - }, - { - "epoch": 31.73, - "learning_rate": 4.567770504428186e-08, - "loss": 3.7734, - "step": 2884000 - }, - { - "epoch": 31.73, - "learning_rate": 4.566395291270146e-08, - "loss": 3.7543, - "step": 2884500 - }, - { - "epoch": 31.74, - "learning_rate": 4.565020078112107e-08, - "loss": 3.7646, - "step": 2885000 - }, - { - "epoch": 31.75, - "learning_rate": 4.5636448649540674e-08, - "loss": 3.7717, - "step": 2885500 - }, - { - "epoch": 31.75, - "learning_rate": 4.5622696517960277e-08, - "loss": 3.7728, - "step": 2886000 - }, - { - "epoch": 31.76, - "learning_rate": 4.5608944386379886e-08, - "loss": 3.7767, - "step": 2886500 - }, - { - "epoch": 31.76, - "learning_rate": 4.559519225479949e-08, - "loss": 3.7712, - "step": 2887000 - }, - { - "epoch": 31.77, - "learning_rate": 4.55814401232191e-08, - "loss": 3.7717, - "step": 2887500 - }, - { - "epoch": 31.77, - "learning_rate": 4.55676879916387e-08, - "loss": 3.7609, - "step": 2888000 - }, - { - "epoch": 31.78, - "learning_rate": 4.5553935860058304e-08, - "loss": 3.7534, - "step": 2888500 - }, - { - "epoch": 31.78, - "learning_rate": 4.554018372847791e-08, - "loss": 3.772, - "step": 2889000 - }, - { - "epoch": 31.79, - "learning_rate": 4.5526431596897516e-08, - "loss": 3.7745, - "step": 2889500 - }, - { - "epoch": 31.79, - "learning_rate": 4.5512679465317125e-08, - "loss": 3.7666, - "step": 2890000 - }, - { - "epoch": 31.8, - "learning_rate": 4.549892733373673e-08, - "loss": 3.7742, - "step": 2890500 - }, - { - "epoch": 31.81, - "learning_rate": 4.548517520215634e-08, - "loss": 3.766, - "step": 2891000 - }, - { - "epoch": 31.81, - "learning_rate": 4.547142307057594e-08, - "loss": 3.7757, - "step": 2891500 - }, - { - "epoch": 31.82, - "learning_rate": 4.545767093899554e-08, - "loss": 3.757, - "step": 2892000 - }, - { - "epoch": 31.82, - "learning_rate": 4.544391880741515e-08, - "loss": 3.7643, - "step": 2892500 - }, - { - "epoch": 31.83, - "learning_rate": 4.5430166675834755e-08, - "loss": 3.7695, - "step": 2893000 - }, - { - "epoch": 31.83, - "learning_rate": 4.541641454425436e-08, - "loss": 3.7485, - "step": 2893500 - }, - { - "epoch": 31.84, - "learning_rate": 4.5402662412673967e-08, - "loss": 3.7661, - "step": 2894000 - }, - { - "epoch": 31.84, - "learning_rate": 4.538891028109357e-08, - "loss": 3.7542, - "step": 2894500 - }, - { - "epoch": 31.85, - "learning_rate": 4.537515814951317e-08, - "loss": 3.7698, - "step": 2895000 - }, - { - "epoch": 31.86, - "learning_rate": 4.536140601793278e-08, - "loss": 3.7569, - "step": 2895500 - }, - { - "epoch": 31.86, - "learning_rate": 4.5347653886352384e-08, - "loss": 3.7834, - "step": 2896000 - }, - { - "epoch": 31.87, - "learning_rate": 4.533390175477199e-08, - "loss": 3.7492, - "step": 2896500 - }, - { - "epoch": 31.87, - "learning_rate": 4.5320149623191596e-08, - "loss": 3.7479, - "step": 2897000 - }, - { - "epoch": 31.88, - "learning_rate": 4.53063974916112e-08, - "loss": 3.7585, - "step": 2897500 - }, - { - "epoch": 31.88, - "learning_rate": 4.52926453600308e-08, - "loss": 3.7707, - "step": 2898000 - }, - { - "epoch": 31.89, - "learning_rate": 4.527889322845041e-08, - "loss": 3.7628, - "step": 2898500 - }, - { - "epoch": 31.89, - "learning_rate": 4.5265141096870014e-08, - "loss": 3.7557, - "step": 2899000 - }, - { - "epoch": 31.9, - "learning_rate": 4.5251388965289616e-08, - "loss": 3.7756, - "step": 2899500 - }, - { - "epoch": 31.9, - "learning_rate": 4.5237636833709226e-08, - "loss": 3.7599, - "step": 2900000 - }, - { - "epoch": 31.91, - "learning_rate": 4.522388470212883e-08, - "loss": 3.7688, - "step": 2900500 - }, - { - "epoch": 31.92, - "learning_rate": 4.521013257054843e-08, - "loss": 3.7723, - "step": 2901000 - }, - { - "epoch": 31.92, - "learning_rate": 4.519638043896804e-08, - "loss": 3.7697, - "step": 2901500 - }, - { - "epoch": 31.93, - "learning_rate": 4.5182628307387643e-08, - "loss": 3.7732, - "step": 2902000 - }, - { - "epoch": 31.93, - "learning_rate": 4.5168876175807246e-08, - "loss": 3.7892, - "step": 2902500 - }, - { - "epoch": 31.94, - "learning_rate": 4.5155124044226855e-08, - "loss": 3.7608, - "step": 2903000 - }, - { - "epoch": 31.94, - "learning_rate": 4.514137191264646e-08, - "loss": 3.7634, - "step": 2903500 - }, - { - "epoch": 31.95, - "learning_rate": 4.512761978106606e-08, - "loss": 3.7704, - "step": 2904000 - }, - { - "epoch": 31.95, - "learning_rate": 4.511386764948567e-08, - "loss": 3.7704, - "step": 2904500 - }, - { - "epoch": 31.96, - "learning_rate": 4.510011551790527e-08, - "loss": 3.7595, - "step": 2905000 - }, - { - "epoch": 31.97, - "learning_rate": 4.5086363386324876e-08, - "loss": 3.7566, - "step": 2905500 - }, - { - "epoch": 31.97, - "learning_rate": 4.5072611254744485e-08, - "loss": 3.75, - "step": 2906000 - }, - { - "epoch": 31.98, - "learning_rate": 4.505885912316409e-08, - "loss": 3.7582, - "step": 2906500 - }, - { - "epoch": 31.98, - "learning_rate": 4.504510699158369e-08, - "loss": 3.7454, - "step": 2907000 - }, - { - "epoch": 31.99, - "learning_rate": 4.50313548600033e-08, - "loss": 3.7708, - "step": 2907500 - }, - { - "epoch": 31.99, - "learning_rate": 4.50176027284229e-08, - "loss": 3.7597, - "step": 2908000 - }, - { - "epoch": 32.0, - "learning_rate": 4.5003850596842505e-08, - "loss": 3.7627, - "step": 2908500 - }, - { - "epoch": 32.0, - "eval_loss": 3.833939552307129, - "eval_runtime": 6.1377, - "eval_samples_per_second": 253.189, - "step": 2908640 - }, - { - "epoch": 32.0, - "learning_rate": 4.4990098465262115e-08, - "loss": 3.7739, - "step": 2909000 - }, - { - "epoch": 32.01, - "learning_rate": 4.497634633368172e-08, - "loss": 3.771, - "step": 2909500 - }, - { - "epoch": 32.01, - "learning_rate": 4.496259420210132e-08, - "loss": 3.7809, - "step": 2910000 - }, - { - "epoch": 32.02, - "learning_rate": 4.494884207052093e-08, - "loss": 3.7688, - "step": 2910500 - }, - { - "epoch": 32.03, - "learning_rate": 4.493508993894053e-08, - "loss": 3.7808, - "step": 2911000 - }, - { - "epoch": 32.03, - "learning_rate": 4.4921337807360135e-08, - "loss": 3.7662, - "step": 2911500 - }, - { - "epoch": 32.04, - "learning_rate": 4.4907585675779744e-08, - "loss": 3.7659, - "step": 2912000 - }, - { - "epoch": 32.04, - "learning_rate": 4.489383354419935e-08, - "loss": 3.7515, - "step": 2912500 - }, - { - "epoch": 32.05, - "learning_rate": 4.4880081412618956e-08, - "loss": 3.7561, - "step": 2913000 - }, - { - "epoch": 32.05, - "learning_rate": 4.486632928103856e-08, - "loss": 3.7819, - "step": 2913500 - }, - { - "epoch": 32.06, - "learning_rate": 4.485257714945817e-08, - "loss": 3.753, - "step": 2914000 - }, - { - "epoch": 32.06, - "learning_rate": 4.483882501787777e-08, - "loss": 3.7538, - "step": 2914500 - }, - { - "epoch": 32.07, - "learning_rate": 4.4825072886297374e-08, - "loss": 3.763, - "step": 2915000 - }, - { - "epoch": 32.08, - "learning_rate": 4.481132075471698e-08, - "loss": 3.7705, - "step": 2915500 - }, - { - "epoch": 32.08, - "learning_rate": 4.4797568623136586e-08, - "loss": 3.7716, - "step": 2916000 - }, - { - "epoch": 32.09, - "learning_rate": 4.478381649155619e-08, - "loss": 3.7653, - "step": 2916500 - }, - { - "epoch": 32.09, - "learning_rate": 4.47700643599758e-08, - "loss": 3.7478, - "step": 2917000 - }, - { - "epoch": 32.1, - "learning_rate": 4.47563122283954e-08, - "loss": 3.7677, - "step": 2917500 - }, - { - "epoch": 32.1, - "learning_rate": 4.4742560096815003e-08, - "loss": 3.7625, - "step": 2918000 - }, - { - "epoch": 32.11, - "learning_rate": 4.472880796523461e-08, - "loss": 3.7589, - "step": 2918500 - }, - { - "epoch": 32.11, - "learning_rate": 4.4715055833654215e-08, - "loss": 3.7732, - "step": 2919000 - }, - { - "epoch": 32.12, - "learning_rate": 4.470130370207382e-08, - "loss": 3.7632, - "step": 2919500 - }, - { - "epoch": 32.12, - "learning_rate": 4.468755157049343e-08, - "loss": 3.7839, - "step": 2920000 - }, - { - "epoch": 32.13, - "learning_rate": 4.467379943891303e-08, - "loss": 3.7659, - "step": 2920500 - }, - { - "epoch": 32.14, - "learning_rate": 4.466004730733263e-08, - "loss": 3.7841, - "step": 2921000 - }, - { - "epoch": 32.14, - "learning_rate": 4.464629517575224e-08, - "loss": 3.7682, - "step": 2921500 - }, - { - "epoch": 32.15, - "learning_rate": 4.4632543044171845e-08, - "loss": 3.7596, - "step": 2922000 - }, - { - "epoch": 32.15, - "learning_rate": 4.461879091259145e-08, - "loss": 3.766, - "step": 2922500 - }, - { - "epoch": 32.16, - "learning_rate": 4.460503878101106e-08, - "loss": 3.7626, - "step": 2923000 - }, - { - "epoch": 32.16, - "learning_rate": 4.459128664943066e-08, - "loss": 3.7563, - "step": 2923500 - }, - { - "epoch": 32.17, - "learning_rate": 4.457753451785026e-08, - "loss": 3.7722, - "step": 2924000 - }, - { - "epoch": 32.17, - "learning_rate": 4.456378238626987e-08, - "loss": 3.7595, - "step": 2924500 - }, - { - "epoch": 32.18, - "learning_rate": 4.4550030254689475e-08, - "loss": 3.7716, - "step": 2925000 - }, - { - "epoch": 32.19, - "learning_rate": 4.453627812310908e-08, - "loss": 3.7866, - "step": 2925500 - }, - { - "epoch": 32.19, - "learning_rate": 4.4522525991528687e-08, - "loss": 3.7672, - "step": 2926000 - }, - { - "epoch": 32.2, - "learning_rate": 4.450877385994829e-08, - "loss": 3.7701, - "step": 2926500 - }, - { - "epoch": 32.2, - "learning_rate": 4.449502172836789e-08, - "loss": 3.7589, - "step": 2927000 - }, - { - "epoch": 32.21, - "learning_rate": 4.44812695967875e-08, - "loss": 3.7759, - "step": 2927500 - }, - { - "epoch": 32.21, - "learning_rate": 4.4467517465207104e-08, - "loss": 3.7759, - "step": 2928000 - }, - { - "epoch": 32.22, - "learning_rate": 4.445376533362671e-08, - "loss": 3.7632, - "step": 2928500 - }, - { - "epoch": 32.22, - "learning_rate": 4.4440013202046316e-08, - "loss": 3.7689, - "step": 2929000 - }, - { - "epoch": 32.23, - "learning_rate": 4.442626107046592e-08, - "loss": 3.768, - "step": 2929500 - }, - { - "epoch": 32.23, - "learning_rate": 4.441250893888552e-08, - "loss": 3.7713, - "step": 2930000 - }, - { - "epoch": 32.24, - "learning_rate": 4.439875680730513e-08, - "loss": 3.759, - "step": 2930500 - }, - { - "epoch": 32.25, - "learning_rate": 4.4385004675724734e-08, - "loss": 3.7702, - "step": 2931000 - }, - { - "epoch": 32.25, - "learning_rate": 4.4371252544144337e-08, - "loss": 3.7557, - "step": 2931500 - }, - { - "epoch": 32.26, - "learning_rate": 4.4357500412563946e-08, - "loss": 3.755, - "step": 2932000 - }, - { - "epoch": 32.26, - "learning_rate": 4.434374828098355e-08, - "loss": 3.7709, - "step": 2932500 - }, - { - "epoch": 32.27, - "learning_rate": 4.432999614940315e-08, - "loss": 3.769, - "step": 2933000 - }, - { - "epoch": 32.27, - "learning_rate": 4.431624401782276e-08, - "loss": 3.7544, - "step": 2933500 - }, - { - "epoch": 32.28, - "learning_rate": 4.4302491886242363e-08, - "loss": 3.7651, - "step": 2934000 - }, - { - "epoch": 32.28, - "learning_rate": 4.4288739754661966e-08, - "loss": 3.7577, - "step": 2934500 - }, - { - "epoch": 32.29, - "learning_rate": 4.4274987623081575e-08, - "loss": 3.7505, - "step": 2935000 - }, - { - "epoch": 32.3, - "learning_rate": 4.426123549150118e-08, - "loss": 3.7566, - "step": 2935500 - }, - { - "epoch": 32.3, - "learning_rate": 4.424748335992078e-08, - "loss": 3.778, - "step": 2936000 - }, - { - "epoch": 32.31, - "learning_rate": 4.423373122834039e-08, - "loss": 3.7646, - "step": 2936500 - }, - { - "epoch": 32.31, - "learning_rate": 4.421997909675999e-08, - "loss": 3.7626, - "step": 2937000 - }, - { - "epoch": 32.32, - "learning_rate": 4.42062269651796e-08, - "loss": 3.7619, - "step": 2937500 - }, - { - "epoch": 32.32, - "learning_rate": 4.4192474833599205e-08, - "loss": 3.7657, - "step": 2938000 - }, - { - "epoch": 32.33, - "learning_rate": 4.4178722702018814e-08, - "loss": 3.7646, - "step": 2938500 - }, - { - "epoch": 32.33, - "learning_rate": 4.416497057043842e-08, - "loss": 3.762, - "step": 2939000 - }, - { - "epoch": 32.34, - "learning_rate": 4.4151218438858026e-08, - "loss": 3.7669, - "step": 2939500 - }, - { - "epoch": 32.35, - "learning_rate": 4.413746630727763e-08, - "loss": 3.759, - "step": 2940000 - }, - { - "epoch": 32.35, - "learning_rate": 4.412371417569723e-08, - "loss": 3.7604, - "step": 2940500 - }, - { - "epoch": 32.36, - "learning_rate": 4.410996204411684e-08, - "loss": 3.7646, - "step": 2941000 - }, - { - "epoch": 32.36, - "learning_rate": 4.4096209912536444e-08, - "loss": 3.7715, - "step": 2941500 - }, - { - "epoch": 32.37, - "learning_rate": 4.408245778095605e-08, - "loss": 3.7595, - "step": 2942000 - }, - { - "epoch": 32.37, - "learning_rate": 4.4068705649375656e-08, - "loss": 3.7614, - "step": 2942500 - }, - { - "epoch": 32.38, - "learning_rate": 4.405495351779526e-08, - "loss": 3.7506, - "step": 2943000 - }, - { - "epoch": 32.38, - "learning_rate": 4.404120138621486e-08, - "loss": 3.7703, - "step": 2943500 - }, - { - "epoch": 32.39, - "learning_rate": 4.402744925463447e-08, - "loss": 3.7626, - "step": 2944000 - }, - { - "epoch": 32.39, - "learning_rate": 4.4013697123054074e-08, - "loss": 3.7741, - "step": 2944500 - }, - { - "epoch": 32.4, - "learning_rate": 4.3999944991473676e-08, - "loss": 3.7726, - "step": 2945000 - }, - { - "epoch": 32.41, - "learning_rate": 4.3986192859893286e-08, - "loss": 3.7592, - "step": 2945500 - }, - { - "epoch": 32.41, - "learning_rate": 4.397244072831289e-08, - "loss": 3.7762, - "step": 2946000 - }, - { - "epoch": 32.42, - "learning_rate": 4.395868859673249e-08, - "loss": 3.7656, - "step": 2946500 - }, - { - "epoch": 32.42, - "learning_rate": 4.39449364651521e-08, - "loss": 3.7611, - "step": 2947000 - }, - { - "epoch": 32.43, - "learning_rate": 4.39311843335717e-08, - "loss": 3.7454, - "step": 2947500 - }, - { - "epoch": 32.43, - "learning_rate": 4.3917432201991306e-08, - "loss": 3.7511, - "step": 2948000 - }, - { - "epoch": 32.44, - "learning_rate": 4.390368007041091e-08, - "loss": 3.7713, - "step": 2948500 - }, - { - "epoch": 32.44, - "learning_rate": 4.388992793883052e-08, - "loss": 3.7681, - "step": 2949000 - }, - { - "epoch": 32.45, - "learning_rate": 4.387617580725012e-08, - "loss": 3.7506, - "step": 2949500 - }, - { - "epoch": 32.46, - "learning_rate": 4.3862423675669723e-08, - "loss": 3.7634, - "step": 2950000 - }, - { - "epoch": 32.46, - "learning_rate": 4.384867154408933e-08, - "loss": 3.7474, - "step": 2950500 - }, - { - "epoch": 32.47, - "learning_rate": 4.3834919412508935e-08, - "loss": 3.7637, - "step": 2951000 - }, - { - "epoch": 32.47, - "learning_rate": 4.382116728092854e-08, - "loss": 3.7749, - "step": 2951500 - }, - { - "epoch": 32.48, - "learning_rate": 4.380741514934815e-08, - "loss": 3.7541, - "step": 2952000 - }, - { - "epoch": 32.48, - "learning_rate": 4.379366301776775e-08, - "loss": 3.7772, - "step": 2952500 - }, - { - "epoch": 32.49, - "learning_rate": 4.377991088618735e-08, - "loss": 3.7436, - "step": 2953000 - }, - { - "epoch": 32.49, - "learning_rate": 4.376615875460696e-08, - "loss": 3.763, - "step": 2953500 - }, - { - "epoch": 32.5, - "learning_rate": 4.3752406623026565e-08, - "loss": 3.7643, - "step": 2954000 - }, - { - "epoch": 32.5, - "learning_rate": 4.373865449144617e-08, - "loss": 3.7901, - "step": 2954500 - }, - { - "epoch": 32.51, - "learning_rate": 4.372490235986578e-08, - "loss": 3.7566, - "step": 2955000 - }, - { - "epoch": 32.52, - "learning_rate": 4.371115022828538e-08, - "loss": 3.772, - "step": 2955500 - }, - { - "epoch": 32.52, - "learning_rate": 4.369739809670498e-08, - "loss": 3.7485, - "step": 2956000 - }, - { - "epoch": 32.53, - "learning_rate": 4.368364596512459e-08, - "loss": 3.7632, - "step": 2956500 - }, - { - "epoch": 32.53, - "learning_rate": 4.3669893833544195e-08, - "loss": 3.7468, - "step": 2957000 - }, - { - "epoch": 32.54, - "learning_rate": 4.36561417019638e-08, - "loss": 3.7573, - "step": 2957500 - }, - { - "epoch": 32.54, - "learning_rate": 4.364238957038341e-08, - "loss": 3.7436, - "step": 2958000 - }, - { - "epoch": 32.55, - "learning_rate": 4.362863743880301e-08, - "loss": 3.7799, - "step": 2958500 - }, - { - "epoch": 32.55, - "learning_rate": 4.361488530722261e-08, - "loss": 3.766, - "step": 2959000 - }, - { - "epoch": 32.56, - "learning_rate": 4.360113317564222e-08, - "loss": 3.7647, - "step": 2959500 - }, - { - "epoch": 32.57, - "learning_rate": 4.3587381044061824e-08, - "loss": 3.7733, - "step": 2960000 - }, - { - "epoch": 32.57, - "learning_rate": 4.3573628912481434e-08, - "loss": 3.7449, - "step": 2960500 - }, - { - "epoch": 32.58, - "learning_rate": 4.3559876780901036e-08, - "loss": 3.783, - "step": 2961000 - }, - { - "epoch": 32.58, - "learning_rate": 4.3546124649320646e-08, - "loss": 3.7437, - "step": 2961500 - }, - { - "epoch": 32.59, - "learning_rate": 4.353237251774025e-08, - "loss": 3.7606, - "step": 2962000 - }, - { - "epoch": 32.59, - "learning_rate": 4.351862038615986e-08, - "loss": 3.7609, - "step": 2962500 - }, - { - "epoch": 32.6, - "learning_rate": 4.350486825457946e-08, - "loss": 3.7618, - "step": 2963000 - }, - { - "epoch": 32.6, - "learning_rate": 4.349111612299906e-08, - "loss": 3.765, - "step": 2963500 - }, - { - "epoch": 32.61, - "learning_rate": 4.347736399141867e-08, - "loss": 3.7543, - "step": 2964000 - }, - { - "epoch": 32.61, - "learning_rate": 4.3463611859838275e-08, - "loss": 3.774, - "step": 2964500 - }, - { - "epoch": 32.62, - "learning_rate": 4.344985972825788e-08, - "loss": 3.7458, - "step": 2965000 - }, - { - "epoch": 32.63, - "learning_rate": 4.343610759667749e-08, - "loss": 3.7467, - "step": 2965500 - }, - { - "epoch": 32.63, - "learning_rate": 4.342235546509709e-08, - "loss": 3.7703, - "step": 2966000 - }, - { - "epoch": 32.64, - "learning_rate": 4.340860333351669e-08, - "loss": 3.7566, - "step": 2966500 - }, - { - "epoch": 32.64, - "learning_rate": 4.33948512019363e-08, - "loss": 3.7616, - "step": 2967000 - }, - { - "epoch": 32.65, - "learning_rate": 4.3381099070355905e-08, - "loss": 3.7825, - "step": 2967500 - }, - { - "epoch": 32.65, - "learning_rate": 4.336734693877551e-08, - "loss": 3.7414, - "step": 2968000 - }, - { - "epoch": 32.66, - "learning_rate": 4.335359480719512e-08, - "loss": 3.7699, - "step": 2968500 - }, - { - "epoch": 32.66, - "learning_rate": 4.333984267561472e-08, - "loss": 3.751, - "step": 2969000 - }, - { - "epoch": 32.67, - "learning_rate": 4.332609054403432e-08, - "loss": 3.7714, - "step": 2969500 - }, - { - "epoch": 32.68, - "learning_rate": 4.331233841245393e-08, - "loss": 3.7849, - "step": 2970000 - }, - { - "epoch": 32.68, - "learning_rate": 4.3298586280873534e-08, - "loss": 3.7592, - "step": 2970500 - }, - { - "epoch": 32.69, - "learning_rate": 4.328483414929314e-08, - "loss": 3.7627, - "step": 2971000 - }, - { - "epoch": 32.69, - "learning_rate": 4.3271082017712747e-08, - "loss": 3.7586, - "step": 2971500 - }, - { - "epoch": 32.7, - "learning_rate": 4.325732988613235e-08, - "loss": 3.7579, - "step": 2972000 - }, - { - "epoch": 32.7, - "learning_rate": 4.324357775455195e-08, - "loss": 3.7703, - "step": 2972500 - }, - { - "epoch": 32.71, - "learning_rate": 4.322982562297156e-08, - "loss": 3.7526, - "step": 2973000 - }, - { - "epoch": 32.71, - "learning_rate": 4.3216073491391164e-08, - "loss": 3.7641, - "step": 2973500 - }, - { - "epoch": 32.72, - "learning_rate": 4.320232135981077e-08, - "loss": 3.7771, - "step": 2974000 - }, - { - "epoch": 32.72, - "learning_rate": 4.3188569228230376e-08, - "loss": 3.7728, - "step": 2974500 - }, - { - "epoch": 32.73, - "learning_rate": 4.317481709664998e-08, - "loss": 3.755, - "step": 2975000 - }, - { - "epoch": 32.74, - "learning_rate": 4.316106496506958e-08, - "loss": 3.7741, - "step": 2975500 - }, - { - "epoch": 32.74, - "learning_rate": 4.314731283348919e-08, - "loss": 3.757, - "step": 2976000 - }, - { - "epoch": 32.75, - "learning_rate": 4.3133560701908794e-08, - "loss": 3.7664, - "step": 2976500 - }, - { - "epoch": 32.75, - "learning_rate": 4.3119808570328396e-08, - "loss": 3.7677, - "step": 2977000 - }, - { - "epoch": 32.76, - "learning_rate": 4.3106056438748006e-08, - "loss": 3.7583, - "step": 2977500 - }, - { - "epoch": 32.76, - "learning_rate": 4.309230430716761e-08, - "loss": 3.7527, - "step": 2978000 - }, - { - "epoch": 32.77, - "learning_rate": 4.307855217558721e-08, - "loss": 3.7548, - "step": 2978500 - }, - { - "epoch": 32.77, - "learning_rate": 4.306480004400682e-08, - "loss": 3.7546, - "step": 2979000 - }, - { - "epoch": 32.78, - "learning_rate": 4.305104791242642e-08, - "loss": 3.7429, - "step": 2979500 - }, - { - "epoch": 32.79, - "learning_rate": 4.3037295780846026e-08, - "loss": 3.7678, - "step": 2980000 - }, - { - "epoch": 32.79, - "learning_rate": 4.3023543649265635e-08, - "loss": 3.7551, - "step": 2980500 - }, - { - "epoch": 32.8, - "learning_rate": 4.300979151768524e-08, - "loss": 3.7555, - "step": 2981000 - }, - { - "epoch": 32.8, - "learning_rate": 4.299603938610484e-08, - "loss": 3.7727, - "step": 2981500 - }, - { - "epoch": 32.81, - "learning_rate": 4.298228725452445e-08, - "loss": 3.7689, - "step": 2982000 - }, - { - "epoch": 32.81, - "learning_rate": 4.296853512294405e-08, - "loss": 3.7536, - "step": 2982500 - }, - { - "epoch": 32.82, - "learning_rate": 4.2954782991363656e-08, - "loss": 3.7687, - "step": 2983000 - }, - { - "epoch": 32.82, - "learning_rate": 4.2941030859783265e-08, - "loss": 3.7444, - "step": 2983500 - }, - { - "epoch": 32.83, - "learning_rate": 4.292727872820287e-08, - "loss": 3.7678, - "step": 2984000 - }, - { - "epoch": 32.83, - "learning_rate": 4.291352659662247e-08, - "loss": 3.7579, - "step": 2984500 - }, - { - "epoch": 32.84, - "learning_rate": 4.289977446504208e-08, - "loss": 3.7618, - "step": 2985000 - }, - { - "epoch": 32.85, - "learning_rate": 4.288602233346168e-08, - "loss": 3.7664, - "step": 2985500 - }, - { - "epoch": 32.85, - "learning_rate": 4.287227020188129e-08, - "loss": 3.7642, - "step": 2986000 - }, - { - "epoch": 32.86, - "learning_rate": 4.2858518070300894e-08, - "loss": 3.757, - "step": 2986500 - }, - { - "epoch": 32.86, - "learning_rate": 4.2844765938720504e-08, - "loss": 3.7786, - "step": 2987000 - }, - { - "epoch": 32.87, - "learning_rate": 4.2831013807140107e-08, - "loss": 3.7681, - "step": 2987500 - }, - { - "epoch": 32.87, - "learning_rate": 4.281726167555971e-08, - "loss": 3.7579, - "step": 2988000 - }, - { - "epoch": 32.88, - "learning_rate": 4.280350954397932e-08, - "loss": 3.7778, - "step": 2988500 - }, - { - "epoch": 32.88, - "learning_rate": 4.278975741239892e-08, - "loss": 3.7516, - "step": 2989000 - }, - { - "epoch": 32.89, - "learning_rate": 4.2776005280818524e-08, - "loss": 3.7847, - "step": 2989500 - }, - { - "epoch": 32.9, - "learning_rate": 4.2762253149238133e-08, - "loss": 3.7717, - "step": 2990000 - }, - { - "epoch": 32.9, - "learning_rate": 4.2748501017657736e-08, - "loss": 3.7692, - "step": 2990500 - }, - { - "epoch": 32.91, - "learning_rate": 4.273474888607734e-08, - "loss": 3.7644, - "step": 2991000 - }, - { - "epoch": 32.91, - "learning_rate": 4.272099675449695e-08, - "loss": 3.7618, - "step": 2991500 - }, - { - "epoch": 32.92, - "learning_rate": 4.270724462291655e-08, - "loss": 3.7547, - "step": 2992000 - }, - { - "epoch": 32.92, - "learning_rate": 4.2693492491336154e-08, - "loss": 3.7741, - "step": 2992500 - }, - { - "epoch": 32.93, - "learning_rate": 4.267974035975576e-08, - "loss": 3.7592, - "step": 2993000 - }, - { - "epoch": 32.93, - "learning_rate": 4.2665988228175366e-08, - "loss": 3.7573, - "step": 2993500 - }, - { - "epoch": 32.94, - "learning_rate": 4.265223609659497e-08, - "loss": 3.7622, - "step": 2994000 - }, - { - "epoch": 32.94, - "learning_rate": 4.263848396501458e-08, - "loss": 3.7624, - "step": 2994500 - }, - { - "epoch": 32.95, - "learning_rate": 4.262473183343418e-08, - "loss": 3.7706, - "step": 2995000 - }, - { - "epoch": 32.96, - "learning_rate": 4.261097970185378e-08, - "loss": 3.7734, - "step": 2995500 - }, - { - "epoch": 32.96, - "learning_rate": 4.259722757027339e-08, - "loss": 3.749, - "step": 2996000 - }, - { - "epoch": 32.97, - "learning_rate": 4.2583475438692995e-08, - "loss": 3.7742, - "step": 2996500 - }, - { - "epoch": 32.97, - "learning_rate": 4.25697233071126e-08, - "loss": 3.7586, - "step": 2997000 - }, - { - "epoch": 32.98, - "learning_rate": 4.255597117553221e-08, - "loss": 3.76, - "step": 2997500 - }, - { - "epoch": 32.98, - "learning_rate": 4.254221904395181e-08, - "loss": 3.762, - "step": 2998000 - }, - { - "epoch": 32.99, - "learning_rate": 4.252846691237141e-08, - "loss": 3.7522, - "step": 2998500 - }, - { - "epoch": 32.99, - "learning_rate": 4.251471478079102e-08, - "loss": 3.7554, - "step": 2999000 - }, - { - "epoch": 33.0, - "learning_rate": 4.2500962649210625e-08, - "loss": 3.747, - "step": 2999500 - }, - { - "epoch": 33.0, - "eval_loss": 3.8329977989196777, - "eval_runtime": 6.1462, - "eval_samples_per_second": 252.838, - "step": 2999535 - }, - { - "epoch": 33.01, - "learning_rate": 4.248721051763023e-08, - "loss": 3.7509, - "step": 3000000 - }, - { - "epoch": 33.01, - "learning_rate": 4.247345838604984e-08, - "loss": 3.772, - "step": 3000500 - }, - { - "epoch": 33.02, - "learning_rate": 4.245970625446944e-08, - "loss": 3.7605, - "step": 3001000 - }, - { - "epoch": 33.02, - "learning_rate": 4.244595412288904e-08, - "loss": 3.7658, - "step": 3001500 - }, - { - "epoch": 33.03, - "learning_rate": 4.243220199130865e-08, - "loss": 3.7546, - "step": 3002000 - }, - { - "epoch": 33.03, - "learning_rate": 4.2418449859728254e-08, - "loss": 3.7638, - "step": 3002500 - }, - { - "epoch": 33.04, - "learning_rate": 4.240469772814786e-08, - "loss": 3.7481, - "step": 3003000 - }, - { - "epoch": 33.04, - "learning_rate": 4.2390945596567467e-08, - "loss": 3.7547, - "step": 3003500 - }, - { - "epoch": 33.05, - "learning_rate": 4.237719346498707e-08, - "loss": 3.7651, - "step": 3004000 - }, - { - "epoch": 33.05, - "learning_rate": 4.236344133340667e-08, - "loss": 3.7707, - "step": 3004500 - }, - { - "epoch": 33.06, - "learning_rate": 4.234968920182628e-08, - "loss": 3.7629, - "step": 3005000 - }, - { - "epoch": 33.07, - "learning_rate": 4.2335937070245884e-08, - "loss": 3.7581, - "step": 3005500 - }, - { - "epoch": 33.07, - "learning_rate": 4.232218493866549e-08, - "loss": 3.7644, - "step": 3006000 - }, - { - "epoch": 33.08, - "learning_rate": 4.2308432807085096e-08, - "loss": 3.7705, - "step": 3006500 - }, - { - "epoch": 33.08, - "learning_rate": 4.22946806755047e-08, - "loss": 3.7539, - "step": 3007000 - }, - { - "epoch": 33.09, - "learning_rate": 4.22809285439243e-08, - "loss": 3.7572, - "step": 3007500 - }, - { - "epoch": 33.09, - "learning_rate": 4.226717641234391e-08, - "loss": 3.7639, - "step": 3008000 - }, - { - "epoch": 33.1, - "learning_rate": 4.2253424280763514e-08, - "loss": 3.7854, - "step": 3008500 - }, - { - "epoch": 33.1, - "learning_rate": 4.223967214918312e-08, - "loss": 3.7894, - "step": 3009000 - }, - { - "epoch": 33.11, - "learning_rate": 4.2225920017602726e-08, - "loss": 3.749, - "step": 3009500 - }, - { - "epoch": 33.12, - "learning_rate": 4.2212167886022335e-08, - "loss": 3.7561, - "step": 3010000 - }, - { - "epoch": 33.12, - "learning_rate": 4.219841575444194e-08, - "loss": 3.7404, - "step": 3010500 - }, - { - "epoch": 33.13, - "learning_rate": 4.218466362286154e-08, - "loss": 3.7568, - "step": 3011000 - }, - { - "epoch": 33.13, - "learning_rate": 4.217091149128115e-08, - "loss": 3.7517, - "step": 3011500 - }, - { - "epoch": 33.14, - "learning_rate": 4.215715935970075e-08, - "loss": 3.7675, - "step": 3012000 - }, - { - "epoch": 33.14, - "learning_rate": 4.214340722812036e-08, - "loss": 3.7567, - "step": 3012500 - }, - { - "epoch": 33.15, - "learning_rate": 4.2129655096539965e-08, - "loss": 3.7464, - "step": 3013000 - }, - { - "epoch": 33.15, - "learning_rate": 4.211590296495957e-08, - "loss": 3.7698, - "step": 3013500 - }, - { - "epoch": 33.16, - "learning_rate": 4.210215083337918e-08, - "loss": 3.7666, - "step": 3014000 - }, - { - "epoch": 33.16, - "learning_rate": 4.208839870179878e-08, - "loss": 3.7744, - "step": 3014500 - }, - { - "epoch": 33.17, - "learning_rate": 4.207464657021838e-08, - "loss": 3.7653, - "step": 3015000 - }, - { - "epoch": 33.18, - "learning_rate": 4.206089443863799e-08, - "loss": 3.7471, - "step": 3015500 - }, - { - "epoch": 33.18, - "learning_rate": 4.2047142307057594e-08, - "loss": 3.7603, - "step": 3016000 - }, - { - "epoch": 33.19, - "learning_rate": 4.20333901754772e-08, - "loss": 3.7521, - "step": 3016500 - }, - { - "epoch": 33.19, - "learning_rate": 4.2019638043896806e-08, - "loss": 3.7689, - "step": 3017000 - }, - { - "epoch": 33.2, - "learning_rate": 4.200588591231641e-08, - "loss": 3.7625, - "step": 3017500 - }, - { - "epoch": 33.2, - "learning_rate": 4.199213378073601e-08, - "loss": 3.7758, - "step": 3018000 - }, - { - "epoch": 33.21, - "learning_rate": 4.197838164915562e-08, - "loss": 3.7647, - "step": 3018500 - }, - { - "epoch": 33.21, - "learning_rate": 4.1964629517575224e-08, - "loss": 3.7661, - "step": 3019000 - }, - { - "epoch": 33.22, - "learning_rate": 4.1950877385994827e-08, - "loss": 3.7566, - "step": 3019500 - }, - { - "epoch": 33.23, - "learning_rate": 4.1937125254414436e-08, - "loss": 3.7669, - "step": 3020000 - }, - { - "epoch": 33.23, - "learning_rate": 4.192337312283404e-08, - "loss": 3.7672, - "step": 3020500 - }, - { - "epoch": 33.24, - "learning_rate": 4.190962099125364e-08, - "loss": 3.7771, - "step": 3021000 - }, - { - "epoch": 33.24, - "learning_rate": 4.189586885967325e-08, - "loss": 3.753, - "step": 3021500 - }, - { - "epoch": 33.25, - "learning_rate": 4.1882116728092853e-08, - "loss": 3.7635, - "step": 3022000 - }, - { - "epoch": 33.25, - "learning_rate": 4.1868364596512456e-08, - "loss": 3.7706, - "step": 3022500 - }, - { - "epoch": 33.26, - "learning_rate": 4.1854612464932066e-08, - "loss": 3.7654, - "step": 3023000 - }, - { - "epoch": 33.26, - "learning_rate": 4.184086033335167e-08, - "loss": 3.7604, - "step": 3023500 - }, - { - "epoch": 33.27, - "learning_rate": 4.182710820177127e-08, - "loss": 3.7589, - "step": 3024000 - }, - { - "epoch": 33.27, - "learning_rate": 4.1813356070190874e-08, - "loss": 3.7553, - "step": 3024500 - }, - { - "epoch": 33.28, - "learning_rate": 4.179960393861048e-08, - "loss": 3.743, - "step": 3025000 - }, - { - "epoch": 33.29, - "learning_rate": 4.1785851807030086e-08, - "loss": 3.7732, - "step": 3025500 - }, - { - "epoch": 33.29, - "learning_rate": 4.177209967544969e-08, - "loss": 3.7586, - "step": 3026000 - }, - { - "epoch": 33.3, - "learning_rate": 4.17583475438693e-08, - "loss": 3.7716, - "step": 3026500 - }, - { - "epoch": 33.3, - "learning_rate": 4.17445954122889e-08, - "loss": 3.7426, - "step": 3027000 - }, - { - "epoch": 33.31, - "learning_rate": 4.17308432807085e-08, - "loss": 3.7566, - "step": 3027500 - }, - { - "epoch": 33.31, - "learning_rate": 4.171709114912811e-08, - "loss": 3.7571, - "step": 3028000 - }, - { - "epoch": 33.32, - "learning_rate": 4.1703339017547715e-08, - "loss": 3.7617, - "step": 3028500 - }, - { - "epoch": 33.32, - "learning_rate": 4.168958688596732e-08, - "loss": 3.7525, - "step": 3029000 - }, - { - "epoch": 33.33, - "learning_rate": 4.167583475438693e-08, - "loss": 3.7476, - "step": 3029500 - }, - { - "epoch": 33.34, - "learning_rate": 4.166208262280653e-08, - "loss": 3.7594, - "step": 3030000 - }, - { - "epoch": 33.34, - "learning_rate": 4.164833049122613e-08, - "loss": 3.7607, - "step": 3030500 - }, - { - "epoch": 33.35, - "learning_rate": 4.163457835964574e-08, - "loss": 3.7721, - "step": 3031000 - }, - { - "epoch": 33.35, - "learning_rate": 4.1620826228065345e-08, - "loss": 3.7623, - "step": 3031500 - }, - { - "epoch": 33.36, - "learning_rate": 4.160707409648495e-08, - "loss": 3.777, - "step": 3032000 - }, - { - "epoch": 33.36, - "learning_rate": 4.159332196490456e-08, - "loss": 3.7642, - "step": 3032500 - }, - { - "epoch": 33.37, - "learning_rate": 4.157956983332416e-08, - "loss": 3.7705, - "step": 3033000 - }, - { - "epoch": 33.37, - "learning_rate": 4.156581770174377e-08, - "loss": 3.7486, - "step": 3033500 - }, - { - "epoch": 33.38, - "learning_rate": 4.155206557016337e-08, - "loss": 3.763, - "step": 3034000 - }, - { - "epoch": 33.38, - "learning_rate": 4.153831343858298e-08, - "loss": 3.7491, - "step": 3034500 - }, - { - "epoch": 33.39, - "learning_rate": 4.1524561307002584e-08, - "loss": 3.7708, - "step": 3035000 - }, - { - "epoch": 33.4, - "learning_rate": 4.151080917542219e-08, - "loss": 3.7563, - "step": 3035500 - }, - { - "epoch": 33.4, - "learning_rate": 4.1497057043841796e-08, - "loss": 3.7398, - "step": 3036000 - }, - { - "epoch": 33.41, - "learning_rate": 4.14833049122614e-08, - "loss": 3.7767, - "step": 3036500 - }, - { - "epoch": 33.41, - "learning_rate": 4.146955278068101e-08, - "loss": 3.7629, - "step": 3037000 - }, - { - "epoch": 33.42, - "learning_rate": 4.145580064910061e-08, - "loss": 3.7446, - "step": 3037500 - }, - { - "epoch": 33.42, - "learning_rate": 4.1442048517520213e-08, - "loss": 3.7554, - "step": 3038000 - }, - { - "epoch": 33.43, - "learning_rate": 4.142829638593982e-08, - "loss": 3.7561, - "step": 3038500 - }, - { - "epoch": 33.43, - "learning_rate": 4.1414544254359426e-08, - "loss": 3.7707, - "step": 3039000 - }, - { - "epoch": 33.44, - "learning_rate": 4.140079212277903e-08, - "loss": 3.7551, - "step": 3039500 - }, - { - "epoch": 33.45, - "learning_rate": 4.138703999119864e-08, - "loss": 3.7714, - "step": 3040000 - }, - { - "epoch": 33.45, - "learning_rate": 4.137328785961824e-08, - "loss": 3.7602, - "step": 3040500 - }, - { - "epoch": 33.46, - "learning_rate": 4.135953572803784e-08, - "loss": 3.7592, - "step": 3041000 - }, - { - "epoch": 33.46, - "learning_rate": 4.134578359645745e-08, - "loss": 3.7626, - "step": 3041500 - }, - { - "epoch": 33.47, - "learning_rate": 4.1332031464877055e-08, - "loss": 3.7747, - "step": 3042000 - }, - { - "epoch": 33.47, - "learning_rate": 4.131827933329666e-08, - "loss": 3.7721, - "step": 3042500 - }, - { - "epoch": 33.48, - "learning_rate": 4.130452720171627e-08, - "loss": 3.7797, - "step": 3043000 - }, - { - "epoch": 33.48, - "learning_rate": 4.129077507013587e-08, - "loss": 3.784, - "step": 3043500 - }, - { - "epoch": 33.49, - "learning_rate": 4.127702293855547e-08, - "loss": 3.7671, - "step": 3044000 - }, - { - "epoch": 33.49, - "learning_rate": 4.126327080697508e-08, - "loss": 3.7624, - "step": 3044500 - }, - { - "epoch": 33.5, - "learning_rate": 4.1249518675394685e-08, - "loss": 3.7744, - "step": 3045000 - }, - { - "epoch": 33.51, - "learning_rate": 4.123576654381429e-08, - "loss": 3.7607, - "step": 3045500 - }, - { - "epoch": 33.51, - "learning_rate": 4.12220144122339e-08, - "loss": 3.757, - "step": 3046000 - }, - { - "epoch": 33.52, - "learning_rate": 4.12082622806535e-08, - "loss": 3.7394, - "step": 3046500 - }, - { - "epoch": 33.52, - "learning_rate": 4.11945101490731e-08, - "loss": 3.771, - "step": 3047000 - }, - { - "epoch": 33.53, - "learning_rate": 4.118075801749271e-08, - "loss": 3.7761, - "step": 3047500 - }, - { - "epoch": 33.53, - "learning_rate": 4.1167005885912314e-08, - "loss": 3.758, - "step": 3048000 - }, - { - "epoch": 33.54, - "learning_rate": 4.115325375433192e-08, - "loss": 3.751, - "step": 3048500 - }, - { - "epoch": 33.54, - "learning_rate": 4.1139501622751526e-08, - "loss": 3.7572, - "step": 3049000 - }, - { - "epoch": 33.55, - "learning_rate": 4.112574949117113e-08, - "loss": 3.7566, - "step": 3049500 - }, - { - "epoch": 33.56, - "learning_rate": 4.111199735959073e-08, - "loss": 3.7676, - "step": 3050000 - }, - { - "epoch": 33.56, - "learning_rate": 4.109824522801034e-08, - "loss": 3.7521, - "step": 3050500 - }, - { - "epoch": 33.57, - "learning_rate": 4.1084493096429944e-08, - "loss": 3.7559, - "step": 3051000 - }, - { - "epoch": 33.57, - "learning_rate": 4.1070740964849547e-08, - "loss": 3.772, - "step": 3051500 - }, - { - "epoch": 33.58, - "learning_rate": 4.1056988833269156e-08, - "loss": 3.7746, - "step": 3052000 - }, - { - "epoch": 33.58, - "learning_rate": 4.104323670168876e-08, - "loss": 3.7613, - "step": 3052500 - }, - { - "epoch": 33.59, - "learning_rate": 4.102948457010836e-08, - "loss": 3.7554, - "step": 3053000 - }, - { - "epoch": 33.59, - "learning_rate": 4.101573243852797e-08, - "loss": 3.7688, - "step": 3053500 - }, - { - "epoch": 33.6, - "learning_rate": 4.1001980306947573e-08, - "loss": 3.7644, - "step": 3054000 - }, - { - "epoch": 33.6, - "learning_rate": 4.0988228175367176e-08, - "loss": 3.7535, - "step": 3054500 - }, - { - "epoch": 33.61, - "learning_rate": 4.0974476043786786e-08, - "loss": 3.7637, - "step": 3055000 - }, - { - "epoch": 33.62, - "learning_rate": 4.096072391220639e-08, - "loss": 3.7706, - "step": 3055500 - }, - { - "epoch": 33.62, - "learning_rate": 4.094697178062599e-08, - "loss": 3.7621, - "step": 3056000 - }, - { - "epoch": 33.63, - "learning_rate": 4.09332196490456e-08, - "loss": 3.7837, - "step": 3056500 - }, - { - "epoch": 33.63, - "learning_rate": 4.09194675174652e-08, - "loss": 3.7603, - "step": 3057000 - }, - { - "epoch": 33.64, - "learning_rate": 4.090571538588481e-08, - "loss": 3.7445, - "step": 3057500 - }, - { - "epoch": 33.64, - "learning_rate": 4.0891963254304415e-08, - "loss": 3.7676, - "step": 3058000 - }, - { - "epoch": 33.65, - "learning_rate": 4.087821112272402e-08, - "loss": 3.764, - "step": 3058500 - }, - { - "epoch": 33.65, - "learning_rate": 4.086445899114363e-08, - "loss": 3.7582, - "step": 3059000 - }, - { - "epoch": 33.66, - "learning_rate": 4.085070685956323e-08, - "loss": 3.7507, - "step": 3059500 - }, - { - "epoch": 33.67, - "learning_rate": 4.083695472798284e-08, - "loss": 3.7639, - "step": 3060000 - }, - { - "epoch": 33.67, - "learning_rate": 4.082320259640244e-08, - "loss": 3.7751, - "step": 3060500 - }, - { - "epoch": 33.68, - "learning_rate": 4.080945046482205e-08, - "loss": 3.7752, - "step": 3061000 - }, - { - "epoch": 33.68, - "learning_rate": 4.0795698333241654e-08, - "loss": 3.7751, - "step": 3061500 - }, - { - "epoch": 33.69, - "learning_rate": 4.078194620166126e-08, - "loss": 3.7732, - "step": 3062000 - }, - { - "epoch": 33.69, - "learning_rate": 4.076819407008086e-08, - "loss": 3.7648, - "step": 3062500 - }, - { - "epoch": 33.7, - "learning_rate": 4.075444193850047e-08, - "loss": 3.7605, - "step": 3063000 - }, - { - "epoch": 33.7, - "learning_rate": 4.074068980692007e-08, - "loss": 3.7618, - "step": 3063500 - }, - { - "epoch": 33.71, - "learning_rate": 4.0726937675339674e-08, - "loss": 3.7506, - "step": 3064000 - }, - { - "epoch": 33.71, - "learning_rate": 4.0713185543759284e-08, - "loss": 3.7538, - "step": 3064500 - }, - { - "epoch": 33.72, - "learning_rate": 4.0699433412178886e-08, - "loss": 3.7593, - "step": 3065000 - }, - { - "epoch": 33.73, - "learning_rate": 4.068568128059849e-08, - "loss": 3.7742, - "step": 3065500 - }, - { - "epoch": 33.73, - "learning_rate": 4.06719291490181e-08, - "loss": 3.7644, - "step": 3066000 - }, - { - "epoch": 33.74, - "learning_rate": 4.06581770174377e-08, - "loss": 3.7353, - "step": 3066500 - }, - { - "epoch": 33.74, - "learning_rate": 4.0644424885857304e-08, - "loss": 3.7488, - "step": 3067000 - }, - { - "epoch": 33.75, - "learning_rate": 4.063067275427691e-08, - "loss": 3.7609, - "step": 3067500 - }, - { - "epoch": 33.75, - "learning_rate": 4.0616920622696516e-08, - "loss": 3.7697, - "step": 3068000 - }, - { - "epoch": 33.76, - "learning_rate": 4.060316849111612e-08, - "loss": 3.7595, - "step": 3068500 - }, - { - "epoch": 33.76, - "learning_rate": 4.058941635953573e-08, - "loss": 3.7318, - "step": 3069000 - }, - { - "epoch": 33.77, - "learning_rate": 4.057566422795533e-08, - "loss": 3.7651, - "step": 3069500 - }, - { - "epoch": 33.78, - "learning_rate": 4.0561912096374934e-08, - "loss": 3.7637, - "step": 3070000 - }, - { - "epoch": 33.78, - "learning_rate": 4.054815996479454e-08, - "loss": 3.7531, - "step": 3070500 - }, - { - "epoch": 33.79, - "learning_rate": 4.0534407833214146e-08, - "loss": 3.7617, - "step": 3071000 - }, - { - "epoch": 33.79, - "learning_rate": 4.052065570163375e-08, - "loss": 3.7627, - "step": 3071500 - }, - { - "epoch": 33.8, - "learning_rate": 4.050690357005336e-08, - "loss": 3.7748, - "step": 3072000 - }, - { - "epoch": 33.8, - "learning_rate": 4.049315143847296e-08, - "loss": 3.7788, - "step": 3072500 - }, - { - "epoch": 33.81, - "learning_rate": 4.047939930689256e-08, - "loss": 3.7751, - "step": 3073000 - }, - { - "epoch": 33.81, - "learning_rate": 4.046564717531217e-08, - "loss": 3.7731, - "step": 3073500 - }, - { - "epoch": 33.82, - "learning_rate": 4.0451895043731775e-08, - "loss": 3.7528, - "step": 3074000 - }, - { - "epoch": 33.82, - "learning_rate": 4.043814291215138e-08, - "loss": 3.7635, - "step": 3074500 - }, - { - "epoch": 33.83, - "learning_rate": 4.042439078057099e-08, - "loss": 3.771, - "step": 3075000 - }, - { - "epoch": 33.84, - "learning_rate": 4.041063864899059e-08, - "loss": 3.7479, - "step": 3075500 - }, - { - "epoch": 33.84, - "learning_rate": 4.039688651741019e-08, - "loss": 3.7559, - "step": 3076000 - }, - { - "epoch": 33.85, - "learning_rate": 4.03831343858298e-08, - "loss": 3.77, - "step": 3076500 - }, - { - "epoch": 33.85, - "learning_rate": 4.0369382254249405e-08, - "loss": 3.7472, - "step": 3077000 - }, - { - "epoch": 33.86, - "learning_rate": 4.035563012266901e-08, - "loss": 3.7628, - "step": 3077500 - }, - { - "epoch": 33.86, - "learning_rate": 4.034187799108862e-08, - "loss": 3.7652, - "step": 3078000 - }, - { - "epoch": 33.87, - "learning_rate": 4.032812585950822e-08, - "loss": 3.7565, - "step": 3078500 - }, - { - "epoch": 33.87, - "learning_rate": 4.031437372792782e-08, - "loss": 3.739, - "step": 3079000 - }, - { - "epoch": 33.88, - "learning_rate": 4.030062159634743e-08, - "loss": 3.742, - "step": 3079500 - }, - { - "epoch": 33.89, - "learning_rate": 4.0286869464767034e-08, - "loss": 3.7808, - "step": 3080000 - }, - { - "epoch": 33.89, - "learning_rate": 4.027311733318664e-08, - "loss": 3.7563, - "step": 3080500 - }, - { - "epoch": 33.9, - "learning_rate": 4.0259365201606246e-08, - "loss": 3.7559, - "step": 3081000 - }, - { - "epoch": 33.9, - "learning_rate": 4.024561307002585e-08, - "loss": 3.7584, - "step": 3081500 - }, - { - "epoch": 33.91, - "learning_rate": 4.023186093844546e-08, - "loss": 3.7686, - "step": 3082000 - }, - { - "epoch": 33.91, - "learning_rate": 4.021810880686506e-08, - "loss": 3.7599, - "step": 3082500 - }, - { - "epoch": 33.92, - "learning_rate": 4.020435667528467e-08, - "loss": 3.7561, - "step": 3083000 - }, - { - "epoch": 33.92, - "learning_rate": 4.019060454370427e-08, - "loss": 3.7647, - "step": 3083500 - }, - { - "epoch": 33.93, - "learning_rate": 4.017685241212388e-08, - "loss": 3.7669, - "step": 3084000 - }, - { - "epoch": 33.93, - "learning_rate": 4.0163100280543485e-08, - "loss": 3.7577, - "step": 3084500 - }, - { - "epoch": 33.94, - "learning_rate": 4.014934814896309e-08, - "loss": 3.7742, - "step": 3085000 - }, - { - "epoch": 33.95, - "learning_rate": 4.01355960173827e-08, - "loss": 3.7626, - "step": 3085500 - }, - { - "epoch": 33.95, - "learning_rate": 4.01218438858023e-08, - "loss": 3.7714, - "step": 3086000 - }, - { - "epoch": 33.96, - "learning_rate": 4.01080917542219e-08, - "loss": 3.7616, - "step": 3086500 - }, - { - "epoch": 33.96, - "learning_rate": 4.009433962264151e-08, - "loss": 3.7667, - "step": 3087000 - }, - { - "epoch": 33.97, - "learning_rate": 4.0080587491061115e-08, - "loss": 3.7657, - "step": 3087500 - }, - { - "epoch": 33.97, - "learning_rate": 4.006683535948072e-08, - "loss": 3.7608, - "step": 3088000 - }, - { - "epoch": 33.98, - "learning_rate": 4.005308322790033e-08, - "loss": 3.7442, - "step": 3088500 - }, - { - "epoch": 33.98, - "learning_rate": 4.003933109631993e-08, - "loss": 3.7704, - "step": 3089000 - }, - { - "epoch": 33.99, - "learning_rate": 4.002557896473953e-08, - "loss": 3.7562, - "step": 3089500 - }, - { - "epoch": 34.0, - "learning_rate": 4.001182683315914e-08, - "loss": 3.783, - "step": 3090000 - }, - { - "epoch": 34.0, - "eval_loss": 3.8324673175811768, - "eval_runtime": 6.1509, - "eval_samples_per_second": 252.646, - "step": 3090430 - }, - { - "epoch": 34.0, - "learning_rate": 3.9998074701578745e-08, - "loss": 3.7363, - "step": 3090500 - }, - { - "epoch": 34.01, - "learning_rate": 3.998432256999835e-08, - "loss": 3.7691, - "step": 3091000 - }, - { - "epoch": 34.01, - "learning_rate": 3.9970570438417957e-08, - "loss": 3.7711, - "step": 3091500 - }, - { - "epoch": 34.02, - "learning_rate": 3.995681830683756e-08, - "loss": 3.7681, - "step": 3092000 - }, - { - "epoch": 34.02, - "learning_rate": 3.994306617525716e-08, - "loss": 3.7686, - "step": 3092500 - }, - { - "epoch": 34.03, - "learning_rate": 3.992931404367677e-08, - "loss": 3.7628, - "step": 3093000 - }, - { - "epoch": 34.03, - "learning_rate": 3.9915561912096374e-08, - "loss": 3.7731, - "step": 3093500 - }, - { - "epoch": 34.04, - "learning_rate": 3.990180978051598e-08, - "loss": 3.7735, - "step": 3094000 - }, - { - "epoch": 34.04, - "learning_rate": 3.9888057648935586e-08, - "loss": 3.7526, - "step": 3094500 - }, - { - "epoch": 34.05, - "learning_rate": 3.987430551735519e-08, - "loss": 3.7488, - "step": 3095000 - }, - { - "epoch": 34.06, - "learning_rate": 3.986055338577479e-08, - "loss": 3.7633, - "step": 3095500 - }, - { - "epoch": 34.06, - "learning_rate": 3.98468012541944e-08, - "loss": 3.7538, - "step": 3096000 - }, - { - "epoch": 34.07, - "learning_rate": 3.9833049122614004e-08, - "loss": 3.7591, - "step": 3096500 - }, - { - "epoch": 34.07, - "learning_rate": 3.9819296991033606e-08, - "loss": 3.7755, - "step": 3097000 - }, - { - "epoch": 34.08, - "learning_rate": 3.9805544859453216e-08, - "loss": 3.7724, - "step": 3097500 - }, - { - "epoch": 34.08, - "learning_rate": 3.979179272787282e-08, - "loss": 3.7637, - "step": 3098000 - }, - { - "epoch": 34.09, - "learning_rate": 3.977804059629242e-08, - "loss": 3.7667, - "step": 3098500 - }, - { - "epoch": 34.09, - "learning_rate": 3.9764288464712024e-08, - "loss": 3.7622, - "step": 3099000 - }, - { - "epoch": 34.1, - "learning_rate": 3.9750536333131633e-08, - "loss": 3.7746, - "step": 3099500 - }, - { - "epoch": 34.11, - "learning_rate": 3.9736784201551236e-08, - "loss": 3.7509, - "step": 3100000 - }, - { - "epoch": 34.11, - "learning_rate": 3.972303206997084e-08, - "loss": 3.7528, - "step": 3100500 - }, - { - "epoch": 34.12, - "learning_rate": 3.970927993839045e-08, - "loss": 3.7828, - "step": 3101000 - }, - { - "epoch": 34.12, - "learning_rate": 3.969552780681005e-08, - "loss": 3.7774, - "step": 3101500 - }, - { - "epoch": 34.13, - "learning_rate": 3.9681775675229654e-08, - "loss": 3.7708, - "step": 3102000 - }, - { - "epoch": 34.13, - "learning_rate": 3.966802354364926e-08, - "loss": 3.7526, - "step": 3102500 - }, - { - "epoch": 34.14, - "learning_rate": 3.9654271412068866e-08, - "loss": 3.7436, - "step": 3103000 - }, - { - "epoch": 34.14, - "learning_rate": 3.964051928048847e-08, - "loss": 3.7613, - "step": 3103500 - }, - { - "epoch": 34.15, - "learning_rate": 3.962676714890808e-08, - "loss": 3.765, - "step": 3104000 - }, - { - "epoch": 34.15, - "learning_rate": 3.961301501732768e-08, - "loss": 3.7545, - "step": 3104500 - }, - { - "epoch": 34.16, - "learning_rate": 3.959926288574729e-08, - "loss": 3.7454, - "step": 3105000 - }, - { - "epoch": 34.17, - "learning_rate": 3.958551075416689e-08, - "loss": 3.7654, - "step": 3105500 - }, - { - "epoch": 34.17, - "learning_rate": 3.95717586225865e-08, - "loss": 3.7695, - "step": 3106000 - }, - { - "epoch": 34.18, - "learning_rate": 3.9558006491006105e-08, - "loss": 3.7572, - "step": 3106500 - }, - { - "epoch": 34.18, - "learning_rate": 3.954425435942571e-08, - "loss": 3.7565, - "step": 3107000 - }, - { - "epoch": 34.19, - "learning_rate": 3.9530502227845317e-08, - "loss": 3.7727, - "step": 3107500 - }, - { - "epoch": 34.19, - "learning_rate": 3.951675009626492e-08, - "loss": 3.7678, - "step": 3108000 - }, - { - "epoch": 34.2, - "learning_rate": 3.950299796468453e-08, - "loss": 3.756, - "step": 3108500 - }, - { - "epoch": 34.2, - "learning_rate": 3.948924583310413e-08, - "loss": 3.7523, - "step": 3109000 - }, - { - "epoch": 34.21, - "learning_rate": 3.9475493701523734e-08, - "loss": 3.7604, - "step": 3109500 - }, - { - "epoch": 34.22, - "learning_rate": 3.9461741569943344e-08, - "loss": 3.762, - "step": 3110000 - }, - { - "epoch": 34.22, - "learning_rate": 3.9447989438362946e-08, - "loss": 3.7738, - "step": 3110500 - }, - { - "epoch": 34.23, - "learning_rate": 3.943423730678255e-08, - "loss": 3.7746, - "step": 3111000 - }, - { - "epoch": 34.23, - "learning_rate": 3.942048517520216e-08, - "loss": 3.769, - "step": 3111500 - }, - { - "epoch": 34.24, - "learning_rate": 3.940673304362176e-08, - "loss": 3.7635, - "step": 3112000 - }, - { - "epoch": 34.24, - "learning_rate": 3.9392980912041364e-08, - "loss": 3.7458, - "step": 3112500 - }, - { - "epoch": 34.25, - "learning_rate": 3.937922878046097e-08, - "loss": 3.7535, - "step": 3113000 - }, - { - "epoch": 34.25, - "learning_rate": 3.9365476648880576e-08, - "loss": 3.7603, - "step": 3113500 - }, - { - "epoch": 34.26, - "learning_rate": 3.935172451730018e-08, - "loss": 3.7512, - "step": 3114000 - }, - { - "epoch": 34.26, - "learning_rate": 3.933797238571979e-08, - "loss": 3.7659, - "step": 3114500 - }, - { - "epoch": 34.27, - "learning_rate": 3.932422025413939e-08, - "loss": 3.7545, - "step": 3115000 - }, - { - "epoch": 34.28, - "learning_rate": 3.9310468122558993e-08, - "loss": 3.741, - "step": 3115500 - }, - { - "epoch": 34.28, - "learning_rate": 3.92967159909786e-08, - "loss": 3.7497, - "step": 3116000 - }, - { - "epoch": 34.29, - "learning_rate": 3.9282963859398205e-08, - "loss": 3.7511, - "step": 3116500 - }, - { - "epoch": 34.29, - "learning_rate": 3.926921172781781e-08, - "loss": 3.7459, - "step": 3117000 - }, - { - "epoch": 34.3, - "learning_rate": 3.925545959623742e-08, - "loss": 3.7645, - "step": 3117500 - }, - { - "epoch": 34.3, - "learning_rate": 3.924170746465702e-08, - "loss": 3.7506, - "step": 3118000 - }, - { - "epoch": 34.31, - "learning_rate": 3.922795533307662e-08, - "loss": 3.7576, - "step": 3118500 - }, - { - "epoch": 34.31, - "learning_rate": 3.921420320149623e-08, - "loss": 3.7676, - "step": 3119000 - }, - { - "epoch": 34.32, - "learning_rate": 3.9200451069915835e-08, - "loss": 3.7631, - "step": 3119500 - }, - { - "epoch": 34.33, - "learning_rate": 3.918669893833544e-08, - "loss": 3.7492, - "step": 3120000 - }, - { - "epoch": 34.33, - "learning_rate": 3.917294680675505e-08, - "loss": 3.747, - "step": 3120500 - }, - { - "epoch": 34.34, - "learning_rate": 3.915919467517465e-08, - "loss": 3.756, - "step": 3121000 - }, - { - "epoch": 34.34, - "learning_rate": 3.914544254359425e-08, - "loss": 3.759, - "step": 3121500 - }, - { - "epoch": 34.35, - "learning_rate": 3.913169041201386e-08, - "loss": 3.761, - "step": 3122000 - }, - { - "epoch": 34.35, - "learning_rate": 3.9117938280433465e-08, - "loss": 3.749, - "step": 3122500 - }, - { - "epoch": 34.36, - "learning_rate": 3.910418614885307e-08, - "loss": 3.7814, - "step": 3123000 - }, - { - "epoch": 34.36, - "learning_rate": 3.9090434017272677e-08, - "loss": 3.7494, - "step": 3123500 - }, - { - "epoch": 34.37, - "learning_rate": 3.907668188569228e-08, - "loss": 3.773, - "step": 3124000 - }, - { - "epoch": 34.37, - "learning_rate": 3.906292975411188e-08, - "loss": 3.7532, - "step": 3124500 - }, - { - "epoch": 34.38, - "learning_rate": 3.904917762253149e-08, - "loss": 3.7647, - "step": 3125000 - }, - { - "epoch": 34.39, - "learning_rate": 3.9035425490951094e-08, - "loss": 3.7446, - "step": 3125500 - }, - { - "epoch": 34.39, - "learning_rate": 3.90216733593707e-08, - "loss": 3.76, - "step": 3126000 - }, - { - "epoch": 34.4, - "learning_rate": 3.9007921227790306e-08, - "loss": 3.7679, - "step": 3126500 - }, - { - "epoch": 34.4, - "learning_rate": 3.899416909620991e-08, - "loss": 3.784, - "step": 3127000 - }, - { - "epoch": 34.41, - "learning_rate": 3.898041696462951e-08, - "loss": 3.7548, - "step": 3127500 - }, - { - "epoch": 34.41, - "learning_rate": 3.896666483304912e-08, - "loss": 3.7381, - "step": 3128000 - }, - { - "epoch": 34.42, - "learning_rate": 3.8952912701468724e-08, - "loss": 3.7516, - "step": 3128500 - }, - { - "epoch": 34.42, - "learning_rate": 3.8939160569888327e-08, - "loss": 3.7591, - "step": 3129000 - }, - { - "epoch": 34.43, - "learning_rate": 3.8925408438307936e-08, - "loss": 3.7594, - "step": 3129500 - }, - { - "epoch": 34.44, - "learning_rate": 3.891165630672754e-08, - "loss": 3.7691, - "step": 3130000 - }, - { - "epoch": 34.44, - "learning_rate": 3.889790417514715e-08, - "loss": 3.7688, - "step": 3130500 - }, - { - "epoch": 34.45, - "learning_rate": 3.888415204356675e-08, - "loss": 3.7578, - "step": 3131000 - }, - { - "epoch": 34.45, - "learning_rate": 3.887039991198636e-08, - "loss": 3.7741, - "step": 3131500 - }, - { - "epoch": 34.46, - "learning_rate": 3.885664778040596e-08, - "loss": 3.7459, - "step": 3132000 - }, - { - "epoch": 34.46, - "learning_rate": 3.884289564882557e-08, - "loss": 3.7577, - "step": 3132500 - }, - { - "epoch": 34.47, - "learning_rate": 3.8829143517245175e-08, - "loss": 3.7659, - "step": 3133000 - }, - { - "epoch": 34.47, - "learning_rate": 3.881539138566478e-08, - "loss": 3.7616, - "step": 3133500 - }, - { - "epoch": 34.48, - "learning_rate": 3.880163925408439e-08, - "loss": 3.745, - "step": 3134000 - }, - { - "epoch": 34.48, - "learning_rate": 3.878788712250399e-08, - "loss": 3.7574, - "step": 3134500 - }, - { - "epoch": 34.49, - "learning_rate": 3.877413499092359e-08, - "loss": 3.7498, - "step": 3135000 - }, - { - "epoch": 34.5, - "learning_rate": 3.87603828593432e-08, - "loss": 3.757, - "step": 3135500 - }, - { - "epoch": 34.5, - "learning_rate": 3.8746630727762804e-08, - "loss": 3.7834, - "step": 3136000 - }, - { - "epoch": 34.51, - "learning_rate": 3.873287859618241e-08, - "loss": 3.7823, - "step": 3136500 - }, - { - "epoch": 34.51, - "learning_rate": 3.871912646460201e-08, - "loss": 3.7526, - "step": 3137000 - }, - { - "epoch": 34.52, - "learning_rate": 3.870537433302162e-08, - "loss": 3.7573, - "step": 3137500 - }, - { - "epoch": 34.52, - "learning_rate": 3.869162220144122e-08, - "loss": 3.7536, - "step": 3138000 - }, - { - "epoch": 34.53, - "learning_rate": 3.8677870069860825e-08, - "loss": 3.7845, - "step": 3138500 - }, - { - "epoch": 34.53, - "learning_rate": 3.8664117938280434e-08, - "loss": 3.7693, - "step": 3139000 - }, - { - "epoch": 34.54, - "learning_rate": 3.865036580670004e-08, - "loss": 3.7439, - "step": 3139500 - }, - { - "epoch": 34.55, - "learning_rate": 3.863661367511964e-08, - "loss": 3.7533, - "step": 3140000 - }, - { - "epoch": 34.55, - "learning_rate": 3.862286154353925e-08, - "loss": 3.7679, - "step": 3140500 - }, - { - "epoch": 34.56, - "learning_rate": 3.860910941195885e-08, - "loss": 3.7505, - "step": 3141000 - }, - { - "epoch": 34.56, - "learning_rate": 3.8595357280378454e-08, - "loss": 3.768, - "step": 3141500 - }, - { - "epoch": 34.57, - "learning_rate": 3.8581605148798064e-08, - "loss": 3.754, - "step": 3142000 - }, - { - "epoch": 34.57, - "learning_rate": 3.8567853017217666e-08, - "loss": 3.7658, - "step": 3142500 - }, - { - "epoch": 34.58, - "learning_rate": 3.855410088563727e-08, - "loss": 3.7819, - "step": 3143000 - }, - { - "epoch": 34.58, - "learning_rate": 3.854034875405688e-08, - "loss": 3.7366, - "step": 3143500 - }, - { - "epoch": 34.59, - "learning_rate": 3.852659662247648e-08, - "loss": 3.7628, - "step": 3144000 - }, - { - "epoch": 34.59, - "learning_rate": 3.8512844490896084e-08, - "loss": 3.7597, - "step": 3144500 - }, - { - "epoch": 34.6, - "learning_rate": 3.849909235931569e-08, - "loss": 3.7584, - "step": 3145000 - }, - { - "epoch": 34.61, - "learning_rate": 3.8485340227735296e-08, - "loss": 3.7631, - "step": 3145500 - }, - { - "epoch": 34.61, - "learning_rate": 3.84715880961549e-08, - "loss": 3.7451, - "step": 3146000 - }, - { - "epoch": 34.62, - "learning_rate": 3.845783596457451e-08, - "loss": 3.7569, - "step": 3146500 - }, - { - "epoch": 34.62, - "learning_rate": 3.844408383299411e-08, - "loss": 3.7817, - "step": 3147000 - }, - { - "epoch": 34.63, - "learning_rate": 3.8430331701413713e-08, - "loss": 3.7546, - "step": 3147500 - }, - { - "epoch": 34.63, - "learning_rate": 3.841657956983332e-08, - "loss": 3.7719, - "step": 3148000 - }, - { - "epoch": 34.64, - "learning_rate": 3.8402827438252925e-08, - "loss": 3.7693, - "step": 3148500 - }, - { - "epoch": 34.64, - "learning_rate": 3.838907530667253e-08, - "loss": 3.7647, - "step": 3149000 - }, - { - "epoch": 34.65, - "learning_rate": 3.837532317509214e-08, - "loss": 3.7741, - "step": 3149500 - }, - { - "epoch": 34.66, - "learning_rate": 3.836157104351174e-08, - "loss": 3.7681, - "step": 3150000 - }, - { - "epoch": 34.66, - "learning_rate": 3.834781891193134e-08, - "loss": 3.7631, - "step": 3150500 - }, - { - "epoch": 34.67, - "learning_rate": 3.833406678035095e-08, - "loss": 3.7656, - "step": 3151000 - }, - { - "epoch": 34.67, - "learning_rate": 3.8320314648770555e-08, - "loss": 3.7562, - "step": 3151500 - }, - { - "epoch": 34.68, - "learning_rate": 3.830656251719016e-08, - "loss": 3.7427, - "step": 3152000 - }, - { - "epoch": 34.68, - "learning_rate": 3.829281038560977e-08, - "loss": 3.7609, - "step": 3152500 - }, - { - "epoch": 34.69, - "learning_rate": 3.827905825402937e-08, - "loss": 3.7662, - "step": 3153000 - }, - { - "epoch": 34.69, - "learning_rate": 3.826530612244898e-08, - "loss": 3.7525, - "step": 3153500 - }, - { - "epoch": 34.7, - "learning_rate": 3.825155399086858e-08, - "loss": 3.7613, - "step": 3154000 - }, - { - "epoch": 34.7, - "learning_rate": 3.8237801859288185e-08, - "loss": 3.7575, - "step": 3154500 - }, - { - "epoch": 34.71, - "learning_rate": 3.8224049727707794e-08, - "loss": 3.7577, - "step": 3155000 - }, - { - "epoch": 34.72, - "learning_rate": 3.82102975961274e-08, - "loss": 3.7785, - "step": 3155500 - }, - { - "epoch": 34.72, - "learning_rate": 3.8196545464547006e-08, - "loss": 3.7529, - "step": 3156000 - }, - { - "epoch": 34.73, - "learning_rate": 3.818279333296661e-08, - "loss": 3.7678, - "step": 3156500 - }, - { - "epoch": 34.73, - "learning_rate": 3.816904120138622e-08, - "loss": 3.7747, - "step": 3157000 - }, - { - "epoch": 34.74, - "learning_rate": 3.815528906980582e-08, - "loss": 3.7659, - "step": 3157500 - }, - { - "epoch": 34.74, - "learning_rate": 3.8141536938225424e-08, - "loss": 3.7625, - "step": 3158000 - }, - { - "epoch": 34.75, - "learning_rate": 3.812778480664503e-08, - "loss": 3.7625, - "step": 3158500 - }, - { - "epoch": 34.75, - "learning_rate": 3.8114032675064636e-08, - "loss": 3.7487, - "step": 3159000 - }, - { - "epoch": 34.76, - "learning_rate": 3.810028054348424e-08, - "loss": 3.7496, - "step": 3159500 - }, - { - "epoch": 34.77, - "learning_rate": 3.808652841190385e-08, - "loss": 3.7513, - "step": 3160000 - }, - { - "epoch": 34.77, - "learning_rate": 3.807277628032345e-08, - "loss": 3.7657, - "step": 3160500 - }, - { - "epoch": 34.78, - "learning_rate": 3.805902414874305e-08, - "loss": 3.7621, - "step": 3161000 - }, - { - "epoch": 34.78, - "learning_rate": 3.804527201716266e-08, - "loss": 3.7579, - "step": 3161500 - }, - { - "epoch": 34.79, - "learning_rate": 3.8031519885582265e-08, - "loss": 3.749, - "step": 3162000 - }, - { - "epoch": 34.79, - "learning_rate": 3.801776775400187e-08, - "loss": 3.7435, - "step": 3162500 - }, - { - "epoch": 34.8, - "learning_rate": 3.800401562242148e-08, - "loss": 3.7611, - "step": 3163000 - }, - { - "epoch": 34.8, - "learning_rate": 3.799026349084108e-08, - "loss": 3.7477, - "step": 3163500 - }, - { - "epoch": 34.81, - "learning_rate": 3.797651135926068e-08, - "loss": 3.7631, - "step": 3164000 - }, - { - "epoch": 34.81, - "learning_rate": 3.796275922768029e-08, - "loss": 3.7659, - "step": 3164500 - }, - { - "epoch": 34.82, - "learning_rate": 3.7949007096099895e-08, - "loss": 3.7622, - "step": 3165000 - }, - { - "epoch": 34.83, - "learning_rate": 3.79352549645195e-08, - "loss": 3.7454, - "step": 3165500 - }, - { - "epoch": 34.83, - "learning_rate": 3.792150283293911e-08, - "loss": 3.7585, - "step": 3166000 - }, - { - "epoch": 34.84, - "learning_rate": 3.790775070135871e-08, - "loss": 3.7771, - "step": 3166500 - }, - { - "epoch": 34.84, - "learning_rate": 3.789399856977831e-08, - "loss": 3.746, - "step": 3167000 - }, - { - "epoch": 34.85, - "learning_rate": 3.788024643819792e-08, - "loss": 3.7558, - "step": 3167500 - }, - { - "epoch": 34.85, - "learning_rate": 3.7866494306617524e-08, - "loss": 3.7619, - "step": 3168000 - }, - { - "epoch": 34.86, - "learning_rate": 3.785274217503713e-08, - "loss": 3.7612, - "step": 3168500 - }, - { - "epoch": 34.86, - "learning_rate": 3.7838990043456737e-08, - "loss": 3.7685, - "step": 3169000 - }, - { - "epoch": 34.87, - "learning_rate": 3.782523791187634e-08, - "loss": 3.746, - "step": 3169500 - }, - { - "epoch": 34.88, - "learning_rate": 3.781148578029594e-08, - "loss": 3.7522, - "step": 3170000 - }, - { - "epoch": 34.88, - "learning_rate": 3.779773364871555e-08, - "loss": 3.7454, - "step": 3170500 - }, - { - "epoch": 34.89, - "learning_rate": 3.7783981517135154e-08, - "loss": 3.7675, - "step": 3171000 - }, - { - "epoch": 34.89, - "learning_rate": 3.777022938555476e-08, - "loss": 3.7658, - "step": 3171500 - }, - { - "epoch": 34.9, - "learning_rate": 3.7756477253974366e-08, - "loss": 3.763, - "step": 3172000 - }, - { - "epoch": 34.9, - "learning_rate": 3.774272512239397e-08, - "loss": 3.7854, - "step": 3172500 - }, - { - "epoch": 34.91, - "learning_rate": 3.772897299081357e-08, - "loss": 3.7661, - "step": 3173000 - }, - { - "epoch": 34.91, - "learning_rate": 3.771522085923318e-08, - "loss": 3.7764, - "step": 3173500 - }, - { - "epoch": 34.92, - "learning_rate": 3.7701468727652784e-08, - "loss": 3.7697, - "step": 3174000 - }, - { - "epoch": 34.92, - "learning_rate": 3.7687716596072386e-08, - "loss": 3.7649, - "step": 3174500 - }, - { - "epoch": 34.93, - "learning_rate": 3.767396446449199e-08, - "loss": 3.7716, - "step": 3175000 - }, - { - "epoch": 34.94, - "learning_rate": 3.76602123329116e-08, - "loss": 3.7511, - "step": 3175500 - }, - { - "epoch": 34.94, - "learning_rate": 3.76464602013312e-08, - "loss": 3.7532, - "step": 3176000 - }, - { - "epoch": 34.95, - "learning_rate": 3.7632708069750804e-08, - "loss": 3.7483, - "step": 3176500 - }, - { - "epoch": 34.95, - "learning_rate": 3.761895593817041e-08, - "loss": 3.7787, - "step": 3177000 - }, - { - "epoch": 34.96, - "learning_rate": 3.7605203806590016e-08, - "loss": 3.7528, - "step": 3177500 - }, - { - "epoch": 34.96, - "learning_rate": 3.7591451675009625e-08, - "loss": 3.7322, - "step": 3178000 - }, - { - "epoch": 34.97, - "learning_rate": 3.757769954342923e-08, - "loss": 3.7456, - "step": 3178500 - }, - { - "epoch": 34.97, - "learning_rate": 3.756394741184884e-08, - "loss": 3.7534, - "step": 3179000 - }, - { - "epoch": 34.98, - "learning_rate": 3.755019528026844e-08, - "loss": 3.75, - "step": 3179500 - }, - { - "epoch": 34.99, - "learning_rate": 3.753644314868805e-08, - "loss": 3.7583, - "step": 3180000 - }, - { - "epoch": 34.99, - "learning_rate": 3.752269101710765e-08, - "loss": 3.7479, - "step": 3180500 - }, - { - "epoch": 35.0, - "learning_rate": 3.7508938885527255e-08, - "loss": 3.7683, - "step": 3181000 - }, - { - "epoch": 35.0, - "eval_loss": 3.830986261367798, - "eval_runtime": 6.1438, - "eval_samples_per_second": 252.938, - "step": 3181325 - }, - { - "epoch": 35.0, - "learning_rate": 3.7495186753946864e-08, - "loss": 3.763, - "step": 3181500 - }, - { - "epoch": 35.01, - "learning_rate": 3.748143462236647e-08, - "loss": 3.7628, - "step": 3182000 - }, - { - "epoch": 35.01, - "learning_rate": 3.746768249078607e-08, - "loss": 3.7443, - "step": 3182500 - }, - { - "epoch": 35.02, - "learning_rate": 3.745393035920568e-08, - "loss": 3.7576, - "step": 3183000 - }, - { - "epoch": 35.02, - "learning_rate": 3.744017822762528e-08, - "loss": 3.7796, - "step": 3183500 - }, - { - "epoch": 35.03, - "learning_rate": 3.7426426096044884e-08, - "loss": 3.7635, - "step": 3184000 - }, - { - "epoch": 35.03, - "learning_rate": 3.7412673964464494e-08, - "loss": 3.7604, - "step": 3184500 - }, - { - "epoch": 35.04, - "learning_rate": 3.7398921832884097e-08, - "loss": 3.7572, - "step": 3185000 - }, - { - "epoch": 35.05, - "learning_rate": 3.73851697013037e-08, - "loss": 3.7542, - "step": 3185500 - }, - { - "epoch": 35.05, - "learning_rate": 3.737141756972331e-08, - "loss": 3.772, - "step": 3186000 - }, - { - "epoch": 35.06, - "learning_rate": 3.735766543814291e-08, - "loss": 3.7616, - "step": 3186500 - }, - { - "epoch": 35.06, - "learning_rate": 3.7343913306562514e-08, - "loss": 3.7499, - "step": 3187000 - }, - { - "epoch": 35.07, - "learning_rate": 3.7330161174982123e-08, - "loss": 3.7592, - "step": 3187500 - }, - { - "epoch": 35.07, - "learning_rate": 3.7316409043401726e-08, - "loss": 3.7566, - "step": 3188000 - }, - { - "epoch": 35.08, - "learning_rate": 3.730265691182133e-08, - "loss": 3.7733, - "step": 3188500 - }, - { - "epoch": 35.08, - "learning_rate": 3.728890478024094e-08, - "loss": 3.7569, - "step": 3189000 - }, - { - "epoch": 35.09, - "learning_rate": 3.727515264866054e-08, - "loss": 3.7502, - "step": 3189500 - }, - { - "epoch": 35.1, - "learning_rate": 3.7261400517080144e-08, - "loss": 3.7651, - "step": 3190000 - }, - { - "epoch": 35.1, - "learning_rate": 3.724764838549975e-08, - "loss": 3.7607, - "step": 3190500 - }, - { - "epoch": 35.11, - "learning_rate": 3.7233896253919356e-08, - "loss": 3.7418, - "step": 3191000 - }, - { - "epoch": 35.11, - "learning_rate": 3.722014412233896e-08, - "loss": 3.7648, - "step": 3191500 - }, - { - "epoch": 35.12, - "learning_rate": 3.720639199075857e-08, - "loss": 3.7653, - "step": 3192000 - }, - { - "epoch": 35.12, - "learning_rate": 3.719263985917817e-08, - "loss": 3.7658, - "step": 3192500 - }, - { - "epoch": 35.13, - "learning_rate": 3.717888772759777e-08, - "loss": 3.7577, - "step": 3193000 - }, - { - "epoch": 35.13, - "learning_rate": 3.716513559601738e-08, - "loss": 3.7496, - "step": 3193500 - }, - { - "epoch": 35.14, - "learning_rate": 3.7151383464436985e-08, - "loss": 3.7812, - "step": 3194000 - }, - { - "epoch": 35.14, - "learning_rate": 3.713763133285659e-08, - "loss": 3.7793, - "step": 3194500 - }, - { - "epoch": 35.15, - "learning_rate": 3.71238792012762e-08, - "loss": 3.7575, - "step": 3195000 - }, - { - "epoch": 35.16, - "learning_rate": 3.71101270696958e-08, - "loss": 3.7444, - "step": 3195500 - }, - { - "epoch": 35.16, - "learning_rate": 3.70963749381154e-08, - "loss": 3.7654, - "step": 3196000 - }, - { - "epoch": 35.17, - "learning_rate": 3.708262280653501e-08, - "loss": 3.7499, - "step": 3196500 - }, - { - "epoch": 35.17, - "learning_rate": 3.7068870674954615e-08, - "loss": 3.7668, - "step": 3197000 - }, - { - "epoch": 35.18, - "learning_rate": 3.705511854337422e-08, - "loss": 3.7744, - "step": 3197500 - }, - { - "epoch": 35.18, - "learning_rate": 3.704136641179383e-08, - "loss": 3.765, - "step": 3198000 - }, - { - "epoch": 35.19, - "learning_rate": 3.702761428021343e-08, - "loss": 3.7595, - "step": 3198500 - }, - { - "epoch": 35.19, - "learning_rate": 3.701386214863303e-08, - "loss": 3.7532, - "step": 3199000 - }, - { - "epoch": 35.2, - "learning_rate": 3.700011001705264e-08, - "loss": 3.7549, - "step": 3199500 - }, - { - "epoch": 35.21, - "learning_rate": 3.6986357885472244e-08, - "loss": 3.7565, - "step": 3200000 - }, - { - "epoch": 35.21, - "learning_rate": 3.697260575389185e-08, - "loss": 3.7727, - "step": 3200500 - }, - { - "epoch": 35.22, - "learning_rate": 3.6958853622311457e-08, - "loss": 3.7534, - "step": 3201000 - }, - { - "epoch": 35.22, - "learning_rate": 3.694510149073106e-08, - "loss": 3.7669, - "step": 3201500 - }, - { - "epoch": 35.23, - "learning_rate": 3.693134935915066e-08, - "loss": 3.7552, - "step": 3202000 - }, - { - "epoch": 35.23, - "learning_rate": 3.691759722757027e-08, - "loss": 3.7583, - "step": 3202500 - }, - { - "epoch": 35.24, - "learning_rate": 3.6903845095989874e-08, - "loss": 3.7589, - "step": 3203000 - }, - { - "epoch": 35.24, - "learning_rate": 3.6890092964409483e-08, - "loss": 3.7549, - "step": 3203500 - }, - { - "epoch": 35.25, - "learning_rate": 3.6876340832829086e-08, - "loss": 3.7456, - "step": 3204000 - }, - { - "epoch": 35.25, - "learning_rate": 3.6862588701248695e-08, - "loss": 3.7603, - "step": 3204500 - }, - { - "epoch": 35.26, - "learning_rate": 3.68488365696683e-08, - "loss": 3.7455, - "step": 3205000 - }, - { - "epoch": 35.27, - "learning_rate": 3.683508443808791e-08, - "loss": 3.7548, - "step": 3205500 - }, - { - "epoch": 35.27, - "learning_rate": 3.682133230650751e-08, - "loss": 3.7794, - "step": 3206000 - }, - { - "epoch": 35.28, - "learning_rate": 3.680758017492711e-08, - "loss": 3.7802, - "step": 3206500 - }, - { - "epoch": 35.28, - "learning_rate": 3.679382804334672e-08, - "loss": 3.7461, - "step": 3207000 - }, - { - "epoch": 35.29, - "learning_rate": 3.6780075911766325e-08, - "loss": 3.7675, - "step": 3207500 - }, - { - "epoch": 35.29, - "learning_rate": 3.676632378018593e-08, - "loss": 3.7518, - "step": 3208000 - }, - { - "epoch": 35.3, - "learning_rate": 3.675257164860554e-08, - "loss": 3.7436, - "step": 3208500 - }, - { - "epoch": 35.3, - "learning_rate": 3.673881951702514e-08, - "loss": 3.7601, - "step": 3209000 - }, - { - "epoch": 35.31, - "learning_rate": 3.672506738544474e-08, - "loss": 3.758, - "step": 3209500 - }, - { - "epoch": 35.32, - "learning_rate": 3.671131525386435e-08, - "loss": 3.75, - "step": 3210000 - }, - { - "epoch": 35.32, - "learning_rate": 3.6697563122283955e-08, - "loss": 3.7634, - "step": 3210500 - }, - { - "epoch": 35.33, - "learning_rate": 3.668381099070356e-08, - "loss": 3.7651, - "step": 3211000 - }, - { - "epoch": 35.33, - "learning_rate": 3.667005885912316e-08, - "loss": 3.7413, - "step": 3211500 - }, - { - "epoch": 35.34, - "learning_rate": 3.665630672754277e-08, - "loss": 3.7687, - "step": 3212000 - }, - { - "epoch": 35.34, - "learning_rate": 3.664255459596237e-08, - "loss": 3.7693, - "step": 3212500 - }, - { - "epoch": 35.35, - "learning_rate": 3.6628802464381975e-08, - "loss": 3.7416, - "step": 3213000 - }, - { - "epoch": 35.35, - "learning_rate": 3.6615050332801584e-08, - "loss": 3.7533, - "step": 3213500 - }, - { - "epoch": 35.36, - "learning_rate": 3.660129820122119e-08, - "loss": 3.7421, - "step": 3214000 - }, - { - "epoch": 35.36, - "learning_rate": 3.658754606964079e-08, - "loss": 3.7698, - "step": 3214500 - }, - { - "epoch": 35.37, - "learning_rate": 3.65737939380604e-08, - "loss": 3.7658, - "step": 3215000 - }, - { - "epoch": 35.38, - "learning_rate": 3.656004180648e-08, - "loss": 3.755, - "step": 3215500 - }, - { - "epoch": 35.38, - "learning_rate": 3.6546289674899605e-08, - "loss": 3.7737, - "step": 3216000 - }, - { - "epoch": 35.39, - "learning_rate": 3.6532537543319214e-08, - "loss": 3.7678, - "step": 3216500 - }, - { - "epoch": 35.39, - "learning_rate": 3.6518785411738817e-08, - "loss": 3.7605, - "step": 3217000 - }, - { - "epoch": 35.4, - "learning_rate": 3.650503328015842e-08, - "loss": 3.748, - "step": 3217500 - }, - { - "epoch": 35.4, - "learning_rate": 3.649128114857803e-08, - "loss": 3.7462, - "step": 3218000 - }, - { - "epoch": 35.41, - "learning_rate": 3.647752901699763e-08, - "loss": 3.7651, - "step": 3218500 - }, - { - "epoch": 35.41, - "learning_rate": 3.6463776885417234e-08, - "loss": 3.7655, - "step": 3219000 - }, - { - "epoch": 35.42, - "learning_rate": 3.6450024753836843e-08, - "loss": 3.759, - "step": 3219500 - }, - { - "epoch": 35.43, - "learning_rate": 3.6436272622256446e-08, - "loss": 3.7539, - "step": 3220000 - }, - { - "epoch": 35.43, - "learning_rate": 3.642252049067605e-08, - "loss": 3.7606, - "step": 3220500 - }, - { - "epoch": 35.44, - "learning_rate": 3.640876835909566e-08, - "loss": 3.759, - "step": 3221000 - }, - { - "epoch": 35.44, - "learning_rate": 3.639501622751526e-08, - "loss": 3.7476, - "step": 3221500 - }, - { - "epoch": 35.45, - "learning_rate": 3.6381264095934864e-08, - "loss": 3.7626, - "step": 3222000 - }, - { - "epoch": 35.45, - "learning_rate": 3.636751196435447e-08, - "loss": 3.7408, - "step": 3222500 - }, - { - "epoch": 35.46, - "learning_rate": 3.6353759832774076e-08, - "loss": 3.7681, - "step": 3223000 - }, - { - "epoch": 35.46, - "learning_rate": 3.634000770119368e-08, - "loss": 3.7667, - "step": 3223500 - }, - { - "epoch": 35.47, - "learning_rate": 3.632625556961329e-08, - "loss": 3.7483, - "step": 3224000 - }, - { - "epoch": 35.47, - "learning_rate": 3.631250343803289e-08, - "loss": 3.7658, - "step": 3224500 - }, - { - "epoch": 35.48, - "learning_rate": 3.629875130645249e-08, - "loss": 3.7547, - "step": 3225000 - }, - { - "epoch": 35.49, - "learning_rate": 3.62849991748721e-08, - "loss": 3.7614, - "step": 3225500 - }, - { - "epoch": 35.49, - "learning_rate": 3.6271247043291705e-08, - "loss": 3.7749, - "step": 3226000 - }, - { - "epoch": 35.5, - "learning_rate": 3.6257494911711315e-08, - "loss": 3.7602, - "step": 3226500 - }, - { - "epoch": 35.5, - "learning_rate": 3.624374278013092e-08, - "loss": 3.7532, - "step": 3227000 - }, - { - "epoch": 35.51, - "learning_rate": 3.622999064855053e-08, - "loss": 3.7808, - "step": 3227500 - }, - { - "epoch": 35.51, - "learning_rate": 3.621623851697013e-08, - "loss": 3.7655, - "step": 3228000 - }, - { - "epoch": 35.52, - "learning_rate": 3.620248638538974e-08, - "loss": 3.7646, - "step": 3228500 - }, - { - "epoch": 35.52, - "learning_rate": 3.618873425380934e-08, - "loss": 3.7631, - "step": 3229000 - }, - { - "epoch": 35.53, - "learning_rate": 3.6174982122228944e-08, - "loss": 3.7667, - "step": 3229500 - }, - { - "epoch": 35.54, - "learning_rate": 3.6161229990648554e-08, - "loss": 3.7501, - "step": 3230000 - }, - { - "epoch": 35.54, - "learning_rate": 3.6147477859068156e-08, - "loss": 3.7474, - "step": 3230500 - }, - { - "epoch": 35.55, - "learning_rate": 3.613372572748776e-08, - "loss": 3.7584, - "step": 3231000 - }, - { - "epoch": 35.55, - "learning_rate": 3.611997359590737e-08, - "loss": 3.7638, - "step": 3231500 - }, - { - "epoch": 35.56, - "learning_rate": 3.610622146432697e-08, - "loss": 3.7413, - "step": 3232000 - }, - { - "epoch": 35.56, - "learning_rate": 3.6092469332746574e-08, - "loss": 3.7489, - "step": 3232500 - }, - { - "epoch": 35.57, - "learning_rate": 3.607871720116618e-08, - "loss": 3.785, - "step": 3233000 - }, - { - "epoch": 35.57, - "learning_rate": 3.6064965069585786e-08, - "loss": 3.7486, - "step": 3233500 - }, - { - "epoch": 35.58, - "learning_rate": 3.605121293800539e-08, - "loss": 3.7537, - "step": 3234000 - }, - { - "epoch": 35.59, - "learning_rate": 3.6037460806425e-08, - "loss": 3.7735, - "step": 3234500 - }, - { - "epoch": 35.59, - "learning_rate": 3.60237086748446e-08, - "loss": 3.7669, - "step": 3235000 - }, - { - "epoch": 35.6, - "learning_rate": 3.6009956543264203e-08, - "loss": 3.7732, - "step": 3235500 - }, - { - "epoch": 35.6, - "learning_rate": 3.599620441168381e-08, - "loss": 3.7557, - "step": 3236000 - }, - { - "epoch": 35.61, - "learning_rate": 3.5982452280103416e-08, - "loss": 3.7497, - "step": 3236500 - }, - { - "epoch": 35.61, - "learning_rate": 3.596870014852302e-08, - "loss": 3.7714, - "step": 3237000 - }, - { - "epoch": 35.62, - "learning_rate": 3.595494801694263e-08, - "loss": 3.7532, - "step": 3237500 - }, - { - "epoch": 35.62, - "learning_rate": 3.594119588536223e-08, - "loss": 3.748, - "step": 3238000 - }, - { - "epoch": 35.63, - "learning_rate": 3.592744375378183e-08, - "loss": 3.7625, - "step": 3238500 - }, - { - "epoch": 35.63, - "learning_rate": 3.591369162220144e-08, - "loss": 3.7747, - "step": 3239000 - }, - { - "epoch": 35.64, - "learning_rate": 3.5899939490621045e-08, - "loss": 3.7654, - "step": 3239500 - }, - { - "epoch": 35.65, - "learning_rate": 3.588618735904065e-08, - "loss": 3.7394, - "step": 3240000 - }, - { - "epoch": 35.65, - "learning_rate": 3.587243522746026e-08, - "loss": 3.7778, - "step": 3240500 - }, - { - "epoch": 35.66, - "learning_rate": 3.585868309587986e-08, - "loss": 3.7784, - "step": 3241000 - }, - { - "epoch": 35.66, - "learning_rate": 3.584493096429946e-08, - "loss": 3.7612, - "step": 3241500 - }, - { - "epoch": 35.67, - "learning_rate": 3.583117883271907e-08, - "loss": 3.7558, - "step": 3242000 - }, - { - "epoch": 35.67, - "learning_rate": 3.5817426701138675e-08, - "loss": 3.7675, - "step": 3242500 - }, - { - "epoch": 35.68, - "learning_rate": 3.580367456955828e-08, - "loss": 3.7489, - "step": 3243000 - }, - { - "epoch": 35.68, - "learning_rate": 3.578992243797789e-08, - "loss": 3.7632, - "step": 3243500 - }, - { - "epoch": 35.69, - "learning_rate": 3.577617030639749e-08, - "loss": 3.7603, - "step": 3244000 - }, - { - "epoch": 35.7, - "learning_rate": 3.576241817481709e-08, - "loss": 3.7582, - "step": 3244500 - }, - { - "epoch": 35.7, - "learning_rate": 3.57486660432367e-08, - "loss": 3.7405, - "step": 3245000 - }, - { - "epoch": 35.71, - "learning_rate": 3.5734913911656304e-08, - "loss": 3.7599, - "step": 3245500 - }, - { - "epoch": 35.71, - "learning_rate": 3.572116178007591e-08, - "loss": 3.77, - "step": 3246000 - }, - { - "epoch": 35.72, - "learning_rate": 3.5707409648495516e-08, - "loss": 3.7767, - "step": 3246500 - }, - { - "epoch": 35.72, - "learning_rate": 3.569365751691512e-08, - "loss": 3.7612, - "step": 3247000 - }, - { - "epoch": 35.73, - "learning_rate": 3.567990538533472e-08, - "loss": 3.7631, - "step": 3247500 - }, - { - "epoch": 35.73, - "learning_rate": 3.566615325375433e-08, - "loss": 3.7756, - "step": 3248000 - }, - { - "epoch": 35.74, - "learning_rate": 3.5652401122173934e-08, - "loss": 3.7478, - "step": 3248500 - }, - { - "epoch": 35.74, - "learning_rate": 3.5638648990593537e-08, - "loss": 3.7675, - "step": 3249000 - }, - { - "epoch": 35.75, - "learning_rate": 3.562489685901314e-08, - "loss": 3.7627, - "step": 3249500 - }, - { - "epoch": 35.76, - "learning_rate": 3.561114472743275e-08, - "loss": 3.7517, - "step": 3250000 - }, - { - "epoch": 35.76, - "learning_rate": 3.559739259585235e-08, - "loss": 3.7617, - "step": 3250500 - }, - { - "epoch": 35.77, - "learning_rate": 3.558364046427196e-08, - "loss": 3.758, - "step": 3251000 - }, - { - "epoch": 35.77, - "learning_rate": 3.5569888332691564e-08, - "loss": 3.7893, - "step": 3251500 - }, - { - "epoch": 35.78, - "learning_rate": 3.555613620111117e-08, - "loss": 3.7586, - "step": 3252000 - }, - { - "epoch": 35.78, - "learning_rate": 3.5542384069530776e-08, - "loss": 3.7676, - "step": 3252500 - }, - { - "epoch": 35.79, - "learning_rate": 3.5528631937950385e-08, - "loss": 3.7649, - "step": 3253000 - }, - { - "epoch": 35.79, - "learning_rate": 3.551487980636999e-08, - "loss": 3.7311, - "step": 3253500 - }, - { - "epoch": 35.8, - "learning_rate": 3.550112767478959e-08, - "loss": 3.7621, - "step": 3254000 - }, - { - "epoch": 35.81, - "learning_rate": 3.54873755432092e-08, - "loss": 3.7611, - "step": 3254500 - }, - { - "epoch": 35.81, - "learning_rate": 3.54736234116288e-08, - "loss": 3.7769, - "step": 3255000 - }, - { - "epoch": 35.82, - "learning_rate": 3.5459871280048405e-08, - "loss": 3.7518, - "step": 3255500 - }, - { - "epoch": 35.82, - "learning_rate": 3.5446119148468015e-08, - "loss": 3.7648, - "step": 3256000 - }, - { - "epoch": 35.83, - "learning_rate": 3.543236701688762e-08, - "loss": 3.7633, - "step": 3256500 - }, - { - "epoch": 35.83, - "learning_rate": 3.541861488530722e-08, - "loss": 3.7729, - "step": 3257000 - }, - { - "epoch": 35.84, - "learning_rate": 3.540486275372683e-08, - "loss": 3.7648, - "step": 3257500 - }, - { - "epoch": 35.84, - "learning_rate": 3.539111062214643e-08, - "loss": 3.7593, - "step": 3258000 - }, - { - "epoch": 35.85, - "learning_rate": 3.5377358490566035e-08, - "loss": 3.7592, - "step": 3258500 - }, - { - "epoch": 35.85, - "learning_rate": 3.5363606358985644e-08, - "loss": 3.7465, - "step": 3259000 - }, - { - "epoch": 35.86, - "learning_rate": 3.534985422740525e-08, - "loss": 3.7534, - "step": 3259500 - }, - { - "epoch": 35.87, - "learning_rate": 3.533610209582485e-08, - "loss": 3.7618, - "step": 3260000 - }, - { - "epoch": 35.87, - "learning_rate": 3.532234996424446e-08, - "loss": 3.7535, - "step": 3260500 - }, - { - "epoch": 35.88, - "learning_rate": 3.530859783266406e-08, - "loss": 3.7715, - "step": 3261000 - }, - { - "epoch": 35.88, - "learning_rate": 3.5294845701083664e-08, - "loss": 3.7349, - "step": 3261500 - }, - { - "epoch": 35.89, - "learning_rate": 3.5281093569503274e-08, - "loss": 3.7472, - "step": 3262000 - }, - { - "epoch": 35.89, - "learning_rate": 3.5267341437922876e-08, - "loss": 3.7644, - "step": 3262500 - }, - { - "epoch": 35.9, - "learning_rate": 3.525358930634248e-08, - "loss": 3.7508, - "step": 3263000 - }, - { - "epoch": 35.9, - "learning_rate": 3.523983717476209e-08, - "loss": 3.7655, - "step": 3263500 - }, - { - "epoch": 35.91, - "learning_rate": 3.522608504318169e-08, - "loss": 3.7504, - "step": 3264000 - }, - { - "epoch": 35.92, - "learning_rate": 3.5212332911601294e-08, - "loss": 3.7377, - "step": 3264500 - }, - { - "epoch": 35.92, - "learning_rate": 3.51985807800209e-08, - "loss": 3.7442, - "step": 3265000 - }, - { - "epoch": 35.93, - "learning_rate": 3.5184828648440506e-08, - "loss": 3.7406, - "step": 3265500 - }, - { - "epoch": 35.93, - "learning_rate": 3.517107651686011e-08, - "loss": 3.7411, - "step": 3266000 - }, - { - "epoch": 35.94, - "learning_rate": 3.515732438527972e-08, - "loss": 3.7462, - "step": 3266500 - }, - { - "epoch": 35.94, - "learning_rate": 3.514357225369932e-08, - "loss": 3.7395, - "step": 3267000 - }, - { - "epoch": 35.95, - "learning_rate": 3.5129820122118924e-08, - "loss": 3.7508, - "step": 3267500 - }, - { - "epoch": 35.95, - "learning_rate": 3.511606799053853e-08, - "loss": 3.7407, - "step": 3268000 - }, - { - "epoch": 35.96, - "learning_rate": 3.5102315858958136e-08, - "loss": 3.7679, - "step": 3268500 - }, - { - "epoch": 35.96, - "learning_rate": 3.508856372737774e-08, - "loss": 3.7356, - "step": 3269000 - }, - { - "epoch": 35.97, - "learning_rate": 3.507481159579735e-08, - "loss": 3.7711, - "step": 3269500 - }, - { - "epoch": 35.98, - "learning_rate": 3.506105946421695e-08, - "loss": 3.7418, - "step": 3270000 - }, - { - "epoch": 35.98, - "learning_rate": 3.504730733263655e-08, - "loss": 3.7441, - "step": 3270500 - }, - { - "epoch": 35.99, - "learning_rate": 3.503355520105616e-08, - "loss": 3.7542, - "step": 3271000 - }, - { - "epoch": 35.99, - "learning_rate": 3.5019803069475765e-08, - "loss": 3.7456, - "step": 3271500 - }, - { - "epoch": 36.0, - "learning_rate": 3.500605093789537e-08, - "loss": 3.7628, - "step": 3272000 - }, - { - "epoch": 36.0, - "eval_loss": 3.830293655395508, - "eval_runtime": 6.1452, - "eval_samples_per_second": 252.88, - "step": 3272220 - }, - { - "epoch": 36.0, - "learning_rate": 3.499229880631498e-08, - "loss": 3.7572, - "step": 3272500 - }, - { - "epoch": 36.01, - "learning_rate": 3.497854667473458e-08, - "loss": 3.7541, - "step": 3273000 - }, - { - "epoch": 36.01, - "learning_rate": 3.496479454315418e-08, - "loss": 3.7611, - "step": 3273500 - }, - { - "epoch": 36.02, - "learning_rate": 3.495104241157379e-08, - "loss": 3.7494, - "step": 3274000 - }, - { - "epoch": 36.03, - "learning_rate": 3.4937290279993395e-08, - "loss": 3.7695, - "step": 3274500 - }, - { - "epoch": 36.03, - "learning_rate": 3.4923538148413004e-08, - "loss": 3.7574, - "step": 3275000 - }, - { - "epoch": 36.04, - "learning_rate": 3.490978601683261e-08, - "loss": 3.7651, - "step": 3275500 - }, - { - "epoch": 36.04, - "learning_rate": 3.4896033885252216e-08, - "loss": 3.7412, - "step": 3276000 - }, - { - "epoch": 36.05, - "learning_rate": 3.488228175367182e-08, - "loss": 3.76, - "step": 3276500 - }, - { - "epoch": 36.05, - "learning_rate": 3.486852962209142e-08, - "loss": 3.7484, - "step": 3277000 - }, - { - "epoch": 36.06, - "learning_rate": 3.485477749051103e-08, - "loss": 3.7741, - "step": 3277500 - }, - { - "epoch": 36.06, - "learning_rate": 3.4841025358930634e-08, - "loss": 3.7568, - "step": 3278000 - }, - { - "epoch": 36.07, - "learning_rate": 3.482727322735024e-08, - "loss": 3.7402, - "step": 3278500 - }, - { - "epoch": 36.07, - "learning_rate": 3.4813521095769846e-08, - "loss": 3.777, - "step": 3279000 - }, - { - "epoch": 36.08, - "learning_rate": 3.479976896418945e-08, - "loss": 3.7681, - "step": 3279500 - }, - { - "epoch": 36.09, - "learning_rate": 3.478601683260906e-08, - "loss": 3.7249, - "step": 3280000 - }, - { - "epoch": 36.09, - "learning_rate": 3.477226470102866e-08, - "loss": 3.7468, - "step": 3280500 - }, - { - "epoch": 36.1, - "learning_rate": 3.4758512569448263e-08, - "loss": 3.7586, - "step": 3281000 - }, - { - "epoch": 36.1, - "learning_rate": 3.474476043786787e-08, - "loss": 3.754, - "step": 3281500 - }, - { - "epoch": 36.11, - "learning_rate": 3.4731008306287475e-08, - "loss": 3.7811, - "step": 3282000 - }, - { - "epoch": 36.11, - "learning_rate": 3.471725617470708e-08, - "loss": 3.7615, - "step": 3282500 - }, - { - "epoch": 36.12, - "learning_rate": 3.470350404312669e-08, - "loss": 3.7866, - "step": 3283000 - }, - { - "epoch": 36.12, - "learning_rate": 3.468975191154629e-08, - "loss": 3.7567, - "step": 3283500 - }, - { - "epoch": 36.13, - "learning_rate": 3.467599977996589e-08, - "loss": 3.7655, - "step": 3284000 - }, - { - "epoch": 36.14, - "learning_rate": 3.46622476483855e-08, - "loss": 3.7372, - "step": 3284500 - }, - { - "epoch": 36.14, - "learning_rate": 3.4648495516805105e-08, - "loss": 3.749, - "step": 3285000 - }, - { - "epoch": 36.15, - "learning_rate": 3.463474338522471e-08, - "loss": 3.7512, - "step": 3285500 - }, - { - "epoch": 36.15, - "learning_rate": 3.462099125364432e-08, - "loss": 3.766, - "step": 3286000 - }, - { - "epoch": 36.16, - "learning_rate": 3.460723912206392e-08, - "loss": 3.7551, - "step": 3286500 - }, - { - "epoch": 36.16, - "learning_rate": 3.459348699048352e-08, - "loss": 3.7657, - "step": 3287000 - }, - { - "epoch": 36.17, - "learning_rate": 3.4579734858903125e-08, - "loss": 3.7636, - "step": 3287500 - }, - { - "epoch": 36.17, - "learning_rate": 3.4565982727322735e-08, - "loss": 3.7603, - "step": 3288000 - }, - { - "epoch": 36.18, - "learning_rate": 3.455223059574234e-08, - "loss": 3.7543, - "step": 3288500 - }, - { - "epoch": 36.18, - "learning_rate": 3.453847846416194e-08, - "loss": 3.756, - "step": 3289000 - }, - { - "epoch": 36.19, - "learning_rate": 3.452472633258155e-08, - "loss": 3.767, - "step": 3289500 - }, - { - "epoch": 36.2, - "learning_rate": 3.451097420100115e-08, - "loss": 3.7715, - "step": 3290000 - }, - { - "epoch": 36.2, - "learning_rate": 3.4497222069420755e-08, - "loss": 3.7453, - "step": 3290500 - }, - { - "epoch": 36.21, - "learning_rate": 3.4483469937840364e-08, - "loss": 3.7627, - "step": 3291000 - }, - { - "epoch": 36.21, - "learning_rate": 3.446971780625997e-08, - "loss": 3.7471, - "step": 3291500 - }, - { - "epoch": 36.22, - "learning_rate": 3.445596567467957e-08, - "loss": 3.7506, - "step": 3292000 - }, - { - "epoch": 36.22, - "learning_rate": 3.444221354309918e-08, - "loss": 3.7506, - "step": 3292500 - }, - { - "epoch": 36.23, - "learning_rate": 3.442846141151878e-08, - "loss": 3.7422, - "step": 3293000 - }, - { - "epoch": 36.23, - "learning_rate": 3.4414709279938384e-08, - "loss": 3.7496, - "step": 3293500 - }, - { - "epoch": 36.24, - "learning_rate": 3.4400957148357994e-08, - "loss": 3.7396, - "step": 3294000 - }, - { - "epoch": 36.25, - "learning_rate": 3.4387205016777596e-08, - "loss": 3.7716, - "step": 3294500 - }, - { - "epoch": 36.25, - "learning_rate": 3.43734528851972e-08, - "loss": 3.7551, - "step": 3295000 - }, - { - "epoch": 36.26, - "learning_rate": 3.435970075361681e-08, - "loss": 3.7586, - "step": 3295500 - }, - { - "epoch": 36.26, - "learning_rate": 3.434594862203641e-08, - "loss": 3.7584, - "step": 3296000 - }, - { - "epoch": 36.27, - "learning_rate": 3.4332196490456014e-08, - "loss": 3.7485, - "step": 3296500 - }, - { - "epoch": 36.27, - "learning_rate": 3.4318444358875623e-08, - "loss": 3.7678, - "step": 3297000 - }, - { - "epoch": 36.28, - "learning_rate": 3.4304692227295226e-08, - "loss": 3.7618, - "step": 3297500 - }, - { - "epoch": 36.28, - "learning_rate": 3.429094009571483e-08, - "loss": 3.7575, - "step": 3298000 - }, - { - "epoch": 36.29, - "learning_rate": 3.427718796413444e-08, - "loss": 3.773, - "step": 3298500 - }, - { - "epoch": 36.29, - "learning_rate": 3.426343583255404e-08, - "loss": 3.7468, - "step": 3299000 - }, - { - "epoch": 36.3, - "learning_rate": 3.424968370097365e-08, - "loss": 3.7639, - "step": 3299500 - }, - { - "epoch": 36.31, - "learning_rate": 3.423593156939325e-08, - "loss": 3.7435, - "step": 3300000 - }, - { - "epoch": 36.31, - "learning_rate": 3.422217943781286e-08, - "loss": 3.7592, - "step": 3300500 - }, - { - "epoch": 36.32, - "learning_rate": 3.4208427306232465e-08, - "loss": 3.7639, - "step": 3301000 - }, - { - "epoch": 36.32, - "learning_rate": 3.4194675174652074e-08, - "loss": 3.7449, - "step": 3301500 - }, - { - "epoch": 36.33, - "learning_rate": 3.418092304307168e-08, - "loss": 3.7646, - "step": 3302000 - }, - { - "epoch": 36.33, - "learning_rate": 3.416717091149128e-08, - "loss": 3.7578, - "step": 3302500 - }, - { - "epoch": 36.34, - "learning_rate": 3.415341877991089e-08, - "loss": 3.7541, - "step": 3303000 - }, - { - "epoch": 36.34, - "learning_rate": 3.413966664833049e-08, - "loss": 3.7837, - "step": 3303500 - }, - { - "epoch": 36.35, - "learning_rate": 3.4125914516750095e-08, - "loss": 3.7294, - "step": 3304000 - }, - { - "epoch": 36.36, - "learning_rate": 3.4112162385169704e-08, - "loss": 3.7509, - "step": 3304500 - }, - { - "epoch": 36.36, - "learning_rate": 3.4098410253589307e-08, - "loss": 3.7379, - "step": 3305000 - }, - { - "epoch": 36.37, - "learning_rate": 3.408465812200891e-08, - "loss": 3.761, - "step": 3305500 - }, - { - "epoch": 36.37, - "learning_rate": 3.407090599042852e-08, - "loss": 3.7543, - "step": 3306000 - }, - { - "epoch": 36.38, - "learning_rate": 3.405715385884812e-08, - "loss": 3.7695, - "step": 3306500 - }, - { - "epoch": 36.38, - "learning_rate": 3.4043401727267724e-08, - "loss": 3.7918, - "step": 3307000 - }, - { - "epoch": 36.39, - "learning_rate": 3.4029649595687334e-08, - "loss": 3.7618, - "step": 3307500 - }, - { - "epoch": 36.39, - "learning_rate": 3.4015897464106936e-08, - "loss": 3.7684, - "step": 3308000 - }, - { - "epoch": 36.4, - "learning_rate": 3.400214533252654e-08, - "loss": 3.7653, - "step": 3308500 - }, - { - "epoch": 36.4, - "learning_rate": 3.398839320094615e-08, - "loss": 3.759, - "step": 3309000 - }, - { - "epoch": 36.41, - "learning_rate": 3.397464106936575e-08, - "loss": 3.7535, - "step": 3309500 - }, - { - "epoch": 36.42, - "learning_rate": 3.3960888937785354e-08, - "loss": 3.7288, - "step": 3310000 - }, - { - "epoch": 36.42, - "learning_rate": 3.394713680620496e-08, - "loss": 3.7677, - "step": 3310500 - }, - { - "epoch": 36.43, - "learning_rate": 3.3933384674624566e-08, - "loss": 3.7574, - "step": 3311000 - }, - { - "epoch": 36.43, - "learning_rate": 3.391963254304417e-08, - "loss": 3.7572, - "step": 3311500 - }, - { - "epoch": 36.44, - "learning_rate": 3.390588041146378e-08, - "loss": 3.7484, - "step": 3312000 - }, - { - "epoch": 36.44, - "learning_rate": 3.389212827988338e-08, - "loss": 3.7417, - "step": 3312500 - }, - { - "epoch": 36.45, - "learning_rate": 3.3878376148302983e-08, - "loss": 3.751, - "step": 3313000 - }, - { - "epoch": 36.45, - "learning_rate": 3.386462401672259e-08, - "loss": 3.7607, - "step": 3313500 - }, - { - "epoch": 36.46, - "learning_rate": 3.3850871885142195e-08, - "loss": 3.7404, - "step": 3314000 - }, - { - "epoch": 36.47, - "learning_rate": 3.38371197535618e-08, - "loss": 3.7475, - "step": 3314500 - }, - { - "epoch": 36.47, - "learning_rate": 3.382336762198141e-08, - "loss": 3.7747, - "step": 3315000 - }, - { - "epoch": 36.48, - "learning_rate": 3.380961549040101e-08, - "loss": 3.7519, - "step": 3315500 - }, - { - "epoch": 36.48, - "learning_rate": 3.379586335882061e-08, - "loss": 3.7613, - "step": 3316000 - }, - { - "epoch": 36.49, - "learning_rate": 3.378211122724022e-08, - "loss": 3.7553, - "step": 3316500 - }, - { - "epoch": 36.49, - "learning_rate": 3.3768359095659825e-08, - "loss": 3.7538, - "step": 3317000 - }, - { - "epoch": 36.5, - "learning_rate": 3.375460696407943e-08, - "loss": 3.7644, - "step": 3317500 - }, - { - "epoch": 36.5, - "learning_rate": 3.374085483249904e-08, - "loss": 3.7701, - "step": 3318000 - }, - { - "epoch": 36.51, - "learning_rate": 3.372710270091864e-08, - "loss": 3.7645, - "step": 3318500 - }, - { - "epoch": 36.51, - "learning_rate": 3.371335056933824e-08, - "loss": 3.7694, - "step": 3319000 - }, - { - "epoch": 36.52, - "learning_rate": 3.369959843775785e-08, - "loss": 3.7549, - "step": 3319500 - }, - { - "epoch": 36.53, - "learning_rate": 3.3685846306177455e-08, - "loss": 3.7638, - "step": 3320000 - }, - { - "epoch": 36.53, - "learning_rate": 3.367209417459706e-08, - "loss": 3.74, - "step": 3320500 - }, - { - "epoch": 36.54, - "learning_rate": 3.3658342043016667e-08, - "loss": 3.7404, - "step": 3321000 - }, - { - "epoch": 36.54, - "learning_rate": 3.364458991143627e-08, - "loss": 3.7488, - "step": 3321500 - }, - { - "epoch": 36.55, - "learning_rate": 3.363083777985587e-08, - "loss": 3.7579, - "step": 3322000 - }, - { - "epoch": 36.55, - "learning_rate": 3.361708564827548e-08, - "loss": 3.7404, - "step": 3322500 - }, - { - "epoch": 36.56, - "learning_rate": 3.3603333516695084e-08, - "loss": 3.7581, - "step": 3323000 - }, - { - "epoch": 36.56, - "learning_rate": 3.3589581385114694e-08, - "loss": 3.7702, - "step": 3323500 - }, - { - "epoch": 36.57, - "learning_rate": 3.3575829253534296e-08, - "loss": 3.7504, - "step": 3324000 - }, - { - "epoch": 36.58, - "learning_rate": 3.35620771219539e-08, - "loss": 3.7651, - "step": 3324500 - }, - { - "epoch": 36.58, - "learning_rate": 3.354832499037351e-08, - "loss": 3.7689, - "step": 3325000 - }, - { - "epoch": 36.59, - "learning_rate": 3.353457285879311e-08, - "loss": 3.7499, - "step": 3325500 - }, - { - "epoch": 36.59, - "learning_rate": 3.352082072721272e-08, - "loss": 3.7553, - "step": 3326000 - }, - { - "epoch": 36.6, - "learning_rate": 3.350706859563232e-08, - "loss": 3.7384, - "step": 3326500 - }, - { - "epoch": 36.6, - "learning_rate": 3.3493316464051926e-08, - "loss": 3.7559, - "step": 3327000 - }, - { - "epoch": 36.61, - "learning_rate": 3.3479564332471535e-08, - "loss": 3.7568, - "step": 3327500 - }, - { - "epoch": 36.61, - "learning_rate": 3.346581220089114e-08, - "loss": 3.7664, - "step": 3328000 - }, - { - "epoch": 36.62, - "learning_rate": 3.345206006931074e-08, - "loss": 3.7485, - "step": 3328500 - }, - { - "epoch": 36.62, - "learning_rate": 3.343830793773035e-08, - "loss": 3.7721, - "step": 3329000 - }, - { - "epoch": 36.63, - "learning_rate": 3.342455580614995e-08, - "loss": 3.767, - "step": 3329500 - }, - { - "epoch": 36.64, - "learning_rate": 3.3410803674569555e-08, - "loss": 3.7522, - "step": 3330000 - }, - { - "epoch": 36.64, - "learning_rate": 3.3397051542989165e-08, - "loss": 3.7549, - "step": 3330500 - }, - { - "epoch": 36.65, - "learning_rate": 3.338329941140877e-08, - "loss": 3.7631, - "step": 3331000 - }, - { - "epoch": 36.65, - "learning_rate": 3.336954727982837e-08, - "loss": 3.7691, - "step": 3331500 - }, - { - "epoch": 36.66, - "learning_rate": 3.335579514824798e-08, - "loss": 3.7474, - "step": 3332000 - }, - { - "epoch": 36.66, - "learning_rate": 3.334204301666758e-08, - "loss": 3.7603, - "step": 3332500 - }, - { - "epoch": 36.67, - "learning_rate": 3.3328290885087185e-08, - "loss": 3.76, - "step": 3333000 - }, - { - "epoch": 36.67, - "learning_rate": 3.3314538753506794e-08, - "loss": 3.757, - "step": 3333500 - }, - { - "epoch": 36.68, - "learning_rate": 3.33007866219264e-08, - "loss": 3.754, - "step": 3334000 - }, - { - "epoch": 36.69, - "learning_rate": 3.3287034490346e-08, - "loss": 3.7662, - "step": 3334500 - }, - { - "epoch": 36.69, - "learning_rate": 3.327328235876561e-08, - "loss": 3.7687, - "step": 3335000 - }, - { - "epoch": 36.7, - "learning_rate": 3.325953022718521e-08, - "loss": 3.7468, - "step": 3335500 - }, - { - "epoch": 36.7, - "learning_rate": 3.3245778095604815e-08, - "loss": 3.7632, - "step": 3336000 - }, - { - "epoch": 36.71, - "learning_rate": 3.3232025964024424e-08, - "loss": 3.7748, - "step": 3336500 - }, - { - "epoch": 36.71, - "learning_rate": 3.321827383244403e-08, - "loss": 3.7645, - "step": 3337000 - }, - { - "epoch": 36.72, - "learning_rate": 3.320452170086363e-08, - "loss": 3.7481, - "step": 3337500 - }, - { - "epoch": 36.72, - "learning_rate": 3.319076956928324e-08, - "loss": 3.7526, - "step": 3338000 - }, - { - "epoch": 36.73, - "learning_rate": 3.317701743770284e-08, - "loss": 3.7492, - "step": 3338500 - }, - { - "epoch": 36.73, - "learning_rate": 3.3163265306122444e-08, - "loss": 3.7478, - "step": 3339000 - }, - { - "epoch": 36.74, - "learning_rate": 3.3149513174542054e-08, - "loss": 3.753, - "step": 3339500 - }, - { - "epoch": 36.75, - "learning_rate": 3.3135761042961656e-08, - "loss": 3.7598, - "step": 3340000 - }, - { - "epoch": 36.75, - "learning_rate": 3.312200891138126e-08, - "loss": 3.7573, - "step": 3340500 - }, - { - "epoch": 36.76, - "learning_rate": 3.310825677980087e-08, - "loss": 3.7614, - "step": 3341000 - }, - { - "epoch": 36.76, - "learning_rate": 3.309450464822047e-08, - "loss": 3.7612, - "step": 3341500 - }, - { - "epoch": 36.77, - "learning_rate": 3.3080752516640074e-08, - "loss": 3.7601, - "step": 3342000 - }, - { - "epoch": 36.77, - "learning_rate": 3.306700038505968e-08, - "loss": 3.733, - "step": 3342500 - }, - { - "epoch": 36.78, - "learning_rate": 3.3053248253479286e-08, - "loss": 3.7733, - "step": 3343000 - }, - { - "epoch": 36.78, - "learning_rate": 3.303949612189889e-08, - "loss": 3.7624, - "step": 3343500 - }, - { - "epoch": 36.79, - "learning_rate": 3.30257439903185e-08, - "loss": 3.7838, - "step": 3344000 - }, - { - "epoch": 36.8, - "learning_rate": 3.30119918587381e-08, - "loss": 3.7641, - "step": 3344500 - }, - { - "epoch": 36.8, - "learning_rate": 3.2998239727157703e-08, - "loss": 3.7617, - "step": 3345000 - }, - { - "epoch": 36.81, - "learning_rate": 3.298448759557731e-08, - "loss": 3.7623, - "step": 3345500 - }, - { - "epoch": 36.81, - "learning_rate": 3.2970735463996915e-08, - "loss": 3.7417, - "step": 3346000 - }, - { - "epoch": 36.82, - "learning_rate": 3.295698333241652e-08, - "loss": 3.7438, - "step": 3346500 - }, - { - "epoch": 36.82, - "learning_rate": 3.294323120083613e-08, - "loss": 3.7599, - "step": 3347000 - }, - { - "epoch": 36.83, - "learning_rate": 3.292947906925573e-08, - "loss": 3.764, - "step": 3347500 - }, - { - "epoch": 36.83, - "learning_rate": 3.291572693767534e-08, - "loss": 3.74, - "step": 3348000 - }, - { - "epoch": 36.84, - "learning_rate": 3.290197480609494e-08, - "loss": 3.7732, - "step": 3348500 - }, - { - "epoch": 36.84, - "learning_rate": 3.288822267451455e-08, - "loss": 3.765, - "step": 3349000 - }, - { - "epoch": 36.85, - "learning_rate": 3.2874470542934154e-08, - "loss": 3.7756, - "step": 3349500 - }, - { - "epoch": 36.86, - "learning_rate": 3.2860718411353764e-08, - "loss": 3.7496, - "step": 3350000 - }, - { - "epoch": 36.86, - "learning_rate": 3.2846966279773366e-08, - "loss": 3.7548, - "step": 3350500 - }, - { - "epoch": 36.87, - "learning_rate": 3.283321414819297e-08, - "loss": 3.7471, - "step": 3351000 - }, - { - "epoch": 36.87, - "learning_rate": 3.281946201661258e-08, - "loss": 3.769, - "step": 3351500 - }, - { - "epoch": 36.88, - "learning_rate": 3.280570988503218e-08, - "loss": 3.7452, - "step": 3352000 - }, - { - "epoch": 36.88, - "learning_rate": 3.2791957753451784e-08, - "loss": 3.755, - "step": 3352500 - }, - { - "epoch": 36.89, - "learning_rate": 3.2778205621871393e-08, - "loss": 3.7629, - "step": 3353000 - }, - { - "epoch": 36.89, - "learning_rate": 3.2764453490290996e-08, - "loss": 3.754, - "step": 3353500 - }, - { - "epoch": 36.9, - "learning_rate": 3.27507013587106e-08, - "loss": 3.7626, - "step": 3354000 - }, - { - "epoch": 36.91, - "learning_rate": 3.273694922713021e-08, - "loss": 3.7692, - "step": 3354500 - }, - { - "epoch": 36.91, - "learning_rate": 3.272319709554981e-08, - "loss": 3.7536, - "step": 3355000 - }, - { - "epoch": 36.92, - "learning_rate": 3.2709444963969414e-08, - "loss": 3.7732, - "step": 3355500 - }, - { - "epoch": 36.92, - "learning_rate": 3.269569283238902e-08, - "loss": 3.7312, - "step": 3356000 - }, - { - "epoch": 36.93, - "learning_rate": 3.2681940700808626e-08, - "loss": 3.7441, - "step": 3356500 - }, - { - "epoch": 36.93, - "learning_rate": 3.266818856922823e-08, - "loss": 3.7718, - "step": 3357000 - }, - { - "epoch": 36.94, - "learning_rate": 3.265443643764784e-08, - "loss": 3.7594, - "step": 3357500 - }, - { - "epoch": 36.94, - "learning_rate": 3.264068430606744e-08, - "loss": 3.7664, - "step": 3358000 - }, - { - "epoch": 36.95, - "learning_rate": 3.262693217448704e-08, - "loss": 3.7581, - "step": 3358500 - }, - { - "epoch": 36.95, - "learning_rate": 3.261318004290665e-08, - "loss": 3.7538, - "step": 3359000 - }, - { - "epoch": 36.96, - "learning_rate": 3.2599427911326255e-08, - "loss": 3.7542, - "step": 3359500 - }, - { - "epoch": 36.97, - "learning_rate": 3.258567577974586e-08, - "loss": 3.753, - "step": 3360000 - }, - { - "epoch": 36.97, - "learning_rate": 3.257192364816547e-08, - "loss": 3.7576, - "step": 3360500 - }, - { - "epoch": 36.98, - "learning_rate": 3.255817151658507e-08, - "loss": 3.757, - "step": 3361000 - }, - { - "epoch": 36.98, - "learning_rate": 3.254441938500467e-08, - "loss": 3.749, - "step": 3361500 - }, - { - "epoch": 36.99, - "learning_rate": 3.2530667253424276e-08, - "loss": 3.7588, - "step": 3362000 - }, - { - "epoch": 36.99, - "learning_rate": 3.2516915121843885e-08, - "loss": 3.759, - "step": 3362500 - }, - { - "epoch": 37.0, - "learning_rate": 3.250316299026349e-08, - "loss": 3.7614, - "step": 3363000 - }, - { - "epoch": 37.0, - "eval_loss": 3.8296761512756348, - "eval_runtime": 6.1458, - "eval_samples_per_second": 252.856, - "step": 3363115 - }, - { - "epoch": 37.0, - "learning_rate": 3.248941085868309e-08, - "loss": 3.7603, - "step": 3363500 - }, - { - "epoch": 37.01, - "learning_rate": 3.24756587271027e-08, - "loss": 3.7573, - "step": 3364000 - }, - { - "epoch": 37.02, - "learning_rate": 3.24619065955223e-08, - "loss": 3.7588, - "step": 3364500 - }, - { - "epoch": 37.02, - "learning_rate": 3.2448154463941905e-08, - "loss": 3.755, - "step": 3365000 - }, - { - "epoch": 37.03, - "learning_rate": 3.2434402332361514e-08, - "loss": 3.7495, - "step": 3365500 - }, - { - "epoch": 37.03, - "learning_rate": 3.242065020078112e-08, - "loss": 3.769, - "step": 3366000 - }, - { - "epoch": 37.04, - "learning_rate": 3.240689806920072e-08, - "loss": 3.7485, - "step": 3366500 - }, - { - "epoch": 37.04, - "learning_rate": 3.239314593762033e-08, - "loss": 3.7702, - "step": 3367000 - }, - { - "epoch": 37.05, - "learning_rate": 3.237939380603993e-08, - "loss": 3.763, - "step": 3367500 - }, - { - "epoch": 37.05, - "learning_rate": 3.2365641674459535e-08, - "loss": 3.743, - "step": 3368000 - }, - { - "epoch": 37.06, - "learning_rate": 3.2351889542879144e-08, - "loss": 3.752, - "step": 3368500 - }, - { - "epoch": 37.06, - "learning_rate": 3.233813741129875e-08, - "loss": 3.7524, - "step": 3369000 - }, - { - "epoch": 37.07, - "learning_rate": 3.232438527971835e-08, - "loss": 3.7663, - "step": 3369500 - }, - { - "epoch": 37.08, - "learning_rate": 3.231063314813796e-08, - "loss": 3.77, - "step": 3370000 - }, - { - "epoch": 37.08, - "learning_rate": 3.229688101655756e-08, - "loss": 3.7668, - "step": 3370500 - }, - { - "epoch": 37.09, - "learning_rate": 3.228312888497717e-08, - "loss": 3.749, - "step": 3371000 - }, - { - "epoch": 37.09, - "learning_rate": 3.2269376753396774e-08, - "loss": 3.7427, - "step": 3371500 - }, - { - "epoch": 37.1, - "learning_rate": 3.2255624621816376e-08, - "loss": 3.75, - "step": 3372000 - }, - { - "epoch": 37.1, - "learning_rate": 3.2241872490235986e-08, - "loss": 3.7652, - "step": 3372500 - }, - { - "epoch": 37.11, - "learning_rate": 3.222812035865559e-08, - "loss": 3.7411, - "step": 3373000 - }, - { - "epoch": 37.11, - "learning_rate": 3.22143682270752e-08, - "loss": 3.7549, - "step": 3373500 - }, - { - "epoch": 37.12, - "learning_rate": 3.22006160954948e-08, - "loss": 3.7574, - "step": 3374000 - }, - { - "epoch": 37.13, - "learning_rate": 3.218686396391441e-08, - "loss": 3.7368, - "step": 3374500 - }, - { - "epoch": 37.13, - "learning_rate": 3.217311183233401e-08, - "loss": 3.7368, - "step": 3375000 - }, - { - "epoch": 37.14, - "learning_rate": 3.2159359700753615e-08, - "loss": 3.7486, - "step": 3375500 - }, - { - "epoch": 37.14, - "learning_rate": 3.2145607569173225e-08, - "loss": 3.7401, - "step": 3376000 - }, - { - "epoch": 37.15, - "learning_rate": 3.213185543759283e-08, - "loss": 3.7667, - "step": 3376500 - }, - { - "epoch": 37.15, - "learning_rate": 3.211810330601243e-08, - "loss": 3.7564, - "step": 3377000 - }, - { - "epoch": 37.16, - "learning_rate": 3.210435117443204e-08, - "loss": 3.7523, - "step": 3377500 - }, - { - "epoch": 37.16, - "learning_rate": 3.209059904285164e-08, - "loss": 3.7384, - "step": 3378000 - }, - { - "epoch": 37.17, - "learning_rate": 3.2076846911271245e-08, - "loss": 3.7569, - "step": 3378500 - }, - { - "epoch": 37.17, - "learning_rate": 3.2063094779690854e-08, - "loss": 3.7702, - "step": 3379000 - }, - { - "epoch": 37.18, - "learning_rate": 3.204934264811046e-08, - "loss": 3.7692, - "step": 3379500 - }, - { - "epoch": 37.19, - "learning_rate": 3.203559051653006e-08, - "loss": 3.7377, - "step": 3380000 - }, - { - "epoch": 37.19, - "learning_rate": 3.202183838494967e-08, - "loss": 3.7749, - "step": 3380500 - }, - { - "epoch": 37.2, - "learning_rate": 3.200808625336927e-08, - "loss": 3.7511, - "step": 3381000 - }, - { - "epoch": 37.2, - "learning_rate": 3.1994334121788874e-08, - "loss": 3.754, - "step": 3381500 - }, - { - "epoch": 37.21, - "learning_rate": 3.1980581990208484e-08, - "loss": 3.7647, - "step": 3382000 - }, - { - "epoch": 37.21, - "learning_rate": 3.1966829858628087e-08, - "loss": 3.7611, - "step": 3382500 - }, - { - "epoch": 37.22, - "learning_rate": 3.195307772704769e-08, - "loss": 3.7646, - "step": 3383000 - }, - { - "epoch": 37.22, - "learning_rate": 3.19393255954673e-08, - "loss": 3.76, - "step": 3383500 - }, - { - "epoch": 37.23, - "learning_rate": 3.19255734638869e-08, - "loss": 3.7569, - "step": 3384000 - }, - { - "epoch": 37.24, - "learning_rate": 3.1911821332306504e-08, - "loss": 3.7556, - "step": 3384500 - }, - { - "epoch": 37.24, - "learning_rate": 3.1898069200726113e-08, - "loss": 3.7725, - "step": 3385000 - }, - { - "epoch": 37.25, - "learning_rate": 3.1884317069145716e-08, - "loss": 3.7548, - "step": 3385500 - }, - { - "epoch": 37.25, - "learning_rate": 3.187056493756532e-08, - "loss": 3.7667, - "step": 3386000 - }, - { - "epoch": 37.26, - "learning_rate": 3.185681280598493e-08, - "loss": 3.7431, - "step": 3386500 - }, - { - "epoch": 37.26, - "learning_rate": 3.184306067440453e-08, - "loss": 3.768, - "step": 3387000 - }, - { - "epoch": 37.27, - "learning_rate": 3.1829308542824134e-08, - "loss": 3.7647, - "step": 3387500 - }, - { - "epoch": 37.27, - "learning_rate": 3.181555641124374e-08, - "loss": 3.7518, - "step": 3388000 - }, - { - "epoch": 37.28, - "learning_rate": 3.1801804279663346e-08, - "loss": 3.7596, - "step": 3388500 - }, - { - "epoch": 37.28, - "learning_rate": 3.178805214808295e-08, - "loss": 3.7602, - "step": 3389000 - }, - { - "epoch": 37.29, - "learning_rate": 3.177430001650256e-08, - "loss": 3.786, - "step": 3389500 - }, - { - "epoch": 37.3, - "learning_rate": 3.176054788492216e-08, - "loss": 3.7581, - "step": 3390000 - }, - { - "epoch": 37.3, - "learning_rate": 3.174679575334176e-08, - "loss": 3.7483, - "step": 3390500 - }, - { - "epoch": 37.31, - "learning_rate": 3.173304362176137e-08, - "loss": 3.7434, - "step": 3391000 - }, - { - "epoch": 37.31, - "learning_rate": 3.1719291490180975e-08, - "loss": 3.7517, - "step": 3391500 - }, - { - "epoch": 37.32, - "learning_rate": 3.170553935860058e-08, - "loss": 3.7596, - "step": 3392000 - }, - { - "epoch": 37.32, - "learning_rate": 3.169178722702019e-08, - "loss": 3.7699, - "step": 3392500 - }, - { - "epoch": 37.33, - "learning_rate": 3.167803509543979e-08, - "loss": 3.7546, - "step": 3393000 - }, - { - "epoch": 37.33, - "learning_rate": 3.166428296385939e-08, - "loss": 3.7551, - "step": 3393500 - }, - { - "epoch": 37.34, - "learning_rate": 3.1650530832279e-08, - "loss": 3.7466, - "step": 3394000 - }, - { - "epoch": 37.35, - "learning_rate": 3.1636778700698605e-08, - "loss": 3.7481, - "step": 3394500 - }, - { - "epoch": 37.35, - "learning_rate": 3.162302656911821e-08, - "loss": 3.7817, - "step": 3395000 - }, - { - "epoch": 37.36, - "learning_rate": 3.160927443753782e-08, - "loss": 3.768, - "step": 3395500 - }, - { - "epoch": 37.36, - "learning_rate": 3.159552230595742e-08, - "loss": 3.7507, - "step": 3396000 - }, - { - "epoch": 37.37, - "learning_rate": 3.158177017437703e-08, - "loss": 3.7723, - "step": 3396500 - }, - { - "epoch": 37.37, - "learning_rate": 3.156801804279663e-08, - "loss": 3.7692, - "step": 3397000 - }, - { - "epoch": 37.38, - "learning_rate": 3.155426591121624e-08, - "loss": 3.7483, - "step": 3397500 - }, - { - "epoch": 37.38, - "learning_rate": 3.1540513779635844e-08, - "loss": 3.7496, - "step": 3398000 - }, - { - "epoch": 37.39, - "learning_rate": 3.152676164805545e-08, - "loss": 3.7613, - "step": 3398500 - }, - { - "epoch": 37.39, - "learning_rate": 3.1513009516475056e-08, - "loss": 3.7695, - "step": 3399000 - }, - { - "epoch": 37.4, - "learning_rate": 3.149925738489466e-08, - "loss": 3.7552, - "step": 3399500 - }, - { - "epoch": 37.41, - "learning_rate": 3.148550525331426e-08, - "loss": 3.7689, - "step": 3400000 - }, - { - "epoch": 37.41, - "learning_rate": 3.147175312173387e-08, - "loss": 3.7375, - "step": 3400500 - }, - { - "epoch": 37.42, - "learning_rate": 3.1458000990153473e-08, - "loss": 3.7754, - "step": 3401000 - }, - { - "epoch": 37.42, - "learning_rate": 3.1444248858573076e-08, - "loss": 3.7627, - "step": 3401500 - }, - { - "epoch": 37.43, - "learning_rate": 3.1430496726992686e-08, - "loss": 3.7679, - "step": 3402000 - }, - { - "epoch": 37.43, - "learning_rate": 3.141674459541229e-08, - "loss": 3.7565, - "step": 3402500 - }, - { - "epoch": 37.44, - "learning_rate": 3.140299246383189e-08, - "loss": 3.7455, - "step": 3403000 - }, - { - "epoch": 37.44, - "learning_rate": 3.13892403322515e-08, - "loss": 3.7591, - "step": 3403500 - }, - { - "epoch": 37.45, - "learning_rate": 3.13754882006711e-08, - "loss": 3.7735, - "step": 3404000 - }, - { - "epoch": 37.46, - "learning_rate": 3.1361736069090706e-08, - "loss": 3.7531, - "step": 3404500 - }, - { - "epoch": 37.46, - "learning_rate": 3.1347983937510315e-08, - "loss": 3.7574, - "step": 3405000 - }, - { - "epoch": 37.47, - "learning_rate": 3.133423180592992e-08, - "loss": 3.7464, - "step": 3405500 - }, - { - "epoch": 37.47, - "learning_rate": 3.132047967434952e-08, - "loss": 3.766, - "step": 3406000 - }, - { - "epoch": 37.48, - "learning_rate": 3.130672754276913e-08, - "loss": 3.7378, - "step": 3406500 - }, - { - "epoch": 37.48, - "learning_rate": 3.129297541118873e-08, - "loss": 3.7488, - "step": 3407000 - }, - { - "epoch": 37.49, - "learning_rate": 3.1279223279608335e-08, - "loss": 3.777, - "step": 3407500 - }, - { - "epoch": 37.49, - "learning_rate": 3.1265471148027945e-08, - "loss": 3.7627, - "step": 3408000 - }, - { - "epoch": 37.5, - "learning_rate": 3.125171901644755e-08, - "loss": 3.7484, - "step": 3408500 - }, - { - "epoch": 37.5, - "learning_rate": 3.123796688486715e-08, - "loss": 3.7706, - "step": 3409000 - }, - { - "epoch": 37.51, - "learning_rate": 3.122421475328676e-08, - "loss": 3.7523, - "step": 3409500 - }, - { - "epoch": 37.52, - "learning_rate": 3.121046262170636e-08, - "loss": 3.756, - "step": 3410000 - }, - { - "epoch": 37.52, - "learning_rate": 3.1196710490125965e-08, - "loss": 3.7488, - "step": 3410500 - }, - { - "epoch": 37.53, - "learning_rate": 3.1182958358545574e-08, - "loss": 3.7655, - "step": 3411000 - }, - { - "epoch": 37.53, - "learning_rate": 3.116920622696518e-08, - "loss": 3.7671, - "step": 3411500 - }, - { - "epoch": 37.54, - "learning_rate": 3.115545409538478e-08, - "loss": 3.7543, - "step": 3412000 - }, - { - "epoch": 37.54, - "learning_rate": 3.114170196380439e-08, - "loss": 3.754, - "step": 3412500 - }, - { - "epoch": 37.55, - "learning_rate": 3.112794983222399e-08, - "loss": 3.7609, - "step": 3413000 - }, - { - "epoch": 37.55, - "learning_rate": 3.1114197700643595e-08, - "loss": 3.7599, - "step": 3413500 - }, - { - "epoch": 37.56, - "learning_rate": 3.1100445569063204e-08, - "loss": 3.7916, - "step": 3414000 - }, - { - "epoch": 37.57, - "learning_rate": 3.1086693437482807e-08, - "loss": 3.7638, - "step": 3414500 - }, - { - "epoch": 37.57, - "learning_rate": 3.107294130590241e-08, - "loss": 3.7652, - "step": 3415000 - }, - { - "epoch": 37.58, - "learning_rate": 3.105918917432202e-08, - "loss": 3.7718, - "step": 3415500 - }, - { - "epoch": 37.58, - "learning_rate": 3.104543704274162e-08, - "loss": 3.7686, - "step": 3416000 - }, - { - "epoch": 37.59, - "learning_rate": 3.1031684911161224e-08, - "loss": 3.7525, - "step": 3416500 - }, - { - "epoch": 37.59, - "learning_rate": 3.1017932779580833e-08, - "loss": 3.7379, - "step": 3417000 - }, - { - "epoch": 37.6, - "learning_rate": 3.1004180648000436e-08, - "loss": 3.7551, - "step": 3417500 - }, - { - "epoch": 37.6, - "learning_rate": 3.099042851642004e-08, - "loss": 3.7739, - "step": 3418000 - }, - { - "epoch": 37.61, - "learning_rate": 3.097667638483965e-08, - "loss": 3.738, - "step": 3418500 - }, - { - "epoch": 37.61, - "learning_rate": 3.096292425325925e-08, - "loss": 3.7457, - "step": 3419000 - }, - { - "epoch": 37.62, - "learning_rate": 3.094917212167886e-08, - "loss": 3.7526, - "step": 3419500 - }, - { - "epoch": 37.63, - "learning_rate": 3.093541999009846e-08, - "loss": 3.7446, - "step": 3420000 - }, - { - "epoch": 37.63, - "learning_rate": 3.0921667858518066e-08, - "loss": 3.7633, - "step": 3420500 - }, - { - "epoch": 37.64, - "learning_rate": 3.0907915726937675e-08, - "loss": 3.7568, - "step": 3421000 - }, - { - "epoch": 37.64, - "learning_rate": 3.089416359535728e-08, - "loss": 3.7442, - "step": 3421500 - }, - { - "epoch": 37.65, - "learning_rate": 3.088041146377689e-08, - "loss": 3.74, - "step": 3422000 - }, - { - "epoch": 37.65, - "learning_rate": 3.086665933219649e-08, - "loss": 3.7407, - "step": 3422500 - }, - { - "epoch": 37.66, - "learning_rate": 3.08529072006161e-08, - "loss": 3.7679, - "step": 3423000 - }, - { - "epoch": 37.66, - "learning_rate": 3.08391550690357e-08, - "loss": 3.7464, - "step": 3423500 - }, - { - "epoch": 37.67, - "learning_rate": 3.0825402937455305e-08, - "loss": 3.747, - "step": 3424000 - }, - { - "epoch": 37.68, - "learning_rate": 3.0811650805874914e-08, - "loss": 3.7591, - "step": 3424500 - }, - { - "epoch": 37.68, - "learning_rate": 3.079789867429452e-08, - "loss": 3.7494, - "step": 3425000 - }, - { - "epoch": 37.69, - "learning_rate": 3.078414654271412e-08, - "loss": 3.7703, - "step": 3425500 - }, - { - "epoch": 37.69, - "learning_rate": 3.077039441113373e-08, - "loss": 3.7678, - "step": 3426000 - }, - { - "epoch": 37.7, - "learning_rate": 3.075664227955333e-08, - "loss": 3.7448, - "step": 3426500 - }, - { - "epoch": 37.7, - "learning_rate": 3.0742890147972934e-08, - "loss": 3.7676, - "step": 3427000 - }, - { - "epoch": 37.71, - "learning_rate": 3.0729138016392544e-08, - "loss": 3.7554, - "step": 3427500 - }, - { - "epoch": 37.71, - "learning_rate": 3.0715385884812146e-08, - "loss": 3.7644, - "step": 3428000 - }, - { - "epoch": 37.72, - "learning_rate": 3.070163375323175e-08, - "loss": 3.741, - "step": 3428500 - }, - { - "epoch": 37.72, - "learning_rate": 3.068788162165136e-08, - "loss": 3.7481, - "step": 3429000 - }, - { - "epoch": 37.73, - "learning_rate": 3.067412949007096e-08, - "loss": 3.7594, - "step": 3429500 - }, - { - "epoch": 37.74, - "learning_rate": 3.0660377358490564e-08, - "loss": 3.7575, - "step": 3430000 - }, - { - "epoch": 37.74, - "learning_rate": 3.064662522691017e-08, - "loss": 3.7572, - "step": 3430500 - }, - { - "epoch": 37.75, - "learning_rate": 3.0632873095329776e-08, - "loss": 3.773, - "step": 3431000 - }, - { - "epoch": 37.75, - "learning_rate": 3.061912096374938e-08, - "loss": 3.76, - "step": 3431500 - }, - { - "epoch": 37.76, - "learning_rate": 3.060536883216899e-08, - "loss": 3.7595, - "step": 3432000 - }, - { - "epoch": 37.76, - "learning_rate": 3.059161670058859e-08, - "loss": 3.7229, - "step": 3432500 - }, - { - "epoch": 37.77, - "learning_rate": 3.0577864569008193e-08, - "loss": 3.7395, - "step": 3433000 - }, - { - "epoch": 37.77, - "learning_rate": 3.05641124374278e-08, - "loss": 3.7567, - "step": 3433500 - }, - { - "epoch": 37.78, - "learning_rate": 3.0550360305847406e-08, - "loss": 3.7415, - "step": 3434000 - }, - { - "epoch": 37.79, - "learning_rate": 3.053660817426701e-08, - "loss": 3.7541, - "step": 3434500 - }, - { - "epoch": 37.79, - "learning_rate": 3.052285604268662e-08, - "loss": 3.7471, - "step": 3435000 - }, - { - "epoch": 37.8, - "learning_rate": 3.050910391110622e-08, - "loss": 3.76, - "step": 3435500 - }, - { - "epoch": 37.8, - "learning_rate": 3.049535177952582e-08, - "loss": 3.751, - "step": 3436000 - }, - { - "epoch": 37.81, - "learning_rate": 3.048159964794543e-08, - "loss": 3.7786, - "step": 3436500 - }, - { - "epoch": 37.81, - "learning_rate": 3.0467847516365035e-08, - "loss": 3.7555, - "step": 3437000 - }, - { - "epoch": 37.82, - "learning_rate": 3.045409538478464e-08, - "loss": 3.7446, - "step": 3437500 - }, - { - "epoch": 37.82, - "learning_rate": 3.044034325320424e-08, - "loss": 3.7888, - "step": 3438000 - }, - { - "epoch": 37.83, - "learning_rate": 3.042659112162385e-08, - "loss": 3.7453, - "step": 3438500 - }, - { - "epoch": 37.83, - "learning_rate": 3.041283899004345e-08, - "loss": 3.7538, - "step": 3439000 - }, - { - "epoch": 37.84, - "learning_rate": 3.0399086858463055e-08, - "loss": 3.7609, - "step": 3439500 - }, - { - "epoch": 37.85, - "learning_rate": 3.0385334726882665e-08, - "loss": 3.7641, - "step": 3440000 - }, - { - "epoch": 37.85, - "learning_rate": 3.037158259530227e-08, - "loss": 3.7627, - "step": 3440500 - }, - { - "epoch": 37.86, - "learning_rate": 3.035783046372187e-08, - "loss": 3.7684, - "step": 3441000 - }, - { - "epoch": 37.86, - "learning_rate": 3.034407833214148e-08, - "loss": 3.7567, - "step": 3441500 - }, - { - "epoch": 37.87, - "learning_rate": 3.033032620056108e-08, - "loss": 3.7295, - "step": 3442000 - }, - { - "epoch": 37.87, - "learning_rate": 3.0316574068980685e-08, - "loss": 3.7563, - "step": 3442500 - }, - { - "epoch": 37.88, - "learning_rate": 3.0302821937400294e-08, - "loss": 3.7524, - "step": 3443000 - }, - { - "epoch": 37.88, - "learning_rate": 3.02890698058199e-08, - "loss": 3.7432, - "step": 3443500 - }, - { - "epoch": 37.89, - "learning_rate": 3.0275317674239506e-08, - "loss": 3.7419, - "step": 3444000 - }, - { - "epoch": 37.9, - "learning_rate": 3.026156554265911e-08, - "loss": 3.7552, - "step": 3444500 - }, - { - "epoch": 37.9, - "learning_rate": 3.024781341107872e-08, - "loss": 3.7488, - "step": 3445000 - }, - { - "epoch": 37.91, - "learning_rate": 3.023406127949832e-08, - "loss": 3.7537, - "step": 3445500 - }, - { - "epoch": 37.91, - "learning_rate": 3.022030914791793e-08, - "loss": 3.7605, - "step": 3446000 - }, - { - "epoch": 37.92, - "learning_rate": 3.020655701633753e-08, - "loss": 3.754, - "step": 3446500 - }, - { - "epoch": 37.92, - "learning_rate": 3.0192804884757136e-08, - "loss": 3.7469, - "step": 3447000 - }, - { - "epoch": 37.93, - "learning_rate": 3.0179052753176745e-08, - "loss": 3.7449, - "step": 3447500 - }, - { - "epoch": 37.93, - "learning_rate": 3.016530062159635e-08, - "loss": 3.7657, - "step": 3448000 - }, - { - "epoch": 37.94, - "learning_rate": 3.015154849001595e-08, - "loss": 3.7562, - "step": 3448500 - }, - { - "epoch": 37.94, - "learning_rate": 3.013779635843556e-08, - "loss": 3.7506, - "step": 3449000 - }, - { - "epoch": 37.95, - "learning_rate": 3.012404422685516e-08, - "loss": 3.7683, - "step": 3449500 - }, - { - "epoch": 37.96, - "learning_rate": 3.0110292095274766e-08, - "loss": 3.7657, - "step": 3450000 - }, - { - "epoch": 37.96, - "learning_rate": 3.0096539963694375e-08, - "loss": 3.734, - "step": 3450500 - }, - { - "epoch": 37.97, - "learning_rate": 3.008278783211398e-08, - "loss": 3.7426, - "step": 3451000 - }, - { - "epoch": 37.97, - "learning_rate": 3.006903570053358e-08, - "loss": 3.772, - "step": 3451500 - }, - { - "epoch": 37.98, - "learning_rate": 3.005528356895319e-08, - "loss": 3.743, - "step": 3452000 - }, - { - "epoch": 37.98, - "learning_rate": 3.004153143737279e-08, - "loss": 3.7583, - "step": 3452500 - }, - { - "epoch": 37.99, - "learning_rate": 3.0027779305792395e-08, - "loss": 3.7463, - "step": 3453000 - }, - { - "epoch": 37.99, - "learning_rate": 3.0014027174212005e-08, - "loss": 3.7617, - "step": 3453500 - }, - { - "epoch": 38.0, - "learning_rate": 3.000027504263161e-08, - "loss": 3.7398, - "step": 3454000 - }, - { - "epoch": 38.0, - "eval_loss": 3.8288087844848633, - "eval_runtime": 6.1441, - "eval_samples_per_second": 252.924, - "step": 3454010 - }, - { - "epoch": 38.01, - "learning_rate": 2.998652291105121e-08, - "loss": 3.747, - "step": 3454500 - }, - { - "epoch": 38.01, - "learning_rate": 2.997277077947082e-08, - "loss": 3.7693, - "step": 3455000 - }, - { - "epoch": 38.02, - "learning_rate": 2.995901864789042e-08, - "loss": 3.7615, - "step": 3455500 - }, - { - "epoch": 38.02, - "learning_rate": 2.9945266516310025e-08, - "loss": 3.7497, - "step": 3456000 - }, - { - "epoch": 38.03, - "learning_rate": 2.9931514384729634e-08, - "loss": 3.7631, - "step": 3456500 - }, - { - "epoch": 38.03, - "learning_rate": 2.991776225314924e-08, - "loss": 3.7566, - "step": 3457000 - }, - { - "epoch": 38.04, - "learning_rate": 2.990401012156884e-08, - "loss": 3.7484, - "step": 3457500 - }, - { - "epoch": 38.04, - "learning_rate": 2.989025798998845e-08, - "loss": 3.7608, - "step": 3458000 - }, - { - "epoch": 38.05, - "learning_rate": 2.987650585840805e-08, - "loss": 3.7656, - "step": 3458500 - }, - { - "epoch": 38.05, - "learning_rate": 2.9862753726827654e-08, - "loss": 3.7521, - "step": 3459000 - }, - { - "epoch": 38.06, - "learning_rate": 2.9849001595247264e-08, - "loss": 3.755, - "step": 3459500 - }, - { - "epoch": 38.07, - "learning_rate": 2.9835249463666866e-08, - "loss": 3.731, - "step": 3460000 - }, - { - "epoch": 38.07, - "learning_rate": 2.982149733208647e-08, - "loss": 3.7729, - "step": 3460500 - }, - { - "epoch": 38.08, - "learning_rate": 2.980774520050608e-08, - "loss": 3.7281, - "step": 3461000 - }, - { - "epoch": 38.08, - "learning_rate": 2.979399306892568e-08, - "loss": 3.7644, - "step": 3461500 - }, - { - "epoch": 38.09, - "learning_rate": 2.9780240937345287e-08, - "loss": 3.7385, - "step": 3462000 - }, - { - "epoch": 38.09, - "learning_rate": 2.9766488805764893e-08, - "loss": 3.7639, - "step": 3462500 - }, - { - "epoch": 38.1, - "learning_rate": 2.9752736674184496e-08, - "loss": 3.7688, - "step": 3463000 - }, - { - "epoch": 38.1, - "learning_rate": 2.9738984542604102e-08, - "loss": 3.758, - "step": 3463500 - }, - { - "epoch": 38.11, - "learning_rate": 2.9725232411023708e-08, - "loss": 3.7447, - "step": 3464000 - }, - { - "epoch": 38.12, - "learning_rate": 2.9711480279443314e-08, - "loss": 3.7391, - "step": 3464500 - }, - { - "epoch": 38.12, - "learning_rate": 2.9697728147862917e-08, - "loss": 3.7558, - "step": 3465000 - }, - { - "epoch": 38.13, - "learning_rate": 2.9683976016282526e-08, - "loss": 3.7415, - "step": 3465500 - }, - { - "epoch": 38.13, - "learning_rate": 2.967022388470213e-08, - "loss": 3.742, - "step": 3466000 - }, - { - "epoch": 38.14, - "learning_rate": 2.965647175312173e-08, - "loss": 3.7526, - "step": 3466500 - }, - { - "epoch": 38.14, - "learning_rate": 2.964271962154134e-08, - "loss": 3.7474, - "step": 3467000 - }, - { - "epoch": 38.15, - "learning_rate": 2.9628967489960944e-08, - "loss": 3.7646, - "step": 3467500 - }, - { - "epoch": 38.15, - "learning_rate": 2.9615215358380546e-08, - "loss": 3.7553, - "step": 3468000 - }, - { - "epoch": 38.16, - "learning_rate": 2.9601463226800156e-08, - "loss": 3.7563, - "step": 3468500 - }, - { - "epoch": 38.16, - "learning_rate": 2.958771109521976e-08, - "loss": 3.7656, - "step": 3469000 - }, - { - "epoch": 38.17, - "learning_rate": 2.957395896363936e-08, - "loss": 3.7524, - "step": 3469500 - }, - { - "epoch": 38.18, - "learning_rate": 2.956020683205897e-08, - "loss": 3.7736, - "step": 3470000 - }, - { - "epoch": 38.18, - "learning_rate": 2.9546454700478573e-08, - "loss": 3.7517, - "step": 3470500 - }, - { - "epoch": 38.19, - "learning_rate": 2.9532702568898176e-08, - "loss": 3.7344, - "step": 3471000 - }, - { - "epoch": 38.19, - "learning_rate": 2.9518950437317785e-08, - "loss": 3.7378, - "step": 3471500 - }, - { - "epoch": 38.2, - "learning_rate": 2.9505198305737388e-08, - "loss": 3.7528, - "step": 3472000 - }, - { - "epoch": 38.2, - "learning_rate": 2.949144617415699e-08, - "loss": 3.7721, - "step": 3472500 - }, - { - "epoch": 38.21, - "learning_rate": 2.94776940425766e-08, - "loss": 3.7645, - "step": 3473000 - }, - { - "epoch": 38.21, - "learning_rate": 2.9463941910996203e-08, - "loss": 3.7421, - "step": 3473500 - }, - { - "epoch": 38.22, - "learning_rate": 2.9450189779415806e-08, - "loss": 3.7563, - "step": 3474000 - }, - { - "epoch": 38.23, - "learning_rate": 2.9436437647835412e-08, - "loss": 3.7472, - "step": 3474500 - }, - { - "epoch": 38.23, - "learning_rate": 2.9422685516255018e-08, - "loss": 3.7597, - "step": 3475000 - }, - { - "epoch": 38.24, - "learning_rate": 2.9408933384674624e-08, - "loss": 3.7593, - "step": 3475500 - }, - { - "epoch": 38.24, - "learning_rate": 2.9395181253094226e-08, - "loss": 3.7685, - "step": 3476000 - }, - { - "epoch": 38.25, - "learning_rate": 2.9381429121513836e-08, - "loss": 3.7487, - "step": 3476500 - }, - { - "epoch": 38.25, - "learning_rate": 2.936767698993344e-08, - "loss": 3.7574, - "step": 3477000 - }, - { - "epoch": 38.26, - "learning_rate": 2.935392485835304e-08, - "loss": 3.7373, - "step": 3477500 - }, - { - "epoch": 38.26, - "learning_rate": 2.934017272677265e-08, - "loss": 3.7508, - "step": 3478000 - }, - { - "epoch": 38.27, - "learning_rate": 2.9326420595192253e-08, - "loss": 3.7464, - "step": 3478500 - }, - { - "epoch": 38.27, - "learning_rate": 2.9312668463611856e-08, - "loss": 3.7557, - "step": 3479000 - }, - { - "epoch": 38.28, - "learning_rate": 2.9298916332031465e-08, - "loss": 3.7528, - "step": 3479500 - }, - { - "epoch": 38.29, - "learning_rate": 2.9285164200451068e-08, - "loss": 3.7653, - "step": 3480000 - }, - { - "epoch": 38.29, - "learning_rate": 2.927141206887067e-08, - "loss": 3.763, - "step": 3480500 - }, - { - "epoch": 38.3, - "learning_rate": 2.925765993729028e-08, - "loss": 3.7287, - "step": 3481000 - }, - { - "epoch": 38.3, - "learning_rate": 2.9243907805709883e-08, - "loss": 3.7664, - "step": 3481500 - }, - { - "epoch": 38.31, - "learning_rate": 2.9230155674129486e-08, - "loss": 3.7501, - "step": 3482000 - }, - { - "epoch": 38.31, - "learning_rate": 2.9216403542549095e-08, - "loss": 3.7539, - "step": 3482500 - }, - { - "epoch": 38.32, - "learning_rate": 2.9202651410968698e-08, - "loss": 3.7468, - "step": 3483000 - }, - { - "epoch": 38.32, - "learning_rate": 2.91888992793883e-08, - "loss": 3.7583, - "step": 3483500 - }, - { - "epoch": 38.33, - "learning_rate": 2.917514714780791e-08, - "loss": 3.7505, - "step": 3484000 - }, - { - "epoch": 38.34, - "learning_rate": 2.9161395016227513e-08, - "loss": 3.7501, - "step": 3484500 - }, - { - "epoch": 38.34, - "learning_rate": 2.9147642884647115e-08, - "loss": 3.7699, - "step": 3485000 - }, - { - "epoch": 38.35, - "learning_rate": 2.9133890753066725e-08, - "loss": 3.7592, - "step": 3485500 - }, - { - "epoch": 38.35, - "learning_rate": 2.9120138621486327e-08, - "loss": 3.7565, - "step": 3486000 - }, - { - "epoch": 38.36, - "learning_rate": 2.9106386489905933e-08, - "loss": 3.7387, - "step": 3486500 - }, - { - "epoch": 38.36, - "learning_rate": 2.909263435832554e-08, - "loss": 3.7679, - "step": 3487000 - }, - { - "epoch": 38.37, - "learning_rate": 2.9078882226745145e-08, - "loss": 3.7425, - "step": 3487500 - }, - { - "epoch": 38.37, - "learning_rate": 2.9065130095164748e-08, - "loss": 3.7636, - "step": 3488000 - }, - { - "epoch": 38.38, - "learning_rate": 2.9051377963584357e-08, - "loss": 3.7664, - "step": 3488500 - }, - { - "epoch": 38.38, - "learning_rate": 2.903762583200396e-08, - "loss": 3.7403, - "step": 3489000 - }, - { - "epoch": 38.39, - "learning_rate": 2.9023873700423563e-08, - "loss": 3.7367, - "step": 3489500 - }, - { - "epoch": 38.4, - "learning_rate": 2.9010121568843172e-08, - "loss": 3.7402, - "step": 3490000 - }, - { - "epoch": 38.4, - "learning_rate": 2.8996369437262775e-08, - "loss": 3.7482, - "step": 3490500 - }, - { - "epoch": 38.41, - "learning_rate": 2.8982617305682378e-08, - "loss": 3.7431, - "step": 3491000 - }, - { - "epoch": 38.41, - "learning_rate": 2.8968865174101987e-08, - "loss": 3.7447, - "step": 3491500 - }, - { - "epoch": 38.42, - "learning_rate": 2.895511304252159e-08, - "loss": 3.7605, - "step": 3492000 - }, - { - "epoch": 38.42, - "learning_rate": 2.8941360910941193e-08, - "loss": 3.7714, - "step": 3492500 - }, - { - "epoch": 38.43, - "learning_rate": 2.8927608779360802e-08, - "loss": 3.7476, - "step": 3493000 - }, - { - "epoch": 38.43, - "learning_rate": 2.8913856647780405e-08, - "loss": 3.7413, - "step": 3493500 - }, - { - "epoch": 38.44, - "learning_rate": 2.8900104516200007e-08, - "loss": 3.7629, - "step": 3494000 - }, - { - "epoch": 38.45, - "learning_rate": 2.8886352384619617e-08, - "loss": 3.7504, - "step": 3494500 - }, - { - "epoch": 38.45, - "learning_rate": 2.887260025303922e-08, - "loss": 3.7838, - "step": 3495000 - }, - { - "epoch": 38.46, - "learning_rate": 2.8858848121458822e-08, - "loss": 3.7529, - "step": 3495500 - }, - { - "epoch": 38.46, - "learning_rate": 2.884509598987843e-08, - "loss": 3.7282, - "step": 3496000 - }, - { - "epoch": 38.47, - "learning_rate": 2.8831343858298034e-08, - "loss": 3.7472, - "step": 3496500 - }, - { - "epoch": 38.47, - "learning_rate": 2.8817591726717637e-08, - "loss": 3.7521, - "step": 3497000 - }, - { - "epoch": 38.48, - "learning_rate": 2.8803839595137246e-08, - "loss": 3.7688, - "step": 3497500 - }, - { - "epoch": 38.48, - "learning_rate": 2.879008746355685e-08, - "loss": 3.7477, - "step": 3498000 - }, - { - "epoch": 38.49, - "learning_rate": 2.8776335331976455e-08, - "loss": 3.7729, - "step": 3498500 - }, - { - "epoch": 38.49, - "learning_rate": 2.876258320039606e-08, - "loss": 3.7531, - "step": 3499000 - }, - { - "epoch": 38.5, - "learning_rate": 2.8748831068815667e-08, - "loss": 3.7701, - "step": 3499500 - }, - { - "epoch": 38.51, - "learning_rate": 2.873507893723527e-08, - "loss": 3.7546, - "step": 3500000 - }, - { - "epoch": 38.51, - "learning_rate": 2.8721326805654876e-08, - "loss": 3.7481, - "step": 3500500 - }, - { - "epoch": 38.52, - "learning_rate": 2.8707574674074482e-08, - "loss": 3.7578, - "step": 3501000 - }, - { - "epoch": 38.52, - "learning_rate": 2.8693822542494085e-08, - "loss": 3.7348, - "step": 3501500 - }, - { - "epoch": 38.53, - "learning_rate": 2.8680070410913694e-08, - "loss": 3.7407, - "step": 3502000 - }, - { - "epoch": 38.53, - "learning_rate": 2.8666318279333297e-08, - "loss": 3.7657, - "step": 3502500 - }, - { - "epoch": 38.54, - "learning_rate": 2.86525661477529e-08, - "loss": 3.7645, - "step": 3503000 - }, - { - "epoch": 38.54, - "learning_rate": 2.863881401617251e-08, - "loss": 3.757, - "step": 3503500 - }, - { - "epoch": 38.55, - "learning_rate": 2.862506188459211e-08, - "loss": 3.748, - "step": 3504000 - }, - { - "epoch": 38.56, - "learning_rate": 2.8611309753011714e-08, - "loss": 3.7746, - "step": 3504500 - }, - { - "epoch": 38.56, - "learning_rate": 2.8597557621431324e-08, - "loss": 3.7413, - "step": 3505000 - }, - { - "epoch": 38.57, - "learning_rate": 2.8583805489850926e-08, - "loss": 3.7268, - "step": 3505500 - }, - { - "epoch": 38.57, - "learning_rate": 2.857005335827053e-08, - "loss": 3.7501, - "step": 3506000 - }, - { - "epoch": 38.58, - "learning_rate": 2.8556301226690138e-08, - "loss": 3.7468, - "step": 3506500 - }, - { - "epoch": 38.58, - "learning_rate": 2.854254909510974e-08, - "loss": 3.7567, - "step": 3507000 - }, - { - "epoch": 38.59, - "learning_rate": 2.8528796963529344e-08, - "loss": 3.7683, - "step": 3507500 - }, - { - "epoch": 38.59, - "learning_rate": 2.8515044831948953e-08, - "loss": 3.771, - "step": 3508000 - }, - { - "epoch": 38.6, - "learning_rate": 2.8501292700368556e-08, - "loss": 3.7619, - "step": 3508500 - }, - { - "epoch": 38.6, - "learning_rate": 2.848754056878816e-08, - "loss": 3.7504, - "step": 3509000 - }, - { - "epoch": 38.61, - "learning_rate": 2.8473788437207768e-08, - "loss": 3.7455, - "step": 3509500 - }, - { - "epoch": 38.62, - "learning_rate": 2.846003630562737e-08, - "loss": 3.757, - "step": 3510000 - }, - { - "epoch": 38.62, - "learning_rate": 2.8446284174046973e-08, - "loss": 3.7603, - "step": 3510500 - }, - { - "epoch": 38.63, - "learning_rate": 2.8432532042466583e-08, - "loss": 3.7521, - "step": 3511000 - }, - { - "epoch": 38.63, - "learning_rate": 2.8418779910886185e-08, - "loss": 3.783, - "step": 3511500 - }, - { - "epoch": 38.64, - "learning_rate": 2.840502777930579e-08, - "loss": 3.7568, - "step": 3512000 - }, - { - "epoch": 38.64, - "learning_rate": 2.8391275647725394e-08, - "loss": 3.7694, - "step": 3512500 - }, - { - "epoch": 38.65, - "learning_rate": 2.8377523516145004e-08, - "loss": 3.7544, - "step": 3513000 - }, - { - "epoch": 38.65, - "learning_rate": 2.8363771384564606e-08, - "loss": 3.747, - "step": 3513500 - }, - { - "epoch": 38.66, - "learning_rate": 2.835001925298421e-08, - "loss": 3.749, - "step": 3514000 - }, - { - "epoch": 38.67, - "learning_rate": 2.833626712140382e-08, - "loss": 3.7521, - "step": 3514500 - }, - { - "epoch": 38.67, - "learning_rate": 2.832251498982342e-08, - "loss": 3.7781, - "step": 3515000 - }, - { - "epoch": 38.68, - "learning_rate": 2.8308762858243024e-08, - "loss": 3.74, - "step": 3515500 - }, - { - "epoch": 38.68, - "learning_rate": 2.8295010726662633e-08, - "loss": 3.7428, - "step": 3516000 - }, - { - "epoch": 38.69, - "learning_rate": 2.8281258595082236e-08, - "loss": 3.7529, - "step": 3516500 - }, - { - "epoch": 38.69, - "learning_rate": 2.826750646350184e-08, - "loss": 3.7672, - "step": 3517000 - }, - { - "epoch": 38.7, - "learning_rate": 2.8253754331921448e-08, - "loss": 3.7565, - "step": 3517500 - }, - { - "epoch": 38.7, - "learning_rate": 2.824000220034105e-08, - "loss": 3.7635, - "step": 3518000 - }, - { - "epoch": 38.71, - "learning_rate": 2.8226250068760653e-08, - "loss": 3.7433, - "step": 3518500 - }, - { - "epoch": 38.72, - "learning_rate": 2.8212497937180263e-08, - "loss": 3.7469, - "step": 3519000 - }, - { - "epoch": 38.72, - "learning_rate": 2.8198745805599865e-08, - "loss": 3.7532, - "step": 3519500 - }, - { - "epoch": 38.73, - "learning_rate": 2.8184993674019468e-08, - "loss": 3.7597, - "step": 3520000 - }, - { - "epoch": 38.73, - "learning_rate": 2.8171241542439078e-08, - "loss": 3.7533, - "step": 3520500 - }, - { - "epoch": 38.74, - "learning_rate": 2.815748941085868e-08, - "loss": 3.7697, - "step": 3521000 - }, - { - "epoch": 38.74, - "learning_rate": 2.8143737279278283e-08, - "loss": 3.7405, - "step": 3521500 - }, - { - "epoch": 38.75, - "learning_rate": 2.8129985147697892e-08, - "loss": 3.7465, - "step": 3522000 - }, - { - "epoch": 38.75, - "learning_rate": 2.8116233016117495e-08, - "loss": 3.7584, - "step": 3522500 - }, - { - "epoch": 38.76, - "learning_rate": 2.81024808845371e-08, - "loss": 3.7655, - "step": 3523000 - }, - { - "epoch": 38.76, - "learning_rate": 2.8088728752956707e-08, - "loss": 3.7634, - "step": 3523500 - }, - { - "epoch": 38.77, - "learning_rate": 2.8074976621376313e-08, - "loss": 3.7449, - "step": 3524000 - }, - { - "epoch": 38.78, - "learning_rate": 2.8061224489795916e-08, - "loss": 3.7725, - "step": 3524500 - }, - { - "epoch": 38.78, - "learning_rate": 2.8047472358215525e-08, - "loss": 3.7667, - "step": 3525000 - }, - { - "epoch": 38.79, - "learning_rate": 2.8033720226635128e-08, - "loss": 3.7567, - "step": 3525500 - }, - { - "epoch": 38.79, - "learning_rate": 2.801996809505473e-08, - "loss": 3.7658, - "step": 3526000 - }, - { - "epoch": 38.8, - "learning_rate": 2.800621596347434e-08, - "loss": 3.7498, - "step": 3526500 - }, - { - "epoch": 38.8, - "learning_rate": 2.7992463831893943e-08, - "loss": 3.7465, - "step": 3527000 - }, - { - "epoch": 38.81, - "learning_rate": 2.7978711700313545e-08, - "loss": 3.7611, - "step": 3527500 - }, - { - "epoch": 38.81, - "learning_rate": 2.7964959568733155e-08, - "loss": 3.7632, - "step": 3528000 - }, - { - "epoch": 38.82, - "learning_rate": 2.7951207437152758e-08, - "loss": 3.7851, - "step": 3528500 - }, - { - "epoch": 38.83, - "learning_rate": 2.793745530557236e-08, - "loss": 3.7624, - "step": 3529000 - }, - { - "epoch": 38.83, - "learning_rate": 2.792370317399197e-08, - "loss": 3.7496, - "step": 3529500 - }, - { - "epoch": 38.84, - "learning_rate": 2.7909951042411572e-08, - "loss": 3.7649, - "step": 3530000 - }, - { - "epoch": 38.84, - "learning_rate": 2.7896198910831175e-08, - "loss": 3.759, - "step": 3530500 - }, - { - "epoch": 38.85, - "learning_rate": 2.7882446779250784e-08, - "loss": 3.7518, - "step": 3531000 - }, - { - "epoch": 38.85, - "learning_rate": 2.7868694647670387e-08, - "loss": 3.7602, - "step": 3531500 - }, - { - "epoch": 38.86, - "learning_rate": 2.785494251608999e-08, - "loss": 3.7501, - "step": 3532000 - }, - { - "epoch": 38.86, - "learning_rate": 2.78411903845096e-08, - "loss": 3.7557, - "step": 3532500 - }, - { - "epoch": 38.87, - "learning_rate": 2.7827438252929202e-08, - "loss": 3.7326, - "step": 3533000 - }, - { - "epoch": 38.87, - "learning_rate": 2.7813686121348805e-08, - "loss": 3.7613, - "step": 3533500 - }, - { - "epoch": 38.88, - "learning_rate": 2.7799933989768414e-08, - "loss": 3.7563, - "step": 3534000 - }, - { - "epoch": 38.89, - "learning_rate": 2.7786181858188017e-08, - "loss": 3.7545, - "step": 3534500 - }, - { - "epoch": 38.89, - "learning_rate": 2.7772429726607623e-08, - "loss": 3.7558, - "step": 3535000 - }, - { - "epoch": 38.9, - "learning_rate": 2.775867759502723e-08, - "loss": 3.7496, - "step": 3535500 - }, - { - "epoch": 38.9, - "learning_rate": 2.7744925463446835e-08, - "loss": 3.7532, - "step": 3536000 - }, - { - "epoch": 38.91, - "learning_rate": 2.7731173331866438e-08, - "loss": 3.7688, - "step": 3536500 - }, - { - "epoch": 38.91, - "learning_rate": 2.7717421200286047e-08, - "loss": 3.7702, - "step": 3537000 - }, - { - "epoch": 38.92, - "learning_rate": 2.770366906870565e-08, - "loss": 3.7472, - "step": 3537500 - }, - { - "epoch": 38.92, - "learning_rate": 2.7689916937125252e-08, - "loss": 3.7546, - "step": 3538000 - }, - { - "epoch": 38.93, - "learning_rate": 2.7676164805544862e-08, - "loss": 3.7416, - "step": 3538500 - }, - { - "epoch": 38.94, - "learning_rate": 2.7662412673964464e-08, - "loss": 3.7591, - "step": 3539000 - }, - { - "epoch": 38.94, - "learning_rate": 2.7648660542384067e-08, - "loss": 3.7804, - "step": 3539500 - }, - { - "epoch": 38.95, - "learning_rate": 2.7634908410803676e-08, - "loss": 3.7571, - "step": 3540000 - }, - { - "epoch": 38.95, - "learning_rate": 2.762115627922328e-08, - "loss": 3.7568, - "step": 3540500 - }, - { - "epoch": 38.96, - "learning_rate": 2.7607404147642882e-08, - "loss": 3.7703, - "step": 3541000 - }, - { - "epoch": 38.96, - "learning_rate": 2.759365201606249e-08, - "loss": 3.7581, - "step": 3541500 - }, - { - "epoch": 38.97, - "learning_rate": 2.7579899884482094e-08, - "loss": 3.7577, - "step": 3542000 - }, - { - "epoch": 38.97, - "learning_rate": 2.7566147752901697e-08, - "loss": 3.7753, - "step": 3542500 - }, - { - "epoch": 38.98, - "learning_rate": 2.7552395621321306e-08, - "loss": 3.7399, - "step": 3543000 - }, - { - "epoch": 38.98, - "learning_rate": 2.753864348974091e-08, - "loss": 3.7544, - "step": 3543500 - }, - { - "epoch": 38.99, - "learning_rate": 2.752489135816051e-08, - "loss": 3.7492, - "step": 3544000 - }, - { - "epoch": 39.0, - "learning_rate": 2.751113922658012e-08, - "loss": 3.7431, - "step": 3544500 - }, - { - "epoch": 39.0, - "eval_loss": 3.8281142711639404, - "eval_runtime": 6.1436, - "eval_samples_per_second": 252.947, - "step": 3544905 - }, - { - "epoch": 39.0, - "learning_rate": 2.7497387094999724e-08, - "loss": 3.7527, - "step": 3545000 - }, - { - "epoch": 39.01, - "learning_rate": 2.7483634963419326e-08, - "loss": 3.7626, - "step": 3545500 - }, - { - "epoch": 39.01, - "learning_rate": 2.7469882831838936e-08, - "loss": 3.7582, - "step": 3546000 - }, - { - "epoch": 39.02, - "learning_rate": 2.745613070025854e-08, - "loss": 3.7599, - "step": 3546500 - }, - { - "epoch": 39.02, - "learning_rate": 2.7442378568678144e-08, - "loss": 3.7629, - "step": 3547000 - }, - { - "epoch": 39.03, - "learning_rate": 2.742862643709775e-08, - "loss": 3.7516, - "step": 3547500 - }, - { - "epoch": 39.03, - "learning_rate": 2.7414874305517353e-08, - "loss": 3.7613, - "step": 3548000 - }, - { - "epoch": 39.04, - "learning_rate": 2.740112217393696e-08, - "loss": 3.7516, - "step": 3548500 - }, - { - "epoch": 39.05, - "learning_rate": 2.7387370042356565e-08, - "loss": 3.7697, - "step": 3549000 - }, - { - "epoch": 39.05, - "learning_rate": 2.737361791077617e-08, - "loss": 3.7474, - "step": 3549500 - }, - { - "epoch": 39.06, - "learning_rate": 2.7359865779195774e-08, - "loss": 3.7391, - "step": 3550000 - }, - { - "epoch": 39.06, - "learning_rate": 2.7346113647615377e-08, - "loss": 3.738, - "step": 3550500 - }, - { - "epoch": 39.07, - "learning_rate": 2.7332361516034986e-08, - "loss": 3.7482, - "step": 3551000 - }, - { - "epoch": 39.07, - "learning_rate": 2.731860938445459e-08, - "loss": 3.7533, - "step": 3551500 - }, - { - "epoch": 39.08, - "learning_rate": 2.730485725287419e-08, - "loss": 3.7452, - "step": 3552000 - }, - { - "epoch": 39.08, - "learning_rate": 2.72911051212938e-08, - "loss": 3.7709, - "step": 3552500 - }, - { - "epoch": 39.09, - "learning_rate": 2.7277352989713404e-08, - "loss": 3.7591, - "step": 3553000 - }, - { - "epoch": 39.09, - "learning_rate": 2.7263600858133006e-08, - "loss": 3.7553, - "step": 3553500 - }, - { - "epoch": 39.1, - "learning_rate": 2.7249848726552616e-08, - "loss": 3.7516, - "step": 3554000 - }, - { - "epoch": 39.11, - "learning_rate": 2.723609659497222e-08, - "loss": 3.7508, - "step": 3554500 - }, - { - "epoch": 39.11, - "learning_rate": 2.722234446339182e-08, - "loss": 3.7729, - "step": 3555000 - }, - { - "epoch": 39.12, - "learning_rate": 2.720859233181143e-08, - "loss": 3.7414, - "step": 3555500 - }, - { - "epoch": 39.12, - "learning_rate": 2.7194840200231033e-08, - "loss": 3.7659, - "step": 3556000 - }, - { - "epoch": 39.13, - "learning_rate": 2.7181088068650636e-08, - "loss": 3.7573, - "step": 3556500 - }, - { - "epoch": 39.13, - "learning_rate": 2.7167335937070245e-08, - "loss": 3.7726, - "step": 3557000 - }, - { - "epoch": 39.14, - "learning_rate": 2.7153583805489848e-08, - "loss": 3.7529, - "step": 3557500 - }, - { - "epoch": 39.14, - "learning_rate": 2.713983167390945e-08, - "loss": 3.7511, - "step": 3558000 - }, - { - "epoch": 39.15, - "learning_rate": 2.712607954232906e-08, - "loss": 3.7553, - "step": 3558500 - }, - { - "epoch": 39.16, - "learning_rate": 2.7112327410748663e-08, - "loss": 3.7635, - "step": 3559000 - }, - { - "epoch": 39.16, - "learning_rate": 2.709857527916827e-08, - "loss": 3.7514, - "step": 3559500 - }, - { - "epoch": 39.17, - "learning_rate": 2.7084823147587875e-08, - "loss": 3.7555, - "step": 3560000 - }, - { - "epoch": 39.17, - "learning_rate": 2.707107101600748e-08, - "loss": 3.7559, - "step": 3560500 - }, - { - "epoch": 39.18, - "learning_rate": 2.7057318884427084e-08, - "loss": 3.7769, - "step": 3561000 - }, - { - "epoch": 39.18, - "learning_rate": 2.7043566752846693e-08, - "loss": 3.7502, - "step": 3561500 - }, - { - "epoch": 39.19, - "learning_rate": 2.7029814621266296e-08, - "loss": 3.7651, - "step": 3562000 - }, - { - "epoch": 39.19, - "learning_rate": 2.70160624896859e-08, - "loss": 3.7491, - "step": 3562500 - }, - { - "epoch": 39.2, - "learning_rate": 2.7002310358105508e-08, - "loss": 3.7614, - "step": 3563000 - }, - { - "epoch": 39.2, - "learning_rate": 2.698855822652511e-08, - "loss": 3.7349, - "step": 3563500 - }, - { - "epoch": 39.21, - "learning_rate": 2.6974806094944713e-08, - "loss": 3.764, - "step": 3564000 - }, - { - "epoch": 39.22, - "learning_rate": 2.6961053963364323e-08, - "loss": 3.7249, - "step": 3564500 - }, - { - "epoch": 39.22, - "learning_rate": 2.6947301831783925e-08, - "loss": 3.7416, - "step": 3565000 - }, - { - "epoch": 39.23, - "learning_rate": 2.6933549700203528e-08, - "loss": 3.7664, - "step": 3565500 - }, - { - "epoch": 39.23, - "learning_rate": 2.6919797568623137e-08, - "loss": 3.7663, - "step": 3566000 - }, - { - "epoch": 39.24, - "learning_rate": 2.690604543704274e-08, - "loss": 3.7502, - "step": 3566500 - }, - { - "epoch": 39.24, - "learning_rate": 2.6892293305462343e-08, - "loss": 3.758, - "step": 3567000 - }, - { - "epoch": 39.25, - "learning_rate": 2.6878541173881952e-08, - "loss": 3.7765, - "step": 3567500 - }, - { - "epoch": 39.25, - "learning_rate": 2.6864789042301555e-08, - "loss": 3.7612, - "step": 3568000 - }, - { - "epoch": 39.26, - "learning_rate": 2.6851036910721158e-08, - "loss": 3.7405, - "step": 3568500 - }, - { - "epoch": 39.27, - "learning_rate": 2.6837284779140767e-08, - "loss": 3.7569, - "step": 3569000 - }, - { - "epoch": 39.27, - "learning_rate": 2.682353264756037e-08, - "loss": 3.7444, - "step": 3569500 - }, - { - "epoch": 39.28, - "learning_rate": 2.6809780515979972e-08, - "loss": 3.7473, - "step": 3570000 - }, - { - "epoch": 39.28, - "learning_rate": 2.6796028384399582e-08, - "loss": 3.7597, - "step": 3570500 - }, - { - "epoch": 39.29, - "learning_rate": 2.6782276252819184e-08, - "loss": 3.7522, - "step": 3571000 - }, - { - "epoch": 39.29, - "learning_rate": 2.676852412123879e-08, - "loss": 3.7539, - "step": 3571500 - }, - { - "epoch": 39.3, - "learning_rate": 2.6754771989658397e-08, - "loss": 3.7756, - "step": 3572000 - }, - { - "epoch": 39.3, - "learning_rate": 2.6741019858078003e-08, - "loss": 3.7507, - "step": 3572500 - }, - { - "epoch": 39.31, - "learning_rate": 2.6727267726497605e-08, - "loss": 3.7501, - "step": 3573000 - }, - { - "epoch": 39.31, - "learning_rate": 2.6713515594917215e-08, - "loss": 3.7598, - "step": 3573500 - }, - { - "epoch": 39.32, - "learning_rate": 2.6699763463336817e-08, - "loss": 3.735, - "step": 3574000 - }, - { - "epoch": 39.33, - "learning_rate": 2.668601133175642e-08, - "loss": 3.7333, - "step": 3574500 - }, - { - "epoch": 39.33, - "learning_rate": 2.667225920017603e-08, - "loss": 3.7587, - "step": 3575000 - }, - { - "epoch": 39.34, - "learning_rate": 2.6658507068595632e-08, - "loss": 3.7494, - "step": 3575500 - }, - { - "epoch": 39.34, - "learning_rate": 2.6644754937015235e-08, - "loss": 3.7504, - "step": 3576000 - }, - { - "epoch": 39.35, - "learning_rate": 2.6631002805434844e-08, - "loss": 3.7508, - "step": 3576500 - }, - { - "epoch": 39.35, - "learning_rate": 2.6617250673854447e-08, - "loss": 3.7532, - "step": 3577000 - }, - { - "epoch": 39.36, - "learning_rate": 2.660349854227405e-08, - "loss": 3.7461, - "step": 3577500 - }, - { - "epoch": 39.36, - "learning_rate": 2.658974641069366e-08, - "loss": 3.7665, - "step": 3578000 - }, - { - "epoch": 39.37, - "learning_rate": 2.6575994279113262e-08, - "loss": 3.7678, - "step": 3578500 - }, - { - "epoch": 39.38, - "learning_rate": 2.6562242147532864e-08, - "loss": 3.7574, - "step": 3579000 - }, - { - "epoch": 39.38, - "learning_rate": 2.6548490015952474e-08, - "loss": 3.7469, - "step": 3579500 - }, - { - "epoch": 39.39, - "learning_rate": 2.6534737884372077e-08, - "loss": 3.7684, - "step": 3580000 - }, - { - "epoch": 39.39, - "learning_rate": 2.652098575279168e-08, - "loss": 3.753, - "step": 3580500 - }, - { - "epoch": 39.4, - "learning_rate": 2.650723362121129e-08, - "loss": 3.7608, - "step": 3581000 - }, - { - "epoch": 39.4, - "learning_rate": 2.649348148963089e-08, - "loss": 3.7634, - "step": 3581500 - }, - { - "epoch": 39.41, - "learning_rate": 2.6479729358050494e-08, - "loss": 3.7401, - "step": 3582000 - }, - { - "epoch": 39.41, - "learning_rate": 2.6465977226470103e-08, - "loss": 3.7589, - "step": 3582500 - }, - { - "epoch": 39.42, - "learning_rate": 2.6452225094889706e-08, - "loss": 3.7508, - "step": 3583000 - }, - { - "epoch": 39.42, - "learning_rate": 2.6438472963309312e-08, - "loss": 3.7686, - "step": 3583500 - }, - { - "epoch": 39.43, - "learning_rate": 2.6424720831728918e-08, - "loss": 3.7509, - "step": 3584000 - }, - { - "epoch": 39.44, - "learning_rate": 2.6410968700148524e-08, - "loss": 3.7568, - "step": 3584500 - }, - { - "epoch": 39.44, - "learning_rate": 2.6397216568568127e-08, - "loss": 3.7508, - "step": 3585000 - }, - { - "epoch": 39.45, - "learning_rate": 2.6383464436987733e-08, - "loss": 3.7476, - "step": 3585500 - }, - { - "epoch": 39.45, - "learning_rate": 2.636971230540734e-08, - "loss": 3.7497, - "step": 3586000 - }, - { - "epoch": 39.46, - "learning_rate": 2.6355960173826942e-08, - "loss": 3.756, - "step": 3586500 - }, - { - "epoch": 39.46, - "learning_rate": 2.6342208042246545e-08, - "loss": 3.7475, - "step": 3587000 - }, - { - "epoch": 39.47, - "learning_rate": 2.6328455910666154e-08, - "loss": 3.7439, - "step": 3587500 - }, - { - "epoch": 39.47, - "learning_rate": 2.6314703779085757e-08, - "loss": 3.7544, - "step": 3588000 - }, - { - "epoch": 39.48, - "learning_rate": 2.630095164750536e-08, - "loss": 3.7585, - "step": 3588500 - }, - { - "epoch": 39.49, - "learning_rate": 2.628719951592497e-08, - "loss": 3.7376, - "step": 3589000 - }, - { - "epoch": 39.49, - "learning_rate": 2.627344738434457e-08, - "loss": 3.739, - "step": 3589500 - }, - { - "epoch": 39.5, - "learning_rate": 2.6259695252764174e-08, - "loss": 3.7554, - "step": 3590000 - }, - { - "epoch": 39.5, - "learning_rate": 2.6245943121183783e-08, - "loss": 3.7475, - "step": 3590500 - }, - { - "epoch": 39.51, - "learning_rate": 2.6232190989603386e-08, - "loss": 3.7488, - "step": 3591000 - }, - { - "epoch": 39.51, - "learning_rate": 2.621843885802299e-08, - "loss": 3.7381, - "step": 3591500 - }, - { - "epoch": 39.52, - "learning_rate": 2.6204686726442598e-08, - "loss": 3.7652, - "step": 3592000 - }, - { - "epoch": 39.52, - "learning_rate": 2.61909345948622e-08, - "loss": 3.7506, - "step": 3592500 - }, - { - "epoch": 39.53, - "learning_rate": 2.6177182463281804e-08, - "loss": 3.7288, - "step": 3593000 - }, - { - "epoch": 39.53, - "learning_rate": 2.6163430331701413e-08, - "loss": 3.7374, - "step": 3593500 - }, - { - "epoch": 39.54, - "learning_rate": 2.6149678200121016e-08, - "loss": 3.7555, - "step": 3594000 - }, - { - "epoch": 39.55, - "learning_rate": 2.6135926068540622e-08, - "loss": 3.7623, - "step": 3594500 - }, - { - "epoch": 39.55, - "learning_rate": 2.6122173936960228e-08, - "loss": 3.7537, - "step": 3595000 - }, - { - "epoch": 39.56, - "learning_rate": 2.610842180537983e-08, - "loss": 3.7435, - "step": 3595500 - }, - { - "epoch": 39.56, - "learning_rate": 2.6094669673799437e-08, - "loss": 3.765, - "step": 3596000 - }, - { - "epoch": 39.57, - "learning_rate": 2.6080917542219043e-08, - "loss": 3.7544, - "step": 3596500 - }, - { - "epoch": 39.57, - "learning_rate": 2.606716541063865e-08, - "loss": 3.773, - "step": 3597000 - }, - { - "epoch": 39.58, - "learning_rate": 2.605341327905825e-08, - "loss": 3.7369, - "step": 3597500 - }, - { - "epoch": 39.58, - "learning_rate": 2.603966114747786e-08, - "loss": 3.7827, - "step": 3598000 - }, - { - "epoch": 39.59, - "learning_rate": 2.6025909015897463e-08, - "loss": 3.733, - "step": 3598500 - }, - { - "epoch": 39.6, - "learning_rate": 2.6012156884317066e-08, - "loss": 3.7613, - "step": 3599000 - }, - { - "epoch": 39.6, - "learning_rate": 2.5998404752736676e-08, - "loss": 3.7647, - "step": 3599500 - }, - { - "epoch": 39.61, - "learning_rate": 2.5984652621156278e-08, - "loss": 3.7525, - "step": 3600000 - }, - { - "epoch": 39.61, - "learning_rate": 2.597090048957588e-08, - "loss": 3.7465, - "step": 3600500 - }, - { - "epoch": 39.62, - "learning_rate": 2.595714835799549e-08, - "loss": 3.7643, - "step": 3601000 - }, - { - "epoch": 39.62, - "learning_rate": 2.5943396226415093e-08, - "loss": 3.7448, - "step": 3601500 - }, - { - "epoch": 39.63, - "learning_rate": 2.5929644094834696e-08, - "loss": 3.7479, - "step": 3602000 - }, - { - "epoch": 39.63, - "learning_rate": 2.5915891963254305e-08, - "loss": 3.7394, - "step": 3602500 - }, - { - "epoch": 39.64, - "learning_rate": 2.5902139831673908e-08, - "loss": 3.7712, - "step": 3603000 - }, - { - "epoch": 39.64, - "learning_rate": 2.588838770009351e-08, - "loss": 3.7561, - "step": 3603500 - }, - { - "epoch": 39.65, - "learning_rate": 2.587463556851312e-08, - "loss": 3.7628, - "step": 3604000 - }, - { - "epoch": 39.66, - "learning_rate": 2.5860883436932723e-08, - "loss": 3.7543, - "step": 3604500 - }, - { - "epoch": 39.66, - "learning_rate": 2.5847131305352325e-08, - "loss": 3.7771, - "step": 3605000 - }, - { - "epoch": 39.67, - "learning_rate": 2.5833379173771935e-08, - "loss": 3.7358, - "step": 3605500 - }, - { - "epoch": 39.67, - "learning_rate": 2.5819627042191537e-08, - "loss": 3.7361, - "step": 3606000 - }, - { - "epoch": 39.68, - "learning_rate": 2.580587491061114e-08, - "loss": 3.7409, - "step": 3606500 - }, - { - "epoch": 39.68, - "learning_rate": 2.579212277903075e-08, - "loss": 3.7485, - "step": 3607000 - }, - { - "epoch": 39.69, - "learning_rate": 2.5778370647450352e-08, - "loss": 3.7512, - "step": 3607500 - }, - { - "epoch": 39.69, - "learning_rate": 2.5764618515869958e-08, - "loss": 3.7575, - "step": 3608000 - }, - { - "epoch": 39.7, - "learning_rate": 2.5750866384289564e-08, - "loss": 3.7575, - "step": 3608500 - }, - { - "epoch": 39.71, - "learning_rate": 2.573711425270917e-08, - "loss": 3.7486, - "step": 3609000 - }, - { - "epoch": 39.71, - "learning_rate": 2.5723362121128773e-08, - "loss": 3.7514, - "step": 3609500 - }, - { - "epoch": 39.72, - "learning_rate": 2.5709609989548382e-08, - "loss": 3.7499, - "step": 3610000 - }, - { - "epoch": 39.72, - "learning_rate": 2.5695857857967985e-08, - "loss": 3.7663, - "step": 3610500 - }, - { - "epoch": 39.73, - "learning_rate": 2.5682105726387588e-08, - "loss": 3.7386, - "step": 3611000 - }, - { - "epoch": 39.73, - "learning_rate": 2.5668353594807197e-08, - "loss": 3.7534, - "step": 3611500 - }, - { - "epoch": 39.74, - "learning_rate": 2.56546014632268e-08, - "loss": 3.7624, - "step": 3612000 - }, - { - "epoch": 39.74, - "learning_rate": 2.5640849331646403e-08, - "loss": 3.753, - "step": 3612500 - }, - { - "epoch": 39.75, - "learning_rate": 2.5627097200066012e-08, - "loss": 3.7432, - "step": 3613000 - }, - { - "epoch": 39.75, - "learning_rate": 2.5613345068485615e-08, - "loss": 3.7619, - "step": 3613500 - }, - { - "epoch": 39.76, - "learning_rate": 2.5599592936905217e-08, - "loss": 3.7607, - "step": 3614000 - }, - { - "epoch": 39.77, - "learning_rate": 2.5585840805324827e-08, - "loss": 3.7643, - "step": 3614500 - }, - { - "epoch": 39.77, - "learning_rate": 2.557208867374443e-08, - "loss": 3.7582, - "step": 3615000 - }, - { - "epoch": 39.78, - "learning_rate": 2.5558336542164032e-08, - "loss": 3.7385, - "step": 3615500 - }, - { - "epoch": 39.78, - "learning_rate": 2.554458441058364e-08, - "loss": 3.774, - "step": 3616000 - }, - { - "epoch": 39.79, - "learning_rate": 2.5530832279003244e-08, - "loss": 3.7446, - "step": 3616500 - }, - { - "epoch": 39.79, - "learning_rate": 2.5517080147422847e-08, - "loss": 3.7795, - "step": 3617000 - }, - { - "epoch": 39.8, - "learning_rate": 2.5503328015842456e-08, - "loss": 3.7636, - "step": 3617500 - }, - { - "epoch": 39.8, - "learning_rate": 2.548957588426206e-08, - "loss": 3.7638, - "step": 3618000 - }, - { - "epoch": 39.81, - "learning_rate": 2.5475823752681662e-08, - "loss": 3.7551, - "step": 3618500 - }, - { - "epoch": 39.82, - "learning_rate": 2.546207162110127e-08, - "loss": 3.747, - "step": 3619000 - }, - { - "epoch": 39.82, - "learning_rate": 2.5448319489520874e-08, - "loss": 3.756, - "step": 3619500 - }, - { - "epoch": 39.83, - "learning_rate": 2.543456735794048e-08, - "loss": 3.7599, - "step": 3620000 - }, - { - "epoch": 39.83, - "learning_rate": 2.5420815226360086e-08, - "loss": 3.7624, - "step": 3620500 - }, - { - "epoch": 39.84, - "learning_rate": 2.5407063094779692e-08, - "loss": 3.7513, - "step": 3621000 - }, - { - "epoch": 39.84, - "learning_rate": 2.5393310963199295e-08, - "loss": 3.7631, - "step": 3621500 - }, - { - "epoch": 39.85, - "learning_rate": 2.5379558831618904e-08, - "loss": 3.7609, - "step": 3622000 - }, - { - "epoch": 39.85, - "learning_rate": 2.5365806700038507e-08, - "loss": 3.7623, - "step": 3622500 - }, - { - "epoch": 39.86, - "learning_rate": 2.535205456845811e-08, - "loss": 3.7492, - "step": 3623000 - }, - { - "epoch": 39.86, - "learning_rate": 2.533830243687772e-08, - "loss": 3.7356, - "step": 3623500 - }, - { - "epoch": 39.87, - "learning_rate": 2.532455030529732e-08, - "loss": 3.7639, - "step": 3624000 - }, - { - "epoch": 39.88, - "learning_rate": 2.5310798173716924e-08, - "loss": 3.7451, - "step": 3624500 - }, - { - "epoch": 39.88, - "learning_rate": 2.5297046042136527e-08, - "loss": 3.7455, - "step": 3625000 - }, - { - "epoch": 39.89, - "learning_rate": 2.5283293910556136e-08, - "loss": 3.7552, - "step": 3625500 - }, - { - "epoch": 39.89, - "learning_rate": 2.526954177897574e-08, - "loss": 3.742, - "step": 3626000 - }, - { - "epoch": 39.9, - "learning_rate": 2.5255789647395342e-08, - "loss": 3.7565, - "step": 3626500 - }, - { - "epoch": 39.9, - "learning_rate": 2.524203751581495e-08, - "loss": 3.7544, - "step": 3627000 - }, - { - "epoch": 39.91, - "learning_rate": 2.5228285384234554e-08, - "loss": 3.7759, - "step": 3627500 - }, - { - "epoch": 39.91, - "learning_rate": 2.5214533252654157e-08, - "loss": 3.7453, - "step": 3628000 - }, - { - "epoch": 39.92, - "learning_rate": 2.5200781121073766e-08, - "loss": 3.7566, - "step": 3628500 - }, - { - "epoch": 39.93, - "learning_rate": 2.518702898949337e-08, - "loss": 3.7467, - "step": 3629000 - }, - { - "epoch": 39.93, - "learning_rate": 2.517327685791297e-08, - "loss": 3.7466, - "step": 3629500 - }, - { - "epoch": 39.94, - "learning_rate": 2.515952472633258e-08, - "loss": 3.7533, - "step": 3630000 - }, - { - "epoch": 39.94, - "learning_rate": 2.5145772594752184e-08, - "loss": 3.7639, - "step": 3630500 - }, - { - "epoch": 39.95, - "learning_rate": 2.513202046317179e-08, - "loss": 3.7613, - "step": 3631000 - }, - { - "epoch": 39.95, - "learning_rate": 2.5118268331591396e-08, - "loss": 3.7544, - "step": 3631500 - }, - { - "epoch": 39.96, - "learning_rate": 2.5104516200011e-08, - "loss": 3.7549, - "step": 3632000 - }, - { - "epoch": 39.96, - "learning_rate": 2.5090764068430604e-08, - "loss": 3.7352, - "step": 3632500 - }, - { - "epoch": 39.97, - "learning_rate": 2.507701193685021e-08, - "loss": 3.746, - "step": 3633000 - }, - { - "epoch": 39.97, - "learning_rate": 2.5063259805269816e-08, - "loss": 3.7455, - "step": 3633500 - }, - { - "epoch": 39.98, - "learning_rate": 2.504950767368942e-08, - "loss": 3.7472, - "step": 3634000 - }, - { - "epoch": 39.99, - "learning_rate": 2.503575554210903e-08, - "loss": 3.764, - "step": 3634500 - }, - { - "epoch": 39.99, - "learning_rate": 2.502200341052863e-08, - "loss": 3.7572, - "step": 3635000 - }, - { - "epoch": 40.0, - "learning_rate": 2.5008251278948234e-08, - "loss": 3.7413, - "step": 3635500 - }, - { - "epoch": 40.0, - "eval_loss": 3.8280036449432373, - "eval_runtime": 6.1422, - "eval_samples_per_second": 253.003, - "step": 3635800 - }, - { - "epoch": 40.0, - "learning_rate": 2.499449914736784e-08, - "loss": 3.7442, - "step": 3636000 - }, - { - "epoch": 40.01, - "learning_rate": 2.4980747015787446e-08, - "loss": 3.7811, - "step": 3636500 - }, - { - "epoch": 40.01, - "learning_rate": 2.4966994884207052e-08, - "loss": 3.7505, - "step": 3637000 - }, - { - "epoch": 40.02, - "learning_rate": 2.4953242752626655e-08, - "loss": 3.7792, - "step": 3637500 - }, - { - "epoch": 40.02, - "learning_rate": 2.493949062104626e-08, - "loss": 3.758, - "step": 3638000 - }, - { - "epoch": 40.03, - "learning_rate": 2.4925738489465867e-08, - "loss": 3.7368, - "step": 3638500 - }, - { - "epoch": 40.04, - "learning_rate": 2.491198635788547e-08, - "loss": 3.7549, - "step": 3639000 - }, - { - "epoch": 40.04, - "learning_rate": 2.4898234226305076e-08, - "loss": 3.7389, - "step": 3639500 - }, - { - "epoch": 40.05, - "learning_rate": 2.488448209472468e-08, - "loss": 3.7411, - "step": 3640000 - }, - { - "epoch": 40.05, - "learning_rate": 2.4870729963144284e-08, - "loss": 3.7476, - "step": 3640500 - }, - { - "epoch": 40.06, - "learning_rate": 2.485697783156389e-08, - "loss": 3.7505, - "step": 3641000 - }, - { - "epoch": 40.06, - "learning_rate": 2.4843225699983496e-08, - "loss": 3.751, - "step": 3641500 - }, - { - "epoch": 40.07, - "learning_rate": 2.48294735684031e-08, - "loss": 3.7524, - "step": 3642000 - }, - { - "epoch": 40.07, - "learning_rate": 2.4815721436822705e-08, - "loss": 3.7497, - "step": 3642500 - }, - { - "epoch": 40.08, - "learning_rate": 2.480196930524231e-08, - "loss": 3.7489, - "step": 3643000 - }, - { - "epoch": 40.08, - "learning_rate": 2.4788217173661914e-08, - "loss": 3.7505, - "step": 3643500 - }, - { - "epoch": 40.09, - "learning_rate": 2.477446504208152e-08, - "loss": 3.751, - "step": 3644000 - }, - { - "epoch": 40.1, - "learning_rate": 2.4760712910501126e-08, - "loss": 3.7529, - "step": 3644500 - }, - { - "epoch": 40.1, - "learning_rate": 2.4746960778920732e-08, - "loss": 3.7683, - "step": 3645000 - }, - { - "epoch": 40.11, - "learning_rate": 2.4733208647340338e-08, - "loss": 3.7568, - "step": 3645500 - }, - { - "epoch": 40.11, - "learning_rate": 2.4719456515759944e-08, - "loss": 3.7498, - "step": 3646000 - }, - { - "epoch": 40.12, - "learning_rate": 2.4705704384179547e-08, - "loss": 3.7549, - "step": 3646500 - }, - { - "epoch": 40.12, - "learning_rate": 2.4691952252599153e-08, - "loss": 3.7663, - "step": 3647000 - }, - { - "epoch": 40.13, - "learning_rate": 2.467820012101876e-08, - "loss": 3.7708, - "step": 3647500 - }, - { - "epoch": 40.13, - "learning_rate": 2.466444798943836e-08, - "loss": 3.7673, - "step": 3648000 - }, - { - "epoch": 40.14, - "learning_rate": 2.4650695857857968e-08, - "loss": 3.7371, - "step": 3648500 - }, - { - "epoch": 40.15, - "learning_rate": 2.4636943726277574e-08, - "loss": 3.7526, - "step": 3649000 - }, - { - "epoch": 40.15, - "learning_rate": 2.4623191594697176e-08, - "loss": 3.7514, - "step": 3649500 - }, - { - "epoch": 40.16, - "learning_rate": 2.4609439463116782e-08, - "loss": 3.7619, - "step": 3650000 - }, - { - "epoch": 40.16, - "learning_rate": 2.459568733153639e-08, - "loss": 3.7652, - "step": 3650500 - }, - { - "epoch": 40.17, - "learning_rate": 2.458193519995599e-08, - "loss": 3.77, - "step": 3651000 - }, - { - "epoch": 40.17, - "learning_rate": 2.4568183068375597e-08, - "loss": 3.7597, - "step": 3651500 - }, - { - "epoch": 40.18, - "learning_rate": 2.4554430936795203e-08, - "loss": 3.7499, - "step": 3652000 - }, - { - "epoch": 40.18, - "learning_rate": 2.4540678805214806e-08, - "loss": 3.7295, - "step": 3652500 - }, - { - "epoch": 40.19, - "learning_rate": 2.4526926673634412e-08, - "loss": 3.7581, - "step": 3653000 - }, - { - "epoch": 40.19, - "learning_rate": 2.4513174542054015e-08, - "loss": 3.7627, - "step": 3653500 - }, - { - "epoch": 40.2, - "learning_rate": 2.449942241047362e-08, - "loss": 3.762, - "step": 3654000 - }, - { - "epoch": 40.21, - "learning_rate": 2.4485670278893227e-08, - "loss": 3.756, - "step": 3654500 - }, - { - "epoch": 40.21, - "learning_rate": 2.447191814731283e-08, - "loss": 3.7854, - "step": 3655000 - }, - { - "epoch": 40.22, - "learning_rate": 2.4458166015732436e-08, - "loss": 3.7549, - "step": 3655500 - }, - { - "epoch": 40.22, - "learning_rate": 2.444441388415204e-08, - "loss": 3.7408, - "step": 3656000 - }, - { - "epoch": 40.23, - "learning_rate": 2.4430661752571648e-08, - "loss": 3.7669, - "step": 3656500 - }, - { - "epoch": 40.23, - "learning_rate": 2.4416909620991254e-08, - "loss": 3.7574, - "step": 3657000 - }, - { - "epoch": 40.24, - "learning_rate": 2.440315748941086e-08, - "loss": 3.7767, - "step": 3657500 - }, - { - "epoch": 40.24, - "learning_rate": 2.4389405357830462e-08, - "loss": 3.7453, - "step": 3658000 - }, - { - "epoch": 40.25, - "learning_rate": 2.437565322625007e-08, - "loss": 3.731, - "step": 3658500 - }, - { - "epoch": 40.26, - "learning_rate": 2.4361901094669675e-08, - "loss": 3.7741, - "step": 3659000 - }, - { - "epoch": 40.26, - "learning_rate": 2.4348148963089277e-08, - "loss": 3.768, - "step": 3659500 - }, - { - "epoch": 40.27, - "learning_rate": 2.4334396831508883e-08, - "loss": 3.7396, - "step": 3660000 - }, - { - "epoch": 40.27, - "learning_rate": 2.432064469992849e-08, - "loss": 3.7497, - "step": 3660500 - }, - { - "epoch": 40.28, - "learning_rate": 2.4306892568348092e-08, - "loss": 3.7385, - "step": 3661000 - }, - { - "epoch": 40.28, - "learning_rate": 2.4293140436767698e-08, - "loss": 3.7418, - "step": 3661500 - }, - { - "epoch": 40.29, - "learning_rate": 2.4279388305187304e-08, - "loss": 3.7454, - "step": 3662000 - }, - { - "epoch": 40.29, - "learning_rate": 2.4265636173606907e-08, - "loss": 3.7345, - "step": 3662500 - }, - { - "epoch": 40.3, - "learning_rate": 2.4251884042026513e-08, - "loss": 3.7431, - "step": 3663000 - }, - { - "epoch": 40.3, - "learning_rate": 2.423813191044612e-08, - "loss": 3.7608, - "step": 3663500 - }, - { - "epoch": 40.31, - "learning_rate": 2.4224379778865722e-08, - "loss": 3.7438, - "step": 3664000 - }, - { - "epoch": 40.32, - "learning_rate": 2.4210627647285328e-08, - "loss": 3.7593, - "step": 3664500 - }, - { - "epoch": 40.32, - "learning_rate": 2.4196875515704934e-08, - "loss": 3.762, - "step": 3665000 - }, - { - "epoch": 40.33, - "learning_rate": 2.4183123384124536e-08, - "loss": 3.7565, - "step": 3665500 - }, - { - "epoch": 40.33, - "learning_rate": 2.4169371252544142e-08, - "loss": 3.7698, - "step": 3666000 - }, - { - "epoch": 40.34, - "learning_rate": 2.415561912096375e-08, - "loss": 3.7306, - "step": 3666500 - }, - { - "epoch": 40.34, - "learning_rate": 2.414186698938335e-08, - "loss": 3.7387, - "step": 3667000 - }, - { - "epoch": 40.35, - "learning_rate": 2.4128114857802957e-08, - "loss": 3.7603, - "step": 3667500 - }, - { - "epoch": 40.35, - "learning_rate": 2.4114362726222563e-08, - "loss": 3.7615, - "step": 3668000 - }, - { - "epoch": 40.36, - "learning_rate": 2.410061059464217e-08, - "loss": 3.7574, - "step": 3668500 - }, - { - "epoch": 40.37, - "learning_rate": 2.4086858463061775e-08, - "loss": 3.7618, - "step": 3669000 - }, - { - "epoch": 40.37, - "learning_rate": 2.407310633148138e-08, - "loss": 3.7468, - "step": 3669500 - }, - { - "epoch": 40.38, - "learning_rate": 2.4059354199900984e-08, - "loss": 3.7649, - "step": 3670000 - }, - { - "epoch": 40.38, - "learning_rate": 2.404560206832059e-08, - "loss": 3.7842, - "step": 3670500 - }, - { - "epoch": 40.39, - "learning_rate": 2.4031849936740196e-08, - "loss": 3.7433, - "step": 3671000 - }, - { - "epoch": 40.39, - "learning_rate": 2.40180978051598e-08, - "loss": 3.7461, - "step": 3671500 - }, - { - "epoch": 40.4, - "learning_rate": 2.4004345673579405e-08, - "loss": 3.748, - "step": 3672000 - }, - { - "epoch": 40.4, - "learning_rate": 2.3990593541999008e-08, - "loss": 3.7319, - "step": 3672500 - }, - { - "epoch": 40.41, - "learning_rate": 2.3976841410418614e-08, - "loss": 3.7704, - "step": 3673000 - }, - { - "epoch": 40.41, - "learning_rate": 2.396308927883822e-08, - "loss": 3.7613, - "step": 3673500 - }, - { - "epoch": 40.42, - "learning_rate": 2.3949337147257823e-08, - "loss": 3.7466, - "step": 3674000 - }, - { - "epoch": 40.43, - "learning_rate": 2.393558501567743e-08, - "loss": 3.7571, - "step": 3674500 - }, - { - "epoch": 40.43, - "learning_rate": 2.3921832884097035e-08, - "loss": 3.7626, - "step": 3675000 - }, - { - "epoch": 40.44, - "learning_rate": 2.3908080752516637e-08, - "loss": 3.7379, - "step": 3675500 - }, - { - "epoch": 40.44, - "learning_rate": 2.3894328620936243e-08, - "loss": 3.7528, - "step": 3676000 - }, - { - "epoch": 40.45, - "learning_rate": 2.388057648935585e-08, - "loss": 3.7584, - "step": 3676500 - }, - { - "epoch": 40.45, - "learning_rate": 2.3866824357775452e-08, - "loss": 3.7486, - "step": 3677000 - }, - { - "epoch": 40.46, - "learning_rate": 2.3853072226195058e-08, - "loss": 3.7545, - "step": 3677500 - }, - { - "epoch": 40.46, - "learning_rate": 2.3839320094614664e-08, - "loss": 3.7479, - "step": 3678000 - }, - { - "epoch": 40.47, - "learning_rate": 2.3825567963034267e-08, - "loss": 3.7772, - "step": 3678500 - }, - { - "epoch": 40.48, - "learning_rate": 2.3811815831453873e-08, - "loss": 3.7687, - "step": 3679000 - }, - { - "epoch": 40.48, - "learning_rate": 2.379806369987348e-08, - "loss": 3.7486, - "step": 3679500 - }, - { - "epoch": 40.49, - "learning_rate": 2.3784311568293085e-08, - "loss": 3.745, - "step": 3680000 - }, - { - "epoch": 40.49, - "learning_rate": 2.3770559436712688e-08, - "loss": 3.7553, - "step": 3680500 - }, - { - "epoch": 40.5, - "learning_rate": 2.3756807305132294e-08, - "loss": 3.7608, - "step": 3681000 - }, - { - "epoch": 40.5, - "learning_rate": 2.37430551735519e-08, - "loss": 3.7382, - "step": 3681500 - }, - { - "epoch": 40.51, - "learning_rate": 2.3729303041971506e-08, - "loss": 3.748, - "step": 3682000 - }, - { - "epoch": 40.51, - "learning_rate": 2.3715550910391112e-08, - "loss": 3.7452, - "step": 3682500 - }, - { - "epoch": 40.52, - "learning_rate": 2.3701798778810715e-08, - "loss": 3.7484, - "step": 3683000 - }, - { - "epoch": 40.52, - "learning_rate": 2.368804664723032e-08, - "loss": 3.7618, - "step": 3683500 - }, - { - "epoch": 40.53, - "learning_rate": 2.3674294515649927e-08, - "loss": 3.7637, - "step": 3684000 - }, - { - "epoch": 40.54, - "learning_rate": 2.366054238406953e-08, - "loss": 3.7464, - "step": 3684500 - }, - { - "epoch": 40.54, - "learning_rate": 2.3646790252489135e-08, - "loss": 3.751, - "step": 3685000 - }, - { - "epoch": 40.55, - "learning_rate": 2.363303812090874e-08, - "loss": 3.7315, - "step": 3685500 - }, - { - "epoch": 40.55, - "learning_rate": 2.3619285989328344e-08, - "loss": 3.7446, - "step": 3686000 - }, - { - "epoch": 40.56, - "learning_rate": 2.360553385774795e-08, - "loss": 3.7501, - "step": 3686500 - }, - { - "epoch": 40.56, - "learning_rate": 2.3591781726167556e-08, - "loss": 3.7567, - "step": 3687000 - }, - { - "epoch": 40.57, - "learning_rate": 2.357802959458716e-08, - "loss": 3.7624, - "step": 3687500 - }, - { - "epoch": 40.57, - "learning_rate": 2.3564277463006765e-08, - "loss": 3.7453, - "step": 3688000 - }, - { - "epoch": 40.58, - "learning_rate": 2.355052533142637e-08, - "loss": 3.7343, - "step": 3688500 - }, - { - "epoch": 40.59, - "learning_rate": 2.3536773199845974e-08, - "loss": 3.7285, - "step": 3689000 - }, - { - "epoch": 40.59, - "learning_rate": 2.352302106826558e-08, - "loss": 3.7435, - "step": 3689500 - }, - { - "epoch": 40.6, - "learning_rate": 2.3509268936685186e-08, - "loss": 3.7478, - "step": 3690000 - }, - { - "epoch": 40.6, - "learning_rate": 2.349551680510479e-08, - "loss": 3.7543, - "step": 3690500 - }, - { - "epoch": 40.61, - "learning_rate": 2.3481764673524395e-08, - "loss": 3.7568, - "step": 3691000 - }, - { - "epoch": 40.61, - "learning_rate": 2.3468012541943997e-08, - "loss": 3.7517, - "step": 3691500 - }, - { - "epoch": 40.62, - "learning_rate": 2.3454260410363603e-08, - "loss": 3.7589, - "step": 3692000 - }, - { - "epoch": 40.62, - "learning_rate": 2.344050827878321e-08, - "loss": 3.751, - "step": 3692500 - }, - { - "epoch": 40.63, - "learning_rate": 2.3426756147202815e-08, - "loss": 3.7353, - "step": 3693000 - }, - { - "epoch": 40.63, - "learning_rate": 2.341300401562242e-08, - "loss": 3.7469, - "step": 3693500 - }, - { - "epoch": 40.64, - "learning_rate": 2.3399251884042027e-08, - "loss": 3.7485, - "step": 3694000 - }, - { - "epoch": 40.65, - "learning_rate": 2.338549975246163e-08, - "loss": 3.7652, - "step": 3694500 - }, - { - "epoch": 40.65, - "learning_rate": 2.3371747620881236e-08, - "loss": 3.747, - "step": 3695000 - }, - { - "epoch": 40.66, - "learning_rate": 2.3357995489300842e-08, - "loss": 3.7773, - "step": 3695500 - }, - { - "epoch": 40.66, - "learning_rate": 2.3344243357720445e-08, - "loss": 3.7505, - "step": 3696000 - }, - { - "epoch": 40.67, - "learning_rate": 2.333049122614005e-08, - "loss": 3.7555, - "step": 3696500 - }, - { - "epoch": 40.67, - "learning_rate": 2.3316739094559657e-08, - "loss": 3.7529, - "step": 3697000 - }, - { - "epoch": 40.68, - "learning_rate": 2.330298696297926e-08, - "loss": 3.7597, - "step": 3697500 - }, - { - "epoch": 40.68, - "learning_rate": 2.3289234831398866e-08, - "loss": 3.7524, - "step": 3698000 - }, - { - "epoch": 40.69, - "learning_rate": 2.3275482699818472e-08, - "loss": 3.7679, - "step": 3698500 - }, - { - "epoch": 40.7, - "learning_rate": 2.3261730568238075e-08, - "loss": 3.7462, - "step": 3699000 - }, - { - "epoch": 40.7, - "learning_rate": 2.324797843665768e-08, - "loss": 3.7436, - "step": 3699500 - }, - { - "epoch": 40.71, - "learning_rate": 2.3234226305077287e-08, - "loss": 3.7497, - "step": 3700000 - }, - { - "epoch": 40.71, - "learning_rate": 2.322047417349689e-08, - "loss": 3.7518, - "step": 3700500 - }, - { - "epoch": 40.72, - "learning_rate": 2.3206722041916495e-08, - "loss": 3.7489, - "step": 3701000 - }, - { - "epoch": 40.72, - "learning_rate": 2.31929699103361e-08, - "loss": 3.7569, - "step": 3701500 - }, - { - "epoch": 40.73, - "learning_rate": 2.3179217778755704e-08, - "loss": 3.7521, - "step": 3702000 - }, - { - "epoch": 40.73, - "learning_rate": 2.316546564717531e-08, - "loss": 3.7539, - "step": 3702500 - }, - { - "epoch": 40.74, - "learning_rate": 2.3151713515594916e-08, - "loss": 3.7507, - "step": 3703000 - }, - { - "epoch": 40.74, - "learning_rate": 2.313796138401452e-08, - "loss": 3.7574, - "step": 3703500 - }, - { - "epoch": 40.75, - "learning_rate": 2.3124209252434125e-08, - "loss": 3.7616, - "step": 3704000 - }, - { - "epoch": 40.76, - "learning_rate": 2.311045712085373e-08, - "loss": 3.7539, - "step": 3704500 - }, - { - "epoch": 40.76, - "learning_rate": 2.3096704989273337e-08, - "loss": 3.7629, - "step": 3705000 - }, - { - "epoch": 40.77, - "learning_rate": 2.3082952857692943e-08, - "loss": 3.7405, - "step": 3705500 - }, - { - "epoch": 40.77, - "learning_rate": 2.306920072611255e-08, - "loss": 3.7474, - "step": 3706000 - }, - { - "epoch": 40.78, - "learning_rate": 2.3055448594532152e-08, - "loss": 3.7701, - "step": 3706500 - }, - { - "epoch": 40.78, - "learning_rate": 2.3041696462951758e-08, - "loss": 3.744, - "step": 3707000 - }, - { - "epoch": 40.79, - "learning_rate": 2.3027944331371364e-08, - "loss": 3.746, - "step": 3707500 - }, - { - "epoch": 40.79, - "learning_rate": 2.3014192199790967e-08, - "loss": 3.7631, - "step": 3708000 - }, - { - "epoch": 40.8, - "learning_rate": 2.3000440068210573e-08, - "loss": 3.7351, - "step": 3708500 - }, - { - "epoch": 40.81, - "learning_rate": 2.298668793663018e-08, - "loss": 3.7712, - "step": 3709000 - }, - { - "epoch": 40.81, - "learning_rate": 2.297293580504978e-08, - "loss": 3.7618, - "step": 3709500 - }, - { - "epoch": 40.82, - "learning_rate": 2.2959183673469388e-08, - "loss": 3.7501, - "step": 3710000 - }, - { - "epoch": 40.82, - "learning_rate": 2.294543154188899e-08, - "loss": 3.7593, - "step": 3710500 - }, - { - "epoch": 40.83, - "learning_rate": 2.2931679410308596e-08, - "loss": 3.7435, - "step": 3711000 - }, - { - "epoch": 40.83, - "learning_rate": 2.2917927278728202e-08, - "loss": 3.7384, - "step": 3711500 - }, - { - "epoch": 40.84, - "learning_rate": 2.2904175147147805e-08, - "loss": 3.7597, - "step": 3712000 - }, - { - "epoch": 40.84, - "learning_rate": 2.289042301556741e-08, - "loss": 3.7686, - "step": 3712500 - }, - { - "epoch": 40.85, - "learning_rate": 2.2876670883987017e-08, - "loss": 3.768, - "step": 3713000 - }, - { - "epoch": 40.85, - "learning_rate": 2.286291875240662e-08, - "loss": 3.77, - "step": 3713500 - }, - { - "epoch": 40.86, - "learning_rate": 2.2849166620826226e-08, - "loss": 3.7626, - "step": 3714000 - }, - { - "epoch": 40.87, - "learning_rate": 2.2835414489245832e-08, - "loss": 3.7454, - "step": 3714500 - }, - { - "epoch": 40.87, - "learning_rate": 2.2821662357665435e-08, - "loss": 3.7454, - "step": 3715000 - }, - { - "epoch": 40.88, - "learning_rate": 2.280791022608504e-08, - "loss": 3.7522, - "step": 3715500 - }, - { - "epoch": 40.88, - "learning_rate": 2.2794158094504647e-08, - "loss": 3.7403, - "step": 3716000 - }, - { - "epoch": 40.89, - "learning_rate": 2.2780405962924253e-08, - "loss": 3.7291, - "step": 3716500 - }, - { - "epoch": 40.89, - "learning_rate": 2.276665383134386e-08, - "loss": 3.762, - "step": 3717000 - }, - { - "epoch": 40.9, - "learning_rate": 2.2752901699763465e-08, - "loss": 3.7236, - "step": 3717500 - }, - { - "epoch": 40.9, - "learning_rate": 2.2739149568183068e-08, - "loss": 3.7745, - "step": 3718000 - }, - { - "epoch": 40.91, - "learning_rate": 2.2725397436602674e-08, - "loss": 3.7601, - "step": 3718500 - }, - { - "epoch": 40.92, - "learning_rate": 2.271164530502228e-08, - "loss": 3.7492, - "step": 3719000 - }, - { - "epoch": 40.92, - "learning_rate": 2.2697893173441882e-08, - "loss": 3.7721, - "step": 3719500 - }, - { - "epoch": 40.93, - "learning_rate": 2.268414104186149e-08, - "loss": 3.7546, - "step": 3720000 - }, - { - "epoch": 40.93, - "learning_rate": 2.2670388910281094e-08, - "loss": 3.7526, - "step": 3720500 - }, - { - "epoch": 40.94, - "learning_rate": 2.2656636778700697e-08, - "loss": 3.7511, - "step": 3721000 - }, - { - "epoch": 40.94, - "learning_rate": 2.2642884647120303e-08, - "loss": 3.7598, - "step": 3721500 - }, - { - "epoch": 40.95, - "learning_rate": 2.262913251553991e-08, - "loss": 3.7696, - "step": 3722000 - }, - { - "epoch": 40.95, - "learning_rate": 2.2615380383959512e-08, - "loss": 3.7263, - "step": 3722500 - }, - { - "epoch": 40.96, - "learning_rate": 2.2601628252379118e-08, - "loss": 3.738, - "step": 3723000 - }, - { - "epoch": 40.96, - "learning_rate": 2.2587876120798724e-08, - "loss": 3.7484, - "step": 3723500 - }, - { - "epoch": 40.97, - "learning_rate": 2.2574123989218327e-08, - "loss": 3.7621, - "step": 3724000 - }, - { - "epoch": 40.98, - "learning_rate": 2.2560371857637933e-08, - "loss": 3.7515, - "step": 3724500 - }, - { - "epoch": 40.98, - "learning_rate": 2.254661972605754e-08, - "loss": 3.7382, - "step": 3725000 - }, - { - "epoch": 40.99, - "learning_rate": 2.253286759447714e-08, - "loss": 3.7509, - "step": 3725500 - }, - { - "epoch": 40.99, - "learning_rate": 2.2519115462896748e-08, - "loss": 3.7562, - "step": 3726000 - }, - { - "epoch": 41.0, - "learning_rate": 2.2505363331316354e-08, - "loss": 3.7366, - "step": 3726500 - }, - { - "epoch": 41.0, - "eval_loss": 3.8274526596069336, - "eval_runtime": 6.1402, - "eval_samples_per_second": 253.087, - "step": 3726695 - }, - { - "epoch": 41.0, - "learning_rate": 2.2491611199735956e-08, - "loss": 3.7448, - "step": 3727000 - }, - { - "epoch": 41.01, - "learning_rate": 2.2477859068155562e-08, - "loss": 3.755, - "step": 3727500 - }, - { - "epoch": 41.01, - "learning_rate": 2.246410693657517e-08, - "loss": 3.7514, - "step": 3728000 - }, - { - "epoch": 41.02, - "learning_rate": 2.245035480499477e-08, - "loss": 3.7666, - "step": 3728500 - }, - { - "epoch": 41.03, - "learning_rate": 2.2436602673414377e-08, - "loss": 3.7526, - "step": 3729000 - }, - { - "epoch": 41.03, - "learning_rate": 2.2422850541833983e-08, - "loss": 3.7238, - "step": 3729500 - }, - { - "epoch": 41.04, - "learning_rate": 2.240909841025359e-08, - "loss": 3.7593, - "step": 3730000 - }, - { - "epoch": 41.04, - "learning_rate": 2.2395346278673195e-08, - "loss": 3.773, - "step": 3730500 - }, - { - "epoch": 41.05, - "learning_rate": 2.2381594147092798e-08, - "loss": 3.754, - "step": 3731000 - }, - { - "epoch": 41.05, - "learning_rate": 2.2367842015512404e-08, - "loss": 3.7496, - "step": 3731500 - }, - { - "epoch": 41.06, - "learning_rate": 2.235408988393201e-08, - "loss": 3.7484, - "step": 3732000 - }, - { - "epoch": 41.06, - "learning_rate": 2.2340337752351613e-08, - "loss": 3.7336, - "step": 3732500 - }, - { - "epoch": 41.07, - "learning_rate": 2.232658562077122e-08, - "loss": 3.7499, - "step": 3733000 - }, - { - "epoch": 41.07, - "learning_rate": 2.2312833489190825e-08, - "loss": 3.7529, - "step": 3733500 - }, - { - "epoch": 41.08, - "learning_rate": 2.2299081357610428e-08, - "loss": 3.7528, - "step": 3734000 - }, - { - "epoch": 41.09, - "learning_rate": 2.2285329226030034e-08, - "loss": 3.7357, - "step": 3734500 - }, - { - "epoch": 41.09, - "learning_rate": 2.227157709444964e-08, - "loss": 3.7483, - "step": 3735000 - }, - { - "epoch": 41.1, - "learning_rate": 2.2257824962869242e-08, - "loss": 3.7598, - "step": 3735500 - }, - { - "epoch": 41.1, - "learning_rate": 2.224407283128885e-08, - "loss": 3.7456, - "step": 3736000 - }, - { - "epoch": 41.11, - "learning_rate": 2.2230320699708454e-08, - "loss": 3.7542, - "step": 3736500 - }, - { - "epoch": 41.11, - "learning_rate": 2.2216568568128057e-08, - "loss": 3.7699, - "step": 3737000 - }, - { - "epoch": 41.12, - "learning_rate": 2.2202816436547663e-08, - "loss": 3.7522, - "step": 3737500 - }, - { - "epoch": 41.12, - "learning_rate": 2.218906430496727e-08, - "loss": 3.7586, - "step": 3738000 - }, - { - "epoch": 41.13, - "learning_rate": 2.2175312173386872e-08, - "loss": 3.7271, - "step": 3738500 - }, - { - "epoch": 41.14, - "learning_rate": 2.2161560041806478e-08, - "loss": 3.7561, - "step": 3739000 - }, - { - "epoch": 41.14, - "learning_rate": 2.2147807910226084e-08, - "loss": 3.7639, - "step": 3739500 - }, - { - "epoch": 41.15, - "learning_rate": 2.2134055778645687e-08, - "loss": 3.7653, - "step": 3740000 - }, - { - "epoch": 41.15, - "learning_rate": 2.2120303647065293e-08, - "loss": 3.7379, - "step": 3740500 - }, - { - "epoch": 41.16, - "learning_rate": 2.21065515154849e-08, - "loss": 3.7567, - "step": 3741000 - }, - { - "epoch": 41.16, - "learning_rate": 2.2092799383904505e-08, - "loss": 3.7376, - "step": 3741500 - }, - { - "epoch": 41.17, - "learning_rate": 2.207904725232411e-08, - "loss": 3.7474, - "step": 3742000 - }, - { - "epoch": 41.17, - "learning_rate": 2.2065295120743717e-08, - "loss": 3.745, - "step": 3742500 - }, - { - "epoch": 41.18, - "learning_rate": 2.205154298916332e-08, - "loss": 3.75, - "step": 3743000 - }, - { - "epoch": 41.18, - "learning_rate": 2.2037790857582926e-08, - "loss": 3.7449, - "step": 3743500 - }, - { - "epoch": 41.19, - "learning_rate": 2.2024038726002532e-08, - "loss": 3.7348, - "step": 3744000 - }, - { - "epoch": 41.2, - "learning_rate": 2.2010286594422134e-08, - "loss": 3.7571, - "step": 3744500 - }, - { - "epoch": 41.2, - "learning_rate": 2.199653446284174e-08, - "loss": 3.7488, - "step": 3745000 - }, - { - "epoch": 41.21, - "learning_rate": 2.1982782331261347e-08, - "loss": 3.7689, - "step": 3745500 - }, - { - "epoch": 41.21, - "learning_rate": 2.196903019968095e-08, - "loss": 3.7565, - "step": 3746000 - }, - { - "epoch": 41.22, - "learning_rate": 2.1955278068100555e-08, - "loss": 3.7637, - "step": 3746500 - }, - { - "epoch": 41.22, - "learning_rate": 2.1941525936520158e-08, - "loss": 3.7408, - "step": 3747000 - }, - { - "epoch": 41.23, - "learning_rate": 2.1927773804939764e-08, - "loss": 3.7432, - "step": 3747500 - }, - { - "epoch": 41.23, - "learning_rate": 2.191402167335937e-08, - "loss": 3.7498, - "step": 3748000 - }, - { - "epoch": 41.24, - "learning_rate": 2.1900269541778973e-08, - "loss": 3.7614, - "step": 3748500 - }, - { - "epoch": 41.25, - "learning_rate": 2.188651741019858e-08, - "loss": 3.7449, - "step": 3749000 - }, - { - "epoch": 41.25, - "learning_rate": 2.1872765278618185e-08, - "loss": 3.7419, - "step": 3749500 - }, - { - "epoch": 41.26, - "learning_rate": 2.1859013147037788e-08, - "loss": 3.7489, - "step": 3750000 - }, - { - "epoch": 41.26, - "learning_rate": 2.1845261015457394e-08, - "loss": 3.7471, - "step": 3750500 - }, - { - "epoch": 41.27, - "learning_rate": 2.1831508883877e-08, - "loss": 3.7709, - "step": 3751000 - }, - { - "epoch": 41.27, - "learning_rate": 2.1817756752296602e-08, - "loss": 3.7717, - "step": 3751500 - }, - { - "epoch": 41.28, - "learning_rate": 2.180400462071621e-08, - "loss": 3.7559, - "step": 3752000 - }, - { - "epoch": 41.28, - "learning_rate": 2.1790252489135814e-08, - "loss": 3.7555, - "step": 3752500 - }, - { - "epoch": 41.29, - "learning_rate": 2.177650035755542e-08, - "loss": 3.767, - "step": 3753000 - }, - { - "epoch": 41.29, - "learning_rate": 2.1762748225975027e-08, - "loss": 3.7548, - "step": 3753500 - }, - { - "epoch": 41.3, - "learning_rate": 2.1748996094394633e-08, - "loss": 3.7506, - "step": 3754000 - }, - { - "epoch": 41.31, - "learning_rate": 2.1735243962814235e-08, - "loss": 3.7483, - "step": 3754500 - }, - { - "epoch": 41.31, - "learning_rate": 2.172149183123384e-08, - "loss": 3.7509, - "step": 3755000 - }, - { - "epoch": 41.32, - "learning_rate": 2.1707739699653447e-08, - "loss": 3.7527, - "step": 3755500 - }, - { - "epoch": 41.32, - "learning_rate": 2.169398756807305e-08, - "loss": 3.7408, - "step": 3756000 - }, - { - "epoch": 41.33, - "learning_rate": 2.1680235436492656e-08, - "loss": 3.7695, - "step": 3756500 - }, - { - "epoch": 41.33, - "learning_rate": 2.1666483304912262e-08, - "loss": 3.752, - "step": 3757000 - }, - { - "epoch": 41.34, - "learning_rate": 2.1652731173331865e-08, - "loss": 3.7689, - "step": 3757500 - }, - { - "epoch": 41.34, - "learning_rate": 2.163897904175147e-08, - "loss": 3.7489, - "step": 3758000 - }, - { - "epoch": 41.35, - "learning_rate": 2.1625226910171077e-08, - "loss": 3.7518, - "step": 3758500 - }, - { - "epoch": 41.36, - "learning_rate": 2.161147477859068e-08, - "loss": 3.7576, - "step": 3759000 - }, - { - "epoch": 41.36, - "learning_rate": 2.1597722647010286e-08, - "loss": 3.759, - "step": 3759500 - }, - { - "epoch": 41.37, - "learning_rate": 2.1583970515429892e-08, - "loss": 3.7495, - "step": 3760000 - }, - { - "epoch": 41.37, - "learning_rate": 2.1570218383849494e-08, - "loss": 3.7522, - "step": 3760500 - }, - { - "epoch": 41.38, - "learning_rate": 2.15564662522691e-08, - "loss": 3.7453, - "step": 3761000 - }, - { - "epoch": 41.38, - "learning_rate": 2.1542714120688707e-08, - "loss": 3.7581, - "step": 3761500 - }, - { - "epoch": 41.39, - "learning_rate": 2.152896198910831e-08, - "loss": 3.733, - "step": 3762000 - }, - { - "epoch": 41.39, - "learning_rate": 2.1515209857527915e-08, - "loss": 3.7393, - "step": 3762500 - }, - { - "epoch": 41.4, - "learning_rate": 2.150145772594752e-08, - "loss": 3.7254, - "step": 3763000 - }, - { - "epoch": 41.4, - "learning_rate": 2.1487705594367124e-08, - "loss": 3.7494, - "step": 3763500 - }, - { - "epoch": 41.41, - "learning_rate": 2.147395346278673e-08, - "loss": 3.752, - "step": 3764000 - }, - { - "epoch": 41.42, - "learning_rate": 2.1460201331206336e-08, - "loss": 3.761, - "step": 3764500 - }, - { - "epoch": 41.42, - "learning_rate": 2.1446449199625942e-08, - "loss": 3.7394, - "step": 3765000 - }, - { - "epoch": 41.43, - "learning_rate": 2.1432697068045548e-08, - "loss": 3.7618, - "step": 3765500 - }, - { - "epoch": 41.43, - "learning_rate": 2.141894493646515e-08, - "loss": 3.735, - "step": 3766000 - }, - { - "epoch": 41.44, - "learning_rate": 2.1405192804884757e-08, - "loss": 3.7423, - "step": 3766500 - }, - { - "epoch": 41.44, - "learning_rate": 2.1391440673304363e-08, - "loss": 3.7767, - "step": 3767000 - }, - { - "epoch": 41.45, - "learning_rate": 2.1377688541723966e-08, - "loss": 3.7685, - "step": 3767500 - }, - { - "epoch": 41.45, - "learning_rate": 2.1363936410143572e-08, - "loss": 3.7453, - "step": 3768000 - }, - { - "epoch": 41.46, - "learning_rate": 2.1350184278563178e-08, - "loss": 3.7634, - "step": 3768500 - }, - { - "epoch": 41.47, - "learning_rate": 2.133643214698278e-08, - "loss": 3.7128, - "step": 3769000 - }, - { - "epoch": 41.47, - "learning_rate": 2.1322680015402387e-08, - "loss": 3.7746, - "step": 3769500 - }, - { - "epoch": 41.48, - "learning_rate": 2.1308927883821993e-08, - "loss": 3.7423, - "step": 3770000 - }, - { - "epoch": 41.48, - "learning_rate": 2.1295175752241595e-08, - "loss": 3.7456, - "step": 3770500 - }, - { - "epoch": 41.49, - "learning_rate": 2.12814236206612e-08, - "loss": 3.757, - "step": 3771000 - }, - { - "epoch": 41.49, - "learning_rate": 2.1267671489080807e-08, - "loss": 3.7594, - "step": 3771500 - }, - { - "epoch": 41.5, - "learning_rate": 2.125391935750041e-08, - "loss": 3.7421, - "step": 3772000 - }, - { - "epoch": 41.5, - "learning_rate": 2.1240167225920016e-08, - "loss": 3.759, - "step": 3772500 - }, - { - "epoch": 41.51, - "learning_rate": 2.1226415094339622e-08, - "loss": 3.7402, - "step": 3773000 - }, - { - "epoch": 41.51, - "learning_rate": 2.1212662962759225e-08, - "loss": 3.7571, - "step": 3773500 - }, - { - "epoch": 41.52, - "learning_rate": 2.119891083117883e-08, - "loss": 3.7696, - "step": 3774000 - }, - { - "epoch": 41.53, - "learning_rate": 2.1185158699598437e-08, - "loss": 3.745, - "step": 3774500 - }, - { - "epoch": 41.53, - "learning_rate": 2.117140656801804e-08, - "loss": 3.7412, - "step": 3775000 - }, - { - "epoch": 41.54, - "learning_rate": 2.1157654436437646e-08, - "loss": 3.7447, - "step": 3775500 - }, - { - "epoch": 41.54, - "learning_rate": 2.1143902304857252e-08, - "loss": 3.7666, - "step": 3776000 - }, - { - "epoch": 41.55, - "learning_rate": 2.1130150173276854e-08, - "loss": 3.75, - "step": 3776500 - }, - { - "epoch": 41.55, - "learning_rate": 2.111639804169646e-08, - "loss": 3.7505, - "step": 3777000 - }, - { - "epoch": 41.56, - "learning_rate": 2.1102645910116067e-08, - "loss": 3.7305, - "step": 3777500 - }, - { - "epoch": 41.56, - "learning_rate": 2.1088893778535673e-08, - "loss": 3.7659, - "step": 3778000 - }, - { - "epoch": 41.57, - "learning_rate": 2.107514164695528e-08, - "loss": 3.751, - "step": 3778500 - }, - { - "epoch": 41.58, - "learning_rate": 2.1061389515374885e-08, - "loss": 3.7478, - "step": 3779000 - }, - { - "epoch": 41.58, - "learning_rate": 2.1047637383794487e-08, - "loss": 3.7563, - "step": 3779500 - }, - { - "epoch": 41.59, - "learning_rate": 2.1033885252214093e-08, - "loss": 3.7604, - "step": 3780000 - }, - { - "epoch": 41.59, - "learning_rate": 2.10201331206337e-08, - "loss": 3.7799, - "step": 3780500 - }, - { - "epoch": 41.6, - "learning_rate": 2.1006380989053302e-08, - "loss": 3.7469, - "step": 3781000 - }, - { - "epoch": 41.6, - "learning_rate": 2.0992628857472908e-08, - "loss": 3.7506, - "step": 3781500 - }, - { - "epoch": 41.61, - "learning_rate": 2.0978876725892514e-08, - "loss": 3.7445, - "step": 3782000 - }, - { - "epoch": 41.61, - "learning_rate": 2.0965124594312117e-08, - "loss": 3.7519, - "step": 3782500 - }, - { - "epoch": 41.62, - "learning_rate": 2.0951372462731723e-08, - "loss": 3.761, - "step": 3783000 - }, - { - "epoch": 41.62, - "learning_rate": 2.093762033115133e-08, - "loss": 3.7359, - "step": 3783500 - }, - { - "epoch": 41.63, - "learning_rate": 2.0923868199570932e-08, - "loss": 3.7705, - "step": 3784000 - }, - { - "epoch": 41.64, - "learning_rate": 2.0910116067990538e-08, - "loss": 3.7724, - "step": 3784500 - }, - { - "epoch": 41.64, - "learning_rate": 2.089636393641014e-08, - "loss": 3.7709, - "step": 3785000 - }, - { - "epoch": 41.65, - "learning_rate": 2.0882611804829747e-08, - "loss": 3.7446, - "step": 3785500 - }, - { - "epoch": 41.65, - "learning_rate": 2.0868859673249353e-08, - "loss": 3.7496, - "step": 3786000 - }, - { - "epoch": 41.66, - "learning_rate": 2.0855107541668955e-08, - "loss": 3.7615, - "step": 3786500 - }, - { - "epoch": 41.66, - "learning_rate": 2.084135541008856e-08, - "loss": 3.7493, - "step": 3787000 - }, - { - "epoch": 41.67, - "learning_rate": 2.0827603278508167e-08, - "loss": 3.7472, - "step": 3787500 - }, - { - "epoch": 41.67, - "learning_rate": 2.081385114692777e-08, - "loss": 3.7575, - "step": 3788000 - }, - { - "epoch": 41.68, - "learning_rate": 2.0800099015347376e-08, - "loss": 3.7393, - "step": 3788500 - }, - { - "epoch": 41.69, - "learning_rate": 2.0786346883766982e-08, - "loss": 3.7592, - "step": 3789000 - }, - { - "epoch": 41.69, - "learning_rate": 2.0772594752186588e-08, - "loss": 3.7522, - "step": 3789500 - }, - { - "epoch": 41.7, - "learning_rate": 2.0758842620606194e-08, - "loss": 3.7446, - "step": 3790000 - }, - { - "epoch": 41.7, - "learning_rate": 2.07450904890258e-08, - "loss": 3.756, - "step": 3790500 - }, - { - "epoch": 41.71, - "learning_rate": 2.0731338357445403e-08, - "loss": 3.7355, - "step": 3791000 - }, - { - "epoch": 41.71, - "learning_rate": 2.071758622586501e-08, - "loss": 3.7511, - "step": 3791500 - }, - { - "epoch": 41.72, - "learning_rate": 2.0703834094284615e-08, - "loss": 3.7528, - "step": 3792000 - }, - { - "epoch": 41.72, - "learning_rate": 2.0690081962704218e-08, - "loss": 3.7364, - "step": 3792500 - }, - { - "epoch": 41.73, - "learning_rate": 2.0676329831123824e-08, - "loss": 3.763, - "step": 3793000 - }, - { - "epoch": 41.73, - "learning_rate": 2.066257769954343e-08, - "loss": 3.7456, - "step": 3793500 - }, - { - "epoch": 41.74, - "learning_rate": 2.0648825567963033e-08, - "loss": 3.7592, - "step": 3794000 - }, - { - "epoch": 41.75, - "learning_rate": 2.063507343638264e-08, - "loss": 3.7419, - "step": 3794500 - }, - { - "epoch": 41.75, - "learning_rate": 2.0621321304802245e-08, - "loss": 3.7618, - "step": 3795000 - }, - { - "epoch": 41.76, - "learning_rate": 2.0607569173221847e-08, - "loss": 3.7476, - "step": 3795500 - }, - { - "epoch": 41.76, - "learning_rate": 2.0593817041641453e-08, - "loss": 3.7528, - "step": 3796000 - }, - { - "epoch": 41.77, - "learning_rate": 2.058006491006106e-08, - "loss": 3.7727, - "step": 3796500 - }, - { - "epoch": 41.77, - "learning_rate": 2.0566312778480662e-08, - "loss": 3.7414, - "step": 3797000 - }, - { - "epoch": 41.78, - "learning_rate": 2.0552560646900268e-08, - "loss": 3.7542, - "step": 3797500 - }, - { - "epoch": 41.78, - "learning_rate": 2.0538808515319874e-08, - "loss": 3.7541, - "step": 3798000 - }, - { - "epoch": 41.79, - "learning_rate": 2.0525056383739477e-08, - "loss": 3.7533, - "step": 3798500 - }, - { - "epoch": 41.8, - "learning_rate": 2.0511304252159083e-08, - "loss": 3.7418, - "step": 3799000 - }, - { - "epoch": 41.8, - "learning_rate": 2.049755212057869e-08, - "loss": 3.7685, - "step": 3799500 - }, - { - "epoch": 41.81, - "learning_rate": 2.0483799988998292e-08, - "loss": 3.7469, - "step": 3800000 - }, - { - "epoch": 41.81, - "learning_rate": 2.0470047857417898e-08, - "loss": 3.7529, - "step": 3800500 - }, - { - "epoch": 41.82, - "learning_rate": 2.0456295725837504e-08, - "loss": 3.7584, - "step": 3801000 - }, - { - "epoch": 41.82, - "learning_rate": 2.044254359425711e-08, - "loss": 3.7538, - "step": 3801500 - }, - { - "epoch": 41.83, - "learning_rate": 2.0428791462676716e-08, - "loss": 3.7639, - "step": 3802000 - }, - { - "epoch": 41.83, - "learning_rate": 2.0415039331096322e-08, - "loss": 3.7583, - "step": 3802500 - }, - { - "epoch": 41.84, - "learning_rate": 2.0401287199515925e-08, - "loss": 3.7581, - "step": 3803000 - }, - { - "epoch": 41.84, - "learning_rate": 2.038753506793553e-08, - "loss": 3.7666, - "step": 3803500 - }, - { - "epoch": 41.85, - "learning_rate": 2.0373782936355133e-08, - "loss": 3.7365, - "step": 3804000 - }, - { - "epoch": 41.86, - "learning_rate": 2.036003080477474e-08, - "loss": 3.756, - "step": 3804500 - }, - { - "epoch": 41.86, - "learning_rate": 2.0346278673194346e-08, - "loss": 3.7603, - "step": 3805000 - }, - { - "epoch": 41.87, - "learning_rate": 2.0332526541613948e-08, - "loss": 3.7555, - "step": 3805500 - }, - { - "epoch": 41.87, - "learning_rate": 2.0318774410033554e-08, - "loss": 3.7519, - "step": 3806000 - }, - { - "epoch": 41.88, - "learning_rate": 2.030502227845316e-08, - "loss": 3.7308, - "step": 3806500 - }, - { - "epoch": 41.88, - "learning_rate": 2.0291270146872763e-08, - "loss": 3.7556, - "step": 3807000 - }, - { - "epoch": 41.89, - "learning_rate": 2.027751801529237e-08, - "loss": 3.7614, - "step": 3807500 - }, - { - "epoch": 41.89, - "learning_rate": 2.0263765883711975e-08, - "loss": 3.7378, - "step": 3808000 - }, - { - "epoch": 41.9, - "learning_rate": 2.0250013752131578e-08, - "loss": 3.7663, - "step": 3808500 - }, - { - "epoch": 41.91, - "learning_rate": 2.0236261620551184e-08, - "loss": 3.7499, - "step": 3809000 - }, - { - "epoch": 41.91, - "learning_rate": 2.022250948897079e-08, - "loss": 3.7587, - "step": 3809500 - }, - { - "epoch": 41.92, - "learning_rate": 2.0208757357390393e-08, - "loss": 3.757, - "step": 3810000 - }, - { - "epoch": 41.92, - "learning_rate": 2.019500522581e-08, - "loss": 3.7581, - "step": 3810500 - }, - { - "epoch": 41.93, - "learning_rate": 2.0181253094229605e-08, - "loss": 3.7555, - "step": 3811000 - }, - { - "epoch": 41.93, - "learning_rate": 2.0167500962649207e-08, - "loss": 3.7295, - "step": 3811500 - }, - { - "epoch": 41.94, - "learning_rate": 2.0153748831068813e-08, - "loss": 3.7546, - "step": 3812000 - }, - { - "epoch": 41.94, - "learning_rate": 2.013999669948842e-08, - "loss": 3.7637, - "step": 3812500 - }, - { - "epoch": 41.95, - "learning_rate": 2.0126244567908026e-08, - "loss": 3.756, - "step": 3813000 - }, - { - "epoch": 41.96, - "learning_rate": 2.0112492436327628e-08, - "loss": 3.7671, - "step": 3813500 - }, - { - "epoch": 41.96, - "learning_rate": 2.0098740304747234e-08, - "loss": 3.7389, - "step": 3814000 - }, - { - "epoch": 41.97, - "learning_rate": 2.008498817316684e-08, - "loss": 3.7422, - "step": 3814500 - }, - { - "epoch": 41.97, - "learning_rate": 2.0071236041586446e-08, - "loss": 3.7757, - "step": 3815000 - }, - { - "epoch": 41.98, - "learning_rate": 2.0057483910006052e-08, - "loss": 3.7522, - "step": 3815500 - }, - { - "epoch": 41.98, - "learning_rate": 2.0043731778425655e-08, - "loss": 3.7477, - "step": 3816000 - }, - { - "epoch": 41.99, - "learning_rate": 2.002997964684526e-08, - "loss": 3.753, - "step": 3816500 - }, - { - "epoch": 41.99, - "learning_rate": 2.0016227515264867e-08, - "loss": 3.7464, - "step": 3817000 - }, - { - "epoch": 42.0, - "learning_rate": 2.000247538368447e-08, - "loss": 3.7457, - "step": 3817500 - }, - { - "epoch": 42.0, - "eval_loss": 3.826972723007202, - "eval_runtime": 6.1413, - "eval_samples_per_second": 253.042, - "step": 3817590 - }, - { - "epoch": 42.0, - "learning_rate": 1.9988723252104076e-08, - "loss": 3.7521, - "step": 3818000 - }, - { - "epoch": 42.01, - "learning_rate": 1.9974971120523682e-08, - "loss": 3.7688, - "step": 3818500 - }, - { - "epoch": 42.02, - "learning_rate": 1.9961218988943285e-08, - "loss": 3.7495, - "step": 3819000 - }, - { - "epoch": 42.02, - "learning_rate": 1.994746685736289e-08, - "loss": 3.7746, - "step": 3819500 - }, - { - "epoch": 42.03, - "learning_rate": 1.9933714725782497e-08, - "loss": 3.7643, - "step": 3820000 - }, - { - "epoch": 42.03, - "learning_rate": 1.99199625942021e-08, - "loss": 3.769, - "step": 3820500 - }, - { - "epoch": 42.04, - "learning_rate": 1.9906210462621706e-08, - "loss": 3.7365, - "step": 3821000 - }, - { - "epoch": 42.04, - "learning_rate": 1.989245833104131e-08, - "loss": 3.7571, - "step": 3821500 - }, - { - "epoch": 42.05, - "learning_rate": 1.9878706199460914e-08, - "loss": 3.7632, - "step": 3822000 - }, - { - "epoch": 42.05, - "learning_rate": 1.986495406788052e-08, - "loss": 3.7467, - "step": 3822500 - }, - { - "epoch": 42.06, - "learning_rate": 1.9851201936300123e-08, - "loss": 3.7787, - "step": 3823000 - }, - { - "epoch": 42.07, - "learning_rate": 1.983744980471973e-08, - "loss": 3.7472, - "step": 3823500 - }, - { - "epoch": 42.07, - "learning_rate": 1.9823697673139335e-08, - "loss": 3.7496, - "step": 3824000 - }, - { - "epoch": 42.08, - "learning_rate": 1.9809945541558938e-08, - "loss": 3.785, - "step": 3824500 - }, - { - "epoch": 42.08, - "learning_rate": 1.9796193409978544e-08, - "loss": 3.7285, - "step": 3825000 - }, - { - "epoch": 42.09, - "learning_rate": 1.978244127839815e-08, - "loss": 3.7687, - "step": 3825500 - }, - { - "epoch": 42.09, - "learning_rate": 1.9768689146817756e-08, - "loss": 3.7518, - "step": 3826000 - }, - { - "epoch": 42.1, - "learning_rate": 1.9754937015237362e-08, - "loss": 3.7477, - "step": 3826500 - }, - { - "epoch": 42.1, - "learning_rate": 1.9741184883656968e-08, - "loss": 3.7549, - "step": 3827000 - }, - { - "epoch": 42.11, - "learning_rate": 1.972743275207657e-08, - "loss": 3.7455, - "step": 3827500 - }, - { - "epoch": 42.11, - "learning_rate": 1.9713680620496177e-08, - "loss": 3.7529, - "step": 3828000 - }, - { - "epoch": 42.12, - "learning_rate": 1.9699928488915783e-08, - "loss": 3.7349, - "step": 3828500 - }, - { - "epoch": 42.13, - "learning_rate": 1.9686176357335386e-08, - "loss": 3.754, - "step": 3829000 - }, - { - "epoch": 42.13, - "learning_rate": 1.967242422575499e-08, - "loss": 3.7459, - "step": 3829500 - }, - { - "epoch": 42.14, - "learning_rate": 1.9658672094174598e-08, - "loss": 3.7437, - "step": 3830000 - }, - { - "epoch": 42.14, - "learning_rate": 1.96449199625942e-08, - "loss": 3.7373, - "step": 3830500 - }, - { - "epoch": 42.15, - "learning_rate": 1.9631167831013806e-08, - "loss": 3.7288, - "step": 3831000 - }, - { - "epoch": 42.15, - "learning_rate": 1.9617415699433412e-08, - "loss": 3.7549, - "step": 3831500 - }, - { - "epoch": 42.16, - "learning_rate": 1.9603663567853015e-08, - "loss": 3.7618, - "step": 3832000 - }, - { - "epoch": 42.16, - "learning_rate": 1.958991143627262e-08, - "loss": 3.7543, - "step": 3832500 - }, - { - "epoch": 42.17, - "learning_rate": 1.9576159304692227e-08, - "loss": 3.7412, - "step": 3833000 - }, - { - "epoch": 42.18, - "learning_rate": 1.956240717311183e-08, - "loss": 3.7465, - "step": 3833500 - }, - { - "epoch": 42.18, - "learning_rate": 1.9548655041531436e-08, - "loss": 3.7546, - "step": 3834000 - }, - { - "epoch": 42.19, - "learning_rate": 1.9534902909951042e-08, - "loss": 3.7321, - "step": 3834500 - }, - { - "epoch": 42.19, - "learning_rate": 1.9521150778370645e-08, - "loss": 3.7521, - "step": 3835000 - }, - { - "epoch": 42.2, - "learning_rate": 1.950739864679025e-08, - "loss": 3.7353, - "step": 3835500 - }, - { - "epoch": 42.2, - "learning_rate": 1.9493646515209857e-08, - "loss": 3.7449, - "step": 3836000 - }, - { - "epoch": 42.21, - "learning_rate": 1.947989438362946e-08, - "loss": 3.7547, - "step": 3836500 - }, - { - "epoch": 42.21, - "learning_rate": 1.9466142252049066e-08, - "loss": 3.7788, - "step": 3837000 - }, - { - "epoch": 42.22, - "learning_rate": 1.945239012046867e-08, - "loss": 3.7498, - "step": 3837500 - }, - { - "epoch": 42.22, - "learning_rate": 1.9438637988888278e-08, - "loss": 3.7606, - "step": 3838000 - }, - { - "epoch": 42.23, - "learning_rate": 1.9424885857307884e-08, - "loss": 3.7276, - "step": 3838500 - }, - { - "epoch": 42.24, - "learning_rate": 1.941113372572749e-08, - "loss": 3.76, - "step": 3839000 - }, - { - "epoch": 42.24, - "learning_rate": 1.9397381594147092e-08, - "loss": 3.741, - "step": 3839500 - }, - { - "epoch": 42.25, - "learning_rate": 1.93836294625667e-08, - "loss": 3.7512, - "step": 3840000 - }, - { - "epoch": 42.25, - "learning_rate": 1.9369877330986305e-08, - "loss": 3.7561, - "step": 3840500 - }, - { - "epoch": 42.26, - "learning_rate": 1.9356125199405907e-08, - "loss": 3.7355, - "step": 3841000 - }, - { - "epoch": 42.26, - "learning_rate": 1.9342373067825513e-08, - "loss": 3.771, - "step": 3841500 - }, - { - "epoch": 42.27, - "learning_rate": 1.9328620936245116e-08, - "loss": 3.7644, - "step": 3842000 - }, - { - "epoch": 42.27, - "learning_rate": 1.9314868804664722e-08, - "loss": 3.736, - "step": 3842500 - }, - { - "epoch": 42.28, - "learning_rate": 1.9301116673084328e-08, - "loss": 3.7491, - "step": 3843000 - }, - { - "epoch": 42.29, - "learning_rate": 1.928736454150393e-08, - "loss": 3.7657, - "step": 3843500 - }, - { - "epoch": 42.29, - "learning_rate": 1.9273612409923537e-08, - "loss": 3.7498, - "step": 3844000 - }, - { - "epoch": 42.3, - "learning_rate": 1.9259860278343143e-08, - "loss": 3.7614, - "step": 3844500 - }, - { - "epoch": 42.3, - "learning_rate": 1.9246108146762746e-08, - "loss": 3.7458, - "step": 3845000 - }, - { - "epoch": 42.31, - "learning_rate": 1.923235601518235e-08, - "loss": 3.7592, - "step": 3845500 - }, - { - "epoch": 42.31, - "learning_rate": 1.9218603883601958e-08, - "loss": 3.7364, - "step": 3846000 - }, - { - "epoch": 42.32, - "learning_rate": 1.920485175202156e-08, - "loss": 3.7457, - "step": 3846500 - }, - { - "epoch": 42.32, - "learning_rate": 1.9191099620441166e-08, - "loss": 3.7437, - "step": 3847000 - }, - { - "epoch": 42.33, - "learning_rate": 1.9177347488860772e-08, - "loss": 3.7565, - "step": 3847500 - }, - { - "epoch": 42.33, - "learning_rate": 1.9163595357280375e-08, - "loss": 3.7336, - "step": 3848000 - }, - { - "epoch": 42.34, - "learning_rate": 1.914984322569998e-08, - "loss": 3.7506, - "step": 3848500 - }, - { - "epoch": 42.35, - "learning_rate": 1.9136091094119587e-08, - "loss": 3.7573, - "step": 3849000 - }, - { - "epoch": 42.35, - "learning_rate": 1.9122338962539193e-08, - "loss": 3.7469, - "step": 3849500 - }, - { - "epoch": 42.36, - "learning_rate": 1.91085868309588e-08, - "loss": 3.7616, - "step": 3850000 - }, - { - "epoch": 42.36, - "learning_rate": 1.9094834699378405e-08, - "loss": 3.7588, - "step": 3850500 - }, - { - "epoch": 42.37, - "learning_rate": 1.9081082567798008e-08, - "loss": 3.7503, - "step": 3851000 - }, - { - "epoch": 42.37, - "learning_rate": 1.9067330436217614e-08, - "loss": 3.7383, - "step": 3851500 - }, - { - "epoch": 42.38, - "learning_rate": 1.905357830463722e-08, - "loss": 3.7413, - "step": 3852000 - }, - { - "epoch": 42.38, - "learning_rate": 1.9039826173056823e-08, - "loss": 3.7434, - "step": 3852500 - }, - { - "epoch": 42.39, - "learning_rate": 1.902607404147643e-08, - "loss": 3.7522, - "step": 3853000 - }, - { - "epoch": 42.4, - "learning_rate": 1.9012321909896035e-08, - "loss": 3.7583, - "step": 3853500 - }, - { - "epoch": 42.4, - "learning_rate": 1.8998569778315638e-08, - "loss": 3.7465, - "step": 3854000 - }, - { - "epoch": 42.41, - "learning_rate": 1.8984817646735244e-08, - "loss": 3.746, - "step": 3854500 - }, - { - "epoch": 42.41, - "learning_rate": 1.897106551515485e-08, - "loss": 3.7559, - "step": 3855000 - }, - { - "epoch": 42.42, - "learning_rate": 1.8957313383574452e-08, - "loss": 3.7499, - "step": 3855500 - }, - { - "epoch": 42.42, - "learning_rate": 1.894356125199406e-08, - "loss": 3.762, - "step": 3856000 - }, - { - "epoch": 42.43, - "learning_rate": 1.8929809120413665e-08, - "loss": 3.7389, - "step": 3856500 - }, - { - "epoch": 42.43, - "learning_rate": 1.8916056988833267e-08, - "loss": 3.7631, - "step": 3857000 - }, - { - "epoch": 42.44, - "learning_rate": 1.8902304857252873e-08, - "loss": 3.7355, - "step": 3857500 - }, - { - "epoch": 42.44, - "learning_rate": 1.888855272567248e-08, - "loss": 3.7424, - "step": 3858000 - }, - { - "epoch": 42.45, - "learning_rate": 1.8874800594092082e-08, - "loss": 3.7557, - "step": 3858500 - }, - { - "epoch": 42.46, - "learning_rate": 1.8861048462511688e-08, - "loss": 3.7594, - "step": 3859000 - }, - { - "epoch": 42.46, - "learning_rate": 1.884729633093129e-08, - "loss": 3.7544, - "step": 3859500 - }, - { - "epoch": 42.47, - "learning_rate": 1.8833544199350897e-08, - "loss": 3.7495, - "step": 3860000 - }, - { - "epoch": 42.47, - "learning_rate": 1.8819792067770503e-08, - "loss": 3.7673, - "step": 3860500 - }, - { - "epoch": 42.48, - "learning_rate": 1.880603993619011e-08, - "loss": 3.7409, - "step": 3861000 - }, - { - "epoch": 42.48, - "learning_rate": 1.8792287804609712e-08, - "loss": 3.7525, - "step": 3861500 - }, - { - "epoch": 42.49, - "learning_rate": 1.8778535673029318e-08, - "loss": 3.737, - "step": 3862000 - }, - { - "epoch": 42.49, - "learning_rate": 1.8764783541448924e-08, - "loss": 3.7432, - "step": 3862500 - }, - { - "epoch": 42.5, - "learning_rate": 1.875103140986853e-08, - "loss": 3.7518, - "step": 3863000 - }, - { - "epoch": 42.51, - "learning_rate": 1.8737279278288136e-08, - "loss": 3.7368, - "step": 3863500 - }, - { - "epoch": 42.51, - "learning_rate": 1.872352714670774e-08, - "loss": 3.7556, - "step": 3864000 - }, - { - "epoch": 42.52, - "learning_rate": 1.8709775015127345e-08, - "loss": 3.7591, - "step": 3864500 - }, - { - "epoch": 42.52, - "learning_rate": 1.869602288354695e-08, - "loss": 3.7552, - "step": 3865000 - }, - { - "epoch": 42.53, - "learning_rate": 1.8682270751966553e-08, - "loss": 3.7646, - "step": 3865500 - }, - { - "epoch": 42.53, - "learning_rate": 1.866851862038616e-08, - "loss": 3.7467, - "step": 3866000 - }, - { - "epoch": 42.54, - "learning_rate": 1.8654766488805765e-08, - "loss": 3.7644, - "step": 3866500 - }, - { - "epoch": 42.54, - "learning_rate": 1.8641014357225368e-08, - "loss": 3.7586, - "step": 3867000 - }, - { - "epoch": 42.55, - "learning_rate": 1.8627262225644974e-08, - "loss": 3.7498, - "step": 3867500 - }, - { - "epoch": 42.55, - "learning_rate": 1.861351009406458e-08, - "loss": 3.7552, - "step": 3868000 - }, - { - "epoch": 42.56, - "learning_rate": 1.8599757962484183e-08, - "loss": 3.7534, - "step": 3868500 - }, - { - "epoch": 42.57, - "learning_rate": 1.858600583090379e-08, - "loss": 3.7433, - "step": 3869000 - }, - { - "epoch": 42.57, - "learning_rate": 1.8572253699323395e-08, - "loss": 3.7508, - "step": 3869500 - }, - { - "epoch": 42.58, - "learning_rate": 1.8558501567742998e-08, - "loss": 3.7425, - "step": 3870000 - }, - { - "epoch": 42.58, - "learning_rate": 1.8544749436162604e-08, - "loss": 3.7523, - "step": 3870500 - }, - { - "epoch": 42.59, - "learning_rate": 1.853099730458221e-08, - "loss": 3.7589, - "step": 3871000 - }, - { - "epoch": 42.59, - "learning_rate": 1.8517245173001813e-08, - "loss": 3.7587, - "step": 3871500 - }, - { - "epoch": 42.6, - "learning_rate": 1.850349304142142e-08, - "loss": 3.756, - "step": 3872000 - }, - { - "epoch": 42.6, - "learning_rate": 1.8489740909841025e-08, - "loss": 3.7512, - "step": 3872500 - }, - { - "epoch": 42.61, - "learning_rate": 1.8475988778260627e-08, - "loss": 3.7579, - "step": 3873000 - }, - { - "epoch": 42.62, - "learning_rate": 1.8462236646680233e-08, - "loss": 3.7517, - "step": 3873500 - }, - { - "epoch": 42.62, - "learning_rate": 1.844848451509984e-08, - "loss": 3.7676, - "step": 3874000 - }, - { - "epoch": 42.63, - "learning_rate": 1.8434732383519445e-08, - "loss": 3.7597, - "step": 3874500 - }, - { - "epoch": 42.63, - "learning_rate": 1.842098025193905e-08, - "loss": 3.7431, - "step": 3875000 - }, - { - "epoch": 42.64, - "learning_rate": 1.8407228120358657e-08, - "loss": 3.7578, - "step": 3875500 - }, - { - "epoch": 42.64, - "learning_rate": 1.839347598877826e-08, - "loss": 3.7335, - "step": 3876000 - }, - { - "epoch": 42.65, - "learning_rate": 1.8379723857197866e-08, - "loss": 3.7423, - "step": 3876500 - }, - { - "epoch": 42.65, - "learning_rate": 1.8365971725617472e-08, - "loss": 3.7464, - "step": 3877000 - }, - { - "epoch": 42.66, - "learning_rate": 1.8352219594037075e-08, - "loss": 3.7518, - "step": 3877500 - }, - { - "epoch": 42.66, - "learning_rate": 1.833846746245668e-08, - "loss": 3.7478, - "step": 3878000 - }, - { - "epoch": 42.67, - "learning_rate": 1.8324715330876284e-08, - "loss": 3.7574, - "step": 3878500 - }, - { - "epoch": 42.68, - "learning_rate": 1.831096319929589e-08, - "loss": 3.7431, - "step": 3879000 - }, - { - "epoch": 42.68, - "learning_rate": 1.8297211067715496e-08, - "loss": 3.7416, - "step": 3879500 - }, - { - "epoch": 42.69, - "learning_rate": 1.82834589361351e-08, - "loss": 3.7526, - "step": 3880000 - }, - { - "epoch": 42.69, - "learning_rate": 1.8269706804554705e-08, - "loss": 3.7593, - "step": 3880500 - }, - { - "epoch": 42.7, - "learning_rate": 1.825595467297431e-08, - "loss": 3.7447, - "step": 3881000 - }, - { - "epoch": 42.7, - "learning_rate": 1.8242202541393913e-08, - "loss": 3.7542, - "step": 3881500 - }, - { - "epoch": 42.71, - "learning_rate": 1.822845040981352e-08, - "loss": 3.7555, - "step": 3882000 - }, - { - "epoch": 42.71, - "learning_rate": 1.8214698278233125e-08, - "loss": 3.7429, - "step": 3882500 - }, - { - "epoch": 42.72, - "learning_rate": 1.8200946146652728e-08, - "loss": 3.757, - "step": 3883000 - }, - { - "epoch": 42.73, - "learning_rate": 1.8187194015072334e-08, - "loss": 3.7564, - "step": 3883500 - }, - { - "epoch": 42.73, - "learning_rate": 1.817344188349194e-08, - "loss": 3.7726, - "step": 3884000 - }, - { - "epoch": 42.74, - "learning_rate": 1.8159689751911543e-08, - "loss": 3.7474, - "step": 3884500 - }, - { - "epoch": 42.74, - "learning_rate": 1.814593762033115e-08, - "loss": 3.7452, - "step": 3885000 - }, - { - "epoch": 42.75, - "learning_rate": 1.8132185488750755e-08, - "loss": 3.7672, - "step": 3885500 - }, - { - "epoch": 42.75, - "learning_rate": 1.811843335717036e-08, - "loss": 3.7476, - "step": 3886000 - }, - { - "epoch": 42.76, - "learning_rate": 1.8104681225589967e-08, - "loss": 3.7544, - "step": 3886500 - }, - { - "epoch": 42.76, - "learning_rate": 1.8090929094009573e-08, - "loss": 3.7442, - "step": 3887000 - }, - { - "epoch": 42.77, - "learning_rate": 1.8077176962429176e-08, - "loss": 3.7461, - "step": 3887500 - }, - { - "epoch": 42.77, - "learning_rate": 1.8063424830848782e-08, - "loss": 3.7622, - "step": 3888000 - }, - { - "epoch": 42.78, - "learning_rate": 1.8049672699268388e-08, - "loss": 3.7524, - "step": 3888500 - }, - { - "epoch": 42.79, - "learning_rate": 1.803592056768799e-08, - "loss": 3.7667, - "step": 3889000 - }, - { - "epoch": 42.79, - "learning_rate": 1.8022168436107597e-08, - "loss": 3.7439, - "step": 3889500 - }, - { - "epoch": 42.8, - "learning_rate": 1.8008416304527203e-08, - "loss": 3.759, - "step": 3890000 - }, - { - "epoch": 42.8, - "learning_rate": 1.7994664172946805e-08, - "loss": 3.7305, - "step": 3890500 - }, - { - "epoch": 42.81, - "learning_rate": 1.798091204136641e-08, - "loss": 3.7383, - "step": 3891000 - }, - { - "epoch": 42.81, - "learning_rate": 1.7967159909786018e-08, - "loss": 3.7449, - "step": 3891500 - }, - { - "epoch": 42.82, - "learning_rate": 1.795340777820562e-08, - "loss": 3.7588, - "step": 3892000 - }, - { - "epoch": 42.82, - "learning_rate": 1.7939655646625226e-08, - "loss": 3.7305, - "step": 3892500 - }, - { - "epoch": 42.83, - "learning_rate": 1.7925903515044832e-08, - "loss": 3.7582, - "step": 3893000 - }, - { - "epoch": 42.84, - "learning_rate": 1.7912151383464435e-08, - "loss": 3.7535, - "step": 3893500 - }, - { - "epoch": 42.84, - "learning_rate": 1.789839925188404e-08, - "loss": 3.7383, - "step": 3894000 - }, - { - "epoch": 42.85, - "learning_rate": 1.7884647120303647e-08, - "loss": 3.7578, - "step": 3894500 - }, - { - "epoch": 42.85, - "learning_rate": 1.787089498872325e-08, - "loss": 3.7597, - "step": 3895000 - }, - { - "epoch": 42.86, - "learning_rate": 1.7857142857142856e-08, - "loss": 3.7586, - "step": 3895500 - }, - { - "epoch": 42.86, - "learning_rate": 1.7843390725562462e-08, - "loss": 3.7546, - "step": 3896000 - }, - { - "epoch": 42.87, - "learning_rate": 1.7829638593982065e-08, - "loss": 3.7642, - "step": 3896500 - }, - { - "epoch": 42.87, - "learning_rate": 1.781588646240167e-08, - "loss": 3.7711, - "step": 3897000 - }, - { - "epoch": 42.88, - "learning_rate": 1.7802134330821277e-08, - "loss": 3.7659, - "step": 3897500 - }, - { - "epoch": 42.88, - "learning_rate": 1.7788382199240883e-08, - "loss": 3.7463, - "step": 3898000 - }, - { - "epoch": 42.89, - "learning_rate": 1.777463006766049e-08, - "loss": 3.7531, - "step": 3898500 - }, - { - "epoch": 42.9, - "learning_rate": 1.776087793608009e-08, - "loss": 3.7459, - "step": 3899000 - }, - { - "epoch": 42.9, - "learning_rate": 1.7747125804499698e-08, - "loss": 3.7472, - "step": 3899500 - }, - { - "epoch": 42.91, - "learning_rate": 1.7733373672919304e-08, - "loss": 3.754, - "step": 3900000 - }, - { - "epoch": 42.91, - "learning_rate": 1.7719621541338906e-08, - "loss": 3.7539, - "step": 3900500 - }, - { - "epoch": 42.92, - "learning_rate": 1.7705869409758512e-08, - "loss": 3.7487, - "step": 3901000 - }, - { - "epoch": 42.92, - "learning_rate": 1.769211727817812e-08, - "loss": 3.7393, - "step": 3901500 - }, - { - "epoch": 42.93, - "learning_rate": 1.767836514659772e-08, - "loss": 3.7347, - "step": 3902000 - }, - { - "epoch": 42.93, - "learning_rate": 1.7664613015017327e-08, - "loss": 3.7589, - "step": 3902500 - }, - { - "epoch": 42.94, - "learning_rate": 1.7650860883436933e-08, - "loss": 3.7485, - "step": 3903000 - }, - { - "epoch": 42.95, - "learning_rate": 1.7637108751856536e-08, - "loss": 3.7417, - "step": 3903500 - }, - { - "epoch": 42.95, - "learning_rate": 1.7623356620276142e-08, - "loss": 3.7644, - "step": 3904000 - }, - { - "epoch": 42.96, - "learning_rate": 1.7609604488695748e-08, - "loss": 3.7489, - "step": 3904500 - }, - { - "epoch": 42.96, - "learning_rate": 1.759585235711535e-08, - "loss": 3.7368, - "step": 3905000 - }, - { - "epoch": 42.97, - "learning_rate": 1.7582100225534957e-08, - "loss": 3.7603, - "step": 3905500 - }, - { - "epoch": 42.97, - "learning_rate": 1.7568348093954563e-08, - "loss": 3.774, - "step": 3906000 - }, - { - "epoch": 42.98, - "learning_rate": 1.7554595962374165e-08, - "loss": 3.7453, - "step": 3906500 - }, - { - "epoch": 42.98, - "learning_rate": 1.754084383079377e-08, - "loss": 3.7497, - "step": 3907000 - }, - { - "epoch": 42.99, - "learning_rate": 1.7527091699213378e-08, - "loss": 3.7558, - "step": 3907500 - }, - { - "epoch": 42.99, - "learning_rate": 1.751333956763298e-08, - "loss": 3.7521, - "step": 3908000 - }, - { - "epoch": 43.0, - "eval_loss": 3.8265368938446045, - "eval_runtime": 6.1446, - "eval_samples_per_second": 252.907, - "step": 3908485 - }, - { - "epoch": 43.0, - "learning_rate": 1.7499587436052586e-08, - "loss": 3.7414, - "step": 3908500 - }, - { - "epoch": 43.01, - "learning_rate": 1.7485835304472192e-08, - "loss": 3.7451, - "step": 3909000 - }, - { - "epoch": 43.01, - "learning_rate": 1.7472083172891795e-08, - "loss": 3.7656, - "step": 3909500 - }, - { - "epoch": 43.02, - "learning_rate": 1.74583310413114e-08, - "loss": 3.7551, - "step": 3910000 - }, - { - "epoch": 43.02, - "learning_rate": 1.7444578909731007e-08, - "loss": 3.7473, - "step": 3910500 - }, - { - "epoch": 43.03, - "learning_rate": 1.7430826778150613e-08, - "loss": 3.7501, - "step": 3911000 - }, - { - "epoch": 43.03, - "learning_rate": 1.741707464657022e-08, - "loss": 3.7466, - "step": 3911500 - }, - { - "epoch": 43.04, - "learning_rate": 1.7403322514989825e-08, - "loss": 3.7332, - "step": 3912000 - }, - { - "epoch": 43.04, - "learning_rate": 1.7389570383409428e-08, - "loss": 3.7329, - "step": 3912500 - }, - { - "epoch": 43.05, - "learning_rate": 1.7375818251829034e-08, - "loss": 3.7494, - "step": 3913000 - }, - { - "epoch": 43.06, - "learning_rate": 1.736206612024864e-08, - "loss": 3.7487, - "step": 3913500 - }, - { - "epoch": 43.06, - "learning_rate": 1.7348313988668243e-08, - "loss": 3.7518, - "step": 3914000 - }, - { - "epoch": 43.07, - "learning_rate": 1.733456185708785e-08, - "loss": 3.7354, - "step": 3914500 - }, - { - "epoch": 43.07, - "learning_rate": 1.7320809725507455e-08, - "loss": 3.7605, - "step": 3915000 - }, - { - "epoch": 43.08, - "learning_rate": 1.7307057593927058e-08, - "loss": 3.7419, - "step": 3915500 - }, - { - "epoch": 43.08, - "learning_rate": 1.7293305462346664e-08, - "loss": 3.7551, - "step": 3916000 - }, - { - "epoch": 43.09, - "learning_rate": 1.7279553330766266e-08, - "loss": 3.7628, - "step": 3916500 - }, - { - "epoch": 43.09, - "learning_rate": 1.7265801199185872e-08, - "loss": 3.7572, - "step": 3917000 - }, - { - "epoch": 43.1, - "learning_rate": 1.725204906760548e-08, - "loss": 3.7606, - "step": 3917500 - }, - { - "epoch": 43.1, - "learning_rate": 1.723829693602508e-08, - "loss": 3.7328, - "step": 3918000 - }, - { - "epoch": 43.11, - "learning_rate": 1.7224544804444687e-08, - "loss": 3.7582, - "step": 3918500 - }, - { - "epoch": 43.12, - "learning_rate": 1.7210792672864293e-08, - "loss": 3.7275, - "step": 3919000 - }, - { - "epoch": 43.12, - "learning_rate": 1.7197040541283896e-08, - "loss": 3.7586, - "step": 3919500 - }, - { - "epoch": 43.13, - "learning_rate": 1.7183288409703502e-08, - "loss": 3.7784, - "step": 3920000 - }, - { - "epoch": 43.13, - "learning_rate": 1.7169536278123108e-08, - "loss": 3.7543, - "step": 3920500 - }, - { - "epoch": 43.14, - "learning_rate": 1.715578414654271e-08, - "loss": 3.7445, - "step": 3921000 - }, - { - "epoch": 43.14, - "learning_rate": 1.7142032014962317e-08, - "loss": 3.752, - "step": 3921500 - }, - { - "epoch": 43.15, - "learning_rate": 1.7128279883381923e-08, - "loss": 3.7542, - "step": 3922000 - }, - { - "epoch": 43.15, - "learning_rate": 1.711452775180153e-08, - "loss": 3.7523, - "step": 3922500 - }, - { - "epoch": 43.16, - "learning_rate": 1.7100775620221135e-08, - "loss": 3.7364, - "step": 3923000 - }, - { - "epoch": 43.17, - "learning_rate": 1.708702348864074e-08, - "loss": 3.7724, - "step": 3923500 - }, - { - "epoch": 43.17, - "learning_rate": 1.7073271357060344e-08, - "loss": 3.757, - "step": 3924000 - }, - { - "epoch": 43.18, - "learning_rate": 1.705951922547995e-08, - "loss": 3.7511, - "step": 3924500 - }, - { - "epoch": 43.18, - "learning_rate": 1.7045767093899556e-08, - "loss": 3.7596, - "step": 3925000 - }, - { - "epoch": 43.19, - "learning_rate": 1.703201496231916e-08, - "loss": 3.7677, - "step": 3925500 - }, - { - "epoch": 43.19, - "learning_rate": 1.7018262830738764e-08, - "loss": 3.7455, - "step": 3926000 - }, - { - "epoch": 43.2, - "learning_rate": 1.700451069915837e-08, - "loss": 3.7329, - "step": 3926500 - }, - { - "epoch": 43.2, - "learning_rate": 1.6990758567577973e-08, - "loss": 3.7666, - "step": 3927000 - }, - { - "epoch": 43.21, - "learning_rate": 1.697700643599758e-08, - "loss": 3.7627, - "step": 3927500 - }, - { - "epoch": 43.21, - "learning_rate": 1.6963254304417185e-08, - "loss": 3.7295, - "step": 3928000 - }, - { - "epoch": 43.22, - "learning_rate": 1.6949502172836788e-08, - "loss": 3.7377, - "step": 3928500 - }, - { - "epoch": 43.23, - "learning_rate": 1.6935750041256394e-08, - "loss": 3.7628, - "step": 3929000 - }, - { - "epoch": 43.23, - "learning_rate": 1.6921997909676e-08, - "loss": 3.7564, - "step": 3929500 - }, - { - "epoch": 43.24, - "learning_rate": 1.6908245778095603e-08, - "loss": 3.759, - "step": 3930000 - }, - { - "epoch": 43.24, - "learning_rate": 1.689449364651521e-08, - "loss": 3.7647, - "step": 3930500 - }, - { - "epoch": 43.25, - "learning_rate": 1.6880741514934815e-08, - "loss": 3.747, - "step": 3931000 - }, - { - "epoch": 43.25, - "learning_rate": 1.6866989383354418e-08, - "loss": 3.7366, - "step": 3931500 - }, - { - "epoch": 43.26, - "learning_rate": 1.6853237251774024e-08, - "loss": 3.7561, - "step": 3932000 - }, - { - "epoch": 43.26, - "learning_rate": 1.683948512019363e-08, - "loss": 3.7585, - "step": 3932500 - }, - { - "epoch": 43.27, - "learning_rate": 1.6825732988613232e-08, - "loss": 3.7406, - "step": 3933000 - }, - { - "epoch": 43.28, - "learning_rate": 1.681198085703284e-08, - "loss": 3.7513, - "step": 3933500 - }, - { - "epoch": 43.28, - "learning_rate": 1.6798228725452444e-08, - "loss": 3.7603, - "step": 3934000 - }, - { - "epoch": 43.29, - "learning_rate": 1.678447659387205e-08, - "loss": 3.7519, - "step": 3934500 - }, - { - "epoch": 43.29, - "learning_rate": 1.6770724462291657e-08, - "loss": 3.7539, - "step": 3935000 - }, - { - "epoch": 43.3, - "learning_rate": 1.675697233071126e-08, - "loss": 3.7532, - "step": 3935500 - }, - { - "epoch": 43.3, - "learning_rate": 1.6743220199130865e-08, - "loss": 3.744, - "step": 3936000 - }, - { - "epoch": 43.31, - "learning_rate": 1.672946806755047e-08, - "loss": 3.7453, - "step": 3936500 - }, - { - "epoch": 43.31, - "learning_rate": 1.6715715935970074e-08, - "loss": 3.7478, - "step": 3937000 - }, - { - "epoch": 43.32, - "learning_rate": 1.670196380438968e-08, - "loss": 3.7449, - "step": 3937500 - }, - { - "epoch": 43.32, - "learning_rate": 1.6688211672809286e-08, - "loss": 3.7743, - "step": 3938000 - }, - { - "epoch": 43.33, - "learning_rate": 1.667445954122889e-08, - "loss": 3.7489, - "step": 3938500 - }, - { - "epoch": 43.34, - "learning_rate": 1.6660707409648495e-08, - "loss": 3.7545, - "step": 3939000 - }, - { - "epoch": 43.34, - "learning_rate": 1.66469552780681e-08, - "loss": 3.7516, - "step": 3939500 - }, - { - "epoch": 43.35, - "learning_rate": 1.6633203146487704e-08, - "loss": 3.7603, - "step": 3940000 - }, - { - "epoch": 43.35, - "learning_rate": 1.661945101490731e-08, - "loss": 3.7183, - "step": 3940500 - }, - { - "epoch": 43.36, - "learning_rate": 1.6605698883326916e-08, - "loss": 3.7403, - "step": 3941000 - }, - { - "epoch": 43.36, - "learning_rate": 1.659194675174652e-08, - "loss": 3.7494, - "step": 3941500 - }, - { - "epoch": 43.37, - "learning_rate": 1.6578194620166124e-08, - "loss": 3.7616, - "step": 3942000 - }, - { - "epoch": 43.37, - "learning_rate": 1.656444248858573e-08, - "loss": 3.7481, - "step": 3942500 - }, - { - "epoch": 43.38, - "learning_rate": 1.6550690357005333e-08, - "loss": 3.7688, - "step": 3943000 - }, - { - "epoch": 43.39, - "learning_rate": 1.653693822542494e-08, - "loss": 3.7439, - "step": 3943500 - }, - { - "epoch": 43.39, - "learning_rate": 1.6523186093844545e-08, - "loss": 3.763, - "step": 3944000 - }, - { - "epoch": 43.4, - "learning_rate": 1.6509433962264148e-08, - "loss": 3.7527, - "step": 3944500 - }, - { - "epoch": 43.4, - "learning_rate": 1.6495681830683754e-08, - "loss": 3.7271, - "step": 3945000 - }, - { - "epoch": 43.41, - "learning_rate": 1.648192969910336e-08, - "loss": 3.7486, - "step": 3945500 - }, - { - "epoch": 43.41, - "learning_rate": 1.6468177567522966e-08, - "loss": 3.743, - "step": 3946000 - }, - { - "epoch": 43.42, - "learning_rate": 1.645442543594257e-08, - "loss": 3.7578, - "step": 3946500 - }, - { - "epoch": 43.42, - "learning_rate": 1.6440673304362175e-08, - "loss": 3.7658, - "step": 3947000 - }, - { - "epoch": 43.43, - "learning_rate": 1.642692117278178e-08, - "loss": 3.7486, - "step": 3947500 - }, - { - "epoch": 43.43, - "learning_rate": 1.6413169041201387e-08, - "loss": 3.7691, - "step": 3948000 - }, - { - "epoch": 43.44, - "learning_rate": 1.6399416909620993e-08, - "loss": 3.7441, - "step": 3948500 - }, - { - "epoch": 43.45, - "learning_rate": 1.6385664778040596e-08, - "loss": 3.7427, - "step": 3949000 - }, - { - "epoch": 43.45, - "learning_rate": 1.6371912646460202e-08, - "loss": 3.7479, - "step": 3949500 - }, - { - "epoch": 43.46, - "learning_rate": 1.6358160514879808e-08, - "loss": 3.7587, - "step": 3950000 - }, - { - "epoch": 43.46, - "learning_rate": 1.634440838329941e-08, - "loss": 3.7497, - "step": 3950500 - }, - { - "epoch": 43.47, - "learning_rate": 1.6330656251719017e-08, - "loss": 3.7542, - "step": 3951000 - }, - { - "epoch": 43.47, - "learning_rate": 1.6316904120138623e-08, - "loss": 3.7543, - "step": 3951500 - }, - { - "epoch": 43.48, - "learning_rate": 1.6303151988558225e-08, - "loss": 3.7663, - "step": 3952000 - }, - { - "epoch": 43.48, - "learning_rate": 1.628939985697783e-08, - "loss": 3.746, - "step": 3952500 - }, - { - "epoch": 43.49, - "learning_rate": 1.6275647725397437e-08, - "loss": 3.7442, - "step": 3953000 - }, - { - "epoch": 43.5, - "learning_rate": 1.626189559381704e-08, - "loss": 3.7619, - "step": 3953500 - }, - { - "epoch": 43.5, - "learning_rate": 1.6248143462236646e-08, - "loss": 3.7618, - "step": 3954000 - }, - { - "epoch": 43.51, - "learning_rate": 1.623439133065625e-08, - "loss": 3.7458, - "step": 3954500 - }, - { - "epoch": 43.51, - "learning_rate": 1.6220639199075855e-08, - "loss": 3.754, - "step": 3955000 - }, - { - "epoch": 43.52, - "learning_rate": 1.620688706749546e-08, - "loss": 3.7589, - "step": 3955500 - }, - { - "epoch": 43.52, - "learning_rate": 1.6193134935915064e-08, - "loss": 3.7628, - "step": 3956000 - }, - { - "epoch": 43.53, - "learning_rate": 1.617938280433467e-08, - "loss": 3.7402, - "step": 3956500 - }, - { - "epoch": 43.53, - "learning_rate": 1.6165630672754276e-08, - "loss": 3.758, - "step": 3957000 - }, - { - "epoch": 43.54, - "learning_rate": 1.615187854117388e-08, - "loss": 3.7622, - "step": 3957500 - }, - { - "epoch": 43.54, - "learning_rate": 1.6138126409593484e-08, - "loss": 3.7414, - "step": 3958000 - }, - { - "epoch": 43.55, - "learning_rate": 1.612437427801309e-08, - "loss": 3.7597, - "step": 3958500 - }, - { - "epoch": 43.56, - "learning_rate": 1.6110622146432697e-08, - "loss": 3.748, - "step": 3959000 - }, - { - "epoch": 43.56, - "learning_rate": 1.6096870014852303e-08, - "loss": 3.7345, - "step": 3959500 - }, - { - "epoch": 43.57, - "learning_rate": 1.608311788327191e-08, - "loss": 3.7342, - "step": 3960000 - }, - { - "epoch": 43.57, - "learning_rate": 1.606936575169151e-08, - "loss": 3.7366, - "step": 3960500 - }, - { - "epoch": 43.58, - "learning_rate": 1.6055613620111117e-08, - "loss": 3.7442, - "step": 3961000 - }, - { - "epoch": 43.58, - "learning_rate": 1.6041861488530723e-08, - "loss": 3.7564, - "step": 3961500 - }, - { - "epoch": 43.59, - "learning_rate": 1.6028109356950326e-08, - "loss": 3.7583, - "step": 3962000 - }, - { - "epoch": 43.59, - "learning_rate": 1.6014357225369932e-08, - "loss": 3.7644, - "step": 3962500 - }, - { - "epoch": 43.6, - "learning_rate": 1.6000605093789538e-08, - "loss": 3.7518, - "step": 3963000 - }, - { - "epoch": 43.61, - "learning_rate": 1.598685296220914e-08, - "loss": 3.7247, - "step": 3963500 - }, - { - "epoch": 43.61, - "learning_rate": 1.5973100830628747e-08, - "loss": 3.7486, - "step": 3964000 - }, - { - "epoch": 43.62, - "learning_rate": 1.5959348699048353e-08, - "loss": 3.7722, - "step": 3964500 - }, - { - "epoch": 43.62, - "learning_rate": 1.5945596567467956e-08, - "loss": 3.7524, - "step": 3965000 - }, - { - "epoch": 43.63, - "learning_rate": 1.5931844435887562e-08, - "loss": 3.7537, - "step": 3965500 - }, - { - "epoch": 43.63, - "learning_rate": 1.5918092304307168e-08, - "loss": 3.7557, - "step": 3966000 - }, - { - "epoch": 43.64, - "learning_rate": 1.590434017272677e-08, - "loss": 3.739, - "step": 3966500 - }, - { - "epoch": 43.64, - "learning_rate": 1.5890588041146377e-08, - "loss": 3.7516, - "step": 3967000 - }, - { - "epoch": 43.65, - "learning_rate": 1.5876835909565983e-08, - "loss": 3.7658, - "step": 3967500 - }, - { - "epoch": 43.65, - "learning_rate": 1.5863083777985585e-08, - "loss": 3.7461, - "step": 3968000 - }, - { - "epoch": 43.66, - "learning_rate": 1.584933164640519e-08, - "loss": 3.7646, - "step": 3968500 - }, - { - "epoch": 43.67, - "learning_rate": 1.5835579514824797e-08, - "loss": 3.7654, - "step": 3969000 - }, - { - "epoch": 43.67, - "learning_rate": 1.58218273832444e-08, - "loss": 3.742, - "step": 3969500 - }, - { - "epoch": 43.68, - "learning_rate": 1.5808075251664006e-08, - "loss": 3.7434, - "step": 3970000 - }, - { - "epoch": 43.68, - "learning_rate": 1.5794323120083612e-08, - "loss": 3.7326, - "step": 3970500 - }, - { - "epoch": 43.69, - "learning_rate": 1.5780570988503218e-08, - "loss": 3.7485, - "step": 3971000 - }, - { - "epoch": 43.69, - "learning_rate": 1.5766818856922824e-08, - "loss": 3.7528, - "step": 3971500 - }, - { - "epoch": 43.7, - "learning_rate": 1.5753066725342427e-08, - "loss": 3.7413, - "step": 3972000 - }, - { - "epoch": 43.7, - "learning_rate": 1.5739314593762033e-08, - "loss": 3.7466, - "step": 3972500 - }, - { - "epoch": 43.71, - "learning_rate": 1.572556246218164e-08, - "loss": 3.7327, - "step": 3973000 - }, - { - "epoch": 43.72, - "learning_rate": 1.5711810330601242e-08, - "loss": 3.7435, - "step": 3973500 - }, - { - "epoch": 43.72, - "learning_rate": 1.5698058199020848e-08, - "loss": 3.7557, - "step": 3974000 - }, - { - "epoch": 43.73, - "learning_rate": 1.5684306067440454e-08, - "loss": 3.7623, - "step": 3974500 - }, - { - "epoch": 43.73, - "learning_rate": 1.5670553935860057e-08, - "loss": 3.7651, - "step": 3975000 - }, - { - "epoch": 43.74, - "learning_rate": 1.5656801804279663e-08, - "loss": 3.7621, - "step": 3975500 - }, - { - "epoch": 43.74, - "learning_rate": 1.564304967269927e-08, - "loss": 3.752, - "step": 3976000 - }, - { - "epoch": 43.75, - "learning_rate": 1.562929754111887e-08, - "loss": 3.7638, - "step": 3976500 - }, - { - "epoch": 43.75, - "learning_rate": 1.5615545409538477e-08, - "loss": 3.7707, - "step": 3977000 - }, - { - "epoch": 43.76, - "learning_rate": 1.5601793277958083e-08, - "loss": 3.7641, - "step": 3977500 - }, - { - "epoch": 43.76, - "learning_rate": 1.5588041146377686e-08, - "loss": 3.7709, - "step": 3978000 - }, - { - "epoch": 43.77, - "learning_rate": 1.5574289014797292e-08, - "loss": 3.7609, - "step": 3978500 - }, - { - "epoch": 43.78, - "learning_rate": 1.5560536883216898e-08, - "loss": 3.7335, - "step": 3979000 - }, - { - "epoch": 43.78, - "learning_rate": 1.55467847516365e-08, - "loss": 3.7616, - "step": 3979500 - }, - { - "epoch": 43.79, - "learning_rate": 1.5533032620056107e-08, - "loss": 3.7657, - "step": 3980000 - }, - { - "epoch": 43.79, - "learning_rate": 1.5519280488475713e-08, - "loss": 3.7445, - "step": 3980500 - }, - { - "epoch": 43.8, - "learning_rate": 1.5505528356895316e-08, - "loss": 3.7625, - "step": 3981000 - }, - { - "epoch": 43.8, - "learning_rate": 1.5491776225314922e-08, - "loss": 3.748, - "step": 3981500 - }, - { - "epoch": 43.81, - "learning_rate": 1.5478024093734528e-08, - "loss": 3.738, - "step": 3982000 - }, - { - "epoch": 43.81, - "learning_rate": 1.5464271962154134e-08, - "loss": 3.7519, - "step": 3982500 - }, - { - "epoch": 43.82, - "learning_rate": 1.545051983057374e-08, - "loss": 3.7531, - "step": 3983000 - }, - { - "epoch": 43.83, - "learning_rate": 1.5436767698993346e-08, - "loss": 3.7287, - "step": 3983500 - }, - { - "epoch": 43.83, - "learning_rate": 1.542301556741295e-08, - "loss": 3.7541, - "step": 3984000 - }, - { - "epoch": 43.84, - "learning_rate": 1.5409263435832555e-08, - "loss": 3.7561, - "step": 3984500 - }, - { - "epoch": 43.84, - "learning_rate": 1.539551130425216e-08, - "loss": 3.7396, - "step": 3985000 - }, - { - "epoch": 43.85, - "learning_rate": 1.5381759172671763e-08, - "loss": 3.7395, - "step": 3985500 - }, - { - "epoch": 43.85, - "learning_rate": 1.536800704109137e-08, - "loss": 3.755, - "step": 3986000 - }, - { - "epoch": 43.86, - "learning_rate": 1.5354254909510976e-08, - "loss": 3.7354, - "step": 3986500 - }, - { - "epoch": 43.86, - "learning_rate": 1.5340502777930578e-08, - "loss": 3.7432, - "step": 3987000 - }, - { - "epoch": 43.87, - "learning_rate": 1.5326750646350184e-08, - "loss": 3.7292, - "step": 3987500 - }, - { - "epoch": 43.87, - "learning_rate": 1.531299851476979e-08, - "loss": 3.7362, - "step": 3988000 - }, - { - "epoch": 43.88, - "learning_rate": 1.5299246383189393e-08, - "loss": 3.7508, - "step": 3988500 - }, - { - "epoch": 43.89, - "learning_rate": 1.5285494251609e-08, - "loss": 3.7473, - "step": 3989000 - }, - { - "epoch": 43.89, - "learning_rate": 1.5271742120028605e-08, - "loss": 3.7439, - "step": 3989500 - }, - { - "epoch": 43.9, - "learning_rate": 1.5257989988448208e-08, - "loss": 3.7426, - "step": 3990000 - }, - { - "epoch": 43.9, - "learning_rate": 1.5244237856867814e-08, - "loss": 3.7471, - "step": 3990500 - }, - { - "epoch": 43.91, - "learning_rate": 1.5230485725287417e-08, - "loss": 3.7552, - "step": 3991000 - }, - { - "epoch": 43.91, - "learning_rate": 1.5216733593707023e-08, - "loss": 3.761, - "step": 3991500 - }, - { - "epoch": 43.92, - "learning_rate": 1.520298146212663e-08, - "loss": 3.7363, - "step": 3992000 - }, - { - "epoch": 43.92, - "learning_rate": 1.518922933054623e-08, - "loss": 3.7388, - "step": 3992500 - }, - { - "epoch": 43.93, - "learning_rate": 1.5175477198965837e-08, - "loss": 3.7538, - "step": 3993000 - }, - { - "epoch": 43.94, - "learning_rate": 1.5161725067385443e-08, - "loss": 3.7506, - "step": 3993500 - }, - { - "epoch": 43.94, - "learning_rate": 1.514797293580505e-08, - "loss": 3.7394, - "step": 3994000 - }, - { - "epoch": 43.95, - "learning_rate": 1.5134220804224652e-08, - "loss": 3.7439, - "step": 3994500 - }, - { - "epoch": 43.95, - "learning_rate": 1.5120468672644258e-08, - "loss": 3.76, - "step": 3995000 - }, - { - "epoch": 43.96, - "learning_rate": 1.5106716541063864e-08, - "loss": 3.7396, - "step": 3995500 - }, - { - "epoch": 43.96, - "learning_rate": 1.509296440948347e-08, - "loss": 3.7401, - "step": 3996000 - }, - { - "epoch": 43.97, - "learning_rate": 1.5079212277903076e-08, - "loss": 3.761, - "step": 3996500 - }, - { - "epoch": 43.97, - "learning_rate": 1.506546014632268e-08, - "loss": 3.7527, - "step": 3997000 - }, - { - "epoch": 43.98, - "learning_rate": 1.5051708014742285e-08, - "loss": 3.7523, - "step": 3997500 - }, - { - "epoch": 43.98, - "learning_rate": 1.503795588316189e-08, - "loss": 3.7409, - "step": 3998000 - }, - { - "epoch": 43.99, - "learning_rate": 1.5024203751581494e-08, - "loss": 3.7826, - "step": 3998500 - }, - { - "epoch": 44.0, - "learning_rate": 1.50104516200011e-08, - "loss": 3.7284, - "step": 3999000 - }, - { - "epoch": 44.0, - "eval_loss": 3.8262314796447754, - "eval_runtime": 6.1442, - "eval_samples_per_second": 252.92, - "step": 3999380 - }, - { - "epoch": 44.0, - "learning_rate": 1.4996699488420706e-08, - "loss": 3.7537, - "step": 3999500 - }, - { - "epoch": 44.01, - "learning_rate": 1.498294735684031e-08, - "loss": 3.7579, - "step": 4000000 - }, - { - "epoch": 44.01, - "learning_rate": 1.4969195225259915e-08, - "loss": 3.7557, - "step": 4000500 - }, - { - "epoch": 44.02, - "learning_rate": 1.495544309367952e-08, - "loss": 3.764, - "step": 4001000 - }, - { - "epoch": 44.02, - "learning_rate": 1.4941690962099123e-08, - "loss": 3.7386, - "step": 4001500 - }, - { - "epoch": 44.03, - "learning_rate": 1.492793883051873e-08, - "loss": 3.7663, - "step": 4002000 - }, - { - "epoch": 44.03, - "learning_rate": 1.4914186698938336e-08, - "loss": 3.7542, - "step": 4002500 - }, - { - "epoch": 44.04, - "learning_rate": 1.4900434567357938e-08, - "loss": 3.7655, - "step": 4003000 - }, - { - "epoch": 44.05, - "learning_rate": 1.4886682435777544e-08, - "loss": 3.7417, - "step": 4003500 - }, - { - "epoch": 44.05, - "learning_rate": 1.487293030419715e-08, - "loss": 3.7389, - "step": 4004000 - }, - { - "epoch": 44.06, - "learning_rate": 1.4859178172616755e-08, - "loss": 3.748, - "step": 4004500 - }, - { - "epoch": 44.06, - "learning_rate": 1.484542604103636e-08, - "loss": 3.7346, - "step": 4005000 - }, - { - "epoch": 44.07, - "learning_rate": 1.4831673909455967e-08, - "loss": 3.761, - "step": 4005500 - }, - { - "epoch": 44.07, - "learning_rate": 1.481792177787557e-08, - "loss": 3.7599, - "step": 4006000 - }, - { - "epoch": 44.08, - "learning_rate": 1.4804169646295176e-08, - "loss": 3.7409, - "step": 4006500 - }, - { - "epoch": 44.08, - "learning_rate": 1.4790417514714782e-08, - "loss": 3.7606, - "step": 4007000 - }, - { - "epoch": 44.09, - "learning_rate": 1.4776665383134384e-08, - "loss": 3.7527, - "step": 4007500 - }, - { - "epoch": 44.09, - "learning_rate": 1.476291325155399e-08, - "loss": 3.7391, - "step": 4008000 - }, - { - "epoch": 44.1, - "learning_rate": 1.4749161119973596e-08, - "loss": 3.7454, - "step": 4008500 - }, - { - "epoch": 44.11, - "learning_rate": 1.4735408988393199e-08, - "loss": 3.7531, - "step": 4009000 - }, - { - "epoch": 44.11, - "learning_rate": 1.4721656856812805e-08, - "loss": 3.7613, - "step": 4009500 - }, - { - "epoch": 44.12, - "learning_rate": 1.470790472523241e-08, - "loss": 3.7616, - "step": 4010000 - }, - { - "epoch": 44.12, - "learning_rate": 1.4694152593652016e-08, - "loss": 3.7303, - "step": 4010500 - }, - { - "epoch": 44.13, - "learning_rate": 1.4680400462071622e-08, - "loss": 3.7826, - "step": 4011000 - }, - { - "epoch": 44.13, - "learning_rate": 1.4666648330491224e-08, - "loss": 3.7579, - "step": 4011500 - }, - { - "epoch": 44.14, - "learning_rate": 1.465289619891083e-08, - "loss": 3.7583, - "step": 4012000 - }, - { - "epoch": 44.14, - "learning_rate": 1.4639144067330436e-08, - "loss": 3.7686, - "step": 4012500 - }, - { - "epoch": 44.15, - "learning_rate": 1.4625391935750039e-08, - "loss": 3.7417, - "step": 4013000 - }, - { - "epoch": 44.16, - "learning_rate": 1.4611639804169645e-08, - "loss": 3.755, - "step": 4013500 - }, - { - "epoch": 44.16, - "learning_rate": 1.4597887672589251e-08, - "loss": 3.7374, - "step": 4014000 - }, - { - "epoch": 44.17, - "learning_rate": 1.4584135541008854e-08, - "loss": 3.7483, - "step": 4014500 - }, - { - "epoch": 44.17, - "learning_rate": 1.457038340942846e-08, - "loss": 3.7526, - "step": 4015000 - }, - { - "epoch": 44.18, - "learning_rate": 1.4556631277848066e-08, - "loss": 3.7413, - "step": 4015500 - }, - { - "epoch": 44.18, - "learning_rate": 1.454287914626767e-08, - "loss": 3.7603, - "step": 4016000 - }, - { - "epoch": 44.19, - "learning_rate": 1.4529127014687276e-08, - "loss": 3.7574, - "step": 4016500 - }, - { - "epoch": 44.19, - "learning_rate": 1.4515374883106882e-08, - "loss": 3.7513, - "step": 4017000 - }, - { - "epoch": 44.2, - "learning_rate": 1.4501622751526485e-08, - "loss": 3.7525, - "step": 4017500 - }, - { - "epoch": 44.2, - "learning_rate": 1.4487870619946091e-08, - "loss": 3.7578, - "step": 4018000 - }, - { - "epoch": 44.21, - "learning_rate": 1.4474118488365697e-08, - "loss": 3.7422, - "step": 4018500 - }, - { - "epoch": 44.22, - "learning_rate": 1.44603663567853e-08, - "loss": 3.7666, - "step": 4019000 - }, - { - "epoch": 44.22, - "learning_rate": 1.4446614225204906e-08, - "loss": 3.7305, - "step": 4019500 - }, - { - "epoch": 44.23, - "learning_rate": 1.4432862093624512e-08, - "loss": 3.7384, - "step": 4020000 - }, - { - "epoch": 44.23, - "learning_rate": 1.4419109962044115e-08, - "loss": 3.759, - "step": 4020500 - }, - { - "epoch": 44.24, - "learning_rate": 1.440535783046372e-08, - "loss": 3.7613, - "step": 4021000 - }, - { - "epoch": 44.24, - "learning_rate": 1.4391605698883327e-08, - "loss": 3.7539, - "step": 4021500 - }, - { - "epoch": 44.25, - "learning_rate": 1.4377853567302931e-08, - "loss": 3.741, - "step": 4022000 - }, - { - "epoch": 44.25, - "learning_rate": 1.4364101435722537e-08, - "loss": 3.7586, - "step": 4022500 - }, - { - "epoch": 44.26, - "learning_rate": 1.4350349304142143e-08, - "loss": 3.7493, - "step": 4023000 - }, - { - "epoch": 44.27, - "learning_rate": 1.4336597172561746e-08, - "loss": 3.7632, - "step": 4023500 - }, - { - "epoch": 44.27, - "learning_rate": 1.4322845040981352e-08, - "loss": 3.739, - "step": 4024000 - }, - { - "epoch": 44.28, - "learning_rate": 1.4309092909400958e-08, - "loss": 3.7355, - "step": 4024500 - }, - { - "epoch": 44.28, - "learning_rate": 1.429534077782056e-08, - "loss": 3.7567, - "step": 4025000 - }, - { - "epoch": 44.29, - "learning_rate": 1.4281588646240167e-08, - "loss": 3.7484, - "step": 4025500 - }, - { - "epoch": 44.29, - "learning_rate": 1.4267836514659773e-08, - "loss": 3.753, - "step": 4026000 - }, - { - "epoch": 44.3, - "learning_rate": 1.4254084383079376e-08, - "loss": 3.73, - "step": 4026500 - }, - { - "epoch": 44.3, - "learning_rate": 1.4240332251498982e-08, - "loss": 3.7349, - "step": 4027000 - }, - { - "epoch": 44.31, - "learning_rate": 1.4226580119918588e-08, - "loss": 3.7177, - "step": 4027500 - }, - { - "epoch": 44.31, - "learning_rate": 1.4212827988338192e-08, - "loss": 3.7518, - "step": 4028000 - }, - { - "epoch": 44.32, - "learning_rate": 1.4199075856757798e-08, - "loss": 3.7563, - "step": 4028500 - }, - { - "epoch": 44.33, - "learning_rate": 1.41853237251774e-08, - "loss": 3.7317, - "step": 4029000 - }, - { - "epoch": 44.33, - "learning_rate": 1.4171571593597007e-08, - "loss": 3.7533, - "step": 4029500 - }, - { - "epoch": 44.34, - "learning_rate": 1.4157819462016613e-08, - "loss": 3.7477, - "step": 4030000 - }, - { - "epoch": 44.34, - "learning_rate": 1.4144067330436216e-08, - "loss": 3.7545, - "step": 4030500 - }, - { - "epoch": 44.35, - "learning_rate": 1.4130315198855822e-08, - "loss": 3.7682, - "step": 4031000 - }, - { - "epoch": 44.35, - "learning_rate": 1.4116563067275428e-08, - "loss": 3.7334, - "step": 4031500 - }, - { - "epoch": 44.36, - "learning_rate": 1.410281093569503e-08, - "loss": 3.7458, - "step": 4032000 - }, - { - "epoch": 44.36, - "learning_rate": 1.4089058804114636e-08, - "loss": 3.7319, - "step": 4032500 - }, - { - "epoch": 44.37, - "learning_rate": 1.4075306672534242e-08, - "loss": 3.7357, - "step": 4033000 - }, - { - "epoch": 44.38, - "learning_rate": 1.4061554540953847e-08, - "loss": 3.7435, - "step": 4033500 - }, - { - "epoch": 44.38, - "learning_rate": 1.4047802409373453e-08, - "loss": 3.7371, - "step": 4034000 - }, - { - "epoch": 44.39, - "learning_rate": 1.4034050277793057e-08, - "loss": 3.7445, - "step": 4034500 - }, - { - "epoch": 44.39, - "learning_rate": 1.4020298146212662e-08, - "loss": 3.7634, - "step": 4035000 - }, - { - "epoch": 44.4, - "learning_rate": 1.4006546014632268e-08, - "loss": 3.7366, - "step": 4035500 - }, - { - "epoch": 44.4, - "learning_rate": 1.3992793883051874e-08, - "loss": 3.7398, - "step": 4036000 - }, - { - "epoch": 44.41, - "learning_rate": 1.3979041751471476e-08, - "loss": 3.7416, - "step": 4036500 - }, - { - "epoch": 44.41, - "learning_rate": 1.3965289619891082e-08, - "loss": 3.7362, - "step": 4037000 - }, - { - "epoch": 44.42, - "learning_rate": 1.3951537488310689e-08, - "loss": 3.74, - "step": 4037500 - }, - { - "epoch": 44.42, - "learning_rate": 1.3937785356730291e-08, - "loss": 3.7598, - "step": 4038000 - }, - { - "epoch": 44.43, - "learning_rate": 1.3924033225149897e-08, - "loss": 3.7672, - "step": 4038500 - }, - { - "epoch": 44.44, - "learning_rate": 1.3910281093569503e-08, - "loss": 3.7646, - "step": 4039000 - }, - { - "epoch": 44.44, - "learning_rate": 1.3896528961989106e-08, - "loss": 3.7472, - "step": 4039500 - }, - { - "epoch": 44.45, - "learning_rate": 1.3882776830408712e-08, - "loss": 3.7386, - "step": 4040000 - }, - { - "epoch": 44.45, - "learning_rate": 1.3869024698828318e-08, - "loss": 3.7505, - "step": 4040500 - }, - { - "epoch": 44.46, - "learning_rate": 1.3855272567247922e-08, - "loss": 3.7538, - "step": 4041000 - }, - { - "epoch": 44.46, - "learning_rate": 1.3841520435667529e-08, - "loss": 3.7566, - "step": 4041500 - }, - { - "epoch": 44.47, - "learning_rate": 1.3827768304087135e-08, - "loss": 3.7437, - "step": 4042000 - }, - { - "epoch": 44.47, - "learning_rate": 1.3814016172506737e-08, - "loss": 3.7429, - "step": 4042500 - }, - { - "epoch": 44.48, - "learning_rate": 1.3800264040926343e-08, - "loss": 3.7456, - "step": 4043000 - }, - { - "epoch": 44.49, - "learning_rate": 1.378651190934595e-08, - "loss": 3.7457, - "step": 4043500 - }, - { - "epoch": 44.49, - "learning_rate": 1.3772759777765552e-08, - "loss": 3.7341, - "step": 4044000 - }, - { - "epoch": 44.5, - "learning_rate": 1.3759007646185158e-08, - "loss": 3.7386, - "step": 4044500 - }, - { - "epoch": 44.5, - "learning_rate": 1.3745255514604764e-08, - "loss": 3.7658, - "step": 4045000 - }, - { - "epoch": 44.51, - "learning_rate": 1.3731503383024367e-08, - "loss": 3.7536, - "step": 4045500 - }, - { - "epoch": 44.51, - "learning_rate": 1.3717751251443973e-08, - "loss": 3.7437, - "step": 4046000 - }, - { - "epoch": 44.52, - "learning_rate": 1.3703999119863579e-08, - "loss": 3.7592, - "step": 4046500 - }, - { - "epoch": 44.52, - "learning_rate": 1.3690246988283183e-08, - "loss": 3.7724, - "step": 4047000 - }, - { - "epoch": 44.53, - "learning_rate": 1.367649485670279e-08, - "loss": 3.7576, - "step": 4047500 - }, - { - "epoch": 44.53, - "learning_rate": 1.3662742725122392e-08, - "loss": 3.7467, - "step": 4048000 - }, - { - "epoch": 44.54, - "learning_rate": 1.3648990593541998e-08, - "loss": 3.757, - "step": 4048500 - }, - { - "epoch": 44.55, - "learning_rate": 1.3635238461961604e-08, - "loss": 3.7465, - "step": 4049000 - }, - { - "epoch": 44.55, - "learning_rate": 1.3621486330381207e-08, - "loss": 3.7563, - "step": 4049500 - }, - { - "epoch": 44.56, - "learning_rate": 1.3607734198800813e-08, - "loss": 3.7482, - "step": 4050000 - }, - { - "epoch": 44.56, - "learning_rate": 1.3593982067220419e-08, - "loss": 3.7639, - "step": 4050500 - }, - { - "epoch": 44.57, - "learning_rate": 1.3580229935640022e-08, - "loss": 3.7585, - "step": 4051000 - }, - { - "epoch": 44.57, - "learning_rate": 1.3566477804059628e-08, - "loss": 3.7556, - "step": 4051500 - }, - { - "epoch": 44.58, - "learning_rate": 1.3552725672479234e-08, - "loss": 3.7473, - "step": 4052000 - }, - { - "epoch": 44.58, - "learning_rate": 1.3538973540898838e-08, - "loss": 3.7473, - "step": 4052500 - }, - { - "epoch": 44.59, - "learning_rate": 1.3525221409318444e-08, - "loss": 3.77, - "step": 4053000 - }, - { - "epoch": 44.6, - "learning_rate": 1.351146927773805e-08, - "loss": 3.772, - "step": 4053500 - }, - { - "epoch": 44.6, - "learning_rate": 1.3497717146157653e-08, - "loss": 3.7494, - "step": 4054000 - }, - { - "epoch": 44.61, - "learning_rate": 1.3483965014577259e-08, - "loss": 3.7489, - "step": 4054500 - }, - { - "epoch": 44.61, - "learning_rate": 1.3470212882996865e-08, - "loss": 3.747, - "step": 4055000 - }, - { - "epoch": 44.62, - "learning_rate": 1.3456460751416468e-08, - "loss": 3.7518, - "step": 4055500 - }, - { - "epoch": 44.62, - "learning_rate": 1.3442708619836074e-08, - "loss": 3.734, - "step": 4056000 - }, - { - "epoch": 44.63, - "learning_rate": 1.342895648825568e-08, - "loss": 3.7331, - "step": 4056500 - }, - { - "epoch": 44.63, - "learning_rate": 1.3415204356675283e-08, - "loss": 3.7727, - "step": 4057000 - }, - { - "epoch": 44.64, - "learning_rate": 1.3401452225094889e-08, - "loss": 3.7437, - "step": 4057500 - }, - { - "epoch": 44.64, - "learning_rate": 1.3387700093514495e-08, - "loss": 3.7533, - "step": 4058000 - }, - { - "epoch": 44.65, - "learning_rate": 1.3373947961934099e-08, - "loss": 3.763, - "step": 4058500 - }, - { - "epoch": 44.66, - "learning_rate": 1.3360195830353705e-08, - "loss": 3.7385, - "step": 4059000 - }, - { - "epoch": 44.66, - "learning_rate": 1.3346443698773311e-08, - "loss": 3.7564, - "step": 4059500 - }, - { - "epoch": 44.67, - "learning_rate": 1.3332691567192914e-08, - "loss": 3.7568, - "step": 4060000 - }, - { - "epoch": 44.67, - "learning_rate": 1.331893943561252e-08, - "loss": 3.7612, - "step": 4060500 - }, - { - "epoch": 44.68, - "learning_rate": 1.3305187304032126e-08, - "loss": 3.7619, - "step": 4061000 - }, - { - "epoch": 44.68, - "learning_rate": 1.3291435172451729e-08, - "loss": 3.7384, - "step": 4061500 - }, - { - "epoch": 44.69, - "learning_rate": 1.3277683040871335e-08, - "loss": 3.7491, - "step": 4062000 - }, - { - "epoch": 44.69, - "learning_rate": 1.326393090929094e-08, - "loss": 3.7497, - "step": 4062500 - }, - { - "epoch": 44.7, - "learning_rate": 1.3250178777710543e-08, - "loss": 3.7484, - "step": 4063000 - }, - { - "epoch": 44.71, - "learning_rate": 1.323642664613015e-08, - "loss": 3.756, - "step": 4063500 - }, - { - "epoch": 44.71, - "learning_rate": 1.3222674514549755e-08, - "loss": 3.7705, - "step": 4064000 - }, - { - "epoch": 44.72, - "learning_rate": 1.320892238296936e-08, - "loss": 3.7411, - "step": 4064500 - }, - { - "epoch": 44.72, - "learning_rate": 1.3195170251388966e-08, - "loss": 3.7412, - "step": 4065000 - }, - { - "epoch": 44.73, - "learning_rate": 1.3181418119808572e-08, - "loss": 3.7668, - "step": 4065500 - }, - { - "epoch": 44.73, - "learning_rate": 1.3167665988228175e-08, - "loss": 3.7734, - "step": 4066000 - }, - { - "epoch": 44.74, - "learning_rate": 1.315391385664778e-08, - "loss": 3.7539, - "step": 4066500 - }, - { - "epoch": 44.74, - "learning_rate": 1.3140161725067383e-08, - "loss": 3.7549, - "step": 4067000 - }, - { - "epoch": 44.75, - "learning_rate": 1.312640959348699e-08, - "loss": 3.7589, - "step": 4067500 - }, - { - "epoch": 44.75, - "learning_rate": 1.3112657461906595e-08, - "loss": 3.738, - "step": 4068000 - }, - { - "epoch": 44.76, - "learning_rate": 1.3098905330326198e-08, - "loss": 3.7565, - "step": 4068500 - }, - { - "epoch": 44.77, - "learning_rate": 1.3085153198745804e-08, - "loss": 3.7483, - "step": 4069000 - }, - { - "epoch": 44.77, - "learning_rate": 1.307140106716541e-08, - "loss": 3.7539, - "step": 4069500 - }, - { - "epoch": 44.78, - "learning_rate": 1.3057648935585015e-08, - "loss": 3.7416, - "step": 4070000 - }, - { - "epoch": 44.78, - "learning_rate": 1.304389680400462e-08, - "loss": 3.7318, - "step": 4070500 - }, - { - "epoch": 44.79, - "learning_rate": 1.3030144672424227e-08, - "loss": 3.7419, - "step": 4071000 - }, - { - "epoch": 44.79, - "learning_rate": 1.301639254084383e-08, - "loss": 3.7648, - "step": 4071500 - }, - { - "epoch": 44.8, - "learning_rate": 1.3002640409263435e-08, - "loss": 3.7393, - "step": 4072000 - }, - { - "epoch": 44.8, - "learning_rate": 1.2988888277683041e-08, - "loss": 3.747, - "step": 4072500 - }, - { - "epoch": 44.81, - "learning_rate": 1.2975136146102644e-08, - "loss": 3.7453, - "step": 4073000 - }, - { - "epoch": 44.82, - "learning_rate": 1.296138401452225e-08, - "loss": 3.7538, - "step": 4073500 - }, - { - "epoch": 44.82, - "learning_rate": 1.2947631882941856e-08, - "loss": 3.7526, - "step": 4074000 - }, - { - "epoch": 44.83, - "learning_rate": 1.2933879751361459e-08, - "loss": 3.7563, - "step": 4074500 - }, - { - "epoch": 44.83, - "learning_rate": 1.2920127619781065e-08, - "loss": 3.769, - "step": 4075000 - }, - { - "epoch": 44.84, - "learning_rate": 1.2906375488200671e-08, - "loss": 3.764, - "step": 4075500 - }, - { - "epoch": 44.84, - "learning_rate": 1.2892623356620275e-08, - "loss": 3.7602, - "step": 4076000 - }, - { - "epoch": 44.85, - "learning_rate": 1.2878871225039881e-08, - "loss": 3.756, - "step": 4076500 - }, - { - "epoch": 44.85, - "learning_rate": 1.2865119093459486e-08, - "loss": 3.7634, - "step": 4077000 - }, - { - "epoch": 44.86, - "learning_rate": 1.285136696187909e-08, - "loss": 3.7428, - "step": 4077500 - }, - { - "epoch": 44.86, - "learning_rate": 1.2837614830298696e-08, - "loss": 3.7492, - "step": 4078000 - }, - { - "epoch": 44.87, - "learning_rate": 1.2823862698718302e-08, - "loss": 3.7425, - "step": 4078500 - }, - { - "epoch": 44.88, - "learning_rate": 1.2810110567137905e-08, - "loss": 3.7259, - "step": 4079000 - }, - { - "epoch": 44.88, - "learning_rate": 1.2796358435557511e-08, - "loss": 3.7515, - "step": 4079500 - }, - { - "epoch": 44.89, - "learning_rate": 1.2782606303977117e-08, - "loss": 3.7533, - "step": 4080000 - }, - { - "epoch": 44.89, - "learning_rate": 1.276885417239672e-08, - "loss": 3.7605, - "step": 4080500 - }, - { - "epoch": 44.9, - "learning_rate": 1.2755102040816326e-08, - "loss": 3.735, - "step": 4081000 - }, - { - "epoch": 44.9, - "learning_rate": 1.2741349909235932e-08, - "loss": 3.7485, - "step": 4081500 - }, - { - "epoch": 44.91, - "learning_rate": 1.2727597777655535e-08, - "loss": 3.768, - "step": 4082000 - }, - { - "epoch": 44.91, - "learning_rate": 1.271384564607514e-08, - "loss": 3.7322, - "step": 4082500 - }, - { - "epoch": 44.92, - "learning_rate": 1.2700093514494747e-08, - "loss": 3.7676, - "step": 4083000 - }, - { - "epoch": 44.93, - "learning_rate": 1.2686341382914351e-08, - "loss": 3.7629, - "step": 4083500 - }, - { - "epoch": 44.93, - "learning_rate": 1.2672589251333957e-08, - "loss": 3.7592, - "step": 4084000 - }, - { - "epoch": 44.94, - "learning_rate": 1.2658837119753563e-08, - "loss": 3.7399, - "step": 4084500 - }, - { - "epoch": 44.94, - "learning_rate": 1.2645084988173166e-08, - "loss": 3.7557, - "step": 4085000 - }, - { - "epoch": 44.95, - "learning_rate": 1.2631332856592772e-08, - "loss": 3.7324, - "step": 4085500 - }, - { - "epoch": 44.95, - "learning_rate": 1.2617580725012375e-08, - "loss": 3.7374, - "step": 4086000 - }, - { - "epoch": 44.96, - "learning_rate": 1.260382859343198e-08, - "loss": 3.7437, - "step": 4086500 - }, - { - "epoch": 44.96, - "learning_rate": 1.2590076461851587e-08, - "loss": 3.7497, - "step": 4087000 - }, - { - "epoch": 44.97, - "learning_rate": 1.257632433027119e-08, - "loss": 3.7312, - "step": 4087500 - }, - { - "epoch": 44.97, - "learning_rate": 1.2562572198690795e-08, - "loss": 3.7563, - "step": 4088000 - }, - { - "epoch": 44.98, - "learning_rate": 1.2548820067110401e-08, - "loss": 3.739, - "step": 4088500 - }, - { - "epoch": 44.99, - "learning_rate": 1.2535067935530006e-08, - "loss": 3.7433, - "step": 4089000 - }, - { - "epoch": 44.99, - "learning_rate": 1.2521315803949612e-08, - "loss": 3.7447, - "step": 4089500 - }, - { - "epoch": 45.0, - "learning_rate": 1.2507563672369218e-08, - "loss": 3.7595, - "step": 4090000 - }, - { - "epoch": 45.0, - "eval_loss": 3.8258707523345947, - "eval_runtime": 6.1512, - "eval_samples_per_second": 252.635, - "step": 4090275 - }, - { - "epoch": 45.0, - "learning_rate": 1.2493811540788822e-08, - "loss": 3.7496, - "step": 4090500 - }, - { - "epoch": 45.01, - "learning_rate": 1.2480059409208427e-08, - "loss": 3.7437, - "step": 4091000 - }, - { - "epoch": 45.01, - "learning_rate": 1.2466307277628031e-08, - "loss": 3.764, - "step": 4091500 - }, - { - "epoch": 45.02, - "learning_rate": 1.2452555146047637e-08, - "loss": 3.7545, - "step": 4092000 - }, - { - "epoch": 45.02, - "learning_rate": 1.2438803014467241e-08, - "loss": 3.7465, - "step": 4092500 - }, - { - "epoch": 45.03, - "learning_rate": 1.2425050882886846e-08, - "loss": 3.7462, - "step": 4093000 - }, - { - "epoch": 45.04, - "learning_rate": 1.2411298751306452e-08, - "loss": 3.7541, - "step": 4093500 - }, - { - "epoch": 45.04, - "learning_rate": 1.2397546619726056e-08, - "loss": 3.7485, - "step": 4094000 - }, - { - "epoch": 45.05, - "learning_rate": 1.2383794488145662e-08, - "loss": 3.7406, - "step": 4094500 - }, - { - "epoch": 45.05, - "learning_rate": 1.2370042356565268e-08, - "loss": 3.7352, - "step": 4095000 - }, - { - "epoch": 45.06, - "learning_rate": 1.2356290224984873e-08, - "loss": 3.7515, - "step": 4095500 - }, - { - "epoch": 45.06, - "learning_rate": 1.2342538093404477e-08, - "loss": 3.7522, - "step": 4096000 - }, - { - "epoch": 45.07, - "learning_rate": 1.2328785961824083e-08, - "loss": 3.7386, - "step": 4096500 - }, - { - "epoch": 45.07, - "learning_rate": 1.2315033830243688e-08, - "loss": 3.7199, - "step": 4097000 - }, - { - "epoch": 45.08, - "learning_rate": 1.2301281698663292e-08, - "loss": 3.7305, - "step": 4097500 - }, - { - "epoch": 45.08, - "learning_rate": 1.2287529567082898e-08, - "loss": 3.7574, - "step": 4098000 - }, - { - "epoch": 45.09, - "learning_rate": 1.2273777435502502e-08, - "loss": 3.7462, - "step": 4098500 - }, - { - "epoch": 45.1, - "learning_rate": 1.2260025303922107e-08, - "loss": 3.7577, - "step": 4099000 - }, - { - "epoch": 45.1, - "learning_rate": 1.2246273172341711e-08, - "loss": 3.7398, - "step": 4099500 - }, - { - "epoch": 45.11, - "learning_rate": 1.2232521040761317e-08, - "loss": 3.7538, - "step": 4100000 - }, - { - "epoch": 45.11, - "learning_rate": 1.2218768909180923e-08, - "loss": 3.7667, - "step": 4100500 - }, - { - "epoch": 45.12, - "learning_rate": 1.2205016777600528e-08, - "loss": 3.7487, - "step": 4101000 - }, - { - "epoch": 45.12, - "learning_rate": 1.2191264646020134e-08, - "loss": 3.7554, - "step": 4101500 - }, - { - "epoch": 45.13, - "learning_rate": 1.2177512514439738e-08, - "loss": 3.7424, - "step": 4102000 - }, - { - "epoch": 45.13, - "learning_rate": 1.2163760382859342e-08, - "loss": 3.7491, - "step": 4102500 - }, - { - "epoch": 45.14, - "learning_rate": 1.2150008251278948e-08, - "loss": 3.7392, - "step": 4103000 - }, - { - "epoch": 45.15, - "learning_rate": 1.2136256119698553e-08, - "loss": 3.7595, - "step": 4103500 - }, - { - "epoch": 45.15, - "learning_rate": 1.2122503988118157e-08, - "loss": 3.7527, - "step": 4104000 - }, - { - "epoch": 45.16, - "learning_rate": 1.2108751856537763e-08, - "loss": 3.7458, - "step": 4104500 - }, - { - "epoch": 45.16, - "learning_rate": 1.2094999724957368e-08, - "loss": 3.7662, - "step": 4105000 - }, - { - "epoch": 45.17, - "learning_rate": 1.2081247593376972e-08, - "loss": 3.7586, - "step": 4105500 - }, - { - "epoch": 45.17, - "learning_rate": 1.2067495461796578e-08, - "loss": 3.7464, - "step": 4106000 - }, - { - "epoch": 45.18, - "learning_rate": 1.2053743330216182e-08, - "loss": 3.7525, - "step": 4106500 - }, - { - "epoch": 45.18, - "learning_rate": 1.2039991198635788e-08, - "loss": 3.7604, - "step": 4107000 - }, - { - "epoch": 45.19, - "learning_rate": 1.2026239067055394e-08, - "loss": 3.7511, - "step": 4107500 - }, - { - "epoch": 45.2, - "learning_rate": 1.2012486935474999e-08, - "loss": 3.7492, - "step": 4108000 - }, - { - "epoch": 45.2, - "learning_rate": 1.1998734803894603e-08, - "loss": 3.7479, - "step": 4108500 - }, - { - "epoch": 45.21, - "learning_rate": 1.1984982672314208e-08, - "loss": 3.7428, - "step": 4109000 - }, - { - "epoch": 45.21, - "learning_rate": 1.1971230540733814e-08, - "loss": 3.75, - "step": 4109500 - }, - { - "epoch": 45.22, - "learning_rate": 1.1957478409153418e-08, - "loss": 3.7434, - "step": 4110000 - }, - { - "epoch": 45.22, - "learning_rate": 1.1943726277573022e-08, - "loss": 3.7396, - "step": 4110500 - }, - { - "epoch": 45.23, - "learning_rate": 1.1929974145992628e-08, - "loss": 3.7595, - "step": 4111000 - }, - { - "epoch": 45.23, - "learning_rate": 1.1916222014412233e-08, - "loss": 3.74, - "step": 4111500 - }, - { - "epoch": 45.24, - "learning_rate": 1.1902469882831837e-08, - "loss": 3.7367, - "step": 4112000 - }, - { - "epoch": 45.24, - "learning_rate": 1.1888717751251443e-08, - "loss": 3.7594, - "step": 4112500 - }, - { - "epoch": 45.25, - "learning_rate": 1.187496561967105e-08, - "loss": 3.7456, - "step": 4113000 - }, - { - "epoch": 45.26, - "learning_rate": 1.1861213488090654e-08, - "loss": 3.7516, - "step": 4113500 - }, - { - "epoch": 45.26, - "learning_rate": 1.184746135651026e-08, - "loss": 3.7299, - "step": 4114000 - }, - { - "epoch": 45.27, - "learning_rate": 1.1833709224929864e-08, - "loss": 3.7423, - "step": 4114500 - }, - { - "epoch": 45.27, - "learning_rate": 1.1819957093349468e-08, - "loss": 3.7479, - "step": 4115000 - }, - { - "epoch": 45.28, - "learning_rate": 1.1806204961769074e-08, - "loss": 3.7647, - "step": 4115500 - }, - { - "epoch": 45.28, - "learning_rate": 1.1792452830188679e-08, - "loss": 3.748, - "step": 4116000 - }, - { - "epoch": 45.29, - "learning_rate": 1.1778700698608283e-08, - "loss": 3.762, - "step": 4116500 - }, - { - "epoch": 45.29, - "learning_rate": 1.176494856702789e-08, - "loss": 3.745, - "step": 4117000 - }, - { - "epoch": 45.3, - "learning_rate": 1.1751196435447494e-08, - "loss": 3.7647, - "step": 4117500 - }, - { - "epoch": 45.31, - "learning_rate": 1.1737444303867098e-08, - "loss": 3.7378, - "step": 4118000 - }, - { - "epoch": 45.31, - "learning_rate": 1.1723692172286704e-08, - "loss": 3.7709, - "step": 4118500 - }, - { - "epoch": 45.32, - "learning_rate": 1.170994004070631e-08, - "loss": 3.7667, - "step": 4119000 - }, - { - "epoch": 45.32, - "learning_rate": 1.1696187909125914e-08, - "loss": 3.7512, - "step": 4119500 - }, - { - "epoch": 45.33, - "learning_rate": 1.1682435777545519e-08, - "loss": 3.7629, - "step": 4120000 - }, - { - "epoch": 45.33, - "learning_rate": 1.1668683645965125e-08, - "loss": 3.7562, - "step": 4120500 - }, - { - "epoch": 45.34, - "learning_rate": 1.165493151438473e-08, - "loss": 3.754, - "step": 4121000 - }, - { - "epoch": 45.34, - "learning_rate": 1.1641179382804334e-08, - "loss": 3.745, - "step": 4121500 - }, - { - "epoch": 45.35, - "learning_rate": 1.162742725122394e-08, - "loss": 3.7402, - "step": 4122000 - }, - { - "epoch": 45.35, - "learning_rate": 1.1613675119643544e-08, - "loss": 3.7519, - "step": 4122500 - }, - { - "epoch": 45.36, - "learning_rate": 1.1599922988063148e-08, - "loss": 3.7237, - "step": 4123000 - }, - { - "epoch": 45.37, - "learning_rate": 1.1586170856482754e-08, - "loss": 3.7442, - "step": 4123500 - }, - { - "epoch": 45.37, - "learning_rate": 1.1572418724902359e-08, - "loss": 3.7638, - "step": 4124000 - }, - { - "epoch": 45.38, - "learning_rate": 1.1558666593321965e-08, - "loss": 3.7561, - "step": 4124500 - }, - { - "epoch": 45.38, - "learning_rate": 1.154491446174157e-08, - "loss": 3.7455, - "step": 4125000 - }, - { - "epoch": 45.39, - "learning_rate": 1.1531162330161175e-08, - "loss": 3.7481, - "step": 4125500 - }, - { - "epoch": 45.39, - "learning_rate": 1.151741019858078e-08, - "loss": 3.7645, - "step": 4126000 - }, - { - "epoch": 45.4, - "learning_rate": 1.1503658067000386e-08, - "loss": 3.7539, - "step": 4126500 - }, - { - "epoch": 45.4, - "learning_rate": 1.148990593541999e-08, - "loss": 3.7369, - "step": 4127000 - }, - { - "epoch": 45.41, - "learning_rate": 1.1476153803839594e-08, - "loss": 3.7523, - "step": 4127500 - }, - { - "epoch": 45.42, - "learning_rate": 1.1462401672259199e-08, - "loss": 3.7414, - "step": 4128000 - }, - { - "epoch": 45.42, - "learning_rate": 1.1448649540678805e-08, - "loss": 3.7732, - "step": 4128500 - }, - { - "epoch": 45.43, - "learning_rate": 1.143489740909841e-08, - "loss": 3.7408, - "step": 4129000 - }, - { - "epoch": 45.43, - "learning_rate": 1.1421145277518014e-08, - "loss": 3.7439, - "step": 4129500 - }, - { - "epoch": 45.44, - "learning_rate": 1.140739314593762e-08, - "loss": 3.7622, - "step": 4130000 - }, - { - "epoch": 45.44, - "learning_rate": 1.1393641014357224e-08, - "loss": 3.7326, - "step": 4130500 - }, - { - "epoch": 45.45, - "learning_rate": 1.137988888277683e-08, - "loss": 3.7452, - "step": 4131000 - }, - { - "epoch": 45.45, - "learning_rate": 1.1366136751196436e-08, - "loss": 3.7491, - "step": 4131500 - }, - { - "epoch": 45.46, - "learning_rate": 1.135238461961604e-08, - "loss": 3.742, - "step": 4132000 - }, - { - "epoch": 45.46, - "learning_rate": 1.1338632488035645e-08, - "loss": 3.7452, - "step": 4132500 - }, - { - "epoch": 45.47, - "learning_rate": 1.1324880356455251e-08, - "loss": 3.7576, - "step": 4133000 - }, - { - "epoch": 45.48, - "learning_rate": 1.1311128224874855e-08, - "loss": 3.7506, - "step": 4133500 - }, - { - "epoch": 45.48, - "learning_rate": 1.129737609329446e-08, - "loss": 3.7465, - "step": 4134000 - }, - { - "epoch": 45.49, - "learning_rate": 1.1283623961714066e-08, - "loss": 3.7637, - "step": 4134500 - }, - { - "epoch": 45.49, - "learning_rate": 1.126987183013367e-08, - "loss": 3.7578, - "step": 4135000 - }, - { - "epoch": 45.5, - "learning_rate": 1.1256119698553274e-08, - "loss": 3.7442, - "step": 4135500 - }, - { - "epoch": 45.5, - "learning_rate": 1.124236756697288e-08, - "loss": 3.7477, - "step": 4136000 - }, - { - "epoch": 45.51, - "learning_rate": 1.1228615435392485e-08, - "loss": 3.7518, - "step": 4136500 - }, - { - "epoch": 45.51, - "learning_rate": 1.1214863303812091e-08, - "loss": 3.7661, - "step": 4137000 - }, - { - "epoch": 45.52, - "learning_rate": 1.1201111172231695e-08, - "loss": 3.7731, - "step": 4137500 - }, - { - "epoch": 45.53, - "learning_rate": 1.1187359040651301e-08, - "loss": 3.7595, - "step": 4138000 - }, - { - "epoch": 45.53, - "learning_rate": 1.1173606909070906e-08, - "loss": 3.7497, - "step": 4138500 - }, - { - "epoch": 45.54, - "learning_rate": 1.115985477749051e-08, - "loss": 3.7492, - "step": 4139000 - }, - { - "epoch": 45.54, - "learning_rate": 1.1146102645910116e-08, - "loss": 3.743, - "step": 4139500 - }, - { - "epoch": 45.55, - "learning_rate": 1.113235051432972e-08, - "loss": 3.7207, - "step": 4140000 - }, - { - "epoch": 45.55, - "learning_rate": 1.1118598382749325e-08, - "loss": 3.7361, - "step": 4140500 - }, - { - "epoch": 45.56, - "learning_rate": 1.1104846251168931e-08, - "loss": 3.7357, - "step": 4141000 - }, - { - "epoch": 45.56, - "learning_rate": 1.1091094119588535e-08, - "loss": 3.7283, - "step": 4141500 - }, - { - "epoch": 45.57, - "learning_rate": 1.107734198800814e-08, - "loss": 3.7411, - "step": 4142000 - }, - { - "epoch": 45.57, - "learning_rate": 1.1063589856427746e-08, - "loss": 3.7527, - "step": 4142500 - }, - { - "epoch": 45.58, - "learning_rate": 1.1049837724847352e-08, - "loss": 3.742, - "step": 4143000 - }, - { - "epoch": 45.59, - "learning_rate": 1.1036085593266956e-08, - "loss": 3.7526, - "step": 4143500 - }, - { - "epoch": 45.59, - "learning_rate": 1.1022333461686562e-08, - "loss": 3.7642, - "step": 4144000 - }, - { - "epoch": 45.6, - "learning_rate": 1.1008581330106167e-08, - "loss": 3.7469, - "step": 4144500 - }, - { - "epoch": 45.6, - "learning_rate": 1.0994829198525771e-08, - "loss": 3.7305, - "step": 4145000 - }, - { - "epoch": 45.61, - "learning_rate": 1.0981077066945377e-08, - "loss": 3.752, - "step": 4145500 - }, - { - "epoch": 45.61, - "learning_rate": 1.0967324935364981e-08, - "loss": 3.7516, - "step": 4146000 - }, - { - "epoch": 45.62, - "learning_rate": 1.0953572803784586e-08, - "loss": 3.7748, - "step": 4146500 - }, - { - "epoch": 45.62, - "learning_rate": 1.093982067220419e-08, - "loss": 3.7414, - "step": 4147000 - }, - { - "epoch": 45.63, - "learning_rate": 1.0926068540623796e-08, - "loss": 3.7317, - "step": 4147500 - }, - { - "epoch": 45.64, - "learning_rate": 1.09123164090434e-08, - "loss": 3.7484, - "step": 4148000 - }, - { - "epoch": 45.64, - "learning_rate": 1.0898564277463005e-08, - "loss": 3.7328, - "step": 4148500 - }, - { - "epoch": 45.65, - "learning_rate": 1.0884812145882611e-08, - "loss": 3.7496, - "step": 4149000 - }, - { - "epoch": 45.65, - "learning_rate": 1.0871060014302217e-08, - "loss": 3.7617, - "step": 4149500 - }, - { - "epoch": 45.66, - "learning_rate": 1.0857307882721821e-08, - "loss": 3.7435, - "step": 4150000 - }, - { - "epoch": 45.66, - "learning_rate": 1.0843555751141427e-08, - "loss": 3.7438, - "step": 4150500 - }, - { - "epoch": 45.67, - "learning_rate": 1.0829803619561032e-08, - "loss": 3.7632, - "step": 4151000 - }, - { - "epoch": 45.67, - "learning_rate": 1.0816051487980636e-08, - "loss": 3.7296, - "step": 4151500 - }, - { - "epoch": 45.68, - "learning_rate": 1.0802299356400242e-08, - "loss": 3.7491, - "step": 4152000 - }, - { - "epoch": 45.68, - "learning_rate": 1.0788547224819847e-08, - "loss": 3.7551, - "step": 4152500 - }, - { - "epoch": 45.69, - "learning_rate": 1.0774795093239451e-08, - "loss": 3.7589, - "step": 4153000 - }, - { - "epoch": 45.7, - "learning_rate": 1.0761042961659057e-08, - "loss": 3.7543, - "step": 4153500 - }, - { - "epoch": 45.7, - "learning_rate": 1.0747290830078661e-08, - "loss": 3.7544, - "step": 4154000 - }, - { - "epoch": 45.71, - "learning_rate": 1.0733538698498266e-08, - "loss": 3.7683, - "step": 4154500 - }, - { - "epoch": 45.71, - "learning_rate": 1.0719786566917872e-08, - "loss": 3.769, - "step": 4155000 - }, - { - "epoch": 45.72, - "learning_rate": 1.0706034435337478e-08, - "loss": 3.7353, - "step": 4155500 - }, - { - "epoch": 45.72, - "learning_rate": 1.0692282303757082e-08, - "loss": 3.7388, - "step": 4156000 - }, - { - "epoch": 45.73, - "learning_rate": 1.0678530172176687e-08, - "loss": 3.7373, - "step": 4156500 - }, - { - "epoch": 45.73, - "learning_rate": 1.0664778040596293e-08, - "loss": 3.744, - "step": 4157000 - }, - { - "epoch": 45.74, - "learning_rate": 1.0651025909015897e-08, - "loss": 3.7501, - "step": 4157500 - }, - { - "epoch": 45.75, - "learning_rate": 1.0637273777435501e-08, - "loss": 3.7413, - "step": 4158000 - }, - { - "epoch": 45.75, - "learning_rate": 1.0623521645855107e-08, - "loss": 3.7434, - "step": 4158500 - }, - { - "epoch": 45.76, - "learning_rate": 1.0609769514274712e-08, - "loss": 3.7508, - "step": 4159000 - }, - { - "epoch": 45.76, - "learning_rate": 1.0596017382694316e-08, - "loss": 3.7676, - "step": 4159500 - }, - { - "epoch": 45.77, - "learning_rate": 1.0582265251113922e-08, - "loss": 3.7535, - "step": 4160000 - }, - { - "epoch": 45.77, - "learning_rate": 1.0568513119533527e-08, - "loss": 3.7582, - "step": 4160500 - }, - { - "epoch": 45.78, - "learning_rate": 1.0554760987953133e-08, - "loss": 3.7277, - "step": 4161000 - }, - { - "epoch": 45.78, - "learning_rate": 1.0541008856372739e-08, - "loss": 3.7672, - "step": 4161500 - }, - { - "epoch": 45.79, - "learning_rate": 1.0527256724792343e-08, - "loss": 3.7419, - "step": 4162000 - }, - { - "epoch": 45.79, - "learning_rate": 1.0513504593211947e-08, - "loss": 3.7788, - "step": 4162500 - }, - { - "epoch": 45.8, - "learning_rate": 1.0499752461631553e-08, - "loss": 3.7495, - "step": 4163000 - }, - { - "epoch": 45.81, - "learning_rate": 1.0486000330051158e-08, - "loss": 3.7481, - "step": 4163500 - }, - { - "epoch": 45.81, - "learning_rate": 1.0472248198470762e-08, - "loss": 3.7394, - "step": 4164000 - }, - { - "epoch": 45.82, - "learning_rate": 1.0458496066890367e-08, - "loss": 3.7333, - "step": 4164500 - }, - { - "epoch": 45.82, - "learning_rate": 1.0444743935309973e-08, - "loss": 3.7548, - "step": 4165000 - }, - { - "epoch": 45.83, - "learning_rate": 1.0430991803729577e-08, - "loss": 3.7429, - "step": 4165500 - }, - { - "epoch": 45.83, - "learning_rate": 1.0417239672149181e-08, - "loss": 3.7766, - "step": 4166000 - }, - { - "epoch": 45.84, - "learning_rate": 1.0403487540568787e-08, - "loss": 3.7629, - "step": 4166500 - }, - { - "epoch": 45.84, - "learning_rate": 1.0389735408988393e-08, - "loss": 3.7372, - "step": 4167000 - }, - { - "epoch": 45.85, - "learning_rate": 1.0375983277407998e-08, - "loss": 3.7685, - "step": 4167500 - }, - { - "epoch": 45.86, - "learning_rate": 1.0362231145827604e-08, - "loss": 3.7408, - "step": 4168000 - }, - { - "epoch": 45.86, - "learning_rate": 1.0348479014247208e-08, - "loss": 3.756, - "step": 4168500 - }, - { - "epoch": 45.87, - "learning_rate": 1.0334726882666813e-08, - "loss": 3.758, - "step": 4169000 - }, - { - "epoch": 45.87, - "learning_rate": 1.0320974751086419e-08, - "loss": 3.7398, - "step": 4169500 - }, - { - "epoch": 45.88, - "learning_rate": 1.0307222619506023e-08, - "loss": 3.7536, - "step": 4170000 - }, - { - "epoch": 45.88, - "learning_rate": 1.0293470487925627e-08, - "loss": 3.7517, - "step": 4170500 - }, - { - "epoch": 45.89, - "learning_rate": 1.0279718356345233e-08, - "loss": 3.7419, - "step": 4171000 - }, - { - "epoch": 45.89, - "learning_rate": 1.0265966224764838e-08, - "loss": 3.7461, - "step": 4171500 - }, - { - "epoch": 45.9, - "learning_rate": 1.0252214093184442e-08, - "loss": 3.7427, - "step": 4172000 - }, - { - "epoch": 45.9, - "learning_rate": 1.0238461961604048e-08, - "loss": 3.7483, - "step": 4172500 - }, - { - "epoch": 45.91, - "learning_rate": 1.0224709830023653e-08, - "loss": 3.768, - "step": 4173000 - }, - { - "epoch": 45.92, - "learning_rate": 1.0210957698443259e-08, - "loss": 3.7678, - "step": 4173500 - }, - { - "epoch": 45.92, - "learning_rate": 1.0197205566862863e-08, - "loss": 3.768, - "step": 4174000 - }, - { - "epoch": 45.93, - "learning_rate": 1.0183453435282469e-08, - "loss": 3.7499, - "step": 4174500 - }, - { - "epoch": 45.93, - "learning_rate": 1.0169701303702073e-08, - "loss": 3.7557, - "step": 4175000 - }, - { - "epoch": 45.94, - "learning_rate": 1.0155949172121678e-08, - "loss": 3.7549, - "step": 4175500 - }, - { - "epoch": 45.94, - "learning_rate": 1.0142197040541284e-08, - "loss": 3.7557, - "step": 4176000 - }, - { - "epoch": 45.95, - "learning_rate": 1.0128444908960888e-08, - "loss": 3.7415, - "step": 4176500 - }, - { - "epoch": 45.95, - "learning_rate": 1.0114692777380493e-08, - "loss": 3.7557, - "step": 4177000 - }, - { - "epoch": 45.96, - "learning_rate": 1.0100940645800099e-08, - "loss": 3.7376, - "step": 4177500 - }, - { - "epoch": 45.97, - "learning_rate": 1.0087188514219703e-08, - "loss": 3.7696, - "step": 4178000 - }, - { - "epoch": 45.97, - "learning_rate": 1.0073436382639307e-08, - "loss": 3.7413, - "step": 4178500 - }, - { - "epoch": 45.98, - "learning_rate": 1.0059684251058913e-08, - "loss": 3.7494, - "step": 4179000 - }, - { - "epoch": 45.98, - "learning_rate": 1.004593211947852e-08, - "loss": 3.7677, - "step": 4179500 - }, - { - "epoch": 45.99, - "learning_rate": 1.0032179987898124e-08, - "loss": 3.7453, - "step": 4180000 - }, - { - "epoch": 45.99, - "learning_rate": 1.001842785631773e-08, - "loss": 3.7573, - "step": 4180500 - }, - { - "epoch": 46.0, - "learning_rate": 1.0004675724737334e-08, - "loss": 3.7583, - "step": 4181000 - }, - { - "epoch": 46.0, - "eval_loss": 3.825601577758789, - "eval_runtime": 6.1432, - "eval_samples_per_second": 252.961, - "step": 4181170 - }, - { - "epoch": 46.0, - "learning_rate": 9.990923593156939e-09, - "loss": 3.7583, - "step": 4181500 - }, - { - "epoch": 46.01, - "learning_rate": 9.977171461576545e-09, - "loss": 3.739, - "step": 4182000 - }, - { - "epoch": 46.01, - "learning_rate": 9.963419329996149e-09, - "loss": 3.7312, - "step": 4182500 - }, - { - "epoch": 46.02, - "learning_rate": 9.949667198415753e-09, - "loss": 3.7493, - "step": 4183000 - }, - { - "epoch": 46.03, - "learning_rate": 9.935915066835358e-09, - "loss": 3.7652, - "step": 4183500 - }, - { - "epoch": 46.03, - "learning_rate": 9.922162935254964e-09, - "loss": 3.7633, - "step": 4184000 - }, - { - "epoch": 46.04, - "learning_rate": 9.908410803674568e-09, - "loss": 3.7392, - "step": 4184500 - }, - { - "epoch": 46.04, - "learning_rate": 9.894658672094174e-09, - "loss": 3.7682, - "step": 4185000 - }, - { - "epoch": 46.05, - "learning_rate": 9.88090654051378e-09, - "loss": 3.7548, - "step": 4185500 - }, - { - "epoch": 46.05, - "learning_rate": 9.867154408933385e-09, - "loss": 3.7413, - "step": 4186000 - }, - { - "epoch": 46.06, - "learning_rate": 9.853402277352989e-09, - "loss": 3.75, - "step": 4186500 - }, - { - "epoch": 46.06, - "learning_rate": 9.839650145772595e-09, - "loss": 3.7331, - "step": 4187000 - }, - { - "epoch": 46.07, - "learning_rate": 9.8258980141922e-09, - "loss": 3.742, - "step": 4187500 - }, - { - "epoch": 46.08, - "learning_rate": 9.812145882611804e-09, - "loss": 3.7521, - "step": 4188000 - }, - { - "epoch": 46.08, - "learning_rate": 9.79839375103141e-09, - "loss": 3.7447, - "step": 4188500 - }, - { - "epoch": 46.09, - "learning_rate": 9.784641619451014e-09, - "loss": 3.7471, - "step": 4189000 - }, - { - "epoch": 46.09, - "learning_rate": 9.770889487870619e-09, - "loss": 3.7498, - "step": 4189500 - }, - { - "epoch": 46.1, - "learning_rate": 9.757137356290225e-09, - "loss": 3.7647, - "step": 4190000 - }, - { - "epoch": 46.1, - "learning_rate": 9.743385224709829e-09, - "loss": 3.7574, - "step": 4190500 - }, - { - "epoch": 46.11, - "learning_rate": 9.729633093129435e-09, - "loss": 3.7517, - "step": 4191000 - }, - { - "epoch": 46.11, - "learning_rate": 9.71588096154904e-09, - "loss": 3.7473, - "step": 4191500 - }, - { - "epoch": 46.12, - "learning_rate": 9.702128829968646e-09, - "loss": 3.7429, - "step": 4192000 - }, - { - "epoch": 46.12, - "learning_rate": 9.68837669838825e-09, - "loss": 3.7611, - "step": 4192500 - }, - { - "epoch": 46.13, - "learning_rate": 9.674624566807854e-09, - "loss": 3.7541, - "step": 4193000 - }, - { - "epoch": 46.14, - "learning_rate": 9.66087243522746e-09, - "loss": 3.7592, - "step": 4193500 - }, - { - "epoch": 46.14, - "learning_rate": 9.647120303647065e-09, - "loss": 3.739, - "step": 4194000 - }, - { - "epoch": 46.15, - "learning_rate": 9.633368172066669e-09, - "loss": 3.7411, - "step": 4194500 - }, - { - "epoch": 46.15, - "learning_rate": 9.619616040486275e-09, - "loss": 3.7699, - "step": 4195000 - }, - { - "epoch": 46.16, - "learning_rate": 9.60586390890588e-09, - "loss": 3.76, - "step": 4195500 - }, - { - "epoch": 46.16, - "learning_rate": 9.592111777325484e-09, - "loss": 3.7401, - "step": 4196000 - }, - { - "epoch": 46.17, - "learning_rate": 9.57835964574509e-09, - "loss": 3.7612, - "step": 4196500 - }, - { - "epoch": 46.17, - "learning_rate": 9.564607514164694e-09, - "loss": 3.7595, - "step": 4197000 - }, - { - "epoch": 46.18, - "learning_rate": 9.5508553825843e-09, - "loss": 3.733, - "step": 4197500 - }, - { - "epoch": 46.19, - "learning_rate": 9.537103251003906e-09, - "loss": 3.7472, - "step": 4198000 - }, - { - "epoch": 46.19, - "learning_rate": 9.52335111942351e-09, - "loss": 3.7534, - "step": 4198500 - }, - { - "epoch": 46.2, - "learning_rate": 9.509598987843115e-09, - "loss": 3.7588, - "step": 4199000 - }, - { - "epoch": 46.2, - "learning_rate": 9.495846856262721e-09, - "loss": 3.7485, - "step": 4199500 - }, - { - "epoch": 46.21, - "learning_rate": 9.482094724682326e-09, - "loss": 3.7636, - "step": 4200000 - }, - { - "epoch": 46.21, - "learning_rate": 9.46834259310193e-09, - "loss": 3.7609, - "step": 4200500 - }, - { - "epoch": 46.22, - "learning_rate": 9.454590461521536e-09, - "loss": 3.7327, - "step": 4201000 - }, - { - "epoch": 46.22, - "learning_rate": 9.44083832994114e-09, - "loss": 3.7614, - "step": 4201500 - }, - { - "epoch": 46.23, - "learning_rate": 9.427086198360745e-09, - "loss": 3.7491, - "step": 4202000 - }, - { - "epoch": 46.23, - "learning_rate": 9.413334066780349e-09, - "loss": 3.7485, - "step": 4202500 - }, - { - "epoch": 46.24, - "learning_rate": 9.399581935199955e-09, - "loss": 3.7531, - "step": 4203000 - }, - { - "epoch": 46.25, - "learning_rate": 9.385829803619561e-09, - "loss": 3.7483, - "step": 4203500 - }, - { - "epoch": 46.25, - "learning_rate": 9.372077672039166e-09, - "loss": 3.7455, - "step": 4204000 - }, - { - "epoch": 46.26, - "learning_rate": 9.358325540458772e-09, - "loss": 3.7611, - "step": 4204500 - }, - { - "epoch": 46.26, - "learning_rate": 9.344573408878376e-09, - "loss": 3.7396, - "step": 4205000 - }, - { - "epoch": 46.27, - "learning_rate": 9.33082127729798e-09, - "loss": 3.7714, - "step": 4205500 - }, - { - "epoch": 46.27, - "learning_rate": 9.317069145717586e-09, - "loss": 3.747, - "step": 4206000 - }, - { - "epoch": 46.28, - "learning_rate": 9.30331701413719e-09, - "loss": 3.7326, - "step": 4206500 - }, - { - "epoch": 46.28, - "learning_rate": 9.289564882556795e-09, - "loss": 3.7457, - "step": 4207000 - }, - { - "epoch": 46.29, - "learning_rate": 9.275812750976401e-09, - "loss": 3.7599, - "step": 4207500 - }, - { - "epoch": 46.3, - "learning_rate": 9.262060619396006e-09, - "loss": 3.7569, - "step": 4208000 - }, - { - "epoch": 46.3, - "learning_rate": 9.24830848781561e-09, - "loss": 3.7482, - "step": 4208500 - }, - { - "epoch": 46.31, - "learning_rate": 9.234556356235216e-09, - "loss": 3.7334, - "step": 4209000 - }, - { - "epoch": 46.31, - "learning_rate": 9.220804224654822e-09, - "loss": 3.7508, - "step": 4209500 - }, - { - "epoch": 46.32, - "learning_rate": 9.207052093074426e-09, - "loss": 3.7505, - "step": 4210000 - }, - { - "epoch": 46.32, - "learning_rate": 9.193299961494032e-09, - "loss": 3.7411, - "step": 4210500 - }, - { - "epoch": 46.33, - "learning_rate": 9.179547829913637e-09, - "loss": 3.756, - "step": 4211000 - }, - { - "epoch": 46.33, - "learning_rate": 9.165795698333241e-09, - "loss": 3.7494, - "step": 4211500 - }, - { - "epoch": 46.34, - "learning_rate": 9.152043566752846e-09, - "loss": 3.7256, - "step": 4212000 - }, - { - "epoch": 46.34, - "learning_rate": 9.138291435172452e-09, - "loss": 3.7388, - "step": 4212500 - }, - { - "epoch": 46.35, - "learning_rate": 9.124539303592056e-09, - "loss": 3.7531, - "step": 4213000 - }, - { - "epoch": 46.36, - "learning_rate": 9.11078717201166e-09, - "loss": 3.7181, - "step": 4213500 - }, - { - "epoch": 46.36, - "learning_rate": 9.097035040431266e-09, - "loss": 3.7524, - "step": 4214000 - }, - { - "epoch": 46.37, - "learning_rate": 9.08328290885087e-09, - "loss": 3.7454, - "step": 4214500 - }, - { - "epoch": 46.37, - "learning_rate": 9.069530777270475e-09, - "loss": 3.7549, - "step": 4215000 - }, - { - "epoch": 46.38, - "learning_rate": 9.055778645690081e-09, - "loss": 3.7719, - "step": 4215500 - }, - { - "epoch": 46.38, - "learning_rate": 9.042026514109687e-09, - "loss": 3.762, - "step": 4216000 - }, - { - "epoch": 46.39, - "learning_rate": 9.028274382529292e-09, - "loss": 3.7523, - "step": 4216500 - }, - { - "epoch": 46.39, - "learning_rate": 9.014522250948898e-09, - "loss": 3.7605, - "step": 4217000 - }, - { - "epoch": 46.4, - "learning_rate": 9.000770119368502e-09, - "loss": 3.7542, - "step": 4217500 - }, - { - "epoch": 46.41, - "learning_rate": 8.987017987788106e-09, - "loss": 3.7569, - "step": 4218000 - }, - { - "epoch": 46.41, - "learning_rate": 8.973265856207712e-09, - "loss": 3.746, - "step": 4218500 - }, - { - "epoch": 46.42, - "learning_rate": 8.959513724627317e-09, - "loss": 3.7509, - "step": 4219000 - }, - { - "epoch": 46.42, - "learning_rate": 8.945761593046921e-09, - "loss": 3.759, - "step": 4219500 - }, - { - "epoch": 46.43, - "learning_rate": 8.932009461466527e-09, - "loss": 3.7529, - "step": 4220000 - }, - { - "epoch": 46.43, - "learning_rate": 8.918257329886132e-09, - "loss": 3.7752, - "step": 4220500 - }, - { - "epoch": 46.44, - "learning_rate": 8.904505198305736e-09, - "loss": 3.771, - "step": 4221000 - }, - { - "epoch": 46.44, - "learning_rate": 8.890753066725342e-09, - "loss": 3.7535, - "step": 4221500 - }, - { - "epoch": 46.45, - "learning_rate": 8.877000935144948e-09, - "loss": 3.7512, - "step": 4222000 - }, - { - "epoch": 46.45, - "learning_rate": 8.863248803564552e-09, - "loss": 3.7353, - "step": 4222500 - }, - { - "epoch": 46.46, - "learning_rate": 8.849496671984157e-09, - "loss": 3.7402, - "step": 4223000 - }, - { - "epoch": 46.47, - "learning_rate": 8.835744540403763e-09, - "loss": 3.7613, - "step": 4223500 - }, - { - "epoch": 46.47, - "learning_rate": 8.821992408823367e-09, - "loss": 3.7522, - "step": 4224000 - }, - { - "epoch": 46.48, - "learning_rate": 8.808240277242972e-09, - "loss": 3.7339, - "step": 4224500 - }, - { - "epoch": 46.48, - "learning_rate": 8.794488145662578e-09, - "loss": 3.7224, - "step": 4225000 - }, - { - "epoch": 46.49, - "learning_rate": 8.780736014082182e-09, - "loss": 3.7657, - "step": 4225500 - }, - { - "epoch": 46.49, - "learning_rate": 8.766983882501786e-09, - "loss": 3.7689, - "step": 4226000 - }, - { - "epoch": 46.5, - "learning_rate": 8.753231750921392e-09, - "loss": 3.7515, - "step": 4226500 - }, - { - "epoch": 46.5, - "learning_rate": 8.739479619340997e-09, - "loss": 3.7606, - "step": 4227000 - }, - { - "epoch": 46.51, - "learning_rate": 8.725727487760603e-09, - "loss": 3.7585, - "step": 4227500 - }, - { - "epoch": 46.52, - "learning_rate": 8.711975356180209e-09, - "loss": 3.7496, - "step": 4228000 - }, - { - "epoch": 46.52, - "learning_rate": 8.698223224599813e-09, - "loss": 3.7538, - "step": 4228500 - }, - { - "epoch": 46.53, - "learning_rate": 8.684471093019418e-09, - "loss": 3.7432, - "step": 4229000 - }, - { - "epoch": 46.53, - "learning_rate": 8.670718961439024e-09, - "loss": 3.7559, - "step": 4229500 - }, - { - "epoch": 46.54, - "learning_rate": 8.656966829858628e-09, - "loss": 3.7457, - "step": 4230000 - }, - { - "epoch": 46.54, - "learning_rate": 8.643214698278232e-09, - "loss": 3.7507, - "step": 4230500 - }, - { - "epoch": 46.55, - "learning_rate": 8.629462566697837e-09, - "loss": 3.7326, - "step": 4231000 - }, - { - "epoch": 46.55, - "learning_rate": 8.615710435117443e-09, - "loss": 3.7548, - "step": 4231500 - }, - { - "epoch": 46.56, - "learning_rate": 8.601958303537047e-09, - "loss": 3.7467, - "step": 4232000 - }, - { - "epoch": 46.56, - "learning_rate": 8.588206171956652e-09, - "loss": 3.7529, - "step": 4232500 - }, - { - "epoch": 46.57, - "learning_rate": 8.574454040376258e-09, - "loss": 3.7327, - "step": 4233000 - }, - { - "epoch": 46.58, - "learning_rate": 8.560701908795864e-09, - "loss": 3.75, - "step": 4233500 - }, - { - "epoch": 46.58, - "learning_rate": 8.546949777215468e-09, - "loss": 3.7522, - "step": 4234000 - }, - { - "epoch": 46.59, - "learning_rate": 8.533197645635074e-09, - "loss": 3.7521, - "step": 4234500 - }, - { - "epoch": 46.59, - "learning_rate": 8.519445514054679e-09, - "loss": 3.7557, - "step": 4235000 - }, - { - "epoch": 46.6, - "learning_rate": 8.505693382474283e-09, - "loss": 3.7453, - "step": 4235500 - }, - { - "epoch": 46.6, - "learning_rate": 8.491941250893889e-09, - "loss": 3.7609, - "step": 4236000 - }, - { - "epoch": 46.61, - "learning_rate": 8.478189119313493e-09, - "loss": 3.7459, - "step": 4236500 - }, - { - "epoch": 46.61, - "learning_rate": 8.464436987733098e-09, - "loss": 3.7573, - "step": 4237000 - }, - { - "epoch": 46.62, - "learning_rate": 8.450684856152704e-09, - "loss": 3.7448, - "step": 4237500 - }, - { - "epoch": 46.63, - "learning_rate": 8.436932724572308e-09, - "loss": 3.7427, - "step": 4238000 - }, - { - "epoch": 46.63, - "learning_rate": 8.423180592991912e-09, - "loss": 3.7449, - "step": 4238500 - }, - { - "epoch": 46.64, - "learning_rate": 8.409428461411519e-09, - "loss": 3.7645, - "step": 4239000 - }, - { - "epoch": 46.64, - "learning_rate": 8.395676329831123e-09, - "loss": 3.7327, - "step": 4239500 - }, - { - "epoch": 46.65, - "learning_rate": 8.381924198250729e-09, - "loss": 3.7288, - "step": 4240000 - }, - { - "epoch": 46.65, - "learning_rate": 8.368172066670333e-09, - "loss": 3.7416, - "step": 4240500 - }, - { - "epoch": 46.66, - "learning_rate": 8.35441993508994e-09, - "loss": 3.7467, - "step": 4241000 - }, - { - "epoch": 46.66, - "learning_rate": 8.340667803509544e-09, - "loss": 3.7332, - "step": 4241500 - }, - { - "epoch": 46.67, - "learning_rate": 8.326915671929148e-09, - "loss": 3.7471, - "step": 4242000 - }, - { - "epoch": 46.67, - "learning_rate": 8.313163540348754e-09, - "loss": 3.7525, - "step": 4242500 - }, - { - "epoch": 46.68, - "learning_rate": 8.299411408768359e-09, - "loss": 3.7472, - "step": 4243000 - }, - { - "epoch": 46.69, - "learning_rate": 8.285659277187963e-09, - "loss": 3.7564, - "step": 4243500 - }, - { - "epoch": 46.69, - "learning_rate": 8.271907145607569e-09, - "loss": 3.7347, - "step": 4244000 - }, - { - "epoch": 46.7, - "learning_rate": 8.258155014027173e-09, - "loss": 3.7725, - "step": 4244500 - }, - { - "epoch": 46.7, - "learning_rate": 8.244402882446778e-09, - "loss": 3.7367, - "step": 4245000 - }, - { - "epoch": 46.71, - "learning_rate": 8.230650750866384e-09, - "loss": 3.7711, - "step": 4245500 - }, - { - "epoch": 46.71, - "learning_rate": 8.21689861928599e-09, - "loss": 3.7377, - "step": 4246000 - }, - { - "epoch": 46.72, - "learning_rate": 8.203146487705594e-09, - "loss": 3.7571, - "step": 4246500 - }, - { - "epoch": 46.72, - "learning_rate": 8.1893943561252e-09, - "loss": 3.7292, - "step": 4247000 - }, - { - "epoch": 46.73, - "learning_rate": 8.175642224544805e-09, - "loss": 3.7541, - "step": 4247500 - }, - { - "epoch": 46.74, - "learning_rate": 8.161890092964409e-09, - "loss": 3.7395, - "step": 4248000 - }, - { - "epoch": 46.74, - "learning_rate": 8.148137961384015e-09, - "loss": 3.7485, - "step": 4248500 - }, - { - "epoch": 46.75, - "learning_rate": 8.13438582980362e-09, - "loss": 3.756, - "step": 4249000 - }, - { - "epoch": 46.75, - "learning_rate": 8.120633698223224e-09, - "loss": 3.7621, - "step": 4249500 - }, - { - "epoch": 46.76, - "learning_rate": 8.106881566642828e-09, - "loss": 3.7479, - "step": 4250000 - }, - { - "epoch": 46.76, - "learning_rate": 8.093129435062434e-09, - "loss": 3.7602, - "step": 4250500 - }, - { - "epoch": 46.77, - "learning_rate": 8.079377303482039e-09, - "loss": 3.7634, - "step": 4251000 - }, - { - "epoch": 46.77, - "learning_rate": 8.065625171901645e-09, - "loss": 3.7443, - "step": 4251500 - }, - { - "epoch": 46.78, - "learning_rate": 8.05187304032125e-09, - "loss": 3.7559, - "step": 4252000 - }, - { - "epoch": 46.78, - "learning_rate": 8.038120908740855e-09, - "loss": 3.7171, - "step": 4252500 - }, - { - "epoch": 46.79, - "learning_rate": 8.02436877716046e-09, - "loss": 3.75, - "step": 4253000 - }, - { - "epoch": 46.8, - "learning_rate": 8.010616645580065e-09, - "loss": 3.7479, - "step": 4253500 - }, - { - "epoch": 46.8, - "learning_rate": 7.99686451399967e-09, - "loss": 3.7599, - "step": 4254000 - }, - { - "epoch": 46.81, - "learning_rate": 7.983112382419274e-09, - "loss": 3.7456, - "step": 4254500 - }, - { - "epoch": 46.81, - "learning_rate": 7.96936025083888e-09, - "loss": 3.7565, - "step": 4255000 - }, - { - "epoch": 46.82, - "learning_rate": 7.955608119258485e-09, - "loss": 3.7389, - "step": 4255500 - }, - { - "epoch": 46.82, - "learning_rate": 7.941855987678089e-09, - "loss": 3.7445, - "step": 4256000 - }, - { - "epoch": 46.83, - "learning_rate": 7.928103856097695e-09, - "loss": 3.7421, - "step": 4256500 - }, - { - "epoch": 46.83, - "learning_rate": 7.9143517245173e-09, - "loss": 3.7476, - "step": 4257000 - }, - { - "epoch": 46.84, - "learning_rate": 7.900599592936905e-09, - "loss": 3.7461, - "step": 4257500 - }, - { - "epoch": 46.85, - "learning_rate": 7.88684746135651e-09, - "loss": 3.7429, - "step": 4258000 - }, - { - "epoch": 46.85, - "learning_rate": 7.873095329776116e-09, - "loss": 3.7518, - "step": 4258500 - }, - { - "epoch": 46.86, - "learning_rate": 7.85934319819572e-09, - "loss": 3.755, - "step": 4259000 - }, - { - "epoch": 46.86, - "learning_rate": 7.845591066615325e-09, - "loss": 3.7429, - "step": 4259500 - }, - { - "epoch": 46.87, - "learning_rate": 7.83183893503493e-09, - "loss": 3.7543, - "step": 4260000 - }, - { - "epoch": 46.87, - "learning_rate": 7.818086803454535e-09, - "loss": 3.7563, - "step": 4260500 - }, - { - "epoch": 46.88, - "learning_rate": 7.80433467187414e-09, - "loss": 3.7608, - "step": 4261000 - }, - { - "epoch": 46.88, - "learning_rate": 7.790582540293745e-09, - "loss": 3.7422, - "step": 4261500 - }, - { - "epoch": 46.89, - "learning_rate": 7.77683040871335e-09, - "loss": 3.7625, - "step": 4262000 - }, - { - "epoch": 46.89, - "learning_rate": 7.763078277132954e-09, - "loss": 3.7396, - "step": 4262500 - }, - { - "epoch": 46.9, - "learning_rate": 7.74932614555256e-09, - "loss": 3.767, - "step": 4263000 - }, - { - "epoch": 46.91, - "learning_rate": 7.735574013972165e-09, - "loss": 3.7305, - "step": 4263500 - }, - { - "epoch": 46.91, - "learning_rate": 7.72182188239177e-09, - "loss": 3.7202, - "step": 4264000 - }, - { - "epoch": 46.92, - "learning_rate": 7.708069750811377e-09, - "loss": 3.7485, - "step": 4264500 - }, - { - "epoch": 46.92, - "learning_rate": 7.694317619230981e-09, - "loss": 3.7391, - "step": 4265000 - }, - { - "epoch": 46.93, - "learning_rate": 7.680565487650585e-09, - "loss": 3.747, - "step": 4265500 - }, - { - "epoch": 46.93, - "learning_rate": 7.666813356070191e-09, - "loss": 3.7578, - "step": 4266000 - }, - { - "epoch": 46.94, - "learning_rate": 7.653061224489796e-09, - "loss": 3.7516, - "step": 4266500 - }, - { - "epoch": 46.94, - "learning_rate": 7.6393090929094e-09, - "loss": 3.738, - "step": 4267000 - }, - { - "epoch": 46.95, - "learning_rate": 7.625556961329006e-09, - "loss": 3.7409, - "step": 4267500 - }, - { - "epoch": 46.96, - "learning_rate": 7.61180482974861e-09, - "loss": 3.7632, - "step": 4268000 - }, - { - "epoch": 46.96, - "learning_rate": 7.598052698168215e-09, - "loss": 3.7623, - "step": 4268500 - }, - { - "epoch": 46.97, - "learning_rate": 7.58430056658782e-09, - "loss": 3.736, - "step": 4269000 - }, - { - "epoch": 46.97, - "learning_rate": 7.570548435007425e-09, - "loss": 3.7423, - "step": 4269500 - }, - { - "epoch": 46.98, - "learning_rate": 7.556796303427031e-09, - "loss": 3.7567, - "step": 4270000 - }, - { - "epoch": 46.98, - "learning_rate": 7.543044171846636e-09, - "loss": 3.7573, - "step": 4270500 - }, - { - "epoch": 46.99, - "learning_rate": 7.529292040266242e-09, - "loss": 3.7576, - "step": 4271000 - }, - { - "epoch": 46.99, - "learning_rate": 7.515539908685846e-09, - "loss": 3.7436, - "step": 4271500 - }, - { - "epoch": 47.0, - "learning_rate": 7.50178777710545e-09, - "loss": 3.7506, - "step": 4272000 - }, - { - "epoch": 47.0, - "eval_loss": 3.82542085647583, - "eval_runtime": 6.1493, - "eval_samples_per_second": 252.713, - "step": 4272065 - }, - { - "epoch": 47.0, - "learning_rate": 7.488035645525057e-09, - "loss": 3.7477, - "step": 4272500 - }, - { - "epoch": 47.01, - "learning_rate": 7.474283513944661e-09, - "loss": 3.7517, - "step": 4273000 - }, - { - "epoch": 47.02, - "learning_rate": 7.460531382364265e-09, - "loss": 3.7637, - "step": 4273500 - }, - { - "epoch": 47.02, - "learning_rate": 7.4467792507838715e-09, - "loss": 3.7417, - "step": 4274000 - }, - { - "epoch": 47.03, - "learning_rate": 7.433027119203477e-09, - "loss": 3.7648, - "step": 4274500 - }, - { - "epoch": 47.03, - "learning_rate": 7.419274987623081e-09, - "loss": 3.7351, - "step": 4275000 - }, - { - "epoch": 47.04, - "learning_rate": 7.405522856042687e-09, - "loss": 3.7618, - "step": 4275500 - }, - { - "epoch": 47.04, - "learning_rate": 7.3917707244622915e-09, - "loss": 3.7424, - "step": 4276000 - }, - { - "epoch": 47.05, - "learning_rate": 7.378018592881896e-09, - "loss": 3.7594, - "step": 4276500 - }, - { - "epoch": 47.05, - "learning_rate": 7.364266461301502e-09, - "loss": 3.75, - "step": 4277000 - }, - { - "epoch": 47.06, - "learning_rate": 7.350514329721107e-09, - "loss": 3.7568, - "step": 4277500 - }, - { - "epoch": 47.07, - "learning_rate": 7.3367621981407115e-09, - "loss": 3.7514, - "step": 4278000 - }, - { - "epoch": 47.07, - "learning_rate": 7.323010066560316e-09, - "loss": 3.7499, - "step": 4278500 - }, - { - "epoch": 47.08, - "learning_rate": 7.309257934979922e-09, - "loss": 3.7594, - "step": 4279000 - }, - { - "epoch": 47.08, - "learning_rate": 7.295505803399526e-09, - "loss": 3.7484, - "step": 4279500 - }, - { - "epoch": 47.09, - "learning_rate": 7.2817536718191315e-09, - "loss": 3.7633, - "step": 4280000 - }, - { - "epoch": 47.09, - "learning_rate": 7.268001540238737e-09, - "loss": 3.7397, - "step": 4280500 - }, - { - "epoch": 47.1, - "learning_rate": 7.254249408658342e-09, - "loss": 3.7449, - "step": 4281000 - }, - { - "epoch": 47.1, - "learning_rate": 7.240497277077946e-09, - "loss": 3.7485, - "step": 4281500 - }, - { - "epoch": 47.11, - "learning_rate": 7.226745145497552e-09, - "loss": 3.7519, - "step": 4282000 - }, - { - "epoch": 47.11, - "learning_rate": 7.212993013917157e-09, - "loss": 3.7414, - "step": 4282500 - }, - { - "epoch": 47.12, - "learning_rate": 7.199240882336761e-09, - "loss": 3.7679, - "step": 4283000 - }, - { - "epoch": 47.13, - "learning_rate": 7.185488750756367e-09, - "loss": 3.7401, - "step": 4283500 - }, - { - "epoch": 47.13, - "learning_rate": 7.171736619175972e-09, - "loss": 3.7319, - "step": 4284000 - }, - { - "epoch": 47.14, - "learning_rate": 7.157984487595577e-09, - "loss": 3.7643, - "step": 4284500 - }, - { - "epoch": 47.14, - "learning_rate": 7.144232356015183e-09, - "loss": 3.7513, - "step": 4285000 - }, - { - "epoch": 47.15, - "learning_rate": 7.130480224434787e-09, - "loss": 3.7522, - "step": 4285500 - }, - { - "epoch": 47.15, - "learning_rate": 7.1167280928543915e-09, - "loss": 3.7463, - "step": 4286000 - }, - { - "epoch": 47.16, - "learning_rate": 7.1029759612739975e-09, - "loss": 3.7363, - "step": 4286500 - }, - { - "epoch": 47.16, - "learning_rate": 7.089223829693603e-09, - "loss": 3.7428, - "step": 4287000 - }, - { - "epoch": 47.17, - "learning_rate": 7.075471698113207e-09, - "loss": 3.76, - "step": 4287500 - }, - { - "epoch": 47.18, - "learning_rate": 7.0617195665328115e-09, - "loss": 3.7648, - "step": 4288000 - }, - { - "epoch": 47.18, - "learning_rate": 7.0479674349524175e-09, - "loss": 3.7425, - "step": 4288500 - }, - { - "epoch": 47.19, - "learning_rate": 7.034215303372022e-09, - "loss": 3.7701, - "step": 4289000 - }, - { - "epoch": 47.19, - "learning_rate": 7.020463171791627e-09, - "loss": 3.7554, - "step": 4289500 - }, - { - "epoch": 47.2, - "learning_rate": 7.006711040211233e-09, - "loss": 3.7403, - "step": 4290000 - }, - { - "epoch": 47.2, - "learning_rate": 6.9929589086308375e-09, - "loss": 3.7454, - "step": 4290500 - }, - { - "epoch": 47.21, - "learning_rate": 6.979206777050442e-09, - "loss": 3.7688, - "step": 4291000 - }, - { - "epoch": 47.21, - "learning_rate": 6.965454645470048e-09, - "loss": 3.7511, - "step": 4291500 - }, - { - "epoch": 47.22, - "learning_rate": 6.951702513889652e-09, - "loss": 3.7574, - "step": 4292000 - }, - { - "epoch": 47.22, - "learning_rate": 6.9379503823092575e-09, - "loss": 3.7542, - "step": 4292500 - }, - { - "epoch": 47.23, - "learning_rate": 6.9241982507288636e-09, - "loss": 3.7353, - "step": 4293000 - }, - { - "epoch": 47.24, - "learning_rate": 6.910446119148468e-09, - "loss": 3.7622, - "step": 4293500 - }, - { - "epoch": 47.24, - "learning_rate": 6.896693987568072e-09, - "loss": 3.7537, - "step": 4294000 - }, - { - "epoch": 47.25, - "learning_rate": 6.882941855987678e-09, - "loss": 3.751, - "step": 4294500 - }, - { - "epoch": 47.25, - "learning_rate": 6.869189724407283e-09, - "loss": 3.7428, - "step": 4295000 - }, - { - "epoch": 47.26, - "learning_rate": 6.855437592826888e-09, - "loss": 3.7447, - "step": 4295500 - }, - { - "epoch": 47.26, - "learning_rate": 6.841685461246492e-09, - "loss": 3.7409, - "step": 4296000 - }, - { - "epoch": 47.27, - "learning_rate": 6.827933329666098e-09, - "loss": 3.7461, - "step": 4296500 - }, - { - "epoch": 47.27, - "learning_rate": 6.814181198085703e-09, - "loss": 3.7604, - "step": 4297000 - }, - { - "epoch": 47.28, - "learning_rate": 6.800429066505307e-09, - "loss": 3.7721, - "step": 4297500 - }, - { - "epoch": 47.29, - "learning_rate": 6.786676934924913e-09, - "loss": 3.7664, - "step": 4298000 - }, - { - "epoch": 47.29, - "learning_rate": 6.772924803344518e-09, - "loss": 3.7565, - "step": 4298500 - }, - { - "epoch": 47.3, - "learning_rate": 6.759172671764123e-09, - "loss": 3.7489, - "step": 4299000 - }, - { - "epoch": 47.3, - "learning_rate": 6.745420540183729e-09, - "loss": 3.7602, - "step": 4299500 - }, - { - "epoch": 47.31, - "learning_rate": 6.731668408603333e-09, - "loss": 3.751, - "step": 4300000 - }, - { - "epoch": 47.31, - "learning_rate": 6.7179162770229376e-09, - "loss": 3.7731, - "step": 4300500 - }, - { - "epoch": 47.32, - "learning_rate": 6.704164145442544e-09, - "loss": 3.7556, - "step": 4301000 - }, - { - "epoch": 47.32, - "learning_rate": 6.690412013862148e-09, - "loss": 3.7607, - "step": 4301500 - }, - { - "epoch": 47.33, - "learning_rate": 6.676659882281753e-09, - "loss": 3.7475, - "step": 4302000 - }, - { - "epoch": 47.33, - "learning_rate": 6.662907750701359e-09, - "loss": 3.7663, - "step": 4302500 - }, - { - "epoch": 47.34, - "learning_rate": 6.649155619120964e-09, - "loss": 3.7458, - "step": 4303000 - }, - { - "epoch": 47.35, - "learning_rate": 6.635403487540568e-09, - "loss": 3.7558, - "step": 4303500 - }, - { - "epoch": 47.35, - "learning_rate": 6.621651355960174e-09, - "loss": 3.7328, - "step": 4304000 - }, - { - "epoch": 47.36, - "learning_rate": 6.607899224379778e-09, - "loss": 3.728, - "step": 4304500 - }, - { - "epoch": 47.36, - "learning_rate": 6.594147092799384e-09, - "loss": 3.7344, - "step": 4305000 - }, - { - "epoch": 47.37, - "learning_rate": 6.580394961218988e-09, - "loss": 3.742, - "step": 4305500 - }, - { - "epoch": 47.37, - "learning_rate": 6.566642829638594e-09, - "loss": 3.7406, - "step": 4306000 - }, - { - "epoch": 47.38, - "learning_rate": 6.552890698058198e-09, - "loss": 3.7428, - "step": 4306500 - }, - { - "epoch": 47.38, - "learning_rate": 6.539138566477803e-09, - "loss": 3.7373, - "step": 4307000 - }, - { - "epoch": 47.39, - "learning_rate": 6.525386434897409e-09, - "loss": 3.7524, - "step": 4307500 - }, - { - "epoch": 47.4, - "learning_rate": 6.511634303317014e-09, - "loss": 3.7355, - "step": 4308000 - }, - { - "epoch": 47.4, - "learning_rate": 6.497882171736618e-09, - "loss": 3.7486, - "step": 4308500 - }, - { - "epoch": 47.41, - "learning_rate": 6.4841300401562244e-09, - "loss": 3.7519, - "step": 4309000 - }, - { - "epoch": 47.41, - "learning_rate": 6.470377908575829e-09, - "loss": 3.7372, - "step": 4309500 - }, - { - "epoch": 47.42, - "learning_rate": 6.456625776995433e-09, - "loss": 3.7493, - "step": 4310000 - }, - { - "epoch": 47.42, - "learning_rate": 6.442873645415039e-09, - "loss": 3.7614, - "step": 4310500 - }, - { - "epoch": 47.43, - "learning_rate": 6.4291215138346444e-09, - "loss": 3.7632, - "step": 4311000 - }, - { - "epoch": 47.43, - "learning_rate": 6.415369382254249e-09, - "loss": 3.736, - "step": 4311500 - }, - { - "epoch": 47.44, - "learning_rate": 6.401617250673855e-09, - "loss": 3.7527, - "step": 4312000 - }, - { - "epoch": 47.44, - "learning_rate": 6.387865119093459e-09, - "loss": 3.765, - "step": 4312500 - }, - { - "epoch": 47.45, - "learning_rate": 6.374112987513064e-09, - "loss": 3.7258, - "step": 4313000 - }, - { - "epoch": 47.46, - "learning_rate": 6.36036085593267e-09, - "loss": 3.7956, - "step": 4313500 - }, - { - "epoch": 47.46, - "learning_rate": 6.346608724352275e-09, - "loss": 3.7462, - "step": 4314000 - }, - { - "epoch": 47.47, - "learning_rate": 6.332856592771879e-09, - "loss": 3.7437, - "step": 4314500 - }, - { - "epoch": 47.47, - "learning_rate": 6.319104461191484e-09, - "loss": 3.7519, - "step": 4315000 - }, - { - "epoch": 47.48, - "learning_rate": 6.30535232961109e-09, - "loss": 3.7607, - "step": 4315500 - }, - { - "epoch": 47.48, - "learning_rate": 6.291600198030694e-09, - "loss": 3.7666, - "step": 4316000 - }, - { - "epoch": 47.49, - "learning_rate": 6.277848066450299e-09, - "loss": 3.7237, - "step": 4316500 - }, - { - "epoch": 47.49, - "learning_rate": 6.264095934869905e-09, - "loss": 3.7331, - "step": 4317000 - }, - { - "epoch": 47.5, - "learning_rate": 6.25034380328951e-09, - "loss": 3.7588, - "step": 4317500 - }, - { - "epoch": 47.51, - "learning_rate": 6.236591671709115e-09, - "loss": 3.744, - "step": 4318000 - }, - { - "epoch": 47.51, - "learning_rate": 6.222839540128719e-09, - "loss": 3.7605, - "step": 4318500 - }, - { - "epoch": 47.52, - "learning_rate": 6.2090874085483244e-09, - "loss": 3.7405, - "step": 4319000 - }, - { - "epoch": 47.52, - "learning_rate": 6.19533527696793e-09, - "loss": 3.7559, - "step": 4319500 - }, - { - "epoch": 47.53, - "learning_rate": 6.181583145387535e-09, - "loss": 3.7391, - "step": 4320000 - }, - { - "epoch": 47.53, - "learning_rate": 6.16783101380714e-09, - "loss": 3.7581, - "step": 4320500 - }, - { - "epoch": 47.54, - "learning_rate": 6.154078882226745e-09, - "loss": 3.7441, - "step": 4321000 - }, - { - "epoch": 47.54, - "learning_rate": 6.14032675064635e-09, - "loss": 3.74, - "step": 4321500 - }, - { - "epoch": 47.55, - "learning_rate": 6.126574619065955e-09, - "loss": 3.7476, - "step": 4322000 - }, - { - "epoch": 47.55, - "learning_rate": 6.11282248748556e-09, - "loss": 3.7406, - "step": 4322500 - }, - { - "epoch": 47.56, - "learning_rate": 6.099070355905165e-09, - "loss": 3.7379, - "step": 4323000 - }, - { - "epoch": 47.57, - "learning_rate": 6.0853182243247705e-09, - "loss": 3.7345, - "step": 4323500 - }, - { - "epoch": 47.57, - "learning_rate": 6.071566092744375e-09, - "loss": 3.743, - "step": 4324000 - }, - { - "epoch": 47.58, - "learning_rate": 6.05781396116398e-09, - "loss": 3.7558, - "step": 4324500 - }, - { - "epoch": 47.58, - "learning_rate": 6.044061829583585e-09, - "loss": 3.7497, - "step": 4325000 - }, - { - "epoch": 47.59, - "learning_rate": 6.03030969800319e-09, - "loss": 3.7557, - "step": 4325500 - }, - { - "epoch": 47.59, - "learning_rate": 6.016557566422796e-09, - "loss": 3.752, - "step": 4326000 - }, - { - "epoch": 47.6, - "learning_rate": 6.0028054348424e-09, - "loss": 3.7354, - "step": 4326500 - }, - { - "epoch": 47.6, - "learning_rate": 5.989053303262005e-09, - "loss": 3.7471, - "step": 4327000 - }, - { - "epoch": 47.61, - "learning_rate": 5.9753011716816105e-09, - "loss": 3.7491, - "step": 4327500 - }, - { - "epoch": 47.62, - "learning_rate": 5.961549040101215e-09, - "loss": 3.745, - "step": 4328000 - }, - { - "epoch": 47.62, - "learning_rate": 5.94779690852082e-09, - "loss": 3.7313, - "step": 4328500 - }, - { - "epoch": 47.63, - "learning_rate": 5.934044776940426e-09, - "loss": 3.7358, - "step": 4329000 - }, - { - "epoch": 47.63, - "learning_rate": 5.9202926453600305e-09, - "loss": 3.7578, - "step": 4329500 - }, - { - "epoch": 47.64, - "learning_rate": 5.906540513779636e-09, - "loss": 3.7633, - "step": 4330000 - }, - { - "epoch": 47.64, - "learning_rate": 5.892788382199241e-09, - "loss": 3.7476, - "step": 4330500 - }, - { - "epoch": 47.65, - "learning_rate": 5.879036250618845e-09, - "loss": 3.7415, - "step": 4331000 - }, - { - "epoch": 47.65, - "learning_rate": 5.8652841190384505e-09, - "loss": 3.738, - "step": 4331500 - }, - { - "epoch": 47.66, - "learning_rate": 5.851531987458056e-09, - "loss": 3.7562, - "step": 4332000 - }, - { - "epoch": 47.66, - "learning_rate": 5.837779855877661e-09, - "loss": 3.748, - "step": 4332500 - }, - { - "epoch": 47.67, - "learning_rate": 5.824027724297266e-09, - "loss": 3.7452, - "step": 4333000 - }, - { - "epoch": 47.68, - "learning_rate": 5.8102755927168705e-09, - "loss": 3.7453, - "step": 4333500 - }, - { - "epoch": 47.68, - "learning_rate": 5.796523461136476e-09, - "loss": 3.7358, - "step": 4334000 - }, - { - "epoch": 47.69, - "learning_rate": 5.782771329556081e-09, - "loss": 3.7481, - "step": 4334500 - }, - { - "epoch": 47.69, - "learning_rate": 5.769019197975686e-09, - "loss": 3.7374, - "step": 4335000 - }, - { - "epoch": 47.7, - "learning_rate": 5.755267066395291e-09, - "loss": 3.7508, - "step": 4335500 - }, - { - "epoch": 47.7, - "learning_rate": 5.741514934814896e-09, - "loss": 3.7381, - "step": 4336000 - }, - { - "epoch": 47.71, - "learning_rate": 5.727762803234501e-09, - "loss": 3.7378, - "step": 4336500 - }, - { - "epoch": 47.71, - "learning_rate": 5.714010671654106e-09, - "loss": 3.7532, - "step": 4337000 - }, - { - "epoch": 47.72, - "learning_rate": 5.7002585400737105e-09, - "loss": 3.7556, - "step": 4337500 - }, - { - "epoch": 47.73, - "learning_rate": 5.6865064084933165e-09, - "loss": 3.7525, - "step": 4338000 - }, - { - "epoch": 47.73, - "learning_rate": 5.672754276912922e-09, - "loss": 3.7514, - "step": 4338500 - }, - { - "epoch": 47.74, - "learning_rate": 5.659002145332526e-09, - "loss": 3.7528, - "step": 4339000 - }, - { - "epoch": 47.74, - "learning_rate": 5.645250013752131e-09, - "loss": 3.7392, - "step": 4339500 - }, - { - "epoch": 47.75, - "learning_rate": 5.6314978821717365e-09, - "loss": 3.7407, - "step": 4340000 - }, - { - "epoch": 47.75, - "learning_rate": 5.617745750591341e-09, - "loss": 3.7397, - "step": 4340500 - }, - { - "epoch": 47.76, - "learning_rate": 5.603993619010947e-09, - "loss": 3.7502, - "step": 4341000 - }, - { - "epoch": 47.76, - "learning_rate": 5.590241487430551e-09, - "loss": 3.7521, - "step": 4341500 - }, - { - "epoch": 47.77, - "learning_rate": 5.5764893558501566e-09, - "loss": 3.7812, - "step": 4342000 - }, - { - "epoch": 47.77, - "learning_rate": 5.562737224269762e-09, - "loss": 3.7593, - "step": 4342500 - }, - { - "epoch": 47.78, - "learning_rate": 5.548985092689366e-09, - "loss": 3.7577, - "step": 4343000 - }, - { - "epoch": 47.79, - "learning_rate": 5.535232961108971e-09, - "loss": 3.736, - "step": 4343500 - }, - { - "epoch": 47.79, - "learning_rate": 5.5214808295285766e-09, - "loss": 3.751, - "step": 4344000 - }, - { - "epoch": 47.8, - "learning_rate": 5.507728697948182e-09, - "loss": 3.7477, - "step": 4344500 - }, - { - "epoch": 47.8, - "learning_rate": 5.493976566367787e-09, - "loss": 3.7708, - "step": 4345000 - }, - { - "epoch": 47.81, - "learning_rate": 5.480224434787391e-09, - "loss": 3.7372, - "step": 4345500 - }, - { - "epoch": 47.81, - "learning_rate": 5.4664723032069966e-09, - "loss": 3.744, - "step": 4346000 - }, - { - "epoch": 47.82, - "learning_rate": 5.452720171626602e-09, - "loss": 3.7472, - "step": 4346500 - }, - { - "epoch": 47.82, - "learning_rate": 5.438968040046207e-09, - "loss": 3.7483, - "step": 4347000 - }, - { - "epoch": 47.83, - "learning_rate": 5.425215908465812e-09, - "loss": 3.752, - "step": 4347500 - }, - { - "epoch": 47.84, - "learning_rate": 5.411463776885417e-09, - "loss": 3.7489, - "step": 4348000 - }, - { - "epoch": 47.84, - "learning_rate": 5.397711645305022e-09, - "loss": 3.7664, - "step": 4348500 - }, - { - "epoch": 47.85, - "learning_rate": 5.383959513724627e-09, - "loss": 3.757, - "step": 4349000 - }, - { - "epoch": 47.85, - "learning_rate": 5.370207382144232e-09, - "loss": 3.7552, - "step": 4349500 - }, - { - "epoch": 47.86, - "learning_rate": 5.356455250563837e-09, - "loss": 3.7389, - "step": 4350000 - }, - { - "epoch": 47.86, - "learning_rate": 5.342703118983443e-09, - "loss": 3.7323, - "step": 4350500 - }, - { - "epoch": 47.87, - "learning_rate": 5.328950987403047e-09, - "loss": 3.76, - "step": 4351000 - }, - { - "epoch": 47.87, - "learning_rate": 5.315198855822652e-09, - "loss": 3.7661, - "step": 4351500 - }, - { - "epoch": 47.88, - "learning_rate": 5.301446724242257e-09, - "loss": 3.7539, - "step": 4352000 - }, - { - "epoch": 47.88, - "learning_rate": 5.287694592661862e-09, - "loss": 3.7369, - "step": 4352500 - }, - { - "epoch": 47.89, - "learning_rate": 5.273942461081468e-09, - "loss": 3.7596, - "step": 4353000 - }, - { - "epoch": 47.9, - "learning_rate": 5.260190329501073e-09, - "loss": 3.7501, - "step": 4353500 - }, - { - "epoch": 47.9, - "learning_rate": 5.246438197920677e-09, - "loss": 3.731, - "step": 4354000 - }, - { - "epoch": 47.91, - "learning_rate": 5.232686066340283e-09, - "loss": 3.7344, - "step": 4354500 - }, - { - "epoch": 47.91, - "learning_rate": 5.218933934759887e-09, - "loss": 3.7615, - "step": 4355000 - }, - { - "epoch": 47.92, - "learning_rate": 5.205181803179492e-09, - "loss": 3.7467, - "step": 4355500 - }, - { - "epoch": 47.92, - "learning_rate": 5.191429671599097e-09, - "loss": 3.7315, - "step": 4356000 - }, - { - "epoch": 47.93, - "learning_rate": 5.177677540018703e-09, - "loss": 3.7494, - "step": 4356500 - }, - { - "epoch": 47.93, - "learning_rate": 5.163925408438308e-09, - "loss": 3.7478, - "step": 4357000 - }, - { - "epoch": 47.94, - "learning_rate": 5.150173276857913e-09, - "loss": 3.7507, - "step": 4357500 - }, - { - "epoch": 47.95, - "learning_rate": 5.136421145277517e-09, - "loss": 3.7425, - "step": 4358000 - }, - { - "epoch": 47.95, - "learning_rate": 5.122669013697123e-09, - "loss": 3.7673, - "step": 4358500 - }, - { - "epoch": 47.96, - "learning_rate": 5.108916882116728e-09, - "loss": 3.7266, - "step": 4359000 - }, - { - "epoch": 47.96, - "learning_rate": 5.095164750536333e-09, - "loss": 3.7449, - "step": 4359500 - }, - { - "epoch": 47.97, - "learning_rate": 5.081412618955938e-09, - "loss": 3.7528, - "step": 4360000 - }, - { - "epoch": 47.97, - "learning_rate": 5.067660487375543e-09, - "loss": 3.7714, - "step": 4360500 - }, - { - "epoch": 47.98, - "learning_rate": 5.053908355795148e-09, - "loss": 3.75, - "step": 4361000 - }, - { - "epoch": 47.98, - "learning_rate": 5.040156224214753e-09, - "loss": 3.7531, - "step": 4361500 - }, - { - "epoch": 47.99, - "learning_rate": 5.026404092634358e-09, - "loss": 3.7609, - "step": 4362000 - }, - { - "epoch": 47.99, - "learning_rate": 5.0126519610539634e-09, - "loss": 3.7544, - "step": 4362500 - }, - { - "epoch": 48.0, - "eval_loss": 3.8254196643829346, - "eval_runtime": 6.1443, - "eval_samples_per_second": 252.916, - "step": 4362960 - }, - { - "epoch": 48.0, - "learning_rate": 4.998899829473569e-09, - "loss": 3.7166, - "step": 4363000 - }, - { - "epoch": 48.01, - "learning_rate": 4.985147697893173e-09, - "loss": 3.7402, - "step": 4363500 - }, - { - "epoch": 48.01, - "learning_rate": 4.971395566312778e-09, - "loss": 3.7379, - "step": 4364000 - }, - { - "epoch": 48.02, - "learning_rate": 4.957643434732383e-09, - "loss": 3.7461, - "step": 4364500 - }, - { - "epoch": 48.02, - "learning_rate": 4.943891303151989e-09, - "loss": 3.7438, - "step": 4365000 - }, - { - "epoch": 48.03, - "learning_rate": 4.930139171571594e-09, - "loss": 3.7506, - "step": 4365500 - }, - { - "epoch": 48.03, - "learning_rate": 4.916387039991198e-09, - "loss": 3.7528, - "step": 4366000 - }, - { - "epoch": 48.04, - "learning_rate": 4.9026349084108035e-09, - "loss": 3.7547, - "step": 4366500 - }, - { - "epoch": 48.04, - "learning_rate": 4.888882776830409e-09, - "loss": 3.7486, - "step": 4367000 - }, - { - "epoch": 48.05, - "learning_rate": 4.875130645250013e-09, - "loss": 3.7267, - "step": 4367500 - }, - { - "epoch": 48.06, - "learning_rate": 4.861378513669618e-09, - "loss": 3.7328, - "step": 4368000 - }, - { - "epoch": 48.06, - "learning_rate": 4.847626382089224e-09, - "loss": 3.7647, - "step": 4368500 - }, - { - "epoch": 48.07, - "learning_rate": 4.833874250508829e-09, - "loss": 3.7322, - "step": 4369000 - }, - { - "epoch": 48.07, - "learning_rate": 4.820122118928434e-09, - "loss": 3.7548, - "step": 4369500 - }, - { - "epoch": 48.08, - "learning_rate": 4.806369987348038e-09, - "loss": 3.7432, - "step": 4370000 - }, - { - "epoch": 48.08, - "learning_rate": 4.7926178557676435e-09, - "loss": 3.7728, - "step": 4370500 - }, - { - "epoch": 48.09, - "learning_rate": 4.778865724187249e-09, - "loss": 3.7384, - "step": 4371000 - }, - { - "epoch": 48.09, - "learning_rate": 4.765113592606854e-09, - "loss": 3.7466, - "step": 4371500 - }, - { - "epoch": 48.1, - "learning_rate": 4.751361461026459e-09, - "loss": 3.739, - "step": 4372000 - }, - { - "epoch": 48.1, - "learning_rate": 4.737609329446064e-09, - "loss": 3.7598, - "step": 4372500 - }, - { - "epoch": 48.11, - "learning_rate": 4.723857197865669e-09, - "loss": 3.7602, - "step": 4373000 - }, - { - "epoch": 48.12, - "learning_rate": 4.710105066285274e-09, - "loss": 3.7342, - "step": 4373500 - }, - { - "epoch": 48.12, - "learning_rate": 4.696352934704879e-09, - "loss": 3.7764, - "step": 4374000 - }, - { - "epoch": 48.13, - "learning_rate": 4.682600803124484e-09, - "loss": 3.7475, - "step": 4374500 - }, - { - "epoch": 48.13, - "learning_rate": 4.6688486715440895e-09, - "loss": 3.7529, - "step": 4375000 - }, - { - "epoch": 48.14, - "learning_rate": 4.655096539963694e-09, - "loss": 3.7386, - "step": 4375500 - }, - { - "epoch": 48.14, - "learning_rate": 4.641344408383299e-09, - "loss": 3.7499, - "step": 4376000 - }, - { - "epoch": 48.15, - "learning_rate": 4.627592276802904e-09, - "loss": 3.7413, - "step": 4376500 - }, - { - "epoch": 48.15, - "learning_rate": 4.6138401452225095e-09, - "loss": 3.7414, - "step": 4377000 - }, - { - "epoch": 48.16, - "learning_rate": 4.600088013642115e-09, - "loss": 3.7251, - "step": 4377500 - }, - { - "epoch": 48.17, - "learning_rate": 4.58633588206172e-09, - "loss": 3.7455, - "step": 4378000 - }, - { - "epoch": 48.17, - "learning_rate": 4.572583750481324e-09, - "loss": 3.7652, - "step": 4378500 - }, - { - "epoch": 48.18, - "learning_rate": 4.5588316189009295e-09, - "loss": 3.7359, - "step": 4379000 - }, - { - "epoch": 48.18, - "learning_rate": 4.545079487320534e-09, - "loss": 3.7437, - "step": 4379500 - }, - { - "epoch": 48.19, - "learning_rate": 4.531327355740139e-09, - "loss": 3.7568, - "step": 4380000 - }, - { - "epoch": 48.19, - "learning_rate": 4.517575224159745e-09, - "loss": 3.7435, - "step": 4380500 - }, - { - "epoch": 48.2, - "learning_rate": 4.5038230925793495e-09, - "loss": 3.7381, - "step": 4381000 - }, - { - "epoch": 48.2, - "learning_rate": 4.490070960998955e-09, - "loss": 3.7525, - "step": 4381500 - }, - { - "epoch": 48.21, - "learning_rate": 4.47631882941856e-09, - "loss": 3.7538, - "step": 4382000 - }, - { - "epoch": 48.21, - "learning_rate": 4.462566697838164e-09, - "loss": 3.7301, - "step": 4382500 - }, - { - "epoch": 48.22, - "learning_rate": 4.4488145662577695e-09, - "loss": 3.749, - "step": 4383000 - }, - { - "epoch": 48.23, - "learning_rate": 4.435062434677375e-09, - "loss": 3.7547, - "step": 4383500 - }, - { - "epoch": 48.23, - "learning_rate": 4.42131030309698e-09, - "loss": 3.7409, - "step": 4384000 - }, - { - "epoch": 48.24, - "learning_rate": 4.407558171516585e-09, - "loss": 3.7486, - "step": 4384500 - }, - { - "epoch": 48.24, - "learning_rate": 4.3938060399361895e-09, - "loss": 3.7376, - "step": 4385000 - }, - { - "epoch": 48.25, - "learning_rate": 4.380053908355795e-09, - "loss": 3.7496, - "step": 4385500 - }, - { - "epoch": 48.25, - "learning_rate": 4.3663017767754e-09, - "loss": 3.733, - "step": 4386000 - }, - { - "epoch": 48.26, - "learning_rate": 4.352549645195005e-09, - "loss": 3.721, - "step": 4386500 - }, - { - "epoch": 48.26, - "learning_rate": 4.33879751361461e-09, - "loss": 3.7447, - "step": 4387000 - }, - { - "epoch": 48.27, - "learning_rate": 4.325045382034215e-09, - "loss": 3.7583, - "step": 4387500 - }, - { - "epoch": 48.28, - "learning_rate": 4.31129325045382e-09, - "loss": 3.7467, - "step": 4388000 - }, - { - "epoch": 48.28, - "learning_rate": 4.297541118873425e-09, - "loss": 3.7578, - "step": 4388500 - }, - { - "epoch": 48.29, - "learning_rate": 4.28378898729303e-09, - "loss": 3.7593, - "step": 4389000 - }, - { - "epoch": 48.29, - "learning_rate": 4.2700368557126356e-09, - "loss": 3.7424, - "step": 4389500 - }, - { - "epoch": 48.3, - "learning_rate": 4.256284724132241e-09, - "loss": 3.7456, - "step": 4390000 - }, - { - "epoch": 48.3, - "learning_rate": 4.242532592551845e-09, - "loss": 3.757, - "step": 4390500 - }, - { - "epoch": 48.31, - "learning_rate": 4.22878046097145e-09, - "loss": 3.7493, - "step": 4391000 - }, - { - "epoch": 48.31, - "learning_rate": 4.2150283293910556e-09, - "loss": 3.7349, - "step": 4391500 - }, - { - "epoch": 48.32, - "learning_rate": 4.20127619781066e-09, - "loss": 3.7551, - "step": 4392000 - }, - { - "epoch": 48.32, - "learning_rate": 4.187524066230266e-09, - "loss": 3.7541, - "step": 4392500 - }, - { - "epoch": 48.33, - "learning_rate": 4.17377193464987e-09, - "loss": 3.7533, - "step": 4393000 - }, - { - "epoch": 48.34, - "learning_rate": 4.1600198030694756e-09, - "loss": 3.7539, - "step": 4393500 - }, - { - "epoch": 48.34, - "learning_rate": 4.146267671489081e-09, - "loss": 3.7386, - "step": 4394000 - }, - { - "epoch": 48.35, - "learning_rate": 4.132515539908685e-09, - "loss": 3.7399, - "step": 4394500 - }, - { - "epoch": 48.35, - "learning_rate": 4.11876340832829e-09, - "loss": 3.7529, - "step": 4395000 - }, - { - "epoch": 48.36, - "learning_rate": 4.105011276747896e-09, - "loss": 3.7637, - "step": 4395500 - }, - { - "epoch": 48.36, - "learning_rate": 4.091259145167501e-09, - "loss": 3.7651, - "step": 4396000 - }, - { - "epoch": 48.37, - "learning_rate": 4.077507013587106e-09, - "loss": 3.7618, - "step": 4396500 - }, - { - "epoch": 48.37, - "learning_rate": 4.06375488200671e-09, - "loss": 3.7608, - "step": 4397000 - }, - { - "epoch": 48.38, - "learning_rate": 4.0500027504263156e-09, - "loss": 3.7565, - "step": 4397500 - }, - { - "epoch": 48.39, - "learning_rate": 4.036250618845921e-09, - "loss": 3.7553, - "step": 4398000 - }, - { - "epoch": 48.39, - "learning_rate": 4.022498487265526e-09, - "loss": 3.7378, - "step": 4398500 - }, - { - "epoch": 48.4, - "learning_rate": 4.008746355685131e-09, - "loss": 3.7637, - "step": 4399000 - }, - { - "epoch": 48.4, - "learning_rate": 3.994994224104736e-09, - "loss": 3.7549, - "step": 4399500 - }, - { - "epoch": 48.41, - "learning_rate": 3.981242092524341e-09, - "loss": 3.7407, - "step": 4400000 - }, - { - "epoch": 48.41, - "learning_rate": 3.967489960943946e-09, - "loss": 3.7699, - "step": 4400500 - }, - { - "epoch": 48.42, - "learning_rate": 3.953737829363551e-09, - "loss": 3.7527, - "step": 4401000 - }, - { - "epoch": 48.42, - "learning_rate": 3.939985697783156e-09, - "loss": 3.7621, - "step": 4401500 - }, - { - "epoch": 48.43, - "learning_rate": 3.926233566202762e-09, - "loss": 3.7389, - "step": 4402000 - }, - { - "epoch": 48.44, - "learning_rate": 3.912481434622366e-09, - "loss": 3.7628, - "step": 4402500 - }, - { - "epoch": 48.44, - "learning_rate": 3.898729303041971e-09, - "loss": 3.7428, - "step": 4403000 - }, - { - "epoch": 48.45, - "learning_rate": 3.884977171461576e-09, - "loss": 3.7589, - "step": 4403500 - }, - { - "epoch": 48.45, - "learning_rate": 3.871225039881181e-09, - "loss": 3.749, - "step": 4404000 - }, - { - "epoch": 48.46, - "learning_rate": 3.857472908300787e-09, - "loss": 3.7487, - "step": 4404500 - }, - { - "epoch": 48.46, - "learning_rate": 3.843720776720392e-09, - "loss": 3.738, - "step": 4405000 - }, - { - "epoch": 48.47, - "learning_rate": 3.829968645139996e-09, - "loss": 3.7464, - "step": 4405500 - }, - { - "epoch": 48.47, - "learning_rate": 3.816216513559602e-09, - "loss": 3.7512, - "step": 4406000 - }, - { - "epoch": 48.48, - "learning_rate": 3.802464381979206e-09, - "loss": 3.759, - "step": 4406500 - }, - { - "epoch": 48.48, - "learning_rate": 3.788712250398811e-09, - "loss": 3.7587, - "step": 4407000 - }, - { - "epoch": 48.49, - "learning_rate": 3.774960118818417e-09, - "loss": 3.7469, - "step": 4407500 - }, - { - "epoch": 48.5, - "learning_rate": 3.761207987238022e-09, - "loss": 3.7476, - "step": 4408000 - }, - { - "epoch": 48.5, - "learning_rate": 3.747455855657627e-09, - "loss": 3.7413, - "step": 4408500 - }, - { - "epoch": 48.51, - "learning_rate": 3.733703724077232e-09, - "loss": 3.7479, - "step": 4409000 - }, - { - "epoch": 48.51, - "learning_rate": 3.719951592496837e-09, - "loss": 3.7538, - "step": 4409500 - }, - { - "epoch": 48.52, - "learning_rate": 3.706199460916442e-09, - "loss": 3.7385, - "step": 4410000 - }, - { - "epoch": 48.52, - "learning_rate": 3.6924473293360472e-09, - "loss": 3.7471, - "step": 4410500 - }, - { - "epoch": 48.53, - "learning_rate": 3.678695197755652e-09, - "loss": 3.7521, - "step": 4411000 - }, - { - "epoch": 48.53, - "learning_rate": 3.6649430661752572e-09, - "loss": 3.7512, - "step": 4411500 - }, - { - "epoch": 48.54, - "learning_rate": 3.6511909345948616e-09, - "loss": 3.7526, - "step": 4412000 - }, - { - "epoch": 48.55, - "learning_rate": 3.637438803014467e-09, - "loss": 3.7538, - "step": 4412500 - }, - { - "epoch": 48.55, - "learning_rate": 3.6236866714340725e-09, - "loss": 3.7581, - "step": 4413000 - }, - { - "epoch": 48.56, - "learning_rate": 3.609934539853677e-09, - "loss": 3.7472, - "step": 4413500 - }, - { - "epoch": 48.56, - "learning_rate": 3.596182408273282e-09, - "loss": 3.7459, - "step": 4414000 - }, - { - "epoch": 48.57, - "learning_rate": 3.5824302766928877e-09, - "loss": 3.7428, - "step": 4414500 - }, - { - "epoch": 48.57, - "learning_rate": 3.568678145112492e-09, - "loss": 3.7592, - "step": 4415000 - }, - { - "epoch": 48.58, - "learning_rate": 3.5549260135320973e-09, - "loss": 3.7543, - "step": 4415500 - }, - { - "epoch": 48.58, - "learning_rate": 3.541173881951702e-09, - "loss": 3.7522, - "step": 4416000 - }, - { - "epoch": 48.59, - "learning_rate": 3.5274217503713073e-09, - "loss": 3.735, - "step": 4416500 - }, - { - "epoch": 48.59, - "learning_rate": 3.5136696187909125e-09, - "loss": 3.7583, - "step": 4417000 - }, - { - "epoch": 48.6, - "learning_rate": 3.4999174872105173e-09, - "loss": 3.753, - "step": 4417500 - }, - { - "epoch": 48.61, - "learning_rate": 3.4861653556301225e-09, - "loss": 3.7427, - "step": 4418000 - }, - { - "epoch": 48.61, - "learning_rate": 3.4724132240497277e-09, - "loss": 3.7341, - "step": 4418500 - }, - { - "epoch": 48.62, - "learning_rate": 3.4586610924693325e-09, - "loss": 3.7573, - "step": 4419000 - }, - { - "epoch": 48.62, - "learning_rate": 3.4449089608889377e-09, - "loss": 3.7539, - "step": 4419500 - }, - { - "epoch": 48.63, - "learning_rate": 3.431156829308543e-09, - "loss": 3.7537, - "step": 4420000 - }, - { - "epoch": 48.63, - "learning_rate": 3.4174046977281477e-09, - "loss": 3.7311, - "step": 4420500 - }, - { - "epoch": 48.64, - "learning_rate": 3.403652566147753e-09, - "loss": 3.7608, - "step": 4421000 - }, - { - "epoch": 48.64, - "learning_rate": 3.3899004345673577e-09, - "loss": 3.7485, - "step": 4421500 - }, - { - "epoch": 48.65, - "learning_rate": 3.376148302986963e-09, - "loss": 3.7436, - "step": 4422000 - }, - { - "epoch": 48.66, - "learning_rate": 3.362396171406568e-09, - "loss": 3.7622, - "step": 4422500 - }, - { - "epoch": 48.66, - "learning_rate": 3.348644039826173e-09, - "loss": 3.757, - "step": 4423000 - }, - { - "epoch": 48.67, - "learning_rate": 3.334891908245778e-09, - "loss": 3.7577, - "step": 4423500 - }, - { - "epoch": 48.67, - "learning_rate": 3.3211397766653833e-09, - "loss": 3.7613, - "step": 4424000 - }, - { - "epoch": 48.68, - "learning_rate": 3.3073876450849877e-09, - "loss": 3.759, - "step": 4424500 - }, - { - "epoch": 48.68, - "learning_rate": 3.2936355135045933e-09, - "loss": 3.7421, - "step": 4425000 - }, - { - "epoch": 48.69, - "learning_rate": 3.2798833819241977e-09, - "loss": 3.7544, - "step": 4425500 - }, - { - "epoch": 48.69, - "learning_rate": 3.266131250343803e-09, - "loss": 3.7591, - "step": 4426000 - }, - { - "epoch": 48.7, - "learning_rate": 3.2523791187634085e-09, - "loss": 3.7473, - "step": 4426500 - }, - { - "epoch": 48.7, - "learning_rate": 3.238626987183013e-09, - "loss": 3.7574, - "step": 4427000 - }, - { - "epoch": 48.71, - "learning_rate": 3.224874855602618e-09, - "loss": 3.7379, - "step": 4427500 - }, - { - "epoch": 48.72, - "learning_rate": 3.2111227240222237e-09, - "loss": 3.7372, - "step": 4428000 - }, - { - "epoch": 48.72, - "learning_rate": 3.197370592441828e-09, - "loss": 3.7591, - "step": 4428500 - }, - { - "epoch": 48.73, - "learning_rate": 3.1836184608614333e-09, - "loss": 3.7525, - "step": 4429000 - }, - { - "epoch": 48.73, - "learning_rate": 3.169866329281039e-09, - "loss": 3.7368, - "step": 4429500 - }, - { - "epoch": 48.74, - "learning_rate": 3.1561141977006433e-09, - "loss": 3.7535, - "step": 4430000 - }, - { - "epoch": 48.74, - "learning_rate": 3.1423620661202485e-09, - "loss": 3.7208, - "step": 4430500 - }, - { - "epoch": 48.75, - "learning_rate": 3.1286099345398533e-09, - "loss": 3.7483, - "step": 4431000 - }, - { - "epoch": 48.75, - "learning_rate": 3.1148578029594585e-09, - "loss": 3.7433, - "step": 4431500 - }, - { - "epoch": 48.76, - "learning_rate": 3.1011056713790633e-09, - "loss": 3.7545, - "step": 4432000 - }, - { - "epoch": 48.77, - "learning_rate": 3.087353539798669e-09, - "loss": 3.7469, - "step": 4432500 - }, - { - "epoch": 48.77, - "learning_rate": 3.0736014082182737e-09, - "loss": 3.7419, - "step": 4433000 - }, - { - "epoch": 48.78, - "learning_rate": 3.0598492766378785e-09, - "loss": 3.7348, - "step": 4433500 - }, - { - "epoch": 48.78, - "learning_rate": 3.0460971450574837e-09, - "loss": 3.7509, - "step": 4434000 - }, - { - "epoch": 48.79, - "learning_rate": 3.032345013477089e-09, - "loss": 3.7584, - "step": 4434500 - }, - { - "epoch": 48.79, - "learning_rate": 3.0185928818966937e-09, - "loss": 3.7479, - "step": 4435000 - }, - { - "epoch": 48.8, - "learning_rate": 3.004840750316299e-09, - "loss": 3.766, - "step": 4435500 - }, - { - "epoch": 48.8, - "learning_rate": 2.9910886187359037e-09, - "loss": 3.7488, - "step": 4436000 - }, - { - "epoch": 48.81, - "learning_rate": 2.977336487155509e-09, - "loss": 3.7323, - "step": 4436500 - }, - { - "epoch": 48.81, - "learning_rate": 2.963584355575114e-09, - "loss": 3.7388, - "step": 4437000 - }, - { - "epoch": 48.82, - "learning_rate": 2.949832223994719e-09, - "loss": 3.7433, - "step": 4437500 - }, - { - "epoch": 48.83, - "learning_rate": 2.9360800924143237e-09, - "loss": 3.7477, - "step": 4438000 - }, - { - "epoch": 48.83, - "learning_rate": 2.9223279608339294e-09, - "loss": 3.7518, - "step": 4438500 - }, - { - "epoch": 48.84, - "learning_rate": 2.908575829253534e-09, - "loss": 3.7563, - "step": 4439000 - }, - { - "epoch": 48.84, - "learning_rate": 2.894823697673139e-09, - "loss": 3.7631, - "step": 4439500 - }, - { - "epoch": 48.85, - "learning_rate": 2.8810715660927446e-09, - "loss": 3.7484, - "step": 4440000 - }, - { - "epoch": 48.85, - "learning_rate": 2.8673194345123494e-09, - "loss": 3.7616, - "step": 4440500 - }, - { - "epoch": 48.86, - "learning_rate": 2.853567302931954e-09, - "loss": 3.7521, - "step": 4441000 - }, - { - "epoch": 48.86, - "learning_rate": 2.8398151713515594e-09, - "loss": 3.7335, - "step": 4441500 - }, - { - "epoch": 48.87, - "learning_rate": 2.8260630397711646e-09, - "loss": 3.7624, - "step": 4442000 - }, - { - "epoch": 48.88, - "learning_rate": 2.8123109081907694e-09, - "loss": 3.7353, - "step": 4442500 - }, - { - "epoch": 48.88, - "learning_rate": 2.7985587766103746e-09, - "loss": 3.7646, - "step": 4443000 - }, - { - "epoch": 48.89, - "learning_rate": 2.7848066450299794e-09, - "loss": 3.7634, - "step": 4443500 - }, - { - "epoch": 48.89, - "learning_rate": 2.7710545134495846e-09, - "loss": 3.739, - "step": 4444000 - }, - { - "epoch": 48.9, - "learning_rate": 2.7573023818691898e-09, - "loss": 3.7429, - "step": 4444500 - }, - { - "epoch": 48.9, - "learning_rate": 2.7435502502887946e-09, - "loss": 3.7424, - "step": 4445000 - }, - { - "epoch": 48.91, - "learning_rate": 2.7297981187083994e-09, - "loss": 3.742, - "step": 4445500 - }, - { - "epoch": 48.91, - "learning_rate": 2.716045987128005e-09, - "loss": 3.7559, - "step": 4446000 - }, - { - "epoch": 48.92, - "learning_rate": 2.7022938555476098e-09, - "loss": 3.7424, - "step": 4446500 - }, - { - "epoch": 48.92, - "learning_rate": 2.6885417239672146e-09, - "loss": 3.7497, - "step": 4447000 - }, - { - "epoch": 48.93, - "learning_rate": 2.6747895923868198e-09, - "loss": 3.7641, - "step": 4447500 - }, - { - "epoch": 48.94, - "learning_rate": 2.661037460806425e-09, - "loss": 3.7675, - "step": 4448000 - }, - { - "epoch": 48.94, - "learning_rate": 2.6472853292260298e-09, - "loss": 3.7631, - "step": 4448500 - }, - { - "epoch": 48.95, - "learning_rate": 2.633533197645635e-09, - "loss": 3.7345, - "step": 4449000 - }, - { - "epoch": 48.95, - "learning_rate": 2.6197810660652402e-09, - "loss": 3.7401, - "step": 4449500 - }, - { - "epoch": 48.96, - "learning_rate": 2.606028934484845e-09, - "loss": 3.7485, - "step": 4450000 - }, - { - "epoch": 48.96, - "learning_rate": 2.5922768029044502e-09, - "loss": 3.762, - "step": 4450500 - }, - { - "epoch": 48.97, - "learning_rate": 2.578524671324055e-09, - "loss": 3.7534, - "step": 4451000 - }, - { - "epoch": 48.97, - "learning_rate": 2.5647725397436602e-09, - "loss": 3.7362, - "step": 4451500 - }, - { - "epoch": 48.98, - "learning_rate": 2.5510204081632654e-09, - "loss": 3.7473, - "step": 4452000 - }, - { - "epoch": 48.99, - "learning_rate": 2.5372682765828702e-09, - "loss": 3.7627, - "step": 4452500 - }, - { - "epoch": 48.99, - "learning_rate": 2.523516145002475e-09, - "loss": 3.7469, - "step": 4453000 - }, - { - "epoch": 49.0, - "learning_rate": 2.5097640134220806e-09, - "loss": 3.7466, - "step": 4453500 - }, - { - "epoch": 49.0, - "eval_loss": 3.8253118991851807, - "eval_runtime": 6.1426, - "eval_samples_per_second": 252.987, - "step": 4453855 - }, - { - "epoch": 49.0, - "learning_rate": 2.4960118818416854e-09, - "loss": 3.75, - "step": 4454000 - }, - { - "epoch": 49.01, - "learning_rate": 2.4822597502612902e-09, - "loss": 3.7537, - "step": 4454500 - }, - { - "epoch": 49.01, - "learning_rate": 2.4685076186808954e-09, - "loss": 3.752, - "step": 4455000 - }, - { - "epoch": 49.02, - "learning_rate": 2.4547554871005006e-09, - "loss": 3.7553, - "step": 4455500 - }, - { - "epoch": 49.02, - "learning_rate": 2.4410033555201054e-09, - "loss": 3.771, - "step": 4456000 - }, - { - "epoch": 49.03, - "learning_rate": 2.4272512239397106e-09, - "loss": 3.7516, - "step": 4456500 - }, - { - "epoch": 49.03, - "learning_rate": 2.4134990923593154e-09, - "loss": 3.7428, - "step": 4457000 - }, - { - "epoch": 49.04, - "learning_rate": 2.3997469607789206e-09, - "loss": 3.7016, - "step": 4457500 - }, - { - "epoch": 49.05, - "learning_rate": 2.385994829198526e-09, - "loss": 3.743, - "step": 4458000 - }, - { - "epoch": 49.05, - "learning_rate": 2.3722426976181306e-09, - "loss": 3.7622, - "step": 4458500 - }, - { - "epoch": 49.06, - "learning_rate": 2.358490566037736e-09, - "loss": 3.7507, - "step": 4459000 - }, - { - "epoch": 49.06, - "learning_rate": 2.344738434457341e-09, - "loss": 3.7626, - "step": 4459500 - }, - { - "epoch": 49.07, - "learning_rate": 2.330986302876946e-09, - "loss": 3.753, - "step": 4460000 - }, - { - "epoch": 49.07, - "learning_rate": 2.3172341712965506e-09, - "loss": 3.7507, - "step": 4460500 - }, - { - "epoch": 49.08, - "learning_rate": 2.303482039716156e-09, - "loss": 3.7484, - "step": 4461000 - }, - { - "epoch": 49.08, - "learning_rate": 2.289729908135761e-09, - "loss": 3.7479, - "step": 4461500 - }, - { - "epoch": 49.09, - "learning_rate": 2.275977776555366e-09, - "loss": 3.743, - "step": 4462000 - }, - { - "epoch": 49.1, - "learning_rate": 2.262225644974971e-09, - "loss": 3.7613, - "step": 4462500 - }, - { - "epoch": 49.1, - "learning_rate": 2.2484735133945763e-09, - "loss": 3.7441, - "step": 4463000 - }, - { - "epoch": 49.11, - "learning_rate": 2.234721381814181e-09, - "loss": 3.7591, - "step": 4463500 - }, - { - "epoch": 49.11, - "learning_rate": 2.2209692502337863e-09, - "loss": 3.7476, - "step": 4464000 - }, - { - "epoch": 49.12, - "learning_rate": 2.207217118653391e-09, - "loss": 3.7556, - "step": 4464500 - }, - { - "epoch": 49.12, - "learning_rate": 2.1934649870729963e-09, - "loss": 3.7658, - "step": 4465000 - }, - { - "epoch": 49.13, - "learning_rate": 2.1797128554926015e-09, - "loss": 3.7483, - "step": 4465500 - }, - { - "epoch": 49.13, - "learning_rate": 2.1659607239122063e-09, - "loss": 3.7599, - "step": 4466000 - }, - { - "epoch": 49.14, - "learning_rate": 2.152208592331811e-09, - "loss": 3.7437, - "step": 4466500 - }, - { - "epoch": 49.14, - "learning_rate": 2.1384564607514163e-09, - "loss": 3.7256, - "step": 4467000 - }, - { - "epoch": 49.15, - "learning_rate": 2.1247043291710215e-09, - "loss": 3.745, - "step": 4467500 - }, - { - "epoch": 49.16, - "learning_rate": 2.1109521975906263e-09, - "loss": 3.751, - "step": 4468000 - }, - { - "epoch": 49.16, - "learning_rate": 2.0972000660102315e-09, - "loss": 3.7411, - "step": 4468500 - }, - { - "epoch": 49.17, - "learning_rate": 2.0834479344298367e-09, - "loss": 3.755, - "step": 4469000 - }, - { - "epoch": 49.17, - "learning_rate": 2.0696958028494415e-09, - "loss": 3.7544, - "step": 4469500 - }, - { - "epoch": 49.18, - "learning_rate": 2.0559436712690467e-09, - "loss": 3.7327, - "step": 4470000 - }, - { - "epoch": 49.18, - "learning_rate": 2.042191539688652e-09, - "loss": 3.7433, - "step": 4470500 - }, - { - "epoch": 49.19, - "learning_rate": 2.0284394081082567e-09, - "loss": 3.7432, - "step": 4471000 - }, - { - "epoch": 49.19, - "learning_rate": 2.014687276527862e-09, - "loss": 3.7357, - "step": 4471500 - }, - { - "epoch": 49.2, - "learning_rate": 2.0009351449474667e-09, - "loss": 3.7292, - "step": 4472000 - }, - { - "epoch": 49.21, - "learning_rate": 1.987183013367072e-09, - "loss": 3.7488, - "step": 4472500 - }, - { - "epoch": 49.21, - "learning_rate": 1.9734308817866767e-09, - "loss": 3.7682, - "step": 4473000 - }, - { - "epoch": 49.22, - "learning_rate": 1.959678750206282e-09, - "loss": 3.7617, - "step": 4473500 - }, - { - "epoch": 49.22, - "learning_rate": 1.9459266186258867e-09, - "loss": 3.7488, - "step": 4474000 - }, - { - "epoch": 49.23, - "learning_rate": 1.932174487045492e-09, - "loss": 3.7569, - "step": 4474500 - }, - { - "epoch": 49.23, - "learning_rate": 1.918422355465097e-09, - "loss": 3.7412, - "step": 4475000 - }, - { - "epoch": 49.24, - "learning_rate": 1.904670223884702e-09, - "loss": 3.7514, - "step": 4475500 - }, - { - "epoch": 49.24, - "learning_rate": 1.890918092304307e-09, - "loss": 3.7522, - "step": 4476000 - }, - { - "epoch": 49.25, - "learning_rate": 1.8771659607239123e-09, - "loss": 3.7524, - "step": 4476500 - }, - { - "epoch": 49.25, - "learning_rate": 1.863413829143517e-09, - "loss": 3.7455, - "step": 4477000 - }, - { - "epoch": 49.26, - "learning_rate": 1.8496616975631221e-09, - "loss": 3.7386, - "step": 4477500 - }, - { - "epoch": 49.27, - "learning_rate": 1.8359095659827271e-09, - "loss": 3.7489, - "step": 4478000 - }, - { - "epoch": 49.27, - "learning_rate": 1.8221574344023323e-09, - "loss": 3.7652, - "step": 4478500 - }, - { - "epoch": 49.28, - "learning_rate": 1.8084053028219373e-09, - "loss": 3.7492, - "step": 4479000 - }, - { - "epoch": 49.28, - "learning_rate": 1.7946531712415423e-09, - "loss": 3.7521, - "step": 4479500 - }, - { - "epoch": 49.29, - "learning_rate": 1.7809010396611475e-09, - "loss": 3.7324, - "step": 4480000 - }, - { - "epoch": 49.29, - "learning_rate": 1.7671489080807525e-09, - "loss": 3.75, - "step": 4480500 - }, - { - "epoch": 49.3, - "learning_rate": 1.7533967765003575e-09, - "loss": 3.7295, - "step": 4481000 - }, - { - "epoch": 49.3, - "learning_rate": 1.7396446449199623e-09, - "loss": 3.753, - "step": 4481500 - }, - { - "epoch": 49.31, - "learning_rate": 1.7258925133395677e-09, - "loss": 3.737, - "step": 4482000 - }, - { - "epoch": 49.32, - "learning_rate": 1.7121403817591727e-09, - "loss": 3.758, - "step": 4482500 - }, - { - "epoch": 49.32, - "learning_rate": 1.6983882501787775e-09, - "loss": 3.7548, - "step": 4483000 - }, - { - "epoch": 49.33, - "learning_rate": 1.6846361185983825e-09, - "loss": 3.7609, - "step": 4483500 - }, - { - "epoch": 49.33, - "learning_rate": 1.6708839870179877e-09, - "loss": 3.7303, - "step": 4484000 - }, - { - "epoch": 49.34, - "learning_rate": 1.6571318554375927e-09, - "loss": 3.7614, - "step": 4484500 - }, - { - "epoch": 49.34, - "learning_rate": 1.6433797238571977e-09, - "loss": 3.7552, - "step": 4485000 - }, - { - "epoch": 49.35, - "learning_rate": 1.6296275922768027e-09, - "loss": 3.7667, - "step": 4485500 - }, - { - "epoch": 49.35, - "learning_rate": 1.615875460696408e-09, - "loss": 3.7536, - "step": 4486000 - }, - { - "epoch": 49.36, - "learning_rate": 1.602123329116013e-09, - "loss": 3.774, - "step": 4486500 - }, - { - "epoch": 49.36, - "learning_rate": 1.588371197535618e-09, - "loss": 3.7311, - "step": 4487000 - }, - { - "epoch": 49.37, - "learning_rate": 1.5746190659552228e-09, - "loss": 3.7543, - "step": 4487500 - }, - { - "epoch": 49.38, - "learning_rate": 1.560866934374828e-09, - "loss": 3.7313, - "step": 4488000 - }, - { - "epoch": 49.38, - "learning_rate": 1.5471148027944332e-09, - "loss": 3.7429, - "step": 4488500 - }, - { - "epoch": 49.39, - "learning_rate": 1.533362671214038e-09, - "loss": 3.7407, - "step": 4489000 - }, - { - "epoch": 49.39, - "learning_rate": 1.5196105396336432e-09, - "loss": 3.7319, - "step": 4489500 - }, - { - "epoch": 49.4, - "learning_rate": 1.5058584080532482e-09, - "loss": 3.7465, - "step": 4490000 - }, - { - "epoch": 49.4, - "learning_rate": 1.4921062764728532e-09, - "loss": 3.7547, - "step": 4490500 - }, - { - "epoch": 49.41, - "learning_rate": 1.4783541448924584e-09, - "loss": 3.7521, - "step": 4491000 - }, - { - "epoch": 49.41, - "learning_rate": 1.4646020133120634e-09, - "loss": 3.7482, - "step": 4491500 - }, - { - "epoch": 49.42, - "learning_rate": 1.4508498817316684e-09, - "loss": 3.7532, - "step": 4492000 - }, - { - "epoch": 49.43, - "learning_rate": 1.4370977501512734e-09, - "loss": 3.7541, - "step": 4492500 - }, - { - "epoch": 49.43, - "learning_rate": 1.4233456185708784e-09, - "loss": 3.7561, - "step": 4493000 - }, - { - "epoch": 49.44, - "learning_rate": 1.4095934869904834e-09, - "loss": 3.7535, - "step": 4493500 - }, - { - "epoch": 49.44, - "learning_rate": 1.3958413554100886e-09, - "loss": 3.7457, - "step": 4494000 - }, - { - "epoch": 49.45, - "learning_rate": 1.3820892238296936e-09, - "loss": 3.7271, - "step": 4494500 - }, - { - "epoch": 49.45, - "learning_rate": 1.3683370922492986e-09, - "loss": 3.7551, - "step": 4495000 - }, - { - "epoch": 49.46, - "learning_rate": 1.3545849606689036e-09, - "loss": 3.769, - "step": 4495500 - }, - { - "epoch": 49.46, - "learning_rate": 1.3408328290885086e-09, - "loss": 3.7481, - "step": 4496000 - }, - { - "epoch": 49.47, - "learning_rate": 1.3270806975081136e-09, - "loss": 3.748, - "step": 4496500 - }, - { - "epoch": 49.47, - "learning_rate": 1.3133285659277188e-09, - "loss": 3.7513, - "step": 4497000 - }, - { - "epoch": 49.48, - "learning_rate": 1.2995764343473238e-09, - "loss": 3.7704, - "step": 4497500 - }, - { - "epoch": 49.49, - "learning_rate": 1.2858243027669288e-09, - "loss": 3.7598, - "step": 4498000 - }, - { - "epoch": 49.49, - "learning_rate": 1.2720721711865338e-09, - "loss": 3.7334, - "step": 4498500 - }, - { - "epoch": 49.5, - "learning_rate": 1.2583200396061388e-09, - "loss": 3.7528, - "step": 4499000 - }, - { - "epoch": 49.5, - "learning_rate": 1.244567908025744e-09, - "loss": 3.7469, - "step": 4499500 - }, - { - "epoch": 49.51, - "learning_rate": 1.230815776445349e-09, - "loss": 3.7613, - "step": 4500000 - }, - { - "epoch": 49.51, - "learning_rate": 1.217063644864954e-09, - "loss": 3.7406, - "step": 4500500 - }, - { - "epoch": 49.52, - "learning_rate": 1.203311513284559e-09, - "loss": 3.7452, - "step": 4501000 - }, - { - "epoch": 49.52, - "learning_rate": 1.1895593817041642e-09, - "loss": 3.7581, - "step": 4501500 - }, - { - "epoch": 49.53, - "learning_rate": 1.175807250123769e-09, - "loss": 3.7644, - "step": 4502000 - }, - { - "epoch": 49.54, - "learning_rate": 1.1620551185433742e-09, - "loss": 3.7417, - "step": 4502500 - }, - { - "epoch": 49.54, - "learning_rate": 1.1483029869629792e-09, - "loss": 3.7424, - "step": 4503000 - }, - { - "epoch": 49.55, - "learning_rate": 1.1345508553825842e-09, - "loss": 3.7492, - "step": 4503500 - }, - { - "epoch": 49.55, - "learning_rate": 1.1207987238021892e-09, - "loss": 3.7413, - "step": 4504000 - }, - { - "epoch": 49.56, - "learning_rate": 1.1070465922217944e-09, - "loss": 3.7517, - "step": 4504500 - }, - { - "epoch": 49.56, - "learning_rate": 1.0932944606413992e-09, - "loss": 3.7427, - "step": 4505000 - }, - { - "epoch": 49.57, - "learning_rate": 1.0795423290610044e-09, - "loss": 3.761, - "step": 4505500 - }, - { - "epoch": 49.57, - "learning_rate": 1.0657901974806094e-09, - "loss": 3.7443, - "step": 4506000 - }, - { - "epoch": 49.58, - "learning_rate": 1.0520380659002144e-09, - "loss": 3.752, - "step": 4506500 - }, - { - "epoch": 49.58, - "learning_rate": 1.0382859343198194e-09, - "loss": 3.734, - "step": 4507000 - }, - { - "epoch": 49.59, - "learning_rate": 1.0245338027394246e-09, - "loss": 3.746, - "step": 4507500 - }, - { - "epoch": 49.6, - "learning_rate": 1.0107816711590294e-09, - "loss": 3.7364, - "step": 4508000 - }, - { - "epoch": 49.6, - "learning_rate": 9.970295395786346e-10, - "loss": 3.7374, - "step": 4508500 - }, - { - "epoch": 49.61, - "learning_rate": 9.832774079982396e-10, - "loss": 3.7571, - "step": 4509000 - }, - { - "epoch": 49.61, - "learning_rate": 9.695252764178446e-10, - "loss": 3.7506, - "step": 4509500 - }, - { - "epoch": 49.62, - "learning_rate": 9.557731448374499e-10, - "loss": 3.7481, - "step": 4510000 - }, - { - "epoch": 49.62, - "learning_rate": 9.420210132570549e-10, - "loss": 3.7567, - "step": 4510500 - }, - { - "epoch": 49.63, - "learning_rate": 9.2826888167666e-10, - "loss": 3.7706, - "step": 4511000 - }, - { - "epoch": 49.63, - "learning_rate": 9.145167500962649e-10, - "loss": 3.7562, - "step": 4511500 - }, - { - "epoch": 49.64, - "learning_rate": 9.0076461851587e-10, - "loss": 3.7378, - "step": 4512000 - }, - { - "epoch": 49.65, - "learning_rate": 8.87012486935475e-10, - "loss": 3.7392, - "step": 4512500 - }, - { - "epoch": 49.65, - "learning_rate": 8.732603553550801e-10, - "loss": 3.7412, - "step": 4513000 - }, - { - "epoch": 49.66, - "learning_rate": 8.59508223774685e-10, - "loss": 3.7571, - "step": 4513500 - }, - { - "epoch": 49.66, - "learning_rate": 8.457560921942902e-10, - "loss": 3.75, - "step": 4514000 - }, - { - "epoch": 49.67, - "learning_rate": 8.320039606138951e-10, - "loss": 3.7675, - "step": 4514500 - }, - { - "epoch": 49.67, - "learning_rate": 8.182518290335002e-10, - "loss": 3.7498, - "step": 4515000 - }, - { - "epoch": 49.68, - "learning_rate": 8.044996974531052e-10, - "loss": 3.7628, - "step": 4515500 - }, - { - "epoch": 49.68, - "learning_rate": 7.907475658727103e-10, - "loss": 3.771, - "step": 4516000 - }, - { - "epoch": 49.69, - "learning_rate": 7.769954342923153e-10, - "loss": 3.7463, - "step": 4516500 - }, - { - "epoch": 49.69, - "learning_rate": 7.632433027119203e-10, - "loss": 3.7496, - "step": 4517000 - }, - { - "epoch": 49.7, - "learning_rate": 7.494911711315254e-10, - "loss": 3.754, - "step": 4517500 - }, - { - "epoch": 49.71, - "learning_rate": 7.357390395511304e-10, - "loss": 3.7322, - "step": 4518000 - }, - { - "epoch": 49.71, - "learning_rate": 7.219869079707354e-10, - "loss": 3.7296, - "step": 4518500 - }, - { - "epoch": 49.72, - "learning_rate": 7.082347763903405e-10, - "loss": 3.7719, - "step": 4519000 - }, - { - "epoch": 49.72, - "learning_rate": 6.944826448099455e-10, - "loss": 3.7419, - "step": 4519500 - }, - { - "epoch": 49.73, - "learning_rate": 6.807305132295505e-10, - "loss": 3.7552, - "step": 4520000 - }, - { - "epoch": 49.73, - "learning_rate": 6.669783816491556e-10, - "loss": 3.765, - "step": 4520500 - }, - { - "epoch": 49.74, - "learning_rate": 6.532262500687606e-10, - "loss": 3.7384, - "step": 4521000 - }, - { - "epoch": 49.74, - "learning_rate": 6.394741184883656e-10, - "loss": 3.7514, - "step": 4521500 - }, - { - "epoch": 49.75, - "learning_rate": 6.257219869079707e-10, - "loss": 3.7354, - "step": 4522000 - }, - { - "epoch": 49.76, - "learning_rate": 6.119698553275757e-10, - "loss": 3.7474, - "step": 4522500 - }, - { - "epoch": 49.76, - "learning_rate": 5.982177237471807e-10, - "loss": 3.7511, - "step": 4523000 - }, - { - "epoch": 49.77, - "learning_rate": 5.844655921667858e-10, - "loss": 3.739, - "step": 4523500 - }, - { - "epoch": 49.77, - "learning_rate": 5.707134605863908e-10, - "loss": 3.7345, - "step": 4524000 - }, - { - "epoch": 49.78, - "learning_rate": 5.569613290059959e-10, - "loss": 3.7544, - "step": 4524500 - }, - { - "epoch": 49.78, - "learning_rate": 5.43209197425601e-10, - "loss": 3.7472, - "step": 4525000 - }, - { - "epoch": 49.79, - "learning_rate": 5.29457065845206e-10, - "loss": 3.7622, - "step": 4525500 - }, - { - "epoch": 49.79, - "learning_rate": 5.15704934264811e-10, - "loss": 3.7751, - "step": 4526000 - }, - { - "epoch": 49.8, - "learning_rate": 5.019528026844161e-10, - "loss": 3.745, - "step": 4526500 - }, - { - "epoch": 49.8, - "learning_rate": 4.882006711040211e-10, - "loss": 3.7265, - "step": 4527000 - }, - { - "epoch": 49.81, - "learning_rate": 4.744485395236261e-10, - "loss": 3.739, - "step": 4527500 - }, - { - "epoch": 49.82, - "learning_rate": 4.606964079432312e-10, - "loss": 3.7298, - "step": 4528000 - }, - { - "epoch": 49.82, - "learning_rate": 4.4694427636283623e-10, - "loss": 3.7627, - "step": 4528500 - }, - { - "epoch": 49.83, - "learning_rate": 4.331921447824413e-10, - "loss": 3.78, - "step": 4529000 - }, - { - "epoch": 49.83, - "learning_rate": 4.194400132020463e-10, - "loss": 3.7387, - "step": 4529500 - }, - { - "epoch": 49.84, - "learning_rate": 4.0568788162165134e-10, - "loss": 3.7501, - "step": 4530000 - }, - { - "epoch": 49.84, - "learning_rate": 3.919357500412564e-10, - "loss": 3.7371, - "step": 4530500 - }, - { - "epoch": 49.85, - "learning_rate": 3.781836184608614e-10, - "loss": 3.7358, - "step": 4531000 - }, - { - "epoch": 49.85, - "learning_rate": 3.6443148688046644e-10, - "loss": 3.7356, - "step": 4531500 - }, - { - "epoch": 49.86, - "learning_rate": 3.506793553000715e-10, - "loss": 3.7474, - "step": 4532000 - }, - { - "epoch": 49.87, - "learning_rate": 3.3692722371967655e-10, - "loss": 3.7462, - "step": 4532500 - }, - { - "epoch": 49.87, - "learning_rate": 3.231750921392816e-10, - "loss": 3.745, - "step": 4533000 - }, - { - "epoch": 49.88, - "learning_rate": 3.094229605588866e-10, - "loss": 3.764, - "step": 4533500 - }, - { - "epoch": 49.88, - "learning_rate": 2.9567082897849165e-10, - "loss": 3.7514, - "step": 4534000 - }, - { - "epoch": 49.89, - "learning_rate": 2.819186973980967e-10, - "loss": 3.7585, - "step": 4534500 - }, - { - "epoch": 49.89, - "learning_rate": 2.681665658177017e-10, - "loss": 3.739, - "step": 4535000 - }, - { - "epoch": 49.9, - "learning_rate": 2.5441443423730676e-10, - "loss": 3.7291, - "step": 4535500 - }, - { - "epoch": 49.9, - "learning_rate": 2.406623026569118e-10, - "loss": 3.7672, - "step": 4536000 - }, - { - "epoch": 49.91, - "learning_rate": 2.2691017107651684e-10, - "loss": 3.7537, - "step": 4536500 - }, - { - "epoch": 49.91, - "learning_rate": 2.1315803949612187e-10, - "loss": 3.756, - "step": 4537000 - }, - { - "epoch": 49.92, - "learning_rate": 1.9940590791572695e-10, - "loss": 3.7495, - "step": 4537500 - }, - { - "epoch": 49.93, - "learning_rate": 1.8565377633533197e-10, - "loss": 3.7447, - "step": 4538000 - }, - { - "epoch": 49.93, - "learning_rate": 1.7190164475493702e-10, - "loss": 3.738, - "step": 4538500 - }, - { - "epoch": 49.94, - "learning_rate": 1.5814951317454205e-10, - "loss": 3.7381, - "step": 4539000 - }, - { - "epoch": 49.94, - "learning_rate": 1.4439738159414708e-10, - "loss": 3.7596, - "step": 4539500 - }, - { - "epoch": 49.95, - "learning_rate": 1.3064525001375213e-10, - "loss": 3.754, - "step": 4540000 - }, - { - "epoch": 49.95, - "learning_rate": 1.1689311843335716e-10, - "loss": 3.7449, - "step": 4540500 - }, - { - "epoch": 49.96, - "learning_rate": 1.0314098685296221e-10, - "loss": 3.7402, - "step": 4541000 - }, - { - "epoch": 49.96, - "learning_rate": 8.938885527256724e-11, - "loss": 3.7431, - "step": 4541500 - }, - { - "epoch": 49.97, - "learning_rate": 7.563672369217229e-11, - "loss": 3.7442, - "step": 4542000 - }, - { - "epoch": 49.98, - "learning_rate": 6.188459211177732e-11, - "loss": 3.7337, - "step": 4542500 - }, - { - "epoch": 49.98, - "learning_rate": 4.813246053138236e-11, - "loss": 3.7584, - "step": 4543000 - }, - { - "epoch": 49.99, - "learning_rate": 3.43803289509874e-11, - "loss": 3.7638, - "step": 4543500 - }, - { - "epoch": 49.99, - "learning_rate": 2.062819737059244e-11, - "loss": 3.7403, - "step": 4544000 - }, - { - "epoch": 50.0, - "learning_rate": 6.876065790197479e-12, - "loss": 3.7504, - "step": 4544500 - }, - { - "epoch": 50.0, - "eval_loss": 3.825317859649658, - "eval_runtime": 6.146, - "eval_samples_per_second": 252.849, - "step": 4544750 - } - ], - "max_steps": 4544750, - "num_train_epochs": 50, - "total_flos": 2.176739633488896e+17, - "trial_name": null, - "trial_params": null -}