diff --git "a/imagined/model/trainer_state.json" "b/imagined/model/trainer_state.json" deleted file mode 100644--- "a/imagined/model/trainer_state.json" +++ /dev/null @@ -1,54390 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 50.0, - "global_step": 4502300, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 5.552717499944473e-11, - "loss": 4.7837, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.1105434999888946e-10, - "loss": 4.7449, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.665815249983342e-10, - "loss": 4.762, - "step": 1500 - }, - { - "epoch": 0.02, - "learning_rate": 2.221086999977789e-10, - "loss": 4.7893, - "step": 2000 - }, - { - "epoch": 0.03, - "learning_rate": 2.7763587499722363e-10, - "loss": 4.7779, - "step": 2500 - }, - { - "epoch": 0.03, - "learning_rate": 3.331630499966684e-10, - "loss": 4.7454, - "step": 3000 - }, - { - "epoch": 0.04, - "learning_rate": 3.886902249961131e-10, - "loss": 4.7598, - "step": 3500 - }, - { - "epoch": 0.04, - "learning_rate": 4.442173999955578e-10, - "loss": 4.7754, - "step": 4000 - }, - { - "epoch": 0.05, - "learning_rate": 4.997445749950025e-10, - "loss": 4.7607, - "step": 4500 - }, - { - "epoch": 0.06, - "learning_rate": 5.552717499944473e-10, - "loss": 4.7713, - "step": 5000 - }, - { - "epoch": 0.06, - "learning_rate": 6.10798924993892e-10, - "loss": 4.7322, - "step": 5500 - }, - { - "epoch": 0.07, - "learning_rate": 6.663260999933368e-10, - "loss": 4.7939, - "step": 6000 - }, - { - "epoch": 0.07, - "learning_rate": 7.218532749927815e-10, - "loss": 4.7548, - "step": 6500 - }, - { - "epoch": 0.08, - "learning_rate": 7.773804499922262e-10, - "loss": 4.772, - "step": 7000 - }, - { - "epoch": 0.08, - "learning_rate": 8.32907624991671e-10, - "loss": 4.7405, - "step": 7500 - }, - { - "epoch": 0.09, - "learning_rate": 8.884347999911156e-10, - "loss": 4.7392, - "step": 8000 - }, - { - "epoch": 0.09, - "learning_rate": 9.439619749905603e-10, - "loss": 4.7745, - "step": 8500 - }, - { - "epoch": 0.1, - "learning_rate": 9.99489149990005e-10, - "loss": 4.7632, - "step": 9000 - }, - { - "epoch": 0.11, - "learning_rate": 1.0550163249894497e-09, - "loss": 4.7306, - "step": 9500 - }, - { - "epoch": 0.11, - "learning_rate": 1.1105434999888945e-09, - "loss": 4.7724, - "step": 10000 - }, - { - "epoch": 0.12, - "learning_rate": 1.1660706749883393e-09, - "loss": 4.7545, - "step": 10500 - }, - { - "epoch": 0.12, - "learning_rate": 1.221597849987784e-09, - "loss": 4.7522, - "step": 11000 - }, - { - "epoch": 0.13, - "learning_rate": 1.2771250249872287e-09, - "loss": 4.744, - "step": 11500 - }, - { - "epoch": 0.13, - "learning_rate": 1.3326521999866735e-09, - "loss": 4.7259, - "step": 12000 - }, - { - "epoch": 0.14, - "learning_rate": 1.3881793749861181e-09, - "loss": 4.7489, - "step": 12500 - }, - { - "epoch": 0.14, - "learning_rate": 1.443706549985563e-09, - "loss": 4.7674, - "step": 13000 - }, - { - "epoch": 0.15, - "learning_rate": 1.4992337249850075e-09, - "loss": 4.7464, - "step": 13500 - }, - { - "epoch": 0.16, - "learning_rate": 1.5547608999844523e-09, - "loss": 4.7427, - "step": 14000 - }, - { - "epoch": 0.16, - "learning_rate": 1.6102880749838971e-09, - "loss": 4.7386, - "step": 14500 - }, - { - "epoch": 0.17, - "learning_rate": 1.665815249983342e-09, - "loss": 4.7435, - "step": 15000 - }, - { - "epoch": 0.17, - "learning_rate": 1.7213424249827865e-09, - "loss": 4.7479, - "step": 15500 - }, - { - "epoch": 0.18, - "learning_rate": 1.7768695999822313e-09, - "loss": 4.7619, - "step": 16000 - }, - { - "epoch": 0.18, - "learning_rate": 1.832396774981676e-09, - "loss": 4.7526, - "step": 16500 - }, - { - "epoch": 0.19, - "learning_rate": 1.8879239499811207e-09, - "loss": 4.7487, - "step": 17000 - }, - { - "epoch": 0.19, - "learning_rate": 1.9434511249805653e-09, - "loss": 4.7383, - "step": 17500 - }, - { - "epoch": 0.2, - "learning_rate": 1.99897829998001e-09, - "loss": 4.7341, - "step": 18000 - }, - { - "epoch": 0.21, - "learning_rate": 2.054505474979455e-09, - "loss": 4.7364, - "step": 18500 - }, - { - "epoch": 0.21, - "learning_rate": 2.1100326499788995e-09, - "loss": 4.7539, - "step": 19000 - }, - { - "epoch": 0.22, - "learning_rate": 2.1655598249783445e-09, - "loss": 4.7276, - "step": 19500 - }, - { - "epoch": 0.22, - "learning_rate": 2.221086999977789e-09, - "loss": 4.7091, - "step": 20000 - }, - { - "epoch": 0.23, - "learning_rate": 2.2766141749772337e-09, - "loss": 4.7189, - "step": 20500 - }, - { - "epoch": 0.23, - "learning_rate": 2.3321413499766787e-09, - "loss": 4.727, - "step": 21000 - }, - { - "epoch": 0.24, - "learning_rate": 2.3876685249761233e-09, - "loss": 4.7139, - "step": 21500 - }, - { - "epoch": 0.24, - "learning_rate": 2.443195699975568e-09, - "loss": 4.7044, - "step": 22000 - }, - { - "epoch": 0.25, - "learning_rate": 2.4987228749750124e-09, - "loss": 4.7405, - "step": 22500 - }, - { - "epoch": 0.26, - "learning_rate": 2.5542500499744574e-09, - "loss": 4.7065, - "step": 23000 - }, - { - "epoch": 0.26, - "learning_rate": 2.609777224973902e-09, - "loss": 4.7193, - "step": 23500 - }, - { - "epoch": 0.27, - "learning_rate": 2.665304399973347e-09, - "loss": 4.6842, - "step": 24000 - }, - { - "epoch": 0.27, - "learning_rate": 2.7208315749727912e-09, - "loss": 4.7172, - "step": 24500 - }, - { - "epoch": 0.28, - "learning_rate": 2.7763587499722362e-09, - "loss": 4.6902, - "step": 25000 - }, - { - "epoch": 0.28, - "learning_rate": 2.831885924971681e-09, - "loss": 4.6976, - "step": 25500 - }, - { - "epoch": 0.29, - "learning_rate": 2.887413099971126e-09, - "loss": 4.6845, - "step": 26000 - }, - { - "epoch": 0.29, - "learning_rate": 2.9429402749705704e-09, - "loss": 4.6812, - "step": 26500 - }, - { - "epoch": 0.3, - "learning_rate": 2.998467449970015e-09, - "loss": 4.6999, - "step": 27000 - }, - { - "epoch": 0.31, - "learning_rate": 3.05399462496946e-09, - "loss": 4.7025, - "step": 27500 - }, - { - "epoch": 0.31, - "learning_rate": 3.1095217999689046e-09, - "loss": 4.6931, - "step": 28000 - }, - { - "epoch": 0.32, - "learning_rate": 3.165048974968349e-09, - "loss": 4.6684, - "step": 28500 - }, - { - "epoch": 0.32, - "learning_rate": 3.2205761499677942e-09, - "loss": 4.6757, - "step": 29000 - }, - { - "epoch": 0.33, - "learning_rate": 3.276103324967239e-09, - "loss": 4.652, - "step": 29500 - }, - { - "epoch": 0.33, - "learning_rate": 3.331630499966684e-09, - "loss": 4.6497, - "step": 30000 - }, - { - "epoch": 0.34, - "learning_rate": 3.3871576749661284e-09, - "loss": 4.665, - "step": 30500 - }, - { - "epoch": 0.34, - "learning_rate": 3.442684849965573e-09, - "loss": 4.6444, - "step": 31000 - }, - { - "epoch": 0.35, - "learning_rate": 3.498212024965018e-09, - "loss": 4.6672, - "step": 31500 - }, - { - "epoch": 0.36, - "learning_rate": 3.5537391999644626e-09, - "loss": 4.6469, - "step": 32000 - }, - { - "epoch": 0.36, - "learning_rate": 3.6092663749639068e-09, - "loss": 4.6385, - "step": 32500 - }, - { - "epoch": 0.37, - "learning_rate": 3.664793549963352e-09, - "loss": 4.6352, - "step": 33000 - }, - { - "epoch": 0.37, - "learning_rate": 3.7203207249627964e-09, - "loss": 4.6436, - "step": 33500 - }, - { - "epoch": 0.38, - "learning_rate": 3.775847899962241e-09, - "loss": 4.6293, - "step": 34000 - }, - { - "epoch": 0.38, - "learning_rate": 3.831375074961686e-09, - "loss": 4.6468, - "step": 34500 - }, - { - "epoch": 0.39, - "learning_rate": 3.8869022499611306e-09, - "loss": 4.6267, - "step": 35000 - }, - { - "epoch": 0.39, - "learning_rate": 3.9424294249605756e-09, - "loss": 4.6408, - "step": 35500 - }, - { - "epoch": 0.4, - "learning_rate": 3.99795659996002e-09, - "loss": 4.629, - "step": 36000 - }, - { - "epoch": 0.41, - "learning_rate": 4.0534837749594656e-09, - "loss": 4.6255, - "step": 36500 - }, - { - "epoch": 0.41, - "learning_rate": 4.10901094995891e-09, - "loss": 4.6302, - "step": 37000 - }, - { - "epoch": 0.42, - "learning_rate": 4.164538124958354e-09, - "loss": 4.5945, - "step": 37500 - }, - { - "epoch": 0.42, - "learning_rate": 4.220065299957799e-09, - "loss": 4.6059, - "step": 38000 - }, - { - "epoch": 0.43, - "learning_rate": 4.275592474957244e-09, - "loss": 4.5908, - "step": 38500 - }, - { - "epoch": 0.43, - "learning_rate": 4.331119649956689e-09, - "loss": 4.5667, - "step": 39000 - }, - { - "epoch": 0.44, - "learning_rate": 4.386646824956133e-09, - "loss": 4.5727, - "step": 39500 - }, - { - "epoch": 0.44, - "learning_rate": 4.442173999955578e-09, - "loss": 4.5981, - "step": 40000 - }, - { - "epoch": 0.45, - "learning_rate": 4.497701174955023e-09, - "loss": 4.5854, - "step": 40500 - }, - { - "epoch": 0.46, - "learning_rate": 4.553228349954467e-09, - "loss": 4.583, - "step": 41000 - }, - { - "epoch": 0.46, - "learning_rate": 4.608755524953912e-09, - "loss": 4.5917, - "step": 41500 - }, - { - "epoch": 0.47, - "learning_rate": 4.664282699953357e-09, - "loss": 4.5816, - "step": 42000 - }, - { - "epoch": 0.47, - "learning_rate": 4.7198098749528015e-09, - "loss": 4.5827, - "step": 42500 - }, - { - "epoch": 0.48, - "learning_rate": 4.7753370499522465e-09, - "loss": 4.5674, - "step": 43000 - }, - { - "epoch": 0.48, - "learning_rate": 4.830864224951691e-09, - "loss": 4.5866, - "step": 43500 - }, - { - "epoch": 0.49, - "learning_rate": 4.886391399951136e-09, - "loss": 4.5659, - "step": 44000 - }, - { - "epoch": 0.49, - "learning_rate": 4.941918574950581e-09, - "loss": 4.553, - "step": 44500 - }, - { - "epoch": 0.5, - "learning_rate": 4.997445749950025e-09, - "loss": 4.5593, - "step": 45000 - }, - { - "epoch": 0.51, - "learning_rate": 5.05297292494947e-09, - "loss": 4.5629, - "step": 45500 - }, - { - "epoch": 0.51, - "learning_rate": 5.108500099948915e-09, - "loss": 4.5519, - "step": 46000 - }, - { - "epoch": 0.52, - "learning_rate": 5.164027274948359e-09, - "loss": 4.5401, - "step": 46500 - }, - { - "epoch": 0.52, - "learning_rate": 5.219554449947804e-09, - "loss": 4.5342, - "step": 47000 - }, - { - "epoch": 0.53, - "learning_rate": 5.275081624947249e-09, - "loss": 4.5381, - "step": 47500 - }, - { - "epoch": 0.53, - "learning_rate": 5.330608799946694e-09, - "loss": 4.5256, - "step": 48000 - }, - { - "epoch": 0.54, - "learning_rate": 5.386135974946138e-09, - "loss": 4.5267, - "step": 48500 - }, - { - "epoch": 0.54, - "learning_rate": 5.4416631499455825e-09, - "loss": 4.543, - "step": 49000 - }, - { - "epoch": 0.55, - "learning_rate": 5.497190324945028e-09, - "loss": 4.5319, - "step": 49500 - }, - { - "epoch": 0.56, - "learning_rate": 5.5527174999444725e-09, - "loss": 4.5299, - "step": 50000 - }, - { - "epoch": 0.56, - "learning_rate": 5.6082446749439175e-09, - "loss": 4.5117, - "step": 50500 - }, - { - "epoch": 0.57, - "learning_rate": 5.663771849943362e-09, - "loss": 4.5291, - "step": 51000 - }, - { - "epoch": 0.57, - "learning_rate": 5.719299024942807e-09, - "loss": 4.5038, - "step": 51500 - }, - { - "epoch": 0.58, - "learning_rate": 5.774826199942252e-09, - "loss": 4.5173, - "step": 52000 - }, - { - "epoch": 0.58, - "learning_rate": 5.830353374941696e-09, - "loss": 4.5036, - "step": 52500 - }, - { - "epoch": 0.59, - "learning_rate": 5.885880549941141e-09, - "loss": 4.5334, - "step": 53000 - }, - { - "epoch": 0.59, - "learning_rate": 5.941407724940586e-09, - "loss": 4.4966, - "step": 53500 - }, - { - "epoch": 0.6, - "learning_rate": 5.99693489994003e-09, - "loss": 4.4683, - "step": 54000 - }, - { - "epoch": 0.61, - "learning_rate": 6.052462074939475e-09, - "loss": 4.5084, - "step": 54500 - }, - { - "epoch": 0.61, - "learning_rate": 6.10798924993892e-09, - "loss": 4.4961, - "step": 55000 - }, - { - "epoch": 0.62, - "learning_rate": 6.163516424938364e-09, - "loss": 4.511, - "step": 55500 - }, - { - "epoch": 0.62, - "learning_rate": 6.219043599937809e-09, - "loss": 4.4621, - "step": 56000 - }, - { - "epoch": 0.63, - "learning_rate": 6.274570774937254e-09, - "loss": 4.488, - "step": 56500 - }, - { - "epoch": 0.63, - "learning_rate": 6.330097949936698e-09, - "loss": 4.4761, - "step": 57000 - }, - { - "epoch": 0.64, - "learning_rate": 6.3856251249361434e-09, - "loss": 4.487, - "step": 57500 - }, - { - "epoch": 0.64, - "learning_rate": 6.4411522999355884e-09, - "loss": 4.4958, - "step": 58000 - }, - { - "epoch": 0.65, - "learning_rate": 6.496679474935033e-09, - "loss": 4.4825, - "step": 58500 - }, - { - "epoch": 0.66, - "learning_rate": 6.552206649934478e-09, - "loss": 4.4856, - "step": 59000 - }, - { - "epoch": 0.66, - "learning_rate": 6.607733824933923e-09, - "loss": 4.4724, - "step": 59500 - }, - { - "epoch": 0.67, - "learning_rate": 6.663260999933368e-09, - "loss": 4.4476, - "step": 60000 - }, - { - "epoch": 0.67, - "learning_rate": 6.718788174932812e-09, - "loss": 4.4582, - "step": 60500 - }, - { - "epoch": 0.68, - "learning_rate": 6.774315349932257e-09, - "loss": 4.4804, - "step": 61000 - }, - { - "epoch": 0.68, - "learning_rate": 6.829842524931702e-09, - "loss": 4.4752, - "step": 61500 - }, - { - "epoch": 0.69, - "learning_rate": 6.885369699931146e-09, - "loss": 4.4499, - "step": 62000 - }, - { - "epoch": 0.69, - "learning_rate": 6.940896874930591e-09, - "loss": 4.4592, - "step": 62500 - }, - { - "epoch": 0.7, - "learning_rate": 6.996424049930036e-09, - "loss": 4.4546, - "step": 63000 - }, - { - "epoch": 0.71, - "learning_rate": 7.051951224929479e-09, - "loss": 4.4364, - "step": 63500 - }, - { - "epoch": 0.71, - "learning_rate": 7.107478399928925e-09, - "loss": 4.4465, - "step": 64000 - }, - { - "epoch": 0.72, - "learning_rate": 7.16300557492837e-09, - "loss": 4.4404, - "step": 64500 - }, - { - "epoch": 0.72, - "learning_rate": 7.2185327499278135e-09, - "loss": 4.4519, - "step": 65000 - }, - { - "epoch": 0.73, - "learning_rate": 7.2740599249272585e-09, - "loss": 4.4402, - "step": 65500 - }, - { - "epoch": 0.73, - "learning_rate": 7.329587099926704e-09, - "loss": 4.4382, - "step": 66000 - }, - { - "epoch": 0.74, - "learning_rate": 7.385114274926149e-09, - "loss": 4.4386, - "step": 66500 - }, - { - "epoch": 0.74, - "learning_rate": 7.440641449925593e-09, - "loss": 4.4537, - "step": 67000 - }, - { - "epoch": 0.75, - "learning_rate": 7.496168624925039e-09, - "loss": 4.4171, - "step": 67500 - }, - { - "epoch": 0.76, - "learning_rate": 7.551695799924483e-09, - "loss": 4.425, - "step": 68000 - }, - { - "epoch": 0.76, - "learning_rate": 7.607222974923927e-09, - "loss": 4.4289, - "step": 68500 - }, - { - "epoch": 0.77, - "learning_rate": 7.662750149923373e-09, - "loss": 4.4365, - "step": 69000 - }, - { - "epoch": 0.77, - "learning_rate": 7.718277324922817e-09, - "loss": 4.4359, - "step": 69500 - }, - { - "epoch": 0.78, - "learning_rate": 7.773804499922261e-09, - "loss": 4.4229, - "step": 70000 - }, - { - "epoch": 0.78, - "learning_rate": 7.829331674921707e-09, - "loss": 4.4428, - "step": 70500 - }, - { - "epoch": 0.79, - "learning_rate": 7.884858849921151e-09, - "loss": 4.4071, - "step": 71000 - }, - { - "epoch": 0.79, - "learning_rate": 7.940386024920597e-09, - "loss": 4.419, - "step": 71500 - }, - { - "epoch": 0.8, - "learning_rate": 7.99591319992004e-09, - "loss": 4.4022, - "step": 72000 - }, - { - "epoch": 0.81, - "learning_rate": 8.051440374919485e-09, - "loss": 4.4226, - "step": 72500 - }, - { - "epoch": 0.81, - "learning_rate": 8.106967549918931e-09, - "loss": 4.4109, - "step": 73000 - }, - { - "epoch": 0.82, - "learning_rate": 8.162494724918374e-09, - "loss": 4.413, - "step": 73500 - }, - { - "epoch": 0.82, - "learning_rate": 8.21802189991782e-09, - "loss": 4.4194, - "step": 74000 - }, - { - "epoch": 0.83, - "learning_rate": 8.273549074917265e-09, - "loss": 4.3956, - "step": 74500 - }, - { - "epoch": 0.83, - "learning_rate": 8.329076249916708e-09, - "loss": 4.4074, - "step": 75000 - }, - { - "epoch": 0.84, - "learning_rate": 8.384603424916154e-09, - "loss": 4.4119, - "step": 75500 - }, - { - "epoch": 0.84, - "learning_rate": 8.440130599915598e-09, - "loss": 4.3935, - "step": 76000 - }, - { - "epoch": 0.85, - "learning_rate": 8.495657774915042e-09, - "loss": 4.4161, - "step": 76500 - }, - { - "epoch": 0.86, - "learning_rate": 8.551184949914488e-09, - "loss": 4.3867, - "step": 77000 - }, - { - "epoch": 0.86, - "learning_rate": 8.606712124913932e-09, - "loss": 4.425, - "step": 77500 - }, - { - "epoch": 0.87, - "learning_rate": 8.662239299913378e-09, - "loss": 4.3904, - "step": 78000 - }, - { - "epoch": 0.87, - "learning_rate": 8.717766474912822e-09, - "loss": 4.3925, - "step": 78500 - }, - { - "epoch": 0.88, - "learning_rate": 8.773293649912266e-09, - "loss": 4.3798, - "step": 79000 - }, - { - "epoch": 0.88, - "learning_rate": 8.828820824911712e-09, - "loss": 4.3841, - "step": 79500 - }, - { - "epoch": 0.89, - "learning_rate": 8.884347999911156e-09, - "loss": 4.3653, - "step": 80000 - }, - { - "epoch": 0.89, - "learning_rate": 8.9398751749106e-09, - "loss": 4.3873, - "step": 80500 - }, - { - "epoch": 0.9, - "learning_rate": 8.995402349910046e-09, - "loss": 4.3724, - "step": 81000 - }, - { - "epoch": 0.91, - "learning_rate": 9.05092952490949e-09, - "loss": 4.3679, - "step": 81500 - }, - { - "epoch": 0.91, - "learning_rate": 9.106456699908935e-09, - "loss": 4.3533, - "step": 82000 - }, - { - "epoch": 0.92, - "learning_rate": 9.16198387490838e-09, - "loss": 4.3635, - "step": 82500 - }, - { - "epoch": 0.92, - "learning_rate": 9.217511049907825e-09, - "loss": 4.361, - "step": 83000 - }, - { - "epoch": 0.93, - "learning_rate": 9.273038224907269e-09, - "loss": 4.3852, - "step": 83500 - }, - { - "epoch": 0.93, - "learning_rate": 9.328565399906715e-09, - "loss": 4.386, - "step": 84000 - }, - { - "epoch": 0.94, - "learning_rate": 9.384092574906159e-09, - "loss": 4.3636, - "step": 84500 - }, - { - "epoch": 0.94, - "learning_rate": 9.439619749905603e-09, - "loss": 4.3734, - "step": 85000 - }, - { - "epoch": 0.95, - "learning_rate": 9.495146924905049e-09, - "loss": 4.3527, - "step": 85500 - }, - { - "epoch": 0.96, - "learning_rate": 9.550674099904493e-09, - "loss": 4.3573, - "step": 86000 - }, - { - "epoch": 0.96, - "learning_rate": 9.606201274903937e-09, - "loss": 4.3753, - "step": 86500 - }, - { - "epoch": 0.97, - "learning_rate": 9.661728449903381e-09, - "loss": 4.3663, - "step": 87000 - }, - { - "epoch": 0.97, - "learning_rate": 9.717255624902827e-09, - "loss": 4.3617, - "step": 87500 - }, - { - "epoch": 0.98, - "learning_rate": 9.772782799902271e-09, - "loss": 4.3662, - "step": 88000 - }, - { - "epoch": 0.98, - "learning_rate": 9.828309974901716e-09, - "loss": 4.3652, - "step": 88500 - }, - { - "epoch": 0.99, - "learning_rate": 9.883837149901161e-09, - "loss": 4.3617, - "step": 89000 - }, - { - "epoch": 0.99, - "learning_rate": 9.939364324900607e-09, - "loss": 4.351, - "step": 89500 - }, - { - "epoch": 1.0, - "learning_rate": 9.99489149990005e-09, - "loss": 4.3559, - "step": 90000 - }, - { - "epoch": 1.0, - "eval_loss": 4.290574073791504, - "eval_runtime": 6.3081, - "eval_samples_per_second": 246.35, - "step": 90046 - }, - { - "epoch": 1.01, - "learning_rate": 1.0050418674899496e-08, - "loss": 4.357, - "step": 90500 - }, - { - "epoch": 1.01, - "learning_rate": 1.010594584989894e-08, - "loss": 4.324, - "step": 91000 - }, - { - "epoch": 1.02, - "learning_rate": 1.0161473024898384e-08, - "loss": 4.34, - "step": 91500 - }, - { - "epoch": 1.02, - "learning_rate": 1.021700019989783e-08, - "loss": 4.3296, - "step": 92000 - }, - { - "epoch": 1.03, - "learning_rate": 1.0272527374897274e-08, - "loss": 4.3347, - "step": 92500 - }, - { - "epoch": 1.03, - "learning_rate": 1.0328054549896718e-08, - "loss": 4.328, - "step": 93000 - }, - { - "epoch": 1.04, - "learning_rate": 1.0383581724896164e-08, - "loss": 4.337, - "step": 93500 - }, - { - "epoch": 1.04, - "learning_rate": 1.0439108899895608e-08, - "loss": 4.3266, - "step": 94000 - }, - { - "epoch": 1.05, - "learning_rate": 1.0494636074895054e-08, - "loss": 4.3219, - "step": 94500 - }, - { - "epoch": 1.06, - "learning_rate": 1.0550163249894498e-08, - "loss": 4.3348, - "step": 95000 - }, - { - "epoch": 1.06, - "learning_rate": 1.0605690424893942e-08, - "loss": 4.3462, - "step": 95500 - }, - { - "epoch": 1.07, - "learning_rate": 1.0661217599893388e-08, - "loss": 4.3301, - "step": 96000 - }, - { - "epoch": 1.07, - "learning_rate": 1.0716744774892832e-08, - "loss": 4.3296, - "step": 96500 - }, - { - "epoch": 1.08, - "learning_rate": 1.0772271949892277e-08, - "loss": 4.3241, - "step": 97000 - }, - { - "epoch": 1.08, - "learning_rate": 1.0827799124891722e-08, - "loss": 4.3226, - "step": 97500 - }, - { - "epoch": 1.09, - "learning_rate": 1.0883326299891165e-08, - "loss": 4.3247, - "step": 98000 - }, - { - "epoch": 1.09, - "learning_rate": 1.093885347489061e-08, - "loss": 4.3166, - "step": 98500 - }, - { - "epoch": 1.1, - "learning_rate": 1.0994380649890057e-08, - "loss": 4.3325, - "step": 99000 - }, - { - "epoch": 1.1, - "learning_rate": 1.1049907824889499e-08, - "loss": 4.3388, - "step": 99500 - }, - { - "epoch": 1.11, - "learning_rate": 1.1105434999888945e-08, - "loss": 4.3162, - "step": 100000 - }, - { - "epoch": 1.12, - "learning_rate": 1.116096217488839e-08, - "loss": 4.3019, - "step": 100500 - }, - { - "epoch": 1.12, - "learning_rate": 1.1216489349887835e-08, - "loss": 4.3087, - "step": 101000 - }, - { - "epoch": 1.13, - "learning_rate": 1.1272016524887279e-08, - "loss": 4.3023, - "step": 101500 - }, - { - "epoch": 1.13, - "learning_rate": 1.1327543699886723e-08, - "loss": 4.3206, - "step": 102000 - }, - { - "epoch": 1.14, - "learning_rate": 1.1383070874886169e-08, - "loss": 4.2908, - "step": 102500 - }, - { - "epoch": 1.14, - "learning_rate": 1.1438598049885613e-08, - "loss": 4.3311, - "step": 103000 - }, - { - "epoch": 1.15, - "learning_rate": 1.1494125224885057e-08, - "loss": 4.3057, - "step": 103500 - }, - { - "epoch": 1.15, - "learning_rate": 1.1549652399884503e-08, - "loss": 4.305, - "step": 104000 - }, - { - "epoch": 1.16, - "learning_rate": 1.1605179574883947e-08, - "loss": 4.2982, - "step": 104500 - }, - { - "epoch": 1.17, - "learning_rate": 1.1660706749883392e-08, - "loss": 4.2979, - "step": 105000 - }, - { - "epoch": 1.17, - "learning_rate": 1.1716233924882838e-08, - "loss": 4.2965, - "step": 105500 - }, - { - "epoch": 1.18, - "learning_rate": 1.1771761099882282e-08, - "loss": 4.3031, - "step": 106000 - }, - { - "epoch": 1.18, - "learning_rate": 1.1827288274881726e-08, - "loss": 4.2963, - "step": 106500 - }, - { - "epoch": 1.19, - "learning_rate": 1.1882815449881172e-08, - "loss": 4.2962, - "step": 107000 - }, - { - "epoch": 1.19, - "learning_rate": 1.1938342624880616e-08, - "loss": 4.3185, - "step": 107500 - }, - { - "epoch": 1.2, - "learning_rate": 1.199386979988006e-08, - "loss": 4.281, - "step": 108000 - }, - { - "epoch": 1.2, - "learning_rate": 1.2049396974879506e-08, - "loss": 4.2945, - "step": 108500 - }, - { - "epoch": 1.21, - "learning_rate": 1.210492414987895e-08, - "loss": 4.2983, - "step": 109000 - }, - { - "epoch": 1.22, - "learning_rate": 1.2160451324878394e-08, - "loss": 4.3044, - "step": 109500 - }, - { - "epoch": 1.22, - "learning_rate": 1.221597849987784e-08, - "loss": 4.2755, - "step": 110000 - }, - { - "epoch": 1.23, - "learning_rate": 1.2271505674877284e-08, - "loss": 4.2836, - "step": 110500 - }, - { - "epoch": 1.23, - "learning_rate": 1.2327032849876728e-08, - "loss": 4.2827, - "step": 111000 - }, - { - "epoch": 1.24, - "learning_rate": 1.2382560024876174e-08, - "loss": 4.2843, - "step": 111500 - }, - { - "epoch": 1.24, - "learning_rate": 1.2438087199875618e-08, - "loss": 4.2762, - "step": 112000 - }, - { - "epoch": 1.25, - "learning_rate": 1.2493614374875064e-08, - "loss": 4.2847, - "step": 112500 - }, - { - "epoch": 1.25, - "learning_rate": 1.2549141549874508e-08, - "loss": 4.2741, - "step": 113000 - }, - { - "epoch": 1.26, - "learning_rate": 1.2604668724873951e-08, - "loss": 4.2848, - "step": 113500 - }, - { - "epoch": 1.27, - "learning_rate": 1.2660195899873397e-08, - "loss": 4.257, - "step": 114000 - }, - { - "epoch": 1.27, - "learning_rate": 1.2715723074872841e-08, - "loss": 4.2695, - "step": 114500 - }, - { - "epoch": 1.28, - "learning_rate": 1.2771250249872287e-08, - "loss": 4.2666, - "step": 115000 - }, - { - "epoch": 1.28, - "learning_rate": 1.2826777424871733e-08, - "loss": 4.2889, - "step": 115500 - }, - { - "epoch": 1.29, - "learning_rate": 1.2882304599871177e-08, - "loss": 4.2677, - "step": 116000 - }, - { - "epoch": 1.29, - "learning_rate": 1.2937831774870623e-08, - "loss": 4.2612, - "step": 116500 - }, - { - "epoch": 1.3, - "learning_rate": 1.2993358949870065e-08, - "loss": 4.2597, - "step": 117000 - }, - { - "epoch": 1.3, - "learning_rate": 1.304888612486951e-08, - "loss": 4.2624, - "step": 117500 - }, - { - "epoch": 1.31, - "learning_rate": 1.3104413299868955e-08, - "loss": 4.2755, - "step": 118000 - }, - { - "epoch": 1.32, - "learning_rate": 1.31599404748684e-08, - "loss": 4.2701, - "step": 118500 - }, - { - "epoch": 1.32, - "learning_rate": 1.3215467649867845e-08, - "loss": 4.2737, - "step": 119000 - }, - { - "epoch": 1.33, - "learning_rate": 1.3270994824867291e-08, - "loss": 4.2509, - "step": 119500 - }, - { - "epoch": 1.33, - "learning_rate": 1.3326521999866735e-08, - "loss": 4.2591, - "step": 120000 - }, - { - "epoch": 1.34, - "learning_rate": 1.3382049174866178e-08, - "loss": 4.2735, - "step": 120500 - }, - { - "epoch": 1.34, - "learning_rate": 1.3437576349865624e-08, - "loss": 4.2559, - "step": 121000 - }, - { - "epoch": 1.35, - "learning_rate": 1.3493103524865068e-08, - "loss": 4.2355, - "step": 121500 - }, - { - "epoch": 1.35, - "learning_rate": 1.3548630699864514e-08, - "loss": 4.2611, - "step": 122000 - }, - { - "epoch": 1.36, - "learning_rate": 1.3604157874863958e-08, - "loss": 4.2562, - "step": 122500 - }, - { - "epoch": 1.37, - "learning_rate": 1.3659685049863404e-08, - "loss": 4.2578, - "step": 123000 - }, - { - "epoch": 1.37, - "learning_rate": 1.3715212224862846e-08, - "loss": 4.2806, - "step": 123500 - }, - { - "epoch": 1.38, - "learning_rate": 1.3770739399862292e-08, - "loss": 4.2339, - "step": 124000 - }, - { - "epoch": 1.38, - "learning_rate": 1.3826266574861736e-08, - "loss": 4.2419, - "step": 124500 - }, - { - "epoch": 1.39, - "learning_rate": 1.3881793749861182e-08, - "loss": 4.2429, - "step": 125000 - }, - { - "epoch": 1.39, - "learning_rate": 1.3937320924860626e-08, - "loss": 4.2233, - "step": 125500 - }, - { - "epoch": 1.4, - "learning_rate": 1.3992848099860072e-08, - "loss": 4.2532, - "step": 126000 - }, - { - "epoch": 1.4, - "learning_rate": 1.4048375274859516e-08, - "loss": 4.2413, - "step": 126500 - }, - { - "epoch": 1.41, - "learning_rate": 1.4103902449858959e-08, - "loss": 4.2578, - "step": 127000 - }, - { - "epoch": 1.42, - "learning_rate": 1.4159429624858405e-08, - "loss": 4.2444, - "step": 127500 - }, - { - "epoch": 1.42, - "learning_rate": 1.421495679985785e-08, - "loss": 4.2482, - "step": 128000 - }, - { - "epoch": 1.43, - "learning_rate": 1.4270483974857295e-08, - "loss": 4.2213, - "step": 128500 - }, - { - "epoch": 1.43, - "learning_rate": 1.432601114985674e-08, - "loss": 4.2461, - "step": 129000 - }, - { - "epoch": 1.44, - "learning_rate": 1.4381538324856185e-08, - "loss": 4.2304, - "step": 129500 - }, - { - "epoch": 1.44, - "learning_rate": 1.4437065499855627e-08, - "loss": 4.2312, - "step": 130000 - }, - { - "epoch": 1.45, - "learning_rate": 1.4492592674855073e-08, - "loss": 4.2371, - "step": 130500 - }, - { - "epoch": 1.45, - "learning_rate": 1.4548119849854517e-08, - "loss": 4.225, - "step": 131000 - }, - { - "epoch": 1.46, - "learning_rate": 1.4603647024853963e-08, - "loss": 4.2405, - "step": 131500 - }, - { - "epoch": 1.47, - "learning_rate": 1.4659174199853409e-08, - "loss": 4.2358, - "step": 132000 - }, - { - "epoch": 1.47, - "learning_rate": 1.4714701374852853e-08, - "loss": 4.2158, - "step": 132500 - }, - { - "epoch": 1.48, - "learning_rate": 1.4770228549852299e-08, - "loss": 4.2217, - "step": 133000 - }, - { - "epoch": 1.48, - "learning_rate": 1.4825755724851741e-08, - "loss": 4.2346, - "step": 133500 - }, - { - "epoch": 1.49, - "learning_rate": 1.4881282899851185e-08, - "loss": 4.2364, - "step": 134000 - }, - { - "epoch": 1.49, - "learning_rate": 1.493681007485063e-08, - "loss": 4.2242, - "step": 134500 - }, - { - "epoch": 1.5, - "learning_rate": 1.4992337249850077e-08, - "loss": 4.2359, - "step": 135000 - }, - { - "epoch": 1.5, - "learning_rate": 1.504786442484952e-08, - "loss": 4.2528, - "step": 135500 - }, - { - "epoch": 1.51, - "learning_rate": 1.5103391599848965e-08, - "loss": 4.2199, - "step": 136000 - }, - { - "epoch": 1.52, - "learning_rate": 1.515891877484841e-08, - "loss": 4.2272, - "step": 136500 - }, - { - "epoch": 1.52, - "learning_rate": 1.5214445949847854e-08, - "loss": 4.217, - "step": 137000 - }, - { - "epoch": 1.53, - "learning_rate": 1.5269973124847298e-08, - "loss": 4.2113, - "step": 137500 - }, - { - "epoch": 1.53, - "learning_rate": 1.5325500299846746e-08, - "loss": 4.2396, - "step": 138000 - }, - { - "epoch": 1.54, - "learning_rate": 1.538102747484619e-08, - "loss": 4.2435, - "step": 138500 - }, - { - "epoch": 1.54, - "learning_rate": 1.5436554649845634e-08, - "loss": 4.2247, - "step": 139000 - }, - { - "epoch": 1.55, - "learning_rate": 1.5492081824845078e-08, - "loss": 4.2265, - "step": 139500 - }, - { - "epoch": 1.55, - "learning_rate": 1.5547608999844522e-08, - "loss": 4.2093, - "step": 140000 - }, - { - "epoch": 1.56, - "learning_rate": 1.5603136174843966e-08, - "loss": 4.2299, - "step": 140500 - }, - { - "epoch": 1.57, - "learning_rate": 1.5658663349843414e-08, - "loss": 4.2198, - "step": 141000 - }, - { - "epoch": 1.57, - "learning_rate": 1.5714190524842858e-08, - "loss": 4.1949, - "step": 141500 - }, - { - "epoch": 1.58, - "learning_rate": 1.5769717699842302e-08, - "loss": 4.2192, - "step": 142000 - }, - { - "epoch": 1.58, - "learning_rate": 1.5825244874841746e-08, - "loss": 4.2192, - "step": 142500 - }, - { - "epoch": 1.59, - "learning_rate": 1.5880772049841194e-08, - "loss": 4.2136, - "step": 143000 - }, - { - "epoch": 1.59, - "learning_rate": 1.5936299224840635e-08, - "loss": 4.2102, - "step": 143500 - }, - { - "epoch": 1.6, - "learning_rate": 1.599182639984008e-08, - "loss": 4.2302, - "step": 144000 - }, - { - "epoch": 1.6, - "learning_rate": 1.6047353574839526e-08, - "loss": 4.2161, - "step": 144500 - }, - { - "epoch": 1.61, - "learning_rate": 1.610288074983897e-08, - "loss": 4.2064, - "step": 145000 - }, - { - "epoch": 1.62, - "learning_rate": 1.6158407924838415e-08, - "loss": 4.217, - "step": 145500 - }, - { - "epoch": 1.62, - "learning_rate": 1.6213935099837862e-08, - "loss": 4.2203, - "step": 146000 - }, - { - "epoch": 1.63, - "learning_rate": 1.6269462274837303e-08, - "loss": 4.1943, - "step": 146500 - }, - { - "epoch": 1.63, - "learning_rate": 1.6324989449836747e-08, - "loss": 4.2022, - "step": 147000 - }, - { - "epoch": 1.64, - "learning_rate": 1.6380516624836195e-08, - "loss": 4.2151, - "step": 147500 - }, - { - "epoch": 1.64, - "learning_rate": 1.643604379983564e-08, - "loss": 4.2117, - "step": 148000 - }, - { - "epoch": 1.65, - "learning_rate": 1.6491570974835083e-08, - "loss": 4.2076, - "step": 148500 - }, - { - "epoch": 1.65, - "learning_rate": 1.654709814983453e-08, - "loss": 4.1816, - "step": 149000 - }, - { - "epoch": 1.66, - "learning_rate": 1.6602625324833975e-08, - "loss": 4.2007, - "step": 149500 - }, - { - "epoch": 1.67, - "learning_rate": 1.6658152499833416e-08, - "loss": 4.1998, - "step": 150000 - }, - { - "epoch": 1.67, - "learning_rate": 1.6713679674832863e-08, - "loss": 4.1838, - "step": 150500 - }, - { - "epoch": 1.68, - "learning_rate": 1.6769206849832307e-08, - "loss": 4.2061, - "step": 151000 - }, - { - "epoch": 1.68, - "learning_rate": 1.682473402483175e-08, - "loss": 4.1925, - "step": 151500 - }, - { - "epoch": 1.69, - "learning_rate": 1.6880261199831196e-08, - "loss": 4.2042, - "step": 152000 - }, - { - "epoch": 1.69, - "learning_rate": 1.6935788374830643e-08, - "loss": 4.1919, - "step": 152500 - }, - { - "epoch": 1.7, - "learning_rate": 1.6991315549830084e-08, - "loss": 4.1963, - "step": 153000 - }, - { - "epoch": 1.7, - "learning_rate": 1.7046842724829528e-08, - "loss": 4.2059, - "step": 153500 - }, - { - "epoch": 1.71, - "learning_rate": 1.7102369899828976e-08, - "loss": 4.1924, - "step": 154000 - }, - { - "epoch": 1.72, - "learning_rate": 1.715789707482842e-08, - "loss": 4.1929, - "step": 154500 - }, - { - "epoch": 1.72, - "learning_rate": 1.7213424249827864e-08, - "loss": 4.1895, - "step": 155000 - }, - { - "epoch": 1.73, - "learning_rate": 1.726895142482731e-08, - "loss": 4.1929, - "step": 155500 - }, - { - "epoch": 1.73, - "learning_rate": 1.7324478599826756e-08, - "loss": 4.195, - "step": 156000 - }, - { - "epoch": 1.74, - "learning_rate": 1.7380005774826197e-08, - "loss": 4.1959, - "step": 156500 - }, - { - "epoch": 1.74, - "learning_rate": 1.7435532949825644e-08, - "loss": 4.1842, - "step": 157000 - }, - { - "epoch": 1.75, - "learning_rate": 1.7491060124825088e-08, - "loss": 4.1645, - "step": 157500 - }, - { - "epoch": 1.75, - "learning_rate": 1.7546587299824533e-08, - "loss": 4.1997, - "step": 158000 - }, - { - "epoch": 1.76, - "learning_rate": 1.760211447482398e-08, - "loss": 4.1824, - "step": 158500 - }, - { - "epoch": 1.77, - "learning_rate": 1.7657641649823424e-08, - "loss": 4.2048, - "step": 159000 - }, - { - "epoch": 1.77, - "learning_rate": 1.771316882482287e-08, - "loss": 4.1896, - "step": 159500 - }, - { - "epoch": 1.78, - "learning_rate": 1.7768695999822313e-08, - "loss": 4.168, - "step": 160000 - }, - { - "epoch": 1.78, - "learning_rate": 1.7824223174821757e-08, - "loss": 4.1701, - "step": 160500 - }, - { - "epoch": 1.79, - "learning_rate": 1.78797503498212e-08, - "loss": 4.1858, - "step": 161000 - }, - { - "epoch": 1.79, - "learning_rate": 1.7935277524820645e-08, - "loss": 4.162, - "step": 161500 - }, - { - "epoch": 1.8, - "learning_rate": 1.7990804699820093e-08, - "loss": 4.1677, - "step": 162000 - }, - { - "epoch": 1.8, - "learning_rate": 1.8046331874819537e-08, - "loss": 4.1765, - "step": 162500 - }, - { - "epoch": 1.81, - "learning_rate": 1.810185904981898e-08, - "loss": 4.1811, - "step": 163000 - }, - { - "epoch": 1.82, - "learning_rate": 1.8157386224818425e-08, - "loss": 4.184, - "step": 163500 - }, - { - "epoch": 1.82, - "learning_rate": 1.821291339981787e-08, - "loss": 4.1738, - "step": 164000 - }, - { - "epoch": 1.83, - "learning_rate": 1.8268440574817313e-08, - "loss": 4.1816, - "step": 164500 - }, - { - "epoch": 1.83, - "learning_rate": 1.832396774981676e-08, - "loss": 4.1815, - "step": 165000 - }, - { - "epoch": 1.84, - "learning_rate": 1.8379494924816205e-08, - "loss": 4.1761, - "step": 165500 - }, - { - "epoch": 1.84, - "learning_rate": 1.843502209981565e-08, - "loss": 4.1894, - "step": 166000 - }, - { - "epoch": 1.85, - "learning_rate": 1.8490549274815093e-08, - "loss": 4.1747, - "step": 166500 - }, - { - "epoch": 1.85, - "learning_rate": 1.8546076449814538e-08, - "loss": 4.1831, - "step": 167000 - }, - { - "epoch": 1.86, - "learning_rate": 1.8601603624813982e-08, - "loss": 4.1821, - "step": 167500 - }, - { - "epoch": 1.87, - "learning_rate": 1.865713079981343e-08, - "loss": 4.1538, - "step": 168000 - }, - { - "epoch": 1.87, - "learning_rate": 1.8712657974812874e-08, - "loss": 4.171, - "step": 168500 - }, - { - "epoch": 1.88, - "learning_rate": 1.8768185149812318e-08, - "loss": 4.1832, - "step": 169000 - }, - { - "epoch": 1.88, - "learning_rate": 1.8823712324811762e-08, - "loss": 4.1354, - "step": 169500 - }, - { - "epoch": 1.89, - "learning_rate": 1.8879239499811206e-08, - "loss": 4.1606, - "step": 170000 - }, - { - "epoch": 1.89, - "learning_rate": 1.893476667481065e-08, - "loss": 4.1688, - "step": 170500 - }, - { - "epoch": 1.9, - "learning_rate": 1.8990293849810098e-08, - "loss": 4.192, - "step": 171000 - }, - { - "epoch": 1.9, - "learning_rate": 1.9045821024809542e-08, - "loss": 4.1674, - "step": 171500 - }, - { - "epoch": 1.91, - "learning_rate": 1.9101348199808986e-08, - "loss": 4.175, - "step": 172000 - }, - { - "epoch": 1.92, - "learning_rate": 1.915687537480843e-08, - "loss": 4.1618, - "step": 172500 - }, - { - "epoch": 1.92, - "learning_rate": 1.9212402549807874e-08, - "loss": 4.1692, - "step": 173000 - }, - { - "epoch": 1.93, - "learning_rate": 1.926792972480732e-08, - "loss": 4.1469, - "step": 173500 - }, - { - "epoch": 1.93, - "learning_rate": 1.9323456899806763e-08, - "loss": 4.1824, - "step": 174000 - }, - { - "epoch": 1.94, - "learning_rate": 1.937898407480621e-08, - "loss": 4.138, - "step": 174500 - }, - { - "epoch": 1.94, - "learning_rate": 1.9434511249805654e-08, - "loss": 4.1668, - "step": 175000 - }, - { - "epoch": 1.95, - "learning_rate": 1.94900384248051e-08, - "loss": 4.1715, - "step": 175500 - }, - { - "epoch": 1.95, - "learning_rate": 1.9545565599804543e-08, - "loss": 4.1606, - "step": 176000 - }, - { - "epoch": 1.96, - "learning_rate": 1.9601092774803987e-08, - "loss": 4.1726, - "step": 176500 - }, - { - "epoch": 1.97, - "learning_rate": 1.965661994980343e-08, - "loss": 4.1387, - "step": 177000 - }, - { - "epoch": 1.97, - "learning_rate": 1.971214712480288e-08, - "loss": 4.1411, - "step": 177500 - }, - { - "epoch": 1.98, - "learning_rate": 1.9767674299802323e-08, - "loss": 4.1571, - "step": 178000 - }, - { - "epoch": 1.98, - "learning_rate": 1.9823201474801767e-08, - "loss": 4.1629, - "step": 178500 - }, - { - "epoch": 1.99, - "learning_rate": 1.9878728649801214e-08, - "loss": 4.1518, - "step": 179000 - }, - { - "epoch": 1.99, - "learning_rate": 1.9934255824800655e-08, - "loss": 4.175, - "step": 179500 - }, - { - "epoch": 2.0, - "learning_rate": 1.99897829998001e-08, - "loss": 4.1433, - "step": 180000 - }, - { - "epoch": 2.0, - "eval_loss": 4.123116493225098, - "eval_runtime": 6.3261, - "eval_samples_per_second": 245.648, - "step": 180092 - }, - { - "epoch": 2.0, - "learning_rate": 2.0045310174799547e-08, - "loss": 4.156, - "step": 180500 - }, - { - "epoch": 2.01, - "learning_rate": 2.010083734979899e-08, - "loss": 4.1567, - "step": 181000 - }, - { - "epoch": 2.02, - "learning_rate": 2.0156364524798435e-08, - "loss": 4.1371, - "step": 181500 - }, - { - "epoch": 2.02, - "learning_rate": 2.021189169979788e-08, - "loss": 4.1543, - "step": 182000 - }, - { - "epoch": 2.03, - "learning_rate": 2.0267418874797327e-08, - "loss": 4.1629, - "step": 182500 - }, - { - "epoch": 2.03, - "learning_rate": 2.0322946049796768e-08, - "loss": 4.1472, - "step": 183000 - }, - { - "epoch": 2.04, - "learning_rate": 2.0378473224796215e-08, - "loss": 4.149, - "step": 183500 - }, - { - "epoch": 2.04, - "learning_rate": 2.043400039979566e-08, - "loss": 4.1544, - "step": 184000 - }, - { - "epoch": 2.05, - "learning_rate": 2.0489527574795104e-08, - "loss": 4.1534, - "step": 184500 - }, - { - "epoch": 2.05, - "learning_rate": 2.0545054749794548e-08, - "loss": 4.1411, - "step": 185000 - }, - { - "epoch": 2.06, - "learning_rate": 2.0600581924793995e-08, - "loss": 4.1362, - "step": 185500 - }, - { - "epoch": 2.07, - "learning_rate": 2.0656109099793436e-08, - "loss": 4.1591, - "step": 186000 - }, - { - "epoch": 2.07, - "learning_rate": 2.071163627479288e-08, - "loss": 4.1516, - "step": 186500 - }, - { - "epoch": 2.08, - "learning_rate": 2.0767163449792328e-08, - "loss": 4.1445, - "step": 187000 - }, - { - "epoch": 2.08, - "learning_rate": 2.0822690624791772e-08, - "loss": 4.1516, - "step": 187500 - }, - { - "epoch": 2.09, - "learning_rate": 2.0878217799791216e-08, - "loss": 4.1523, - "step": 188000 - }, - { - "epoch": 2.09, - "learning_rate": 2.0933744974790664e-08, - "loss": 4.1315, - "step": 188500 - }, - { - "epoch": 2.1, - "learning_rate": 2.0989272149790108e-08, - "loss": 4.1512, - "step": 189000 - }, - { - "epoch": 2.1, - "learning_rate": 2.104479932478955e-08, - "loss": 4.1461, - "step": 189500 - }, - { - "epoch": 2.11, - "learning_rate": 2.1100326499788996e-08, - "loss": 4.1427, - "step": 190000 - }, - { - "epoch": 2.12, - "learning_rate": 2.115585367478844e-08, - "loss": 4.1519, - "step": 190500 - }, - { - "epoch": 2.12, - "learning_rate": 2.1211380849787885e-08, - "loss": 4.1469, - "step": 191000 - }, - { - "epoch": 2.13, - "learning_rate": 2.1266908024787332e-08, - "loss": 4.1438, - "step": 191500 - }, - { - "epoch": 2.13, - "learning_rate": 2.1322435199786776e-08, - "loss": 4.1581, - "step": 192000 - }, - { - "epoch": 2.14, - "learning_rate": 2.1377962374786217e-08, - "loss": 4.1525, - "step": 192500 - }, - { - "epoch": 2.14, - "learning_rate": 2.1433489549785665e-08, - "loss": 4.1381, - "step": 193000 - }, - { - "epoch": 2.15, - "learning_rate": 2.148901672478511e-08, - "loss": 4.1397, - "step": 193500 - }, - { - "epoch": 2.15, - "learning_rate": 2.1544543899784553e-08, - "loss": 4.1365, - "step": 194000 - }, - { - "epoch": 2.16, - "learning_rate": 2.1600071074783997e-08, - "loss": 4.1297, - "step": 194500 - }, - { - "epoch": 2.17, - "learning_rate": 2.1655598249783445e-08, - "loss": 4.1296, - "step": 195000 - }, - { - "epoch": 2.17, - "learning_rate": 2.171112542478289e-08, - "loss": 4.1545, - "step": 195500 - }, - { - "epoch": 2.18, - "learning_rate": 2.176665259978233e-08, - "loss": 4.1479, - "step": 196000 - }, - { - "epoch": 2.18, - "learning_rate": 2.1822179774781777e-08, - "loss": 4.1311, - "step": 196500 - }, - { - "epoch": 2.19, - "learning_rate": 2.187770694978122e-08, - "loss": 4.1384, - "step": 197000 - }, - { - "epoch": 2.19, - "learning_rate": 2.1933234124780666e-08, - "loss": 4.1299, - "step": 197500 - }, - { - "epoch": 2.2, - "learning_rate": 2.1988761299780113e-08, - "loss": 4.1386, - "step": 198000 - }, - { - "epoch": 2.2, - "learning_rate": 2.2044288474779557e-08, - "loss": 4.1577, - "step": 198500 - }, - { - "epoch": 2.21, - "learning_rate": 2.2099815649778998e-08, - "loss": 4.1175, - "step": 199000 - }, - { - "epoch": 2.22, - "learning_rate": 2.2155342824778446e-08, - "loss": 4.1354, - "step": 199500 - }, - { - "epoch": 2.22, - "learning_rate": 2.221086999977789e-08, - "loss": 4.1239, - "step": 200000 - }, - { - "epoch": 2.23, - "learning_rate": 2.2266397174777334e-08, - "loss": 4.1558, - "step": 200500 - }, - { - "epoch": 2.23, - "learning_rate": 2.232192434977678e-08, - "loss": 4.1277, - "step": 201000 - }, - { - "epoch": 2.24, - "learning_rate": 2.2377451524776226e-08, - "loss": 4.1317, - "step": 201500 - }, - { - "epoch": 2.24, - "learning_rate": 2.243297869977567e-08, - "loss": 4.1267, - "step": 202000 - }, - { - "epoch": 2.25, - "learning_rate": 2.2488505874775114e-08, - "loss": 4.1315, - "step": 202500 - }, - { - "epoch": 2.25, - "learning_rate": 2.2544033049774558e-08, - "loss": 4.1399, - "step": 203000 - }, - { - "epoch": 2.26, - "learning_rate": 2.2599560224774002e-08, - "loss": 4.1285, - "step": 203500 - }, - { - "epoch": 2.27, - "learning_rate": 2.2655087399773447e-08, - "loss": 4.1298, - "step": 204000 - }, - { - "epoch": 2.27, - "learning_rate": 2.2710614574772894e-08, - "loss": 4.1137, - "step": 204500 - }, - { - "epoch": 2.28, - "learning_rate": 2.2766141749772338e-08, - "loss": 4.1352, - "step": 205000 - }, - { - "epoch": 2.28, - "learning_rate": 2.2821668924771782e-08, - "loss": 4.1176, - "step": 205500 - }, - { - "epoch": 2.29, - "learning_rate": 2.2877196099771227e-08, - "loss": 4.1378, - "step": 206000 - }, - { - "epoch": 2.29, - "learning_rate": 2.293272327477067e-08, - "loss": 4.1138, - "step": 206500 - }, - { - "epoch": 2.3, - "learning_rate": 2.2988250449770115e-08, - "loss": 4.1411, - "step": 207000 - }, - { - "epoch": 2.3, - "learning_rate": 2.3043777624769562e-08, - "loss": 4.1264, - "step": 207500 - }, - { - "epoch": 2.31, - "learning_rate": 2.3099304799769007e-08, - "loss": 4.1252, - "step": 208000 - }, - { - "epoch": 2.32, - "learning_rate": 2.315483197476845e-08, - "loss": 4.1338, - "step": 208500 - }, - { - "epoch": 2.32, - "learning_rate": 2.3210359149767895e-08, - "loss": 4.1296, - "step": 209000 - }, - { - "epoch": 2.33, - "learning_rate": 2.326588632476734e-08, - "loss": 4.1215, - "step": 209500 - }, - { - "epoch": 2.33, - "learning_rate": 2.3321413499766783e-08, - "loss": 4.1278, - "step": 210000 - }, - { - "epoch": 2.34, - "learning_rate": 2.337694067476623e-08, - "loss": 4.136, - "step": 210500 - }, - { - "epoch": 2.34, - "learning_rate": 2.3432467849765675e-08, - "loss": 4.0958, - "step": 211000 - }, - { - "epoch": 2.35, - "learning_rate": 2.348799502476512e-08, - "loss": 4.1178, - "step": 211500 - }, - { - "epoch": 2.35, - "learning_rate": 2.3543522199764563e-08, - "loss": 4.1008, - "step": 212000 - }, - { - "epoch": 2.36, - "learning_rate": 2.3599049374764008e-08, - "loss": 4.095, - "step": 212500 - }, - { - "epoch": 2.37, - "learning_rate": 2.3654576549763452e-08, - "loss": 4.1155, - "step": 213000 - }, - { - "epoch": 2.37, - "learning_rate": 2.37101037247629e-08, - "loss": 4.1213, - "step": 213500 - }, - { - "epoch": 2.38, - "learning_rate": 2.3765630899762343e-08, - "loss": 4.096, - "step": 214000 - }, - { - "epoch": 2.38, - "learning_rate": 2.3821158074761788e-08, - "loss": 4.1187, - "step": 214500 - }, - { - "epoch": 2.39, - "learning_rate": 2.3876685249761232e-08, - "loss": 4.1168, - "step": 215000 - }, - { - "epoch": 2.39, - "learning_rate": 2.3932212424760676e-08, - "loss": 4.1148, - "step": 215500 - }, - { - "epoch": 2.4, - "learning_rate": 2.398773959976012e-08, - "loss": 4.1237, - "step": 216000 - }, - { - "epoch": 2.4, - "learning_rate": 2.4043266774759564e-08, - "loss": 4.111, - "step": 216500 - }, - { - "epoch": 2.41, - "learning_rate": 2.4098793949759012e-08, - "loss": 4.1143, - "step": 217000 - }, - { - "epoch": 2.42, - "learning_rate": 2.4154321124758456e-08, - "loss": 4.1118, - "step": 217500 - }, - { - "epoch": 2.42, - "learning_rate": 2.42098482997579e-08, - "loss": 4.1146, - "step": 218000 - }, - { - "epoch": 2.43, - "learning_rate": 2.4265375474757348e-08, - "loss": 4.1138, - "step": 218500 - }, - { - "epoch": 2.43, - "learning_rate": 2.432090264975679e-08, - "loss": 4.0978, - "step": 219000 - }, - { - "epoch": 2.44, - "learning_rate": 2.4376429824756233e-08, - "loss": 4.1065, - "step": 219500 - }, - { - "epoch": 2.44, - "learning_rate": 2.443195699975568e-08, - "loss": 4.1024, - "step": 220000 - }, - { - "epoch": 2.45, - "learning_rate": 2.4487484174755124e-08, - "loss": 4.1114, - "step": 220500 - }, - { - "epoch": 2.45, - "learning_rate": 2.454301134975457e-08, - "loss": 4.1118, - "step": 221000 - }, - { - "epoch": 2.46, - "learning_rate": 2.4598538524754016e-08, - "loss": 4.1131, - "step": 221500 - }, - { - "epoch": 2.47, - "learning_rate": 2.4654065699753457e-08, - "loss": 4.0993, - "step": 222000 - }, - { - "epoch": 2.47, - "learning_rate": 2.47095928747529e-08, - "loss": 4.1242, - "step": 222500 - }, - { - "epoch": 2.48, - "learning_rate": 2.476512004975235e-08, - "loss": 4.1001, - "step": 223000 - }, - { - "epoch": 2.48, - "learning_rate": 2.4820647224751793e-08, - "loss": 4.1123, - "step": 223500 - }, - { - "epoch": 2.49, - "learning_rate": 2.4876174399751237e-08, - "loss": 4.096, - "step": 224000 - }, - { - "epoch": 2.49, - "learning_rate": 2.493170157475068e-08, - "loss": 4.0955, - "step": 224500 - }, - { - "epoch": 2.5, - "learning_rate": 2.498722874975013e-08, - "loss": 4.1065, - "step": 225000 - }, - { - "epoch": 2.5, - "learning_rate": 2.504275592474957e-08, - "loss": 4.0978, - "step": 225500 - }, - { - "epoch": 2.51, - "learning_rate": 2.5098283099749017e-08, - "loss": 4.117, - "step": 226000 - }, - { - "epoch": 2.52, - "learning_rate": 2.515381027474846e-08, - "loss": 4.1116, - "step": 226500 - }, - { - "epoch": 2.52, - "learning_rate": 2.5209337449747902e-08, - "loss": 4.1183, - "step": 227000 - }, - { - "epoch": 2.53, - "learning_rate": 2.526486462474735e-08, - "loss": 4.092, - "step": 227500 - }, - { - "epoch": 2.53, - "learning_rate": 2.5320391799746794e-08, - "loss": 4.0974, - "step": 228000 - }, - { - "epoch": 2.54, - "learning_rate": 2.537591897474624e-08, - "loss": 4.1012, - "step": 228500 - }, - { - "epoch": 2.54, - "learning_rate": 2.5431446149745682e-08, - "loss": 4.1221, - "step": 229000 - }, - { - "epoch": 2.55, - "learning_rate": 2.5486973324745133e-08, - "loss": 4.1168, - "step": 229500 - }, - { - "epoch": 2.55, - "learning_rate": 2.5542500499744574e-08, - "loss": 4.1236, - "step": 230000 - }, - { - "epoch": 2.56, - "learning_rate": 2.5598027674744015e-08, - "loss": 4.107, - "step": 230500 - }, - { - "epoch": 2.57, - "learning_rate": 2.5653554849743465e-08, - "loss": 4.0949, - "step": 231000 - }, - { - "epoch": 2.57, - "learning_rate": 2.5709082024742906e-08, - "loss": 4.1096, - "step": 231500 - }, - { - "epoch": 2.58, - "learning_rate": 2.5764609199742354e-08, - "loss": 4.1036, - "step": 232000 - }, - { - "epoch": 2.58, - "learning_rate": 2.5820136374741798e-08, - "loss": 4.0994, - "step": 232500 - }, - { - "epoch": 2.59, - "learning_rate": 2.5875663549741245e-08, - "loss": 4.1042, - "step": 233000 - }, - { - "epoch": 2.59, - "learning_rate": 2.5931190724740686e-08, - "loss": 4.0824, - "step": 233500 - }, - { - "epoch": 2.6, - "learning_rate": 2.598671789974013e-08, - "loss": 4.1048, - "step": 234000 - }, - { - "epoch": 2.6, - "learning_rate": 2.6042245074739578e-08, - "loss": 4.0772, - "step": 234500 - }, - { - "epoch": 2.61, - "learning_rate": 2.609777224973902e-08, - "loss": 4.0978, - "step": 235000 - }, - { - "epoch": 2.62, - "learning_rate": 2.6153299424738466e-08, - "loss": 4.0774, - "step": 235500 - }, - { - "epoch": 2.62, - "learning_rate": 2.620882659973791e-08, - "loss": 4.1111, - "step": 236000 - }, - { - "epoch": 2.63, - "learning_rate": 2.6264353774737358e-08, - "loss": 4.0863, - "step": 236500 - }, - { - "epoch": 2.63, - "learning_rate": 2.63198809497368e-08, - "loss": 4.1038, - "step": 237000 - }, - { - "epoch": 2.64, - "learning_rate": 2.6375408124736243e-08, - "loss": 4.0634, - "step": 237500 - }, - { - "epoch": 2.64, - "learning_rate": 2.643093529973569e-08, - "loss": 4.0941, - "step": 238000 - }, - { - "epoch": 2.65, - "learning_rate": 2.648646247473513e-08, - "loss": 4.1001, - "step": 238500 - }, - { - "epoch": 2.65, - "learning_rate": 2.6541989649734582e-08, - "loss": 4.1073, - "step": 239000 - }, - { - "epoch": 2.66, - "learning_rate": 2.6597516824734023e-08, - "loss": 4.0594, - "step": 239500 - }, - { - "epoch": 2.67, - "learning_rate": 2.665304399973347e-08, - "loss": 4.084, - "step": 240000 - }, - { - "epoch": 2.67, - "learning_rate": 2.6708571174732915e-08, - "loss": 4.0941, - "step": 240500 - }, - { - "epoch": 2.68, - "learning_rate": 2.6764098349732356e-08, - "loss": 4.1004, - "step": 241000 - }, - { - "epoch": 2.68, - "learning_rate": 2.6819625524731803e-08, - "loss": 4.0788, - "step": 241500 - }, - { - "epoch": 2.69, - "learning_rate": 2.6875152699731247e-08, - "loss": 4.1075, - "step": 242000 - }, - { - "epoch": 2.69, - "learning_rate": 2.6930679874730695e-08, - "loss": 4.1066, - "step": 242500 - }, - { - "epoch": 2.7, - "learning_rate": 2.6986207049730136e-08, - "loss": 4.096, - "step": 243000 - }, - { - "epoch": 2.7, - "learning_rate": 2.704173422472958e-08, - "loss": 4.1113, - "step": 243500 - }, - { - "epoch": 2.71, - "learning_rate": 2.7097261399729027e-08, - "loss": 4.1107, - "step": 244000 - }, - { - "epoch": 2.72, - "learning_rate": 2.7152788574728468e-08, - "loss": 4.0997, - "step": 244500 - }, - { - "epoch": 2.72, - "learning_rate": 2.7208315749727916e-08, - "loss": 4.0944, - "step": 245000 - }, - { - "epoch": 2.73, - "learning_rate": 2.726384292472736e-08, - "loss": 4.0742, - "step": 245500 - }, - { - "epoch": 2.73, - "learning_rate": 2.7319370099726807e-08, - "loss": 4.0917, - "step": 246000 - }, - { - "epoch": 2.74, - "learning_rate": 2.7374897274726248e-08, - "loss": 4.0972, - "step": 246500 - }, - { - "epoch": 2.74, - "learning_rate": 2.7430424449725692e-08, - "loss": 4.0874, - "step": 247000 - }, - { - "epoch": 2.75, - "learning_rate": 2.748595162472514e-08, - "loss": 4.0942, - "step": 247500 - }, - { - "epoch": 2.75, - "learning_rate": 2.7541478799724584e-08, - "loss": 4.0869, - "step": 248000 - }, - { - "epoch": 2.76, - "learning_rate": 2.759700597472403e-08, - "loss": 4.079, - "step": 248500 - }, - { - "epoch": 2.77, - "learning_rate": 2.7652533149723472e-08, - "loss": 4.0971, - "step": 249000 - }, - { - "epoch": 2.77, - "learning_rate": 2.770806032472292e-08, - "loss": 4.0913, - "step": 249500 - }, - { - "epoch": 2.78, - "learning_rate": 2.7763587499722364e-08, - "loss": 4.085, - "step": 250000 - }, - { - "epoch": 2.78, - "learning_rate": 2.7819114674721805e-08, - "loss": 4.0759, - "step": 250500 - }, - { - "epoch": 2.79, - "learning_rate": 2.7874641849721252e-08, - "loss": 4.0712, - "step": 251000 - }, - { - "epoch": 2.79, - "learning_rate": 2.7930169024720697e-08, - "loss": 4.1092, - "step": 251500 - }, - { - "epoch": 2.8, - "learning_rate": 2.7985696199720144e-08, - "loss": 4.0749, - "step": 252000 - }, - { - "epoch": 2.8, - "learning_rate": 2.8041223374719585e-08, - "loss": 4.0832, - "step": 252500 - }, - { - "epoch": 2.81, - "learning_rate": 2.8096750549719032e-08, - "loss": 4.0867, - "step": 253000 - }, - { - "epoch": 2.82, - "learning_rate": 2.8152277724718477e-08, - "loss": 4.0784, - "step": 253500 - }, - { - "epoch": 2.82, - "learning_rate": 2.8207804899717917e-08, - "loss": 4.0646, - "step": 254000 - }, - { - "epoch": 2.83, - "learning_rate": 2.8263332074717365e-08, - "loss": 4.0779, - "step": 254500 - }, - { - "epoch": 2.83, - "learning_rate": 2.831885924971681e-08, - "loss": 4.0666, - "step": 255000 - }, - { - "epoch": 2.84, - "learning_rate": 2.8374386424716257e-08, - "loss": 4.0903, - "step": 255500 - }, - { - "epoch": 2.84, - "learning_rate": 2.84299135997157e-08, - "loss": 4.0937, - "step": 256000 - }, - { - "epoch": 2.85, - "learning_rate": 2.8485440774715148e-08, - "loss": 4.0922, - "step": 256500 - }, - { - "epoch": 2.85, - "learning_rate": 2.854096794971459e-08, - "loss": 4.0747, - "step": 257000 - }, - { - "epoch": 2.86, - "learning_rate": 2.8596495124714033e-08, - "loss": 4.0704, - "step": 257500 - }, - { - "epoch": 2.87, - "learning_rate": 2.865202229971348e-08, - "loss": 4.0716, - "step": 258000 - }, - { - "epoch": 2.87, - "learning_rate": 2.870754947471292e-08, - "loss": 4.0747, - "step": 258500 - }, - { - "epoch": 2.88, - "learning_rate": 2.876307664971237e-08, - "loss": 4.0942, - "step": 259000 - }, - { - "epoch": 2.88, - "learning_rate": 2.8818603824711813e-08, - "loss": 4.1021, - "step": 259500 - }, - { - "epoch": 2.89, - "learning_rate": 2.8874130999711254e-08, - "loss": 4.0918, - "step": 260000 - }, - { - "epoch": 2.89, - "learning_rate": 2.89296581747107e-08, - "loss": 4.0813, - "step": 260500 - }, - { - "epoch": 2.9, - "learning_rate": 2.8985185349710146e-08, - "loss": 4.0744, - "step": 261000 - }, - { - "epoch": 2.9, - "learning_rate": 2.9040712524709593e-08, - "loss": 4.0882, - "step": 261500 - }, - { - "epoch": 2.91, - "learning_rate": 2.9096239699709034e-08, - "loss": 4.0887, - "step": 262000 - }, - { - "epoch": 2.92, - "learning_rate": 2.9151766874708482e-08, - "loss": 4.0747, - "step": 262500 - }, - { - "epoch": 2.92, - "learning_rate": 2.9207294049707926e-08, - "loss": 4.0707, - "step": 263000 - }, - { - "epoch": 2.93, - "learning_rate": 2.9262821224707367e-08, - "loss": 4.0642, - "step": 263500 - }, - { - "epoch": 2.93, - "learning_rate": 2.9318348399706818e-08, - "loss": 4.0646, - "step": 264000 - }, - { - "epoch": 2.94, - "learning_rate": 2.937387557470626e-08, - "loss": 4.075, - "step": 264500 - }, - { - "epoch": 2.94, - "learning_rate": 2.9429402749705706e-08, - "loss": 4.0733, - "step": 265000 - }, - { - "epoch": 2.95, - "learning_rate": 2.948492992470515e-08, - "loss": 4.0734, - "step": 265500 - }, - { - "epoch": 2.95, - "learning_rate": 2.9540457099704598e-08, - "loss": 4.0871, - "step": 266000 - }, - { - "epoch": 2.96, - "learning_rate": 2.959598427470404e-08, - "loss": 4.0676, - "step": 266500 - }, - { - "epoch": 2.97, - "learning_rate": 2.9651511449703483e-08, - "loss": 4.0773, - "step": 267000 - }, - { - "epoch": 2.97, - "learning_rate": 2.970703862470293e-08, - "loss": 4.0733, - "step": 267500 - }, - { - "epoch": 2.98, - "learning_rate": 2.976256579970237e-08, - "loss": 4.0874, - "step": 268000 - }, - { - "epoch": 2.98, - "learning_rate": 2.981809297470182e-08, - "loss": 4.0765, - "step": 268500 - }, - { - "epoch": 2.99, - "learning_rate": 2.987362014970126e-08, - "loss": 4.0605, - "step": 269000 - }, - { - "epoch": 2.99, - "learning_rate": 2.992914732470071e-08, - "loss": 4.0506, - "step": 269500 - }, - { - "epoch": 3.0, - "learning_rate": 2.9984674499700154e-08, - "loss": 4.0806, - "step": 270000 - }, - { - "epoch": 3.0, - "eval_loss": 4.056514263153076, - "eval_runtime": 6.3026, - "eval_samples_per_second": 246.567, - "step": 270138 - }, - { - "epoch": 3.0, - "learning_rate": 3.004020167469959e-08, - "loss": 4.0646, - "step": 270500 - }, - { - "epoch": 3.01, - "learning_rate": 3.009572884969904e-08, - "loss": 4.0577, - "step": 271000 - }, - { - "epoch": 3.02, - "learning_rate": 3.015125602469849e-08, - "loss": 4.0721, - "step": 271500 - }, - { - "epoch": 3.02, - "learning_rate": 3.020678319969793e-08, - "loss": 4.0837, - "step": 272000 - }, - { - "epoch": 3.03, - "learning_rate": 3.0262310374697375e-08, - "loss": 4.0935, - "step": 272500 - }, - { - "epoch": 3.03, - "learning_rate": 3.031783754969682e-08, - "loss": 4.0564, - "step": 273000 - }, - { - "epoch": 3.04, - "learning_rate": 3.0373364724696264e-08, - "loss": 4.0866, - "step": 273500 - }, - { - "epoch": 3.04, - "learning_rate": 3.042889189969571e-08, - "loss": 4.0721, - "step": 274000 - }, - { - "epoch": 3.05, - "learning_rate": 3.048441907469516e-08, - "loss": 4.0722, - "step": 274500 - }, - { - "epoch": 3.05, - "learning_rate": 3.0539946249694596e-08, - "loss": 4.0812, - "step": 275000 - }, - { - "epoch": 3.06, - "learning_rate": 3.059547342469405e-08, - "loss": 4.0439, - "step": 275500 - }, - { - "epoch": 3.07, - "learning_rate": 3.065100059969349e-08, - "loss": 4.0733, - "step": 276000 - }, - { - "epoch": 3.07, - "learning_rate": 3.070652777469293e-08, - "loss": 4.0709, - "step": 276500 - }, - { - "epoch": 3.08, - "learning_rate": 3.076205494969238e-08, - "loss": 4.0633, - "step": 277000 - }, - { - "epoch": 3.08, - "learning_rate": 3.0817582124691824e-08, - "loss": 4.0608, - "step": 277500 - }, - { - "epoch": 3.09, - "learning_rate": 3.087310929969127e-08, - "loss": 4.0561, - "step": 278000 - }, - { - "epoch": 3.09, - "learning_rate": 3.092863647469071e-08, - "loss": 4.0637, - "step": 278500 - }, - { - "epoch": 3.1, - "learning_rate": 3.0984163649690156e-08, - "loss": 4.0655, - "step": 279000 - }, - { - "epoch": 3.1, - "learning_rate": 3.10396908246896e-08, - "loss": 4.0861, - "step": 279500 - }, - { - "epoch": 3.11, - "learning_rate": 3.1095217999689044e-08, - "loss": 4.0609, - "step": 280000 - }, - { - "epoch": 3.12, - "learning_rate": 3.115074517468849e-08, - "loss": 4.065, - "step": 280500 - }, - { - "epoch": 3.12, - "learning_rate": 3.120627234968793e-08, - "loss": 4.0679, - "step": 281000 - }, - { - "epoch": 3.13, - "learning_rate": 3.1261799524687384e-08, - "loss": 4.05, - "step": 281500 - }, - { - "epoch": 3.13, - "learning_rate": 3.131732669968683e-08, - "loss": 4.0283, - "step": 282000 - }, - { - "epoch": 3.14, - "learning_rate": 3.137285387468627e-08, - "loss": 4.0595, - "step": 282500 - }, - { - "epoch": 3.14, - "learning_rate": 3.1428381049685716e-08, - "loss": 4.0778, - "step": 283000 - }, - { - "epoch": 3.15, - "learning_rate": 3.148390822468516e-08, - "loss": 4.0679, - "step": 283500 - }, - { - "epoch": 3.15, - "learning_rate": 3.1539435399684605e-08, - "loss": 4.0653, - "step": 284000 - }, - { - "epoch": 3.16, - "learning_rate": 3.159496257468405e-08, - "loss": 4.0538, - "step": 284500 - }, - { - "epoch": 3.17, - "learning_rate": 3.165048974968349e-08, - "loss": 4.0479, - "step": 285000 - }, - { - "epoch": 3.17, - "learning_rate": 3.170601692468294e-08, - "loss": 4.0469, - "step": 285500 - }, - { - "epoch": 3.18, - "learning_rate": 3.176154409968239e-08, - "loss": 4.0756, - "step": 286000 - }, - { - "epoch": 3.18, - "learning_rate": 3.1817071274681825e-08, - "loss": 4.0609, - "step": 286500 - }, - { - "epoch": 3.19, - "learning_rate": 3.187259844968127e-08, - "loss": 4.0247, - "step": 287000 - }, - { - "epoch": 3.19, - "learning_rate": 3.192812562468072e-08, - "loss": 4.074, - "step": 287500 - }, - { - "epoch": 3.2, - "learning_rate": 3.198365279968016e-08, - "loss": 4.0597, - "step": 288000 - }, - { - "epoch": 3.2, - "learning_rate": 3.203917997467961e-08, - "loss": 4.0569, - "step": 288500 - }, - { - "epoch": 3.21, - "learning_rate": 3.209470714967905e-08, - "loss": 4.0393, - "step": 289000 - }, - { - "epoch": 3.22, - "learning_rate": 3.215023432467849e-08, - "loss": 4.0618, - "step": 289500 - }, - { - "epoch": 3.22, - "learning_rate": 3.220576149967794e-08, - "loss": 4.0426, - "step": 290000 - }, - { - "epoch": 3.23, - "learning_rate": 3.2261288674677385e-08, - "loss": 4.0593, - "step": 290500 - }, - { - "epoch": 3.23, - "learning_rate": 3.231681584967683e-08, - "loss": 4.0546, - "step": 291000 - }, - { - "epoch": 3.24, - "learning_rate": 3.2372343024676274e-08, - "loss": 4.0527, - "step": 291500 - }, - { - "epoch": 3.24, - "learning_rate": 3.2427870199675725e-08, - "loss": 4.0523, - "step": 292000 - }, - { - "epoch": 3.25, - "learning_rate": 3.248339737467516e-08, - "loss": 4.0676, - "step": 292500 - }, - { - "epoch": 3.25, - "learning_rate": 3.2538924549674606e-08, - "loss": 4.0683, - "step": 293000 - }, - { - "epoch": 3.26, - "learning_rate": 3.259445172467406e-08, - "loss": 4.0577, - "step": 293500 - }, - { - "epoch": 3.26, - "learning_rate": 3.2649978899673495e-08, - "loss": 4.0606, - "step": 294000 - }, - { - "epoch": 3.27, - "learning_rate": 3.2705506074672945e-08, - "loss": 4.0633, - "step": 294500 - }, - { - "epoch": 3.28, - "learning_rate": 3.276103324967239e-08, - "loss": 4.0385, - "step": 295000 - }, - { - "epoch": 3.28, - "learning_rate": 3.2816560424671834e-08, - "loss": 4.036, - "step": 295500 - }, - { - "epoch": 3.29, - "learning_rate": 3.287208759967128e-08, - "loss": 4.0401, - "step": 296000 - }, - { - "epoch": 3.29, - "learning_rate": 3.292761477467072e-08, - "loss": 4.0433, - "step": 296500 - }, - { - "epoch": 3.3, - "learning_rate": 3.2983141949670166e-08, - "loss": 4.0394, - "step": 297000 - }, - { - "epoch": 3.3, - "learning_rate": 3.303866912466961e-08, - "loss": 4.0434, - "step": 297500 - }, - { - "epoch": 3.31, - "learning_rate": 3.309419629966906e-08, - "loss": 4.0629, - "step": 298000 - }, - { - "epoch": 3.31, - "learning_rate": 3.31497234746685e-08, - "loss": 4.0306, - "step": 298500 - }, - { - "epoch": 3.32, - "learning_rate": 3.320525064966795e-08, - "loss": 4.0449, - "step": 299000 - }, - { - "epoch": 3.33, - "learning_rate": 3.3260777824667394e-08, - "loss": 4.0438, - "step": 299500 - }, - { - "epoch": 3.33, - "learning_rate": 3.331630499966683e-08, - "loss": 4.0442, - "step": 300000 - }, - { - "epoch": 3.34, - "learning_rate": 3.337183217466628e-08, - "loss": 4.0438, - "step": 300500 - }, - { - "epoch": 3.34, - "learning_rate": 3.3427359349665726e-08, - "loss": 4.0358, - "step": 301000 - }, - { - "epoch": 3.35, - "learning_rate": 3.348288652466517e-08, - "loss": 4.0654, - "step": 301500 - }, - { - "epoch": 3.35, - "learning_rate": 3.3538413699664615e-08, - "loss": 4.0596, - "step": 302000 - }, - { - "epoch": 3.36, - "learning_rate": 3.359394087466406e-08, - "loss": 4.0342, - "step": 302500 - }, - { - "epoch": 3.36, - "learning_rate": 3.36494680496635e-08, - "loss": 4.0457, - "step": 303000 - }, - { - "epoch": 3.37, - "learning_rate": 3.370499522466295e-08, - "loss": 4.0387, - "step": 303500 - }, - { - "epoch": 3.38, - "learning_rate": 3.376052239966239e-08, - "loss": 4.0336, - "step": 304000 - }, - { - "epoch": 3.38, - "learning_rate": 3.3816049574661836e-08, - "loss": 4.0226, - "step": 304500 - }, - { - "epoch": 3.39, - "learning_rate": 3.3871576749661286e-08, - "loss": 4.0373, - "step": 305000 - }, - { - "epoch": 3.39, - "learning_rate": 3.3927103924660724e-08, - "loss": 4.0409, - "step": 305500 - }, - { - "epoch": 3.4, - "learning_rate": 3.398263109966017e-08, - "loss": 4.0429, - "step": 306000 - }, - { - "epoch": 3.4, - "learning_rate": 3.403815827465962e-08, - "loss": 4.0652, - "step": 306500 - }, - { - "epoch": 3.41, - "learning_rate": 3.4093685449659057e-08, - "loss": 4.0422, - "step": 307000 - }, - { - "epoch": 3.41, - "learning_rate": 3.414921262465851e-08, - "loss": 4.0615, - "step": 307500 - }, - { - "epoch": 3.42, - "learning_rate": 3.420473979965795e-08, - "loss": 4.0351, - "step": 308000 - }, - { - "epoch": 3.43, - "learning_rate": 3.4260266974657396e-08, - "loss": 4.0478, - "step": 308500 - }, - { - "epoch": 3.43, - "learning_rate": 3.431579414965684e-08, - "loss": 4.0489, - "step": 309000 - }, - { - "epoch": 3.44, - "learning_rate": 3.4371321324656284e-08, - "loss": 4.0405, - "step": 309500 - }, - { - "epoch": 3.44, - "learning_rate": 3.442684849965573e-08, - "loss": 4.0276, - "step": 310000 - }, - { - "epoch": 3.45, - "learning_rate": 3.448237567465517e-08, - "loss": 4.0416, - "step": 310500 - }, - { - "epoch": 3.45, - "learning_rate": 3.453790284965462e-08, - "loss": 4.0589, - "step": 311000 - }, - { - "epoch": 3.46, - "learning_rate": 3.459343002465406e-08, - "loss": 4.0261, - "step": 311500 - }, - { - "epoch": 3.46, - "learning_rate": 3.464895719965351e-08, - "loss": 4.0363, - "step": 312000 - }, - { - "epoch": 3.47, - "learning_rate": 3.4704484374652956e-08, - "loss": 4.0582, - "step": 312500 - }, - { - "epoch": 3.48, - "learning_rate": 3.4760011549652393e-08, - "loss": 4.0281, - "step": 313000 - }, - { - "epoch": 3.48, - "learning_rate": 3.4815538724651844e-08, - "loss": 4.0409, - "step": 313500 - }, - { - "epoch": 3.49, - "learning_rate": 3.487106589965129e-08, - "loss": 4.0402, - "step": 314000 - }, - { - "epoch": 3.49, - "learning_rate": 3.492659307465073e-08, - "loss": 4.0204, - "step": 314500 - }, - { - "epoch": 3.5, - "learning_rate": 3.4982120249650177e-08, - "loss": 4.0187, - "step": 315000 - }, - { - "epoch": 3.5, - "learning_rate": 3.503764742464963e-08, - "loss": 4.0387, - "step": 315500 - }, - { - "epoch": 3.51, - "learning_rate": 3.5093174599649065e-08, - "loss": 4.028, - "step": 316000 - }, - { - "epoch": 3.51, - "learning_rate": 3.514870177464851e-08, - "loss": 4.0498, - "step": 316500 - }, - { - "epoch": 3.52, - "learning_rate": 3.520422894964796e-08, - "loss": 4.035, - "step": 317000 - }, - { - "epoch": 3.53, - "learning_rate": 3.52597561246474e-08, - "loss": 4.0359, - "step": 317500 - }, - { - "epoch": 3.53, - "learning_rate": 3.531528329964685e-08, - "loss": 4.0442, - "step": 318000 - }, - { - "epoch": 3.54, - "learning_rate": 3.537081047464629e-08, - "loss": 4.032, - "step": 318500 - }, - { - "epoch": 3.54, - "learning_rate": 3.542633764964574e-08, - "loss": 4.0291, - "step": 319000 - }, - { - "epoch": 3.55, - "learning_rate": 3.548186482464518e-08, - "loss": 4.0364, - "step": 319500 - }, - { - "epoch": 3.55, - "learning_rate": 3.5537391999644625e-08, - "loss": 4.0294, - "step": 320000 - }, - { - "epoch": 3.56, - "learning_rate": 3.559291917464407e-08, - "loss": 4.0276, - "step": 320500 - }, - { - "epoch": 3.56, - "learning_rate": 3.5648446349643513e-08, - "loss": 4.0239, - "step": 321000 - }, - { - "epoch": 3.57, - "learning_rate": 3.570397352464296e-08, - "loss": 4.0317, - "step": 321500 - }, - { - "epoch": 3.58, - "learning_rate": 3.57595006996424e-08, - "loss": 4.0278, - "step": 322000 - }, - { - "epoch": 3.58, - "learning_rate": 3.5815027874641846e-08, - "loss": 4.0462, - "step": 322500 - }, - { - "epoch": 3.59, - "learning_rate": 3.587055504964129e-08, - "loss": 4.0241, - "step": 323000 - }, - { - "epoch": 3.59, - "learning_rate": 3.5926082224640734e-08, - "loss": 4.0157, - "step": 323500 - }, - { - "epoch": 3.6, - "learning_rate": 3.5981609399640185e-08, - "loss": 4.0438, - "step": 324000 - }, - { - "epoch": 3.6, - "learning_rate": 3.603713657463963e-08, - "loss": 4.0424, - "step": 324500 - }, - { - "epoch": 3.61, - "learning_rate": 3.6092663749639073e-08, - "loss": 4.0295, - "step": 325000 - }, - { - "epoch": 3.61, - "learning_rate": 3.614819092463852e-08, - "loss": 4.0269, - "step": 325500 - }, - { - "epoch": 3.62, - "learning_rate": 3.620371809963796e-08, - "loss": 4.0431, - "step": 326000 - }, - { - "epoch": 3.63, - "learning_rate": 3.6259245274637406e-08, - "loss": 4.0322, - "step": 326500 - }, - { - "epoch": 3.63, - "learning_rate": 3.631477244963685e-08, - "loss": 4.0323, - "step": 327000 - }, - { - "epoch": 3.64, - "learning_rate": 3.6370299624636294e-08, - "loss": 4.0452, - "step": 327500 - }, - { - "epoch": 3.64, - "learning_rate": 3.642582679963574e-08, - "loss": 4.0394, - "step": 328000 - }, - { - "epoch": 3.65, - "learning_rate": 3.648135397463519e-08, - "loss": 4.0444, - "step": 328500 - }, - { - "epoch": 3.65, - "learning_rate": 3.653688114963463e-08, - "loss": 4.0201, - "step": 329000 - }, - { - "epoch": 3.66, - "learning_rate": 3.659240832463407e-08, - "loss": 4.0265, - "step": 329500 - }, - { - "epoch": 3.66, - "learning_rate": 3.664793549963352e-08, - "loss": 4.0365, - "step": 330000 - }, - { - "epoch": 3.67, - "learning_rate": 3.670346267463296e-08, - "loss": 4.0136, - "step": 330500 - }, - { - "epoch": 3.68, - "learning_rate": 3.675898984963241e-08, - "loss": 4.0301, - "step": 331000 - }, - { - "epoch": 3.68, - "learning_rate": 3.6814517024631854e-08, - "loss": 4.0269, - "step": 331500 - }, - { - "epoch": 3.69, - "learning_rate": 3.68700441996313e-08, - "loss": 4.0471, - "step": 332000 - }, - { - "epoch": 3.69, - "learning_rate": 3.692557137463074e-08, - "loss": 4.0178, - "step": 332500 - }, - { - "epoch": 3.7, - "learning_rate": 3.698109854963019e-08, - "loss": 4.0266, - "step": 333000 - }, - { - "epoch": 3.7, - "learning_rate": 3.703662572462963e-08, - "loss": 4.0309, - "step": 333500 - }, - { - "epoch": 3.71, - "learning_rate": 3.7092152899629075e-08, - "loss": 4.0293, - "step": 334000 - }, - { - "epoch": 3.71, - "learning_rate": 3.7147680074628526e-08, - "loss": 4.0329, - "step": 334500 - }, - { - "epoch": 3.72, - "learning_rate": 3.7203207249627964e-08, - "loss": 4.0213, - "step": 335000 - }, - { - "epoch": 3.73, - "learning_rate": 3.725873442462741e-08, - "loss": 4.0302, - "step": 335500 - }, - { - "epoch": 3.73, - "learning_rate": 3.731426159962686e-08, - "loss": 4.0358, - "step": 336000 - }, - { - "epoch": 3.74, - "learning_rate": 3.7369788774626296e-08, - "loss": 4.008, - "step": 336500 - }, - { - "epoch": 3.74, - "learning_rate": 3.742531594962575e-08, - "loss": 4.0094, - "step": 337000 - }, - { - "epoch": 3.75, - "learning_rate": 3.748084312462519e-08, - "loss": 4.0164, - "step": 337500 - }, - { - "epoch": 3.75, - "learning_rate": 3.7536370299624635e-08, - "loss": 4.0193, - "step": 338000 - }, - { - "epoch": 3.76, - "learning_rate": 3.759189747462408e-08, - "loss": 4.0287, - "step": 338500 - }, - { - "epoch": 3.76, - "learning_rate": 3.7647424649623524e-08, - "loss": 4.023, - "step": 339000 - }, - { - "epoch": 3.77, - "learning_rate": 3.770295182462297e-08, - "loss": 4.0225, - "step": 339500 - }, - { - "epoch": 3.78, - "learning_rate": 3.775847899962241e-08, - "loss": 4.0399, - "step": 340000 - }, - { - "epoch": 3.78, - "learning_rate": 3.781400617462186e-08, - "loss": 4.0306, - "step": 340500 - }, - { - "epoch": 3.79, - "learning_rate": 3.78695333496213e-08, - "loss": 4.022, - "step": 341000 - }, - { - "epoch": 3.79, - "learning_rate": 3.792506052462075e-08, - "loss": 4.0275, - "step": 341500 - }, - { - "epoch": 3.8, - "learning_rate": 3.7980587699620195e-08, - "loss": 4.0376, - "step": 342000 - }, - { - "epoch": 3.8, - "learning_rate": 3.803611487461963e-08, - "loss": 4.0126, - "step": 342500 - }, - { - "epoch": 3.81, - "learning_rate": 3.8091642049619084e-08, - "loss": 4.0314, - "step": 343000 - }, - { - "epoch": 3.81, - "learning_rate": 3.814716922461853e-08, - "loss": 4.0159, - "step": 343500 - }, - { - "epoch": 3.82, - "learning_rate": 3.820269639961797e-08, - "loss": 4.0281, - "step": 344000 - }, - { - "epoch": 3.83, - "learning_rate": 3.8258223574617416e-08, - "loss": 4.0251, - "step": 344500 - }, - { - "epoch": 3.83, - "learning_rate": 3.831375074961686e-08, - "loss": 4.0242, - "step": 345000 - }, - { - "epoch": 3.84, - "learning_rate": 3.8369277924616305e-08, - "loss": 4.0365, - "step": 345500 - }, - { - "epoch": 3.84, - "learning_rate": 3.842480509961575e-08, - "loss": 4.0134, - "step": 346000 - }, - { - "epoch": 3.85, - "learning_rate": 3.848033227461519e-08, - "loss": 4.0163, - "step": 346500 - }, - { - "epoch": 3.85, - "learning_rate": 3.853585944961464e-08, - "loss": 4.0371, - "step": 347000 - }, - { - "epoch": 3.86, - "learning_rate": 3.859138662461409e-08, - "loss": 4.0047, - "step": 347500 - }, - { - "epoch": 3.86, - "learning_rate": 3.8646913799613526e-08, - "loss": 4.016, - "step": 348000 - }, - { - "epoch": 3.87, - "learning_rate": 3.8702440974612976e-08, - "loss": 4.0389, - "step": 348500 - }, - { - "epoch": 3.88, - "learning_rate": 3.875796814961242e-08, - "loss": 4.0015, - "step": 349000 - }, - { - "epoch": 3.88, - "learning_rate": 3.881349532461186e-08, - "loss": 4.0132, - "step": 349500 - }, - { - "epoch": 3.89, - "learning_rate": 3.886902249961131e-08, - "loss": 4.0028, - "step": 350000 - }, - { - "epoch": 3.89, - "learning_rate": 3.892454967461075e-08, - "loss": 4.0079, - "step": 350500 - }, - { - "epoch": 3.9, - "learning_rate": 3.89800768496102e-08, - "loss": 4.0428, - "step": 351000 - }, - { - "epoch": 3.9, - "learning_rate": 3.903560402460964e-08, - "loss": 4.0088, - "step": 351500 - }, - { - "epoch": 3.91, - "learning_rate": 3.9091131199609086e-08, - "loss": 4.0158, - "step": 352000 - }, - { - "epoch": 3.91, - "learning_rate": 3.914665837460853e-08, - "loss": 4.0181, - "step": 352500 - }, - { - "epoch": 3.92, - "learning_rate": 3.9202185549607974e-08, - "loss": 4.0193, - "step": 353000 - }, - { - "epoch": 3.93, - "learning_rate": 3.9257712724607425e-08, - "loss": 4.0056, - "step": 353500 - }, - { - "epoch": 3.93, - "learning_rate": 3.931323989960686e-08, - "loss": 4.0166, - "step": 354000 - }, - { - "epoch": 3.94, - "learning_rate": 3.936876707460631e-08, - "loss": 4.0256, - "step": 354500 - }, - { - "epoch": 3.94, - "learning_rate": 3.942429424960576e-08, - "loss": 4.014, - "step": 355000 - }, - { - "epoch": 3.95, - "learning_rate": 3.9479821424605195e-08, - "loss": 4.0526, - "step": 355500 - }, - { - "epoch": 3.95, - "learning_rate": 3.9535348599604646e-08, - "loss": 4.0243, - "step": 356000 - }, - { - "epoch": 3.96, - "learning_rate": 3.959087577460409e-08, - "loss": 4.0224, - "step": 356500 - }, - { - "epoch": 3.96, - "learning_rate": 3.9646402949603534e-08, - "loss": 4.0319, - "step": 357000 - }, - { - "epoch": 3.97, - "learning_rate": 3.970193012460298e-08, - "loss": 4.0028, - "step": 357500 - }, - { - "epoch": 3.98, - "learning_rate": 3.975745729960243e-08, - "loss": 3.9964, - "step": 358000 - }, - { - "epoch": 3.98, - "learning_rate": 3.9812984474601867e-08, - "loss": 4.0005, - "step": 358500 - }, - { - "epoch": 3.99, - "learning_rate": 3.986851164960131e-08, - "loss": 4.0167, - "step": 359000 - }, - { - "epoch": 3.99, - "learning_rate": 3.992403882460076e-08, - "loss": 4.0178, - "step": 359500 - }, - { - "epoch": 4.0, - "learning_rate": 3.99795659996002e-08, - "loss": 4.0169, - "step": 360000 - }, - { - "epoch": 4.0, - "eval_loss": 4.012733459472656, - "eval_runtime": 6.3084, - "eval_samples_per_second": 246.337, - "step": 360184 - }, - { - "epoch": 4.0, - "learning_rate": 4.003509317459965e-08, - "loss": 4.0111, - "step": 360500 - }, - { - "epoch": 4.01, - "learning_rate": 4.0090620349599094e-08, - "loss": 4.0056, - "step": 361000 - }, - { - "epoch": 4.01, - "learning_rate": 4.014614752459854e-08, - "loss": 4.0092, - "step": 361500 - }, - { - "epoch": 4.02, - "learning_rate": 4.020167469959798e-08, - "loss": 4.0285, - "step": 362000 - }, - { - "epoch": 4.03, - "learning_rate": 4.0257201874597427e-08, - "loss": 4.0084, - "step": 362500 - }, - { - "epoch": 4.03, - "learning_rate": 4.031272904959687e-08, - "loss": 4.0263, - "step": 363000 - }, - { - "epoch": 4.04, - "learning_rate": 4.0368256224596315e-08, - "loss": 4.0244, - "step": 363500 - }, - { - "epoch": 4.04, - "learning_rate": 4.042378339959576e-08, - "loss": 3.9977, - "step": 364000 - }, - { - "epoch": 4.05, - "learning_rate": 4.04793105745952e-08, - "loss": 3.9967, - "step": 364500 - }, - { - "epoch": 4.05, - "learning_rate": 4.0534837749594654e-08, - "loss": 4.0248, - "step": 365000 - }, - { - "epoch": 4.06, - "learning_rate": 4.059036492459409e-08, - "loss": 4.0071, - "step": 365500 - }, - { - "epoch": 4.06, - "learning_rate": 4.0645892099593536e-08, - "loss": 4.016, - "step": 366000 - }, - { - "epoch": 4.07, - "learning_rate": 4.0701419274592987e-08, - "loss": 3.9927, - "step": 366500 - }, - { - "epoch": 4.08, - "learning_rate": 4.075694644959243e-08, - "loss": 4.0108, - "step": 367000 - }, - { - "epoch": 4.08, - "learning_rate": 4.0812473624591875e-08, - "loss": 4.0183, - "step": 367500 - }, - { - "epoch": 4.09, - "learning_rate": 4.086800079959132e-08, - "loss": 4.0027, - "step": 368000 - }, - { - "epoch": 4.09, - "learning_rate": 4.0923527974590763e-08, - "loss": 4.0056, - "step": 368500 - }, - { - "epoch": 4.1, - "learning_rate": 4.097905514959021e-08, - "loss": 4.0082, - "step": 369000 - }, - { - "epoch": 4.1, - "learning_rate": 4.103458232458965e-08, - "loss": 4.0038, - "step": 369500 - }, - { - "epoch": 4.11, - "learning_rate": 4.1090109499589096e-08, - "loss": 3.9956, - "step": 370000 - }, - { - "epoch": 4.11, - "learning_rate": 4.114563667458854e-08, - "loss": 4.0033, - "step": 370500 - }, - { - "epoch": 4.12, - "learning_rate": 4.120116384958799e-08, - "loss": 3.9961, - "step": 371000 - }, - { - "epoch": 4.13, - "learning_rate": 4.125669102458743e-08, - "loss": 4.0267, - "step": 371500 - }, - { - "epoch": 4.13, - "learning_rate": 4.131221819958687e-08, - "loss": 3.9941, - "step": 372000 - }, - { - "epoch": 4.14, - "learning_rate": 4.1367745374586323e-08, - "loss": 4.0191, - "step": 372500 - }, - { - "epoch": 4.14, - "learning_rate": 4.142327254958576e-08, - "loss": 4.0141, - "step": 373000 - }, - { - "epoch": 4.15, - "learning_rate": 4.147879972458521e-08, - "loss": 4.0063, - "step": 373500 - }, - { - "epoch": 4.15, - "learning_rate": 4.1534326899584656e-08, - "loss": 4.0067, - "step": 374000 - }, - { - "epoch": 4.16, - "learning_rate": 4.15898540745841e-08, - "loss": 4.0085, - "step": 374500 - }, - { - "epoch": 4.16, - "learning_rate": 4.1645381249583544e-08, - "loss": 4.0114, - "step": 375000 - }, - { - "epoch": 4.17, - "learning_rate": 4.170090842458299e-08, - "loss": 3.9937, - "step": 375500 - }, - { - "epoch": 4.18, - "learning_rate": 4.175643559958243e-08, - "loss": 4.001, - "step": 376000 - }, - { - "epoch": 4.18, - "learning_rate": 4.181196277458188e-08, - "loss": 4.0154, - "step": 376500 - }, - { - "epoch": 4.19, - "learning_rate": 4.186748994958133e-08, - "loss": 3.9987, - "step": 377000 - }, - { - "epoch": 4.19, - "learning_rate": 4.1923017124580765e-08, - "loss": 4.0165, - "step": 377500 - }, - { - "epoch": 4.2, - "learning_rate": 4.1978544299580216e-08, - "loss": 3.9932, - "step": 378000 - }, - { - "epoch": 4.2, - "learning_rate": 4.203407147457966e-08, - "loss": 4.0054, - "step": 378500 - }, - { - "epoch": 4.21, - "learning_rate": 4.20895986495791e-08, - "loss": 4.0084, - "step": 379000 - }, - { - "epoch": 4.21, - "learning_rate": 4.214512582457855e-08, - "loss": 4.0094, - "step": 379500 - }, - { - "epoch": 4.22, - "learning_rate": 4.220065299957799e-08, - "loss": 3.9905, - "step": 380000 - }, - { - "epoch": 4.23, - "learning_rate": 4.225618017457744e-08, - "loss": 4.0, - "step": 380500 - }, - { - "epoch": 4.23, - "learning_rate": 4.231170734957688e-08, - "loss": 4.0141, - "step": 381000 - }, - { - "epoch": 4.24, - "learning_rate": 4.2367234524576325e-08, - "loss": 4.0034, - "step": 381500 - }, - { - "epoch": 4.24, - "learning_rate": 4.242276169957577e-08, - "loss": 4.0126, - "step": 382000 - }, - { - "epoch": 4.25, - "learning_rate": 4.2478288874575214e-08, - "loss": 3.9771, - "step": 382500 - }, - { - "epoch": 4.25, - "learning_rate": 4.2533816049574664e-08, - "loss": 4.0017, - "step": 383000 - }, - { - "epoch": 4.26, - "learning_rate": 4.25893432245741e-08, - "loss": 4.0028, - "step": 383500 - }, - { - "epoch": 4.26, - "learning_rate": 4.264487039957355e-08, - "loss": 3.9836, - "step": 384000 - }, - { - "epoch": 4.27, - "learning_rate": 4.2700397574573e-08, - "loss": 3.987, - "step": 384500 - }, - { - "epoch": 4.28, - "learning_rate": 4.2755924749572434e-08, - "loss": 4.0078, - "step": 385000 - }, - { - "epoch": 4.28, - "learning_rate": 4.2811451924571885e-08, - "loss": 3.9905, - "step": 385500 - }, - { - "epoch": 4.29, - "learning_rate": 4.286697909957133e-08, - "loss": 3.9939, - "step": 386000 - }, - { - "epoch": 4.29, - "learning_rate": 4.2922506274570774e-08, - "loss": 4.0043, - "step": 386500 - }, - { - "epoch": 4.3, - "learning_rate": 4.297803344957022e-08, - "loss": 3.9899, - "step": 387000 - }, - { - "epoch": 4.3, - "learning_rate": 4.303356062456966e-08, - "loss": 3.9869, - "step": 387500 - }, - { - "epoch": 4.31, - "learning_rate": 4.3089087799569106e-08, - "loss": 3.9914, - "step": 388000 - }, - { - "epoch": 4.31, - "learning_rate": 4.314461497456855e-08, - "loss": 4.0073, - "step": 388500 - }, - { - "epoch": 4.32, - "learning_rate": 4.3200142149567995e-08, - "loss": 4.0109, - "step": 389000 - }, - { - "epoch": 4.33, - "learning_rate": 4.325566932456744e-08, - "loss": 3.9705, - "step": 389500 - }, - { - "epoch": 4.33, - "learning_rate": 4.331119649956689e-08, - "loss": 4.0004, - "step": 390000 - }, - { - "epoch": 4.34, - "learning_rate": 4.336672367456633e-08, - "loss": 4.0117, - "step": 390500 - }, - { - "epoch": 4.34, - "learning_rate": 4.342225084956578e-08, - "loss": 3.9868, - "step": 391000 - }, - { - "epoch": 4.35, - "learning_rate": 4.347777802456522e-08, - "loss": 3.9959, - "step": 391500 - }, - { - "epoch": 4.35, - "learning_rate": 4.353330519956466e-08, - "loss": 4.006, - "step": 392000 - }, - { - "epoch": 4.36, - "learning_rate": 4.358883237456411e-08, - "loss": 3.9973, - "step": 392500 - }, - { - "epoch": 4.36, - "learning_rate": 4.3644359549563555e-08, - "loss": 3.9842, - "step": 393000 - }, - { - "epoch": 4.37, - "learning_rate": 4.3699886724563e-08, - "loss": 3.993, - "step": 393500 - }, - { - "epoch": 4.38, - "learning_rate": 4.375541389956244e-08, - "loss": 3.9979, - "step": 394000 - }, - { - "epoch": 4.38, - "learning_rate": 4.3810941074561894e-08, - "loss": 3.9982, - "step": 394500 - }, - { - "epoch": 4.39, - "learning_rate": 4.386646824956133e-08, - "loss": 4.0057, - "step": 395000 - }, - { - "epoch": 4.39, - "learning_rate": 4.3921995424560775e-08, - "loss": 3.9749, - "step": 395500 - }, - { - "epoch": 4.4, - "learning_rate": 4.3977522599560226e-08, - "loss": 4.0134, - "step": 396000 - }, - { - "epoch": 4.4, - "learning_rate": 4.4033049774559664e-08, - "loss": 3.9945, - "step": 396500 - }, - { - "epoch": 4.41, - "learning_rate": 4.4088576949559115e-08, - "loss": 3.9779, - "step": 397000 - }, - { - "epoch": 4.41, - "learning_rate": 4.414410412455856e-08, - "loss": 3.9846, - "step": 397500 - }, - { - "epoch": 4.42, - "learning_rate": 4.4199631299557996e-08, - "loss": 4.002, - "step": 398000 - }, - { - "epoch": 4.43, - "learning_rate": 4.425515847455745e-08, - "loss": 3.9968, - "step": 398500 - }, - { - "epoch": 4.43, - "learning_rate": 4.431068564955689e-08, - "loss": 4.0025, - "step": 399000 - }, - { - "epoch": 4.44, - "learning_rate": 4.4366212824556336e-08, - "loss": 3.9933, - "step": 399500 - }, - { - "epoch": 4.44, - "learning_rate": 4.442173999955578e-08, - "loss": 3.9982, - "step": 400000 - }, - { - "epoch": 4.45, - "learning_rate": 4.447726717455523e-08, - "loss": 3.9919, - "step": 400500 - }, - { - "epoch": 4.45, - "learning_rate": 4.453279434955467e-08, - "loss": 3.9767, - "step": 401000 - }, - { - "epoch": 4.46, - "learning_rate": 4.458832152455411e-08, - "loss": 4.0057, - "step": 401500 - }, - { - "epoch": 4.46, - "learning_rate": 4.464384869955356e-08, - "loss": 4.0115, - "step": 402000 - }, - { - "epoch": 4.47, - "learning_rate": 4.4699375874553e-08, - "loss": 3.9792, - "step": 402500 - }, - { - "epoch": 4.48, - "learning_rate": 4.475490304955245e-08, - "loss": 3.9911, - "step": 403000 - }, - { - "epoch": 4.48, - "learning_rate": 4.4810430224551896e-08, - "loss": 3.9975, - "step": 403500 - }, - { - "epoch": 4.49, - "learning_rate": 4.486595739955134e-08, - "loss": 3.9752, - "step": 404000 - }, - { - "epoch": 4.49, - "learning_rate": 4.4921484574550784e-08, - "loss": 3.9821, - "step": 404500 - }, - { - "epoch": 4.5, - "learning_rate": 4.497701174955023e-08, - "loss": 3.986, - "step": 405000 - }, - { - "epoch": 4.5, - "learning_rate": 4.503253892454967e-08, - "loss": 4.0016, - "step": 405500 - }, - { - "epoch": 4.51, - "learning_rate": 4.5088066099549116e-08, - "loss": 3.9924, - "step": 406000 - }, - { - "epoch": 4.51, - "learning_rate": 4.514359327454856e-08, - "loss": 3.9965, - "step": 406500 - }, - { - "epoch": 4.52, - "learning_rate": 4.5199120449548005e-08, - "loss": 3.9853, - "step": 407000 - }, - { - "epoch": 4.53, - "learning_rate": 4.5254647624547456e-08, - "loss": 3.9895, - "step": 407500 - }, - { - "epoch": 4.53, - "learning_rate": 4.531017479954689e-08, - "loss": 3.9945, - "step": 408000 - }, - { - "epoch": 4.54, - "learning_rate": 4.536570197454634e-08, - "loss": 3.9956, - "step": 408500 - }, - { - "epoch": 4.54, - "learning_rate": 4.542122914954579e-08, - "loss": 3.9892, - "step": 409000 - }, - { - "epoch": 4.55, - "learning_rate": 4.547675632454523e-08, - "loss": 3.9985, - "step": 409500 - }, - { - "epoch": 4.55, - "learning_rate": 4.5532283499544677e-08, - "loss": 4.0087, - "step": 410000 - }, - { - "epoch": 4.56, - "learning_rate": 4.558781067454412e-08, - "loss": 3.9899, - "step": 410500 - }, - { - "epoch": 4.56, - "learning_rate": 4.5643337849543565e-08, - "loss": 4.0053, - "step": 411000 - }, - { - "epoch": 4.57, - "learning_rate": 4.569886502454301e-08, - "loss": 3.973, - "step": 411500 - }, - { - "epoch": 4.58, - "learning_rate": 4.575439219954245e-08, - "loss": 3.9917, - "step": 412000 - }, - { - "epoch": 4.58, - "learning_rate": 4.58099193745419e-08, - "loss": 3.9872, - "step": 412500 - }, - { - "epoch": 4.59, - "learning_rate": 4.586544654954134e-08, - "loss": 3.9766, - "step": 413000 - }, - { - "epoch": 4.59, - "learning_rate": 4.592097372454079e-08, - "loss": 3.9831, - "step": 413500 - }, - { - "epoch": 4.6, - "learning_rate": 4.597650089954023e-08, - "loss": 3.9769, - "step": 414000 - }, - { - "epoch": 4.6, - "learning_rate": 4.6032028074539674e-08, - "loss": 3.987, - "step": 414500 - }, - { - "epoch": 4.61, - "learning_rate": 4.6087555249539125e-08, - "loss": 3.9891, - "step": 415000 - }, - { - "epoch": 4.61, - "learning_rate": 4.614308242453856e-08, - "loss": 3.9911, - "step": 415500 - }, - { - "epoch": 4.62, - "learning_rate": 4.619860959953801e-08, - "loss": 3.9969, - "step": 416000 - }, - { - "epoch": 4.63, - "learning_rate": 4.625413677453746e-08, - "loss": 4.0026, - "step": 416500 - }, - { - "epoch": 4.63, - "learning_rate": 4.63096639495369e-08, - "loss": 3.9894, - "step": 417000 - }, - { - "epoch": 4.64, - "learning_rate": 4.6365191124536346e-08, - "loss": 3.9728, - "step": 417500 - }, - { - "epoch": 4.64, - "learning_rate": 4.642071829953579e-08, - "loss": 3.9784, - "step": 418000 - }, - { - "epoch": 4.65, - "learning_rate": 4.6476245474535234e-08, - "loss": 3.9917, - "step": 418500 - }, - { - "epoch": 4.65, - "learning_rate": 4.653177264953468e-08, - "loss": 3.9886, - "step": 419000 - }, - { - "epoch": 4.66, - "learning_rate": 4.658729982453413e-08, - "loss": 3.9813, - "step": 419500 - }, - { - "epoch": 4.66, - "learning_rate": 4.664282699953357e-08, - "loss": 3.9863, - "step": 420000 - }, - { - "epoch": 4.67, - "learning_rate": 4.669835417453302e-08, - "loss": 4.0092, - "step": 420500 - }, - { - "epoch": 4.68, - "learning_rate": 4.675388134953246e-08, - "loss": 3.9895, - "step": 421000 - }, - { - "epoch": 4.68, - "learning_rate": 4.68094085245319e-08, - "loss": 3.9985, - "step": 421500 - }, - { - "epoch": 4.69, - "learning_rate": 4.686493569953135e-08, - "loss": 3.9822, - "step": 422000 - }, - { - "epoch": 4.69, - "learning_rate": 4.6920462874530794e-08, - "loss": 3.9833, - "step": 422500 - }, - { - "epoch": 4.7, - "learning_rate": 4.697599004953024e-08, - "loss": 4.0023, - "step": 423000 - }, - { - "epoch": 4.7, - "learning_rate": 4.703151722452968e-08, - "loss": 3.9726, - "step": 423500 - }, - { - "epoch": 4.71, - "learning_rate": 4.708704439952913e-08, - "loss": 3.956, - "step": 424000 - }, - { - "epoch": 4.71, - "learning_rate": 4.714257157452857e-08, - "loss": 3.9939, - "step": 424500 - }, - { - "epoch": 4.72, - "learning_rate": 4.7198098749528015e-08, - "loss": 3.988, - "step": 425000 - }, - { - "epoch": 4.73, - "learning_rate": 4.7253625924527466e-08, - "loss": 3.9827, - "step": 425500 - }, - { - "epoch": 4.73, - "learning_rate": 4.7309153099526903e-08, - "loss": 3.9837, - "step": 426000 - }, - { - "epoch": 4.74, - "learning_rate": 4.7364680274526354e-08, - "loss": 3.9843, - "step": 426500 - }, - { - "epoch": 4.74, - "learning_rate": 4.74202074495258e-08, - "loss": 3.9812, - "step": 427000 - }, - { - "epoch": 4.75, - "learning_rate": 4.747573462452524e-08, - "loss": 3.9722, - "step": 427500 - }, - { - "epoch": 4.75, - "learning_rate": 4.753126179952469e-08, - "loss": 3.9839, - "step": 428000 - }, - { - "epoch": 4.76, - "learning_rate": 4.758678897452413e-08, - "loss": 3.9772, - "step": 428500 - }, - { - "epoch": 4.76, - "learning_rate": 4.7642316149523575e-08, - "loss": 4.0006, - "step": 429000 - }, - { - "epoch": 4.77, - "learning_rate": 4.769784332452302e-08, - "loss": 3.9973, - "step": 429500 - }, - { - "epoch": 4.78, - "learning_rate": 4.7753370499522464e-08, - "loss": 3.9879, - "step": 430000 - }, - { - "epoch": 4.78, - "learning_rate": 4.780889767452191e-08, - "loss": 3.9749, - "step": 430500 - }, - { - "epoch": 4.79, - "learning_rate": 4.786442484952135e-08, - "loss": 3.9939, - "step": 431000 - }, - { - "epoch": 4.79, - "learning_rate": 4.7919952024520796e-08, - "loss": 3.9714, - "step": 431500 - }, - { - "epoch": 4.8, - "learning_rate": 4.797547919952024e-08, - "loss": 3.9883, - "step": 432000 - }, - { - "epoch": 4.8, - "learning_rate": 4.803100637451969e-08, - "loss": 3.9879, - "step": 432500 - }, - { - "epoch": 4.81, - "learning_rate": 4.808653354951913e-08, - "loss": 3.9729, - "step": 433000 - }, - { - "epoch": 4.81, - "learning_rate": 4.814206072451858e-08, - "loss": 3.9623, - "step": 433500 - }, - { - "epoch": 4.82, - "learning_rate": 4.8197587899518024e-08, - "loss": 3.9784, - "step": 434000 - }, - { - "epoch": 4.83, - "learning_rate": 4.825311507451746e-08, - "loss": 3.9872, - "step": 434500 - }, - { - "epoch": 4.83, - "learning_rate": 4.830864224951691e-08, - "loss": 3.984, - "step": 435000 - }, - { - "epoch": 4.84, - "learning_rate": 4.8364169424516356e-08, - "loss": 3.9745, - "step": 435500 - }, - { - "epoch": 4.84, - "learning_rate": 4.84196965995158e-08, - "loss": 3.9719, - "step": 436000 - }, - { - "epoch": 4.85, - "learning_rate": 4.8475223774515244e-08, - "loss": 3.9711, - "step": 436500 - }, - { - "epoch": 4.85, - "learning_rate": 4.8530750949514695e-08, - "loss": 3.9703, - "step": 437000 - }, - { - "epoch": 4.86, - "learning_rate": 4.858627812451413e-08, - "loss": 3.9586, - "step": 437500 - }, - { - "epoch": 4.86, - "learning_rate": 4.864180529951358e-08, - "loss": 3.9699, - "step": 438000 - }, - { - "epoch": 4.87, - "learning_rate": 4.869733247451303e-08, - "loss": 3.9747, - "step": 438500 - }, - { - "epoch": 4.88, - "learning_rate": 4.8752859649512465e-08, - "loss": 3.9673, - "step": 439000 - }, - { - "epoch": 4.88, - "learning_rate": 4.8808386824511916e-08, - "loss": 3.9681, - "step": 439500 - }, - { - "epoch": 4.89, - "learning_rate": 4.886391399951136e-08, - "loss": 3.96, - "step": 440000 - }, - { - "epoch": 4.89, - "learning_rate": 4.8919441174510804e-08, - "loss": 3.9928, - "step": 440500 - }, - { - "epoch": 4.9, - "learning_rate": 4.897496834951025e-08, - "loss": 3.9976, - "step": 441000 - }, - { - "epoch": 4.9, - "learning_rate": 4.903049552450969e-08, - "loss": 3.9696, - "step": 441500 - }, - { - "epoch": 4.91, - "learning_rate": 4.908602269950914e-08, - "loss": 3.9719, - "step": 442000 - }, - { - "epoch": 4.91, - "learning_rate": 4.914154987450858e-08, - "loss": 3.9623, - "step": 442500 - }, - { - "epoch": 4.92, - "learning_rate": 4.919707704950803e-08, - "loss": 3.9835, - "step": 443000 - }, - { - "epoch": 4.93, - "learning_rate": 4.925260422450747e-08, - "loss": 3.9696, - "step": 443500 - }, - { - "epoch": 4.93, - "learning_rate": 4.9308131399506914e-08, - "loss": 3.9761, - "step": 444000 - }, - { - "epoch": 4.94, - "learning_rate": 4.9363658574506365e-08, - "loss": 3.9787, - "step": 444500 - }, - { - "epoch": 4.94, - "learning_rate": 4.94191857495058e-08, - "loss": 3.9933, - "step": 445000 - }, - { - "epoch": 4.95, - "learning_rate": 4.947471292450525e-08, - "loss": 3.9899, - "step": 445500 - }, - { - "epoch": 4.95, - "learning_rate": 4.95302400995047e-08, - "loss": 3.9763, - "step": 446000 - }, - { - "epoch": 4.96, - "learning_rate": 4.958576727450414e-08, - "loss": 3.9708, - "step": 446500 - }, - { - "epoch": 4.96, - "learning_rate": 4.9641294449503585e-08, - "loss": 3.9889, - "step": 447000 - }, - { - "epoch": 4.97, - "learning_rate": 4.969682162450303e-08, - "loss": 3.9769, - "step": 447500 - }, - { - "epoch": 4.98, - "learning_rate": 4.9752348799502474e-08, - "loss": 3.9738, - "step": 448000 - }, - { - "epoch": 4.98, - "learning_rate": 4.980787597450192e-08, - "loss": 3.9591, - "step": 448500 - }, - { - "epoch": 4.99, - "learning_rate": 4.986340314950136e-08, - "loss": 3.9796, - "step": 449000 - }, - { - "epoch": 4.99, - "learning_rate": 4.9918930324500806e-08, - "loss": 3.9772, - "step": 449500 - }, - { - "epoch": 5.0, - "learning_rate": 4.997445749950026e-08, - "loss": 3.9604, - "step": 450000 - }, - { - "epoch": 5.0, - "eval_loss": 3.979917526245117, - "eval_runtime": 6.3032, - "eval_samples_per_second": 246.54, - "step": 450230 - }, - { - "epoch": 5.0, - "learning_rate": 5.00299846744997e-08, - "loss": 3.9679, - "step": 450500 - }, - { - "epoch": 5.01, - "learning_rate": 5.008551184949914e-08, - "loss": 3.9582, - "step": 451000 - }, - { - "epoch": 5.01, - "learning_rate": 5.014103902449859e-08, - "loss": 3.9734, - "step": 451500 - }, - { - "epoch": 5.02, - "learning_rate": 5.0196566199498034e-08, - "loss": 3.9825, - "step": 452000 - }, - { - "epoch": 5.03, - "learning_rate": 5.025209337449747e-08, - "loss": 3.9878, - "step": 452500 - }, - { - "epoch": 5.03, - "learning_rate": 5.030762054949692e-08, - "loss": 3.969, - "step": 453000 - }, - { - "epoch": 5.04, - "learning_rate": 5.0363147724496366e-08, - "loss": 3.9616, - "step": 453500 - }, - { - "epoch": 5.04, - "learning_rate": 5.0418674899495804e-08, - "loss": 3.9926, - "step": 454000 - }, - { - "epoch": 5.05, - "learning_rate": 5.0474202074495255e-08, - "loss": 3.9751, - "step": 454500 - }, - { - "epoch": 5.05, - "learning_rate": 5.05297292494947e-08, - "loss": 3.9791, - "step": 455000 - }, - { - "epoch": 5.06, - "learning_rate": 5.058525642449415e-08, - "loss": 3.9649, - "step": 455500 - }, - { - "epoch": 5.06, - "learning_rate": 5.064078359949359e-08, - "loss": 3.9741, - "step": 456000 - }, - { - "epoch": 5.07, - "learning_rate": 5.069631077449303e-08, - "loss": 3.9608, - "step": 456500 - }, - { - "epoch": 5.08, - "learning_rate": 5.075183794949248e-08, - "loss": 3.9766, - "step": 457000 - }, - { - "epoch": 5.08, - "learning_rate": 5.080736512449192e-08, - "loss": 3.9694, - "step": 457500 - }, - { - "epoch": 5.09, - "learning_rate": 5.0862892299491364e-08, - "loss": 3.9779, - "step": 458000 - }, - { - "epoch": 5.09, - "learning_rate": 5.0918419474490815e-08, - "loss": 3.9594, - "step": 458500 - }, - { - "epoch": 5.1, - "learning_rate": 5.0973946649490266e-08, - "loss": 3.9589, - "step": 459000 - }, - { - "epoch": 5.1, - "learning_rate": 5.1029473824489697e-08, - "loss": 3.9799, - "step": 459500 - }, - { - "epoch": 5.11, - "learning_rate": 5.108500099948915e-08, - "loss": 3.9652, - "step": 460000 - }, - { - "epoch": 5.11, - "learning_rate": 5.11405281744886e-08, - "loss": 3.9576, - "step": 460500 - }, - { - "epoch": 5.12, - "learning_rate": 5.119605534948803e-08, - "loss": 3.9753, - "step": 461000 - }, - { - "epoch": 5.13, - "learning_rate": 5.125158252448748e-08, - "loss": 3.9681, - "step": 461500 - }, - { - "epoch": 5.13, - "learning_rate": 5.130710969948693e-08, - "loss": 3.9559, - "step": 462000 - }, - { - "epoch": 5.14, - "learning_rate": 5.1362636874486375e-08, - "loss": 3.9708, - "step": 462500 - }, - { - "epoch": 5.14, - "learning_rate": 5.141816404948581e-08, - "loss": 3.953, - "step": 463000 - }, - { - "epoch": 5.15, - "learning_rate": 5.147369122448526e-08, - "loss": 3.9638, - "step": 463500 - }, - { - "epoch": 5.15, - "learning_rate": 5.152921839948471e-08, - "loss": 3.9588, - "step": 464000 - }, - { - "epoch": 5.16, - "learning_rate": 5.1584745574484145e-08, - "loss": 3.9676, - "step": 464500 - }, - { - "epoch": 5.16, - "learning_rate": 5.1640272749483596e-08, - "loss": 3.9801, - "step": 465000 - }, - { - "epoch": 5.17, - "learning_rate": 5.169579992448304e-08, - "loss": 3.9834, - "step": 465500 - }, - { - "epoch": 5.18, - "learning_rate": 5.175132709948249e-08, - "loss": 3.9709, - "step": 466000 - }, - { - "epoch": 5.18, - "learning_rate": 5.180685427448193e-08, - "loss": 3.9634, - "step": 466500 - }, - { - "epoch": 5.19, - "learning_rate": 5.186238144948137e-08, - "loss": 3.9807, - "step": 467000 - }, - { - "epoch": 5.19, - "learning_rate": 5.191790862448082e-08, - "loss": 3.9807, - "step": 467500 - }, - { - "epoch": 5.2, - "learning_rate": 5.197343579948026e-08, - "loss": 3.9588, - "step": 468000 - }, - { - "epoch": 5.2, - "learning_rate": 5.2028962974479705e-08, - "loss": 3.9785, - "step": 468500 - }, - { - "epoch": 5.21, - "learning_rate": 5.2084490149479156e-08, - "loss": 3.9755, - "step": 469000 - }, - { - "epoch": 5.21, - "learning_rate": 5.21400173244786e-08, - "loss": 3.9691, - "step": 469500 - }, - { - "epoch": 5.22, - "learning_rate": 5.219554449947804e-08, - "loss": 3.9581, - "step": 470000 - }, - { - "epoch": 5.23, - "learning_rate": 5.225107167447749e-08, - "loss": 3.9749, - "step": 470500 - }, - { - "epoch": 5.23, - "learning_rate": 5.230659884947693e-08, - "loss": 3.9744, - "step": 471000 - }, - { - "epoch": 5.24, - "learning_rate": 5.236212602447637e-08, - "loss": 3.9533, - "step": 471500 - }, - { - "epoch": 5.24, - "learning_rate": 5.241765319947582e-08, - "loss": 3.9659, - "step": 472000 - }, - { - "epoch": 5.25, - "learning_rate": 5.2473180374475265e-08, - "loss": 3.9525, - "step": 472500 - }, - { - "epoch": 5.25, - "learning_rate": 5.2528707549474716e-08, - "loss": 3.9506, - "step": 473000 - }, - { - "epoch": 5.26, - "learning_rate": 5.2584234724474153e-08, - "loss": 3.9397, - "step": 473500 - }, - { - "epoch": 5.26, - "learning_rate": 5.26397618994736e-08, - "loss": 3.9579, - "step": 474000 - }, - { - "epoch": 5.27, - "learning_rate": 5.269528907447305e-08, - "loss": 3.9607, - "step": 474500 - }, - { - "epoch": 5.28, - "learning_rate": 5.2750816249472486e-08, - "loss": 3.9663, - "step": 475000 - }, - { - "epoch": 5.28, - "learning_rate": 5.280634342447193e-08, - "loss": 3.9651, - "step": 475500 - }, - { - "epoch": 5.29, - "learning_rate": 5.286187059947138e-08, - "loss": 3.953, - "step": 476000 - }, - { - "epoch": 5.29, - "learning_rate": 5.291739777447083e-08, - "loss": 3.9471, - "step": 476500 - }, - { - "epoch": 5.3, - "learning_rate": 5.297292494947026e-08, - "loss": 3.9738, - "step": 477000 - }, - { - "epoch": 5.3, - "learning_rate": 5.3028452124469713e-08, - "loss": 3.9717, - "step": 477500 - }, - { - "epoch": 5.31, - "learning_rate": 5.3083979299469164e-08, - "loss": 3.9548, - "step": 478000 - }, - { - "epoch": 5.31, - "learning_rate": 5.3139506474468595e-08, - "loss": 3.9373, - "step": 478500 - }, - { - "epoch": 5.32, - "learning_rate": 5.3195033649468046e-08, - "loss": 3.9567, - "step": 479000 - }, - { - "epoch": 5.33, - "learning_rate": 5.32505608244675e-08, - "loss": 3.9676, - "step": 479500 - }, - { - "epoch": 5.33, - "learning_rate": 5.330608799946694e-08, - "loss": 3.9717, - "step": 480000 - }, - { - "epoch": 5.34, - "learning_rate": 5.336161517446638e-08, - "loss": 3.957, - "step": 480500 - }, - { - "epoch": 5.34, - "learning_rate": 5.341714234946583e-08, - "loss": 3.9736, - "step": 481000 - }, - { - "epoch": 5.35, - "learning_rate": 5.3472669524465273e-08, - "loss": 3.9484, - "step": 481500 - }, - { - "epoch": 5.35, - "learning_rate": 5.352819669946471e-08, - "loss": 3.9439, - "step": 482000 - }, - { - "epoch": 5.36, - "learning_rate": 5.358372387446416e-08, - "loss": 3.9685, - "step": 482500 - }, - { - "epoch": 5.36, - "learning_rate": 5.3639251049463606e-08, - "loss": 3.9692, - "step": 483000 - }, - { - "epoch": 5.37, - "learning_rate": 5.3694778224463044e-08, - "loss": 3.9468, - "step": 483500 - }, - { - "epoch": 5.38, - "learning_rate": 5.3750305399462494e-08, - "loss": 3.947, - "step": 484000 - }, - { - "epoch": 5.38, - "learning_rate": 5.380583257446194e-08, - "loss": 3.9577, - "step": 484500 - }, - { - "epoch": 5.39, - "learning_rate": 5.386135974946139e-08, - "loss": 3.9634, - "step": 485000 - }, - { - "epoch": 5.39, - "learning_rate": 5.391688692446083e-08, - "loss": 3.9416, - "step": 485500 - }, - { - "epoch": 5.4, - "learning_rate": 5.397241409946027e-08, - "loss": 3.962, - "step": 486000 - }, - { - "epoch": 5.4, - "learning_rate": 5.402794127445972e-08, - "loss": 3.9677, - "step": 486500 - }, - { - "epoch": 5.41, - "learning_rate": 5.408346844945916e-08, - "loss": 3.9538, - "step": 487000 - }, - { - "epoch": 5.41, - "learning_rate": 5.4138995624458604e-08, - "loss": 3.9689, - "step": 487500 - }, - { - "epoch": 5.42, - "learning_rate": 5.4194522799458054e-08, - "loss": 3.9649, - "step": 488000 - }, - { - "epoch": 5.43, - "learning_rate": 5.42500499744575e-08, - "loss": 3.956, - "step": 488500 - }, - { - "epoch": 5.43, - "learning_rate": 5.4305577149456936e-08, - "loss": 3.964, - "step": 489000 - }, - { - "epoch": 5.44, - "learning_rate": 5.436110432445639e-08, - "loss": 3.9513, - "step": 489500 - }, - { - "epoch": 5.44, - "learning_rate": 5.441663149945583e-08, - "loss": 3.9641, - "step": 490000 - }, - { - "epoch": 5.45, - "learning_rate": 5.447215867445527e-08, - "loss": 3.9624, - "step": 490500 - }, - { - "epoch": 5.45, - "learning_rate": 5.452768584945472e-08, - "loss": 3.9448, - "step": 491000 - }, - { - "epoch": 5.46, - "learning_rate": 5.4583213024454164e-08, - "loss": 3.9784, - "step": 491500 - }, - { - "epoch": 5.46, - "learning_rate": 5.4638740199453614e-08, - "loss": 3.9588, - "step": 492000 - }, - { - "epoch": 5.47, - "learning_rate": 5.469426737445305e-08, - "loss": 3.9761, - "step": 492500 - }, - { - "epoch": 5.47, - "learning_rate": 5.4749794549452496e-08, - "loss": 3.9593, - "step": 493000 - }, - { - "epoch": 5.48, - "learning_rate": 5.480532172445195e-08, - "loss": 3.9539, - "step": 493500 - }, - { - "epoch": 5.49, - "learning_rate": 5.4860848899451385e-08, - "loss": 3.9435, - "step": 494000 - }, - { - "epoch": 5.49, - "learning_rate": 5.491637607445083e-08, - "loss": 3.9597, - "step": 494500 - }, - { - "epoch": 5.5, - "learning_rate": 5.497190324945028e-08, - "loss": 3.962, - "step": 495000 - }, - { - "epoch": 5.5, - "learning_rate": 5.502743042444973e-08, - "loss": 3.9746, - "step": 495500 - }, - { - "epoch": 5.51, - "learning_rate": 5.508295759944917e-08, - "loss": 3.9431, - "step": 496000 - }, - { - "epoch": 5.51, - "learning_rate": 5.513848477444861e-08, - "loss": 3.948, - "step": 496500 - }, - { - "epoch": 5.52, - "learning_rate": 5.519401194944806e-08, - "loss": 3.9546, - "step": 497000 - }, - { - "epoch": 5.52, - "learning_rate": 5.52495391244475e-08, - "loss": 3.9528, - "step": 497500 - }, - { - "epoch": 5.53, - "learning_rate": 5.5305066299446945e-08, - "loss": 3.9582, - "step": 498000 - }, - { - "epoch": 5.54, - "learning_rate": 5.5360593474446395e-08, - "loss": 3.9466, - "step": 498500 - }, - { - "epoch": 5.54, - "learning_rate": 5.541612064944584e-08, - "loss": 3.9822, - "step": 499000 - }, - { - "epoch": 5.55, - "learning_rate": 5.547164782444528e-08, - "loss": 3.9714, - "step": 499500 - }, - { - "epoch": 5.55, - "learning_rate": 5.552717499944473e-08, - "loss": 3.9581, - "step": 500000 - }, - { - "epoch": 5.56, - "learning_rate": 5.558270217444417e-08, - "loss": 3.9612, - "step": 500500 - }, - { - "epoch": 5.56, - "learning_rate": 5.563822934944361e-08, - "loss": 3.9731, - "step": 501000 - }, - { - "epoch": 5.57, - "learning_rate": 5.569375652444306e-08, - "loss": 3.9718, - "step": 501500 - }, - { - "epoch": 5.57, - "learning_rate": 5.5749283699442505e-08, - "loss": 3.9534, - "step": 502000 - }, - { - "epoch": 5.58, - "learning_rate": 5.5804810874441955e-08, - "loss": 3.9642, - "step": 502500 - }, - { - "epoch": 5.59, - "learning_rate": 5.586033804944139e-08, - "loss": 3.9509, - "step": 503000 - }, - { - "epoch": 5.59, - "learning_rate": 5.591586522444084e-08, - "loss": 3.9404, - "step": 503500 - }, - { - "epoch": 5.6, - "learning_rate": 5.597139239944029e-08, - "loss": 3.941, - "step": 504000 - }, - { - "epoch": 5.6, - "learning_rate": 5.6026919574439726e-08, - "loss": 3.9572, - "step": 504500 - }, - { - "epoch": 5.61, - "learning_rate": 5.608244674943917e-08, - "loss": 3.9336, - "step": 505000 - }, - { - "epoch": 5.61, - "learning_rate": 5.613797392443862e-08, - "loss": 3.9493, - "step": 505500 - }, - { - "epoch": 5.62, - "learning_rate": 5.6193501099438065e-08, - "loss": 3.9511, - "step": 506000 - }, - { - "epoch": 5.62, - "learning_rate": 5.62490282744375e-08, - "loss": 3.937, - "step": 506500 - }, - { - "epoch": 5.63, - "learning_rate": 5.630455544943695e-08, - "loss": 3.9533, - "step": 507000 - }, - { - "epoch": 5.64, - "learning_rate": 5.63600826244364e-08, - "loss": 3.9673, - "step": 507500 - }, - { - "epoch": 5.64, - "learning_rate": 5.6415609799435835e-08, - "loss": 3.9642, - "step": 508000 - }, - { - "epoch": 5.65, - "learning_rate": 5.6471136974435286e-08, - "loss": 3.9343, - "step": 508500 - }, - { - "epoch": 5.65, - "learning_rate": 5.652666414943473e-08, - "loss": 3.9659, - "step": 509000 - }, - { - "epoch": 5.66, - "learning_rate": 5.658219132443418e-08, - "loss": 3.9633, - "step": 509500 - }, - { - "epoch": 5.66, - "learning_rate": 5.663771849943362e-08, - "loss": 3.957, - "step": 510000 - }, - { - "epoch": 5.67, - "learning_rate": 5.669324567443306e-08, - "loss": 3.9357, - "step": 510500 - }, - { - "epoch": 5.67, - "learning_rate": 5.674877284943251e-08, - "loss": 3.9515, - "step": 511000 - }, - { - "epoch": 5.68, - "learning_rate": 5.680430002443195e-08, - "loss": 3.9693, - "step": 511500 - }, - { - "epoch": 5.69, - "learning_rate": 5.68598271994314e-08, - "loss": 3.9498, - "step": 512000 - }, - { - "epoch": 5.69, - "learning_rate": 5.6915354374430846e-08, - "loss": 3.9483, - "step": 512500 - }, - { - "epoch": 5.7, - "learning_rate": 5.6970881549430296e-08, - "loss": 3.96, - "step": 513000 - }, - { - "epoch": 5.7, - "learning_rate": 5.7026408724429734e-08, - "loss": 3.9513, - "step": 513500 - }, - { - "epoch": 5.71, - "learning_rate": 5.708193589942918e-08, - "loss": 3.94, - "step": 514000 - }, - { - "epoch": 5.71, - "learning_rate": 5.713746307442863e-08, - "loss": 3.9433, - "step": 514500 - }, - { - "epoch": 5.72, - "learning_rate": 5.7192990249428067e-08, - "loss": 3.938, - "step": 515000 - }, - { - "epoch": 5.72, - "learning_rate": 5.724851742442751e-08, - "loss": 3.944, - "step": 515500 - }, - { - "epoch": 5.73, - "learning_rate": 5.730404459942696e-08, - "loss": 3.9501, - "step": 516000 - }, - { - "epoch": 5.74, - "learning_rate": 5.73595717744264e-08, - "loss": 3.954, - "step": 516500 - }, - { - "epoch": 5.74, - "learning_rate": 5.741509894942584e-08, - "loss": 3.9366, - "step": 517000 - }, - { - "epoch": 5.75, - "learning_rate": 5.7470626124425294e-08, - "loss": 3.9513, - "step": 517500 - }, - { - "epoch": 5.75, - "learning_rate": 5.752615329942474e-08, - "loss": 3.9388, - "step": 518000 - }, - { - "epoch": 5.76, - "learning_rate": 5.7581680474424176e-08, - "loss": 3.9471, - "step": 518500 - }, - { - "epoch": 5.76, - "learning_rate": 5.7637207649423627e-08, - "loss": 3.9411, - "step": 519000 - }, - { - "epoch": 5.77, - "learning_rate": 5.769273482442307e-08, - "loss": 3.9785, - "step": 519500 - }, - { - "epoch": 5.77, - "learning_rate": 5.774826199942251e-08, - "loss": 3.9397, - "step": 520000 - }, - { - "epoch": 5.78, - "learning_rate": 5.780378917442196e-08, - "loss": 3.9626, - "step": 520500 - }, - { - "epoch": 5.79, - "learning_rate": 5.78593163494214e-08, - "loss": 3.9436, - "step": 521000 - }, - { - "epoch": 5.79, - "learning_rate": 5.7914843524420854e-08, - "loss": 3.9672, - "step": 521500 - }, - { - "epoch": 5.8, - "learning_rate": 5.797037069942029e-08, - "loss": 3.949, - "step": 522000 - }, - { - "epoch": 5.8, - "learning_rate": 5.8025897874419736e-08, - "loss": 3.9333, - "step": 522500 - }, - { - "epoch": 5.81, - "learning_rate": 5.8081425049419187e-08, - "loss": 3.963, - "step": 523000 - }, - { - "epoch": 5.81, - "learning_rate": 5.8136952224418624e-08, - "loss": 3.9595, - "step": 523500 - }, - { - "epoch": 5.82, - "learning_rate": 5.819247939941807e-08, - "loss": 3.9429, - "step": 524000 - }, - { - "epoch": 5.82, - "learning_rate": 5.824800657441752e-08, - "loss": 3.9546, - "step": 524500 - }, - { - "epoch": 5.83, - "learning_rate": 5.8303533749416963e-08, - "loss": 3.935, - "step": 525000 - }, - { - "epoch": 5.84, - "learning_rate": 5.83590609244164e-08, - "loss": 3.9467, - "step": 525500 - }, - { - "epoch": 5.84, - "learning_rate": 5.841458809941585e-08, - "loss": 3.9507, - "step": 526000 - }, - { - "epoch": 5.85, - "learning_rate": 5.84701152744153e-08, - "loss": 3.9562, - "step": 526500 - }, - { - "epoch": 5.85, - "learning_rate": 5.8525642449414733e-08, - "loss": 3.9703, - "step": 527000 - }, - { - "epoch": 5.86, - "learning_rate": 5.8581169624414184e-08, - "loss": 3.9301, - "step": 527500 - }, - { - "epoch": 5.86, - "learning_rate": 5.8636696799413635e-08, - "loss": 3.9348, - "step": 528000 - }, - { - "epoch": 5.87, - "learning_rate": 5.869222397441308e-08, - "loss": 3.9399, - "step": 528500 - }, - { - "epoch": 5.87, - "learning_rate": 5.874775114941252e-08, - "loss": 3.967, - "step": 529000 - }, - { - "epoch": 5.88, - "learning_rate": 5.880327832441197e-08, - "loss": 3.9575, - "step": 529500 - }, - { - "epoch": 5.89, - "learning_rate": 5.885880549941141e-08, - "loss": 3.9382, - "step": 530000 - }, - { - "epoch": 5.89, - "learning_rate": 5.891433267441085e-08, - "loss": 3.9322, - "step": 530500 - }, - { - "epoch": 5.9, - "learning_rate": 5.89698598494103e-08, - "loss": 3.9357, - "step": 531000 - }, - { - "epoch": 5.9, - "learning_rate": 5.9025387024409744e-08, - "loss": 3.9619, - "step": 531500 - }, - { - "epoch": 5.91, - "learning_rate": 5.9080914199409195e-08, - "loss": 3.9453, - "step": 532000 - }, - { - "epoch": 5.91, - "learning_rate": 5.913644137440863e-08, - "loss": 3.9507, - "step": 532500 - }, - { - "epoch": 5.92, - "learning_rate": 5.919196854940808e-08, - "loss": 3.9565, - "step": 533000 - }, - { - "epoch": 5.92, - "learning_rate": 5.924749572440753e-08, - "loss": 3.9495, - "step": 533500 - }, - { - "epoch": 5.93, - "learning_rate": 5.9303022899406965e-08, - "loss": 3.9427, - "step": 534000 - }, - { - "epoch": 5.94, - "learning_rate": 5.935855007440641e-08, - "loss": 3.9472, - "step": 534500 - }, - { - "epoch": 5.94, - "learning_rate": 5.941407724940586e-08, - "loss": 3.9472, - "step": 535000 - }, - { - "epoch": 5.95, - "learning_rate": 5.9469604424405304e-08, - "loss": 3.9411, - "step": 535500 - }, - { - "epoch": 5.95, - "learning_rate": 5.952513159940474e-08, - "loss": 3.9418, - "step": 536000 - }, - { - "epoch": 5.96, - "learning_rate": 5.958065877440419e-08, - "loss": 3.9293, - "step": 536500 - }, - { - "epoch": 5.96, - "learning_rate": 5.963618594940364e-08, - "loss": 3.9538, - "step": 537000 - }, - { - "epoch": 5.97, - "learning_rate": 5.969171312440307e-08, - "loss": 3.9491, - "step": 537500 - }, - { - "epoch": 5.97, - "learning_rate": 5.974724029940252e-08, - "loss": 3.9373, - "step": 538000 - }, - { - "epoch": 5.98, - "learning_rate": 5.980276747440198e-08, - "loss": 3.9537, - "step": 538500 - }, - { - "epoch": 5.99, - "learning_rate": 5.985829464940142e-08, - "loss": 3.9417, - "step": 539000 - }, - { - "epoch": 5.99, - "learning_rate": 5.991382182440085e-08, - "loss": 3.9653, - "step": 539500 - }, - { - "epoch": 6.0, - "learning_rate": 5.996934899940031e-08, - "loss": 3.9557, - "step": 540000 - }, - { - "epoch": 6.0, - "eval_loss": 3.9568018913269043, - "eval_runtime": 6.3151, - "eval_samples_per_second": 246.077, - "step": 540276 - }, - { - "epoch": 6.0, - "learning_rate": 6.002487617439975e-08, - "loss": 3.9361, - "step": 540500 - }, - { - "epoch": 6.01, - "learning_rate": 6.008040334939918e-08, - "loss": 3.945, - "step": 541000 - }, - { - "epoch": 6.01, - "learning_rate": 6.013593052439864e-08, - "loss": 3.9389, - "step": 541500 - }, - { - "epoch": 6.02, - "learning_rate": 6.019145769939809e-08, - "loss": 3.9583, - "step": 542000 - }, - { - "epoch": 6.02, - "learning_rate": 6.024698487439753e-08, - "loss": 3.9412, - "step": 542500 - }, - { - "epoch": 6.03, - "learning_rate": 6.030251204939697e-08, - "loss": 3.9477, - "step": 543000 - }, - { - "epoch": 6.04, - "learning_rate": 6.035803922439642e-08, - "loss": 3.9518, - "step": 543500 - }, - { - "epoch": 6.04, - "learning_rate": 6.041356639939586e-08, - "loss": 3.9443, - "step": 544000 - }, - { - "epoch": 6.05, - "learning_rate": 6.04690935743953e-08, - "loss": 3.9274, - "step": 544500 - }, - { - "epoch": 6.05, - "learning_rate": 6.052462074939475e-08, - "loss": 3.9232, - "step": 545000 - }, - { - "epoch": 6.06, - "learning_rate": 6.05801479243942e-08, - "loss": 3.9389, - "step": 545500 - }, - { - "epoch": 6.06, - "learning_rate": 6.063567509939364e-08, - "loss": 3.9506, - "step": 546000 - }, - { - "epoch": 6.07, - "learning_rate": 6.069120227439308e-08, - "loss": 3.9553, - "step": 546500 - }, - { - "epoch": 6.07, - "learning_rate": 6.074672944939253e-08, - "loss": 3.9612, - "step": 547000 - }, - { - "epoch": 6.08, - "learning_rate": 6.080225662439198e-08, - "loss": 3.94, - "step": 547500 - }, - { - "epoch": 6.09, - "learning_rate": 6.085778379939142e-08, - "loss": 3.953, - "step": 548000 - }, - { - "epoch": 6.09, - "learning_rate": 6.091331097439086e-08, - "loss": 3.9462, - "step": 548500 - }, - { - "epoch": 6.1, - "learning_rate": 6.096883814939032e-08, - "loss": 3.9316, - "step": 549000 - }, - { - "epoch": 6.1, - "learning_rate": 6.102436532438975e-08, - "loss": 3.9408, - "step": 549500 - }, - { - "epoch": 6.11, - "learning_rate": 6.107989249938919e-08, - "loss": 3.9347, - "step": 550000 - }, - { - "epoch": 6.11, - "learning_rate": 6.113541967438865e-08, - "loss": 3.9506, - "step": 550500 - }, - { - "epoch": 6.12, - "learning_rate": 6.11909468493881e-08, - "loss": 3.9241, - "step": 551000 - }, - { - "epoch": 6.12, - "learning_rate": 6.124647402438752e-08, - "loss": 3.9242, - "step": 551500 - }, - { - "epoch": 6.13, - "learning_rate": 6.130200119938698e-08, - "loss": 3.941, - "step": 552000 - }, - { - "epoch": 6.14, - "learning_rate": 6.135752837438643e-08, - "loss": 3.9496, - "step": 552500 - }, - { - "epoch": 6.14, - "learning_rate": 6.141305554938586e-08, - "loss": 3.9441, - "step": 553000 - }, - { - "epoch": 6.15, - "learning_rate": 6.146858272438531e-08, - "loss": 3.9308, - "step": 553500 - }, - { - "epoch": 6.15, - "learning_rate": 6.152410989938476e-08, - "loss": 3.9358, - "step": 554000 - }, - { - "epoch": 6.16, - "learning_rate": 6.15796370743842e-08, - "loss": 3.9401, - "step": 554500 - }, - { - "epoch": 6.16, - "learning_rate": 6.163516424938365e-08, - "loss": 3.9411, - "step": 555000 - }, - { - "epoch": 6.17, - "learning_rate": 6.169069142438309e-08, - "loss": 3.9419, - "step": 555500 - }, - { - "epoch": 6.17, - "learning_rate": 6.174621859938254e-08, - "loss": 3.9487, - "step": 556000 - }, - { - "epoch": 6.18, - "learning_rate": 6.180174577438198e-08, - "loss": 3.952, - "step": 556500 - }, - { - "epoch": 6.19, - "learning_rate": 6.185727294938142e-08, - "loss": 3.9228, - "step": 557000 - }, - { - "epoch": 6.19, - "learning_rate": 6.191280012438087e-08, - "loss": 3.9396, - "step": 557500 - }, - { - "epoch": 6.2, - "learning_rate": 6.196832729938031e-08, - "loss": 3.9284, - "step": 558000 - }, - { - "epoch": 6.2, - "learning_rate": 6.202385447437976e-08, - "loss": 3.9472, - "step": 558500 - }, - { - "epoch": 6.21, - "learning_rate": 6.20793816493792e-08, - "loss": 3.9273, - "step": 559000 - }, - { - "epoch": 6.21, - "learning_rate": 6.213490882437864e-08, - "loss": 3.9503, - "step": 559500 - }, - { - "epoch": 6.22, - "learning_rate": 6.219043599937809e-08, - "loss": 3.9372, - "step": 560000 - }, - { - "epoch": 6.22, - "learning_rate": 6.224596317437753e-08, - "loss": 3.9395, - "step": 560500 - }, - { - "epoch": 6.23, - "learning_rate": 6.230149034937698e-08, - "loss": 3.9382, - "step": 561000 - }, - { - "epoch": 6.24, - "learning_rate": 6.235701752437643e-08, - "loss": 3.9413, - "step": 561500 - }, - { - "epoch": 6.24, - "learning_rate": 6.241254469937587e-08, - "loss": 3.9435, - "step": 562000 - }, - { - "epoch": 6.25, - "learning_rate": 6.246807187437532e-08, - "loss": 3.9339, - "step": 562500 - }, - { - "epoch": 6.25, - "learning_rate": 6.252359904937477e-08, - "loss": 3.9205, - "step": 563000 - }, - { - "epoch": 6.26, - "learning_rate": 6.25791262243742e-08, - "loss": 3.9363, - "step": 563500 - }, - { - "epoch": 6.26, - "learning_rate": 6.263465339937366e-08, - "loss": 3.9355, - "step": 564000 - }, - { - "epoch": 6.27, - "learning_rate": 6.26901805743731e-08, - "loss": 3.9259, - "step": 564500 - }, - { - "epoch": 6.27, - "learning_rate": 6.274570774937254e-08, - "loss": 3.9428, - "step": 565000 - }, - { - "epoch": 6.28, - "learning_rate": 6.280123492437199e-08, - "loss": 3.9435, - "step": 565500 - }, - { - "epoch": 6.29, - "learning_rate": 6.285676209937143e-08, - "loss": 3.9427, - "step": 566000 - }, - { - "epoch": 6.29, - "learning_rate": 6.291228927437088e-08, - "loss": 3.9305, - "step": 566500 - }, - { - "epoch": 6.3, - "learning_rate": 6.296781644937032e-08, - "loss": 3.9313, - "step": 567000 - }, - { - "epoch": 6.3, - "learning_rate": 6.302334362436976e-08, - "loss": 3.9337, - "step": 567500 - }, - { - "epoch": 6.31, - "learning_rate": 6.307887079936921e-08, - "loss": 3.926, - "step": 568000 - }, - { - "epoch": 6.31, - "learning_rate": 6.313439797436865e-08, - "loss": 3.9373, - "step": 568500 - }, - { - "epoch": 6.32, - "learning_rate": 6.31899251493681e-08, - "loss": 3.9312, - "step": 569000 - }, - { - "epoch": 6.32, - "learning_rate": 6.324545232436754e-08, - "loss": 3.9607, - "step": 569500 - }, - { - "epoch": 6.33, - "learning_rate": 6.330097949936699e-08, - "loss": 3.9486, - "step": 570000 - }, - { - "epoch": 6.34, - "learning_rate": 6.335650667436643e-08, - "loss": 3.9454, - "step": 570500 - }, - { - "epoch": 6.34, - "learning_rate": 6.341203384936587e-08, - "loss": 3.9454, - "step": 571000 - }, - { - "epoch": 6.35, - "learning_rate": 6.346756102436532e-08, - "loss": 3.9355, - "step": 571500 - }, - { - "epoch": 6.35, - "learning_rate": 6.352308819936478e-08, - "loss": 3.9447, - "step": 572000 - }, - { - "epoch": 6.36, - "learning_rate": 6.357861537436421e-08, - "loss": 3.946, - "step": 572500 - }, - { - "epoch": 6.36, - "learning_rate": 6.363414254936365e-08, - "loss": 3.9447, - "step": 573000 - }, - { - "epoch": 6.37, - "learning_rate": 6.368966972436311e-08, - "loss": 3.9245, - "step": 573500 - }, - { - "epoch": 6.37, - "learning_rate": 6.374519689936254e-08, - "loss": 3.9139, - "step": 574000 - }, - { - "epoch": 6.38, - "learning_rate": 6.380072407436198e-08, - "loss": 3.9303, - "step": 574500 - }, - { - "epoch": 6.39, - "learning_rate": 6.385625124936144e-08, - "loss": 3.9308, - "step": 575000 - }, - { - "epoch": 6.39, - "learning_rate": 6.391177842436088e-08, - "loss": 3.9342, - "step": 575500 - }, - { - "epoch": 6.4, - "learning_rate": 6.396730559936032e-08, - "loss": 3.9615, - "step": 576000 - }, - { - "epoch": 6.4, - "learning_rate": 6.402283277435977e-08, - "loss": 3.9385, - "step": 576500 - }, - { - "epoch": 6.41, - "learning_rate": 6.407835994935922e-08, - "loss": 3.9267, - "step": 577000 - }, - { - "epoch": 6.41, - "learning_rate": 6.413388712435865e-08, - "loss": 3.9361, - "step": 577500 - }, - { - "epoch": 6.42, - "learning_rate": 6.41894142993581e-08, - "loss": 3.9193, - "step": 578000 - }, - { - "epoch": 6.42, - "learning_rate": 6.424494147435755e-08, - "loss": 3.9314, - "step": 578500 - }, - { - "epoch": 6.43, - "learning_rate": 6.430046864935698e-08, - "loss": 3.9402, - "step": 579000 - }, - { - "epoch": 6.44, - "learning_rate": 6.435599582435644e-08, - "loss": 3.9199, - "step": 579500 - }, - { - "epoch": 6.44, - "learning_rate": 6.441152299935588e-08, - "loss": 3.9192, - "step": 580000 - }, - { - "epoch": 6.45, - "learning_rate": 6.446705017435533e-08, - "loss": 3.9321, - "step": 580500 - }, - { - "epoch": 6.45, - "learning_rate": 6.452257734935477e-08, - "loss": 3.937, - "step": 581000 - }, - { - "epoch": 6.46, - "learning_rate": 6.457810452435422e-08, - "loss": 3.9252, - "step": 581500 - }, - { - "epoch": 6.46, - "learning_rate": 6.463363169935366e-08, - "loss": 3.9259, - "step": 582000 - }, - { - "epoch": 6.47, - "learning_rate": 6.46891588743531e-08, - "loss": 3.9177, - "step": 582500 - }, - { - "epoch": 6.47, - "learning_rate": 6.474468604935255e-08, - "loss": 3.9188, - "step": 583000 - }, - { - "epoch": 6.48, - "learning_rate": 6.480021322435199e-08, - "loss": 3.9307, - "step": 583500 - }, - { - "epoch": 6.49, - "learning_rate": 6.485574039935145e-08, - "loss": 3.9509, - "step": 584000 - }, - { - "epoch": 6.49, - "learning_rate": 6.491126757435088e-08, - "loss": 3.9356, - "step": 584500 - }, - { - "epoch": 6.5, - "learning_rate": 6.496679474935032e-08, - "loss": 3.9305, - "step": 585000 - }, - { - "epoch": 6.5, - "learning_rate": 6.502232192434978e-08, - "loss": 3.9323, - "step": 585500 - }, - { - "epoch": 6.51, - "learning_rate": 6.507784909934921e-08, - "loss": 3.9217, - "step": 586000 - }, - { - "epoch": 6.51, - "learning_rate": 6.513337627434866e-08, - "loss": 3.9264, - "step": 586500 - }, - { - "epoch": 6.52, - "learning_rate": 6.518890344934811e-08, - "loss": 3.9398, - "step": 587000 - }, - { - "epoch": 6.52, - "learning_rate": 6.524443062434756e-08, - "loss": 3.9205, - "step": 587500 - }, - { - "epoch": 6.53, - "learning_rate": 6.529995779934699e-08, - "loss": 3.937, - "step": 588000 - }, - { - "epoch": 6.54, - "learning_rate": 6.535548497434645e-08, - "loss": 3.9312, - "step": 588500 - }, - { - "epoch": 6.54, - "learning_rate": 6.541101214934589e-08, - "loss": 3.9102, - "step": 589000 - }, - { - "epoch": 6.55, - "learning_rate": 6.546653932434532e-08, - "loss": 3.9319, - "step": 589500 - }, - { - "epoch": 6.55, - "learning_rate": 6.552206649934478e-08, - "loss": 3.9355, - "step": 590000 - }, - { - "epoch": 6.56, - "learning_rate": 6.557759367434422e-08, - "loss": 3.9361, - "step": 590500 - }, - { - "epoch": 6.56, - "learning_rate": 6.563312084934367e-08, - "loss": 3.9254, - "step": 591000 - }, - { - "epoch": 6.57, - "learning_rate": 6.568864802434311e-08, - "loss": 3.9286, - "step": 591500 - }, - { - "epoch": 6.57, - "learning_rate": 6.574417519934256e-08, - "loss": 3.9225, - "step": 592000 - }, - { - "epoch": 6.58, - "learning_rate": 6.5799702374342e-08, - "loss": 3.9242, - "step": 592500 - }, - { - "epoch": 6.59, - "learning_rate": 6.585522954934144e-08, - "loss": 3.9423, - "step": 593000 - }, - { - "epoch": 6.59, - "learning_rate": 6.591075672434089e-08, - "loss": 3.9053, - "step": 593500 - }, - { - "epoch": 6.6, - "learning_rate": 6.596628389934033e-08, - "loss": 3.9128, - "step": 594000 - }, - { - "epoch": 6.6, - "learning_rate": 6.602181107433979e-08, - "loss": 3.9311, - "step": 594500 - }, - { - "epoch": 6.61, - "learning_rate": 6.607733824933922e-08, - "loss": 3.9216, - "step": 595000 - }, - { - "epoch": 6.61, - "learning_rate": 6.613286542433867e-08, - "loss": 3.9226, - "step": 595500 - }, - { - "epoch": 6.62, - "learning_rate": 6.618839259933812e-08, - "loss": 3.929, - "step": 596000 - }, - { - "epoch": 6.62, - "learning_rate": 6.624391977433755e-08, - "loss": 3.9284, - "step": 596500 - }, - { - "epoch": 6.63, - "learning_rate": 6.6299446949337e-08, - "loss": 3.9281, - "step": 597000 - }, - { - "epoch": 6.64, - "learning_rate": 6.635497412433646e-08, - "loss": 3.9203, - "step": 597500 - }, - { - "epoch": 6.64, - "learning_rate": 6.64105012993359e-08, - "loss": 3.9338, - "step": 598000 - }, - { - "epoch": 6.65, - "learning_rate": 6.646602847433533e-08, - "loss": 3.9226, - "step": 598500 - }, - { - "epoch": 6.65, - "learning_rate": 6.652155564933479e-08, - "loss": 3.9085, - "step": 599000 - }, - { - "epoch": 6.66, - "learning_rate": 6.657708282433423e-08, - "loss": 3.9467, - "step": 599500 - }, - { - "epoch": 6.66, - "learning_rate": 6.663260999933366e-08, - "loss": 3.9172, - "step": 600000 - }, - { - "epoch": 6.67, - "learning_rate": 6.668813717433312e-08, - "loss": 3.932, - "step": 600500 - }, - { - "epoch": 6.67, - "learning_rate": 6.674366434933256e-08, - "loss": 3.9004, - "step": 601000 - }, - { - "epoch": 6.68, - "learning_rate": 6.679919152433201e-08, - "loss": 3.916, - "step": 601500 - }, - { - "epoch": 6.69, - "learning_rate": 6.685471869933145e-08, - "loss": 3.9365, - "step": 602000 - }, - { - "epoch": 6.69, - "learning_rate": 6.69102458743309e-08, - "loss": 3.9196, - "step": 602500 - }, - { - "epoch": 6.7, - "learning_rate": 6.696577304933034e-08, - "loss": 3.9324, - "step": 603000 - }, - { - "epoch": 6.7, - "learning_rate": 6.702130022432979e-08, - "loss": 3.9126, - "step": 603500 - }, - { - "epoch": 6.71, - "learning_rate": 6.707682739932923e-08, - "loss": 3.9314, - "step": 604000 - }, - { - "epoch": 6.71, - "learning_rate": 6.713235457432867e-08, - "loss": 3.9297, - "step": 604500 - }, - { - "epoch": 6.72, - "learning_rate": 6.718788174932812e-08, - "loss": 3.9211, - "step": 605000 - }, - { - "epoch": 6.72, - "learning_rate": 6.724340892432756e-08, - "loss": 3.9295, - "step": 605500 - }, - { - "epoch": 6.73, - "learning_rate": 6.7298936099327e-08, - "loss": 3.9194, - "step": 606000 - }, - { - "epoch": 6.74, - "learning_rate": 6.735446327432645e-08, - "loss": 3.9063, - "step": 606500 - }, - { - "epoch": 6.74, - "learning_rate": 6.74099904493259e-08, - "loss": 3.9325, - "step": 607000 - }, - { - "epoch": 6.75, - "learning_rate": 6.746551762432534e-08, - "loss": 3.9129, - "step": 607500 - }, - { - "epoch": 6.75, - "learning_rate": 6.752104479932478e-08, - "loss": 3.922, - "step": 608000 - }, - { - "epoch": 6.76, - "learning_rate": 6.757657197432423e-08, - "loss": 3.937, - "step": 608500 - }, - { - "epoch": 6.76, - "learning_rate": 6.763209914932367e-08, - "loss": 3.9429, - "step": 609000 - }, - { - "epoch": 6.77, - "learning_rate": 6.768762632432312e-08, - "loss": 3.9045, - "step": 609500 - }, - { - "epoch": 6.77, - "learning_rate": 6.774315349932257e-08, - "loss": 3.9464, - "step": 610000 - }, - { - "epoch": 6.78, - "learning_rate": 6.7798680674322e-08, - "loss": 3.9216, - "step": 610500 - }, - { - "epoch": 6.79, - "learning_rate": 6.785420784932145e-08, - "loss": 3.9371, - "step": 611000 - }, - { - "epoch": 6.79, - "learning_rate": 6.79097350243209e-08, - "loss": 3.9265, - "step": 611500 - }, - { - "epoch": 6.8, - "learning_rate": 6.796526219932034e-08, - "loss": 3.938, - "step": 612000 - }, - { - "epoch": 6.8, - "learning_rate": 6.802078937431978e-08, - "loss": 3.9481, - "step": 612500 - }, - { - "epoch": 6.81, - "learning_rate": 6.807631654931924e-08, - "loss": 3.9402, - "step": 613000 - }, - { - "epoch": 6.81, - "learning_rate": 6.813184372431868e-08, - "loss": 3.9236, - "step": 613500 - }, - { - "epoch": 6.82, - "learning_rate": 6.818737089931811e-08, - "loss": 3.9269, - "step": 614000 - }, - { - "epoch": 6.82, - "learning_rate": 6.824289807431757e-08, - "loss": 3.9182, - "step": 614500 - }, - { - "epoch": 6.83, - "learning_rate": 6.829842524931701e-08, - "loss": 3.9493, - "step": 615000 - }, - { - "epoch": 6.84, - "learning_rate": 6.835395242431645e-08, - "loss": 3.9467, - "step": 615500 - }, - { - "epoch": 6.84, - "learning_rate": 6.84094795993159e-08, - "loss": 3.9133, - "step": 616000 - }, - { - "epoch": 6.85, - "learning_rate": 6.846500677431535e-08, - "loss": 3.9211, - "step": 616500 - }, - { - "epoch": 6.85, - "learning_rate": 6.852053394931479e-08, - "loss": 3.933, - "step": 617000 - }, - { - "epoch": 6.86, - "learning_rate": 6.857606112431424e-08, - "loss": 3.9194, - "step": 617500 - }, - { - "epoch": 6.86, - "learning_rate": 6.863158829931368e-08, - "loss": 3.9159, - "step": 618000 - }, - { - "epoch": 6.87, - "learning_rate": 6.868711547431312e-08, - "loss": 3.9272, - "step": 618500 - }, - { - "epoch": 6.87, - "learning_rate": 6.874264264931257e-08, - "loss": 3.9273, - "step": 619000 - }, - { - "epoch": 6.88, - "learning_rate": 6.879816982431201e-08, - "loss": 3.9304, - "step": 619500 - }, - { - "epoch": 6.89, - "learning_rate": 6.885369699931146e-08, - "loss": 3.9261, - "step": 620000 - }, - { - "epoch": 6.89, - "learning_rate": 6.890922417431091e-08, - "loss": 3.9158, - "step": 620500 - }, - { - "epoch": 6.9, - "learning_rate": 6.896475134931034e-08, - "loss": 3.929, - "step": 621000 - }, - { - "epoch": 6.9, - "learning_rate": 6.902027852430979e-08, - "loss": 3.916, - "step": 621500 - }, - { - "epoch": 6.91, - "learning_rate": 6.907580569930925e-08, - "loss": 3.9179, - "step": 622000 - }, - { - "epoch": 6.91, - "learning_rate": 6.913133287430868e-08, - "loss": 3.9178, - "step": 622500 - }, - { - "epoch": 6.92, - "learning_rate": 6.918686004930812e-08, - "loss": 3.9373, - "step": 623000 - }, - { - "epoch": 6.92, - "learning_rate": 6.924238722430758e-08, - "loss": 3.929, - "step": 623500 - }, - { - "epoch": 6.93, - "learning_rate": 6.929791439930702e-08, - "loss": 3.9087, - "step": 624000 - }, - { - "epoch": 6.94, - "learning_rate": 6.935344157430645e-08, - "loss": 3.9404, - "step": 624500 - }, - { - "epoch": 6.94, - "learning_rate": 6.940896874930591e-08, - "loss": 3.9256, - "step": 625000 - }, - { - "epoch": 6.95, - "learning_rate": 6.946449592430536e-08, - "loss": 3.9075, - "step": 625500 - }, - { - "epoch": 6.95, - "learning_rate": 6.952002309930479e-08, - "loss": 3.9193, - "step": 626000 - }, - { - "epoch": 6.96, - "learning_rate": 6.957555027430424e-08, - "loss": 3.9285, - "step": 626500 - }, - { - "epoch": 6.96, - "learning_rate": 6.963107744930369e-08, - "loss": 3.9006, - "step": 627000 - }, - { - "epoch": 6.97, - "learning_rate": 6.968660462430313e-08, - "loss": 3.9346, - "step": 627500 - }, - { - "epoch": 6.97, - "learning_rate": 6.974213179930258e-08, - "loss": 3.9115, - "step": 628000 - }, - { - "epoch": 6.98, - "learning_rate": 6.979765897430202e-08, - "loss": 3.9158, - "step": 628500 - }, - { - "epoch": 6.99, - "learning_rate": 6.985318614930146e-08, - "loss": 3.8951, - "step": 629000 - }, - { - "epoch": 6.99, - "learning_rate": 6.990871332430091e-08, - "loss": 3.9125, - "step": 629500 - }, - { - "epoch": 7.0, - "learning_rate": 6.996424049930035e-08, - "loss": 3.9119, - "step": 630000 - }, - { - "epoch": 7.0, - "eval_loss": 3.940045118331909, - "eval_runtime": 6.3095, - "eval_samples_per_second": 246.294, - "step": 630322 - }, - { - "epoch": 7.0, - "learning_rate": 7.00197676742998e-08, - "loss": 3.9333, - "step": 630500 - }, - { - "epoch": 7.01, - "learning_rate": 7.007529484929925e-08, - "loss": 3.9177, - "step": 631000 - }, - { - "epoch": 7.01, - "learning_rate": 7.013082202429869e-08, - "loss": 3.915, - "step": 631500 - }, - { - "epoch": 7.02, - "learning_rate": 7.018634919929813e-08, - "loss": 3.9332, - "step": 632000 - }, - { - "epoch": 7.02, - "learning_rate": 7.024187637429759e-08, - "loss": 3.9191, - "step": 632500 - }, - { - "epoch": 7.03, - "learning_rate": 7.029740354929702e-08, - "loss": 3.919, - "step": 633000 - }, - { - "epoch": 7.04, - "learning_rate": 7.035293072429646e-08, - "loss": 3.9161, - "step": 633500 - }, - { - "epoch": 7.04, - "learning_rate": 7.040845789929592e-08, - "loss": 3.9204, - "step": 634000 - }, - { - "epoch": 7.05, - "learning_rate": 7.046398507429536e-08, - "loss": 3.9058, - "step": 634500 - }, - { - "epoch": 7.05, - "learning_rate": 7.05195122492948e-08, - "loss": 3.9288, - "step": 635000 - }, - { - "epoch": 7.06, - "learning_rate": 7.057503942429425e-08, - "loss": 3.9139, - "step": 635500 - }, - { - "epoch": 7.06, - "learning_rate": 7.06305665992937e-08, - "loss": 3.9273, - "step": 636000 - }, - { - "epoch": 7.07, - "learning_rate": 7.068609377429313e-08, - "loss": 3.9258, - "step": 636500 - }, - { - "epoch": 7.07, - "learning_rate": 7.074162094929259e-08, - "loss": 3.9207, - "step": 637000 - }, - { - "epoch": 7.08, - "learning_rate": 7.079714812429203e-08, - "loss": 3.9064, - "step": 637500 - }, - { - "epoch": 7.09, - "learning_rate": 7.085267529929147e-08, - "loss": 3.9279, - "step": 638000 - }, - { - "epoch": 7.09, - "learning_rate": 7.090820247429092e-08, - "loss": 3.9297, - "step": 638500 - }, - { - "epoch": 7.1, - "learning_rate": 7.096372964929036e-08, - "loss": 3.9044, - "step": 639000 - }, - { - "epoch": 7.1, - "learning_rate": 7.10192568242898e-08, - "loss": 3.9123, - "step": 639500 - }, - { - "epoch": 7.11, - "learning_rate": 7.107478399928925e-08, - "loss": 3.8989, - "step": 640000 - }, - { - "epoch": 7.11, - "learning_rate": 7.11303111742887e-08, - "loss": 3.94, - "step": 640500 - }, - { - "epoch": 7.12, - "learning_rate": 7.118583834928814e-08, - "loss": 3.9098, - "step": 641000 - }, - { - "epoch": 7.12, - "learning_rate": 7.124136552428758e-08, - "loss": 3.8942, - "step": 641500 - }, - { - "epoch": 7.13, - "learning_rate": 7.129689269928703e-08, - "loss": 3.9097, - "step": 642000 - }, - { - "epoch": 7.14, - "learning_rate": 7.135241987428647e-08, - "loss": 3.9137, - "step": 642500 - }, - { - "epoch": 7.14, - "learning_rate": 7.140794704928592e-08, - "loss": 3.91, - "step": 643000 - }, - { - "epoch": 7.15, - "learning_rate": 7.146347422428536e-08, - "loss": 3.9204, - "step": 643500 - }, - { - "epoch": 7.15, - "learning_rate": 7.15190013992848e-08, - "loss": 3.9341, - "step": 644000 - }, - { - "epoch": 7.16, - "learning_rate": 7.157452857428425e-08, - "loss": 3.9228, - "step": 644500 - }, - { - "epoch": 7.16, - "learning_rate": 7.163005574928369e-08, - "loss": 3.9228, - "step": 645000 - }, - { - "epoch": 7.17, - "learning_rate": 7.168558292428314e-08, - "loss": 3.9125, - "step": 645500 - }, - { - "epoch": 7.17, - "learning_rate": 7.174111009928258e-08, - "loss": 3.9016, - "step": 646000 - }, - { - "epoch": 7.18, - "learning_rate": 7.179663727428204e-08, - "loss": 3.9306, - "step": 646500 - }, - { - "epoch": 7.19, - "learning_rate": 7.185216444928147e-08, - "loss": 3.9173, - "step": 647000 - }, - { - "epoch": 7.19, - "learning_rate": 7.190769162428093e-08, - "loss": 3.907, - "step": 647500 - }, - { - "epoch": 7.2, - "learning_rate": 7.196321879928037e-08, - "loss": 3.9289, - "step": 648000 - }, - { - "epoch": 7.2, - "learning_rate": 7.20187459742798e-08, - "loss": 3.9323, - "step": 648500 - }, - { - "epoch": 7.21, - "learning_rate": 7.207427314927926e-08, - "loss": 3.9233, - "step": 649000 - }, - { - "epoch": 7.21, - "learning_rate": 7.21298003242787e-08, - "loss": 3.9141, - "step": 649500 - }, - { - "epoch": 7.22, - "learning_rate": 7.218532749927815e-08, - "loss": 3.9246, - "step": 650000 - }, - { - "epoch": 7.22, - "learning_rate": 7.224085467427759e-08, - "loss": 3.9174, - "step": 650500 - }, - { - "epoch": 7.23, - "learning_rate": 7.229638184927704e-08, - "loss": 3.9108, - "step": 651000 - }, - { - "epoch": 7.24, - "learning_rate": 7.235190902427648e-08, - "loss": 3.9237, - "step": 651500 - }, - { - "epoch": 7.24, - "learning_rate": 7.240743619927592e-08, - "loss": 3.9005, - "step": 652000 - }, - { - "epoch": 7.25, - "learning_rate": 7.246296337427537e-08, - "loss": 3.9105, - "step": 652500 - }, - { - "epoch": 7.25, - "learning_rate": 7.251849054927481e-08, - "loss": 3.9028, - "step": 653000 - }, - { - "epoch": 7.26, - "learning_rate": 7.257401772427426e-08, - "loss": 3.9251, - "step": 653500 - }, - { - "epoch": 7.26, - "learning_rate": 7.26295448992737e-08, - "loss": 3.9139, - "step": 654000 - }, - { - "epoch": 7.27, - "learning_rate": 7.268507207427314e-08, - "loss": 3.9035, - "step": 654500 - }, - { - "epoch": 7.27, - "learning_rate": 7.274059924927259e-08, - "loss": 3.9356, - "step": 655000 - }, - { - "epoch": 7.28, - "learning_rate": 7.279612642427203e-08, - "loss": 3.8926, - "step": 655500 - }, - { - "epoch": 7.29, - "learning_rate": 7.285165359927148e-08, - "loss": 3.9119, - "step": 656000 - }, - { - "epoch": 7.29, - "learning_rate": 7.290718077427092e-08, - "loss": 3.9163, - "step": 656500 - }, - { - "epoch": 7.3, - "learning_rate": 7.296270794927038e-08, - "loss": 3.887, - "step": 657000 - }, - { - "epoch": 7.3, - "learning_rate": 7.301823512426981e-08, - "loss": 3.9306, - "step": 657500 - }, - { - "epoch": 7.31, - "learning_rate": 7.307376229926925e-08, - "loss": 3.9175, - "step": 658000 - }, - { - "epoch": 7.31, - "learning_rate": 7.312928947426871e-08, - "loss": 3.9034, - "step": 658500 - }, - { - "epoch": 7.32, - "learning_rate": 7.318481664926814e-08, - "loss": 3.912, - "step": 659000 - }, - { - "epoch": 7.32, - "learning_rate": 7.324034382426759e-08, - "loss": 3.9079, - "step": 659500 - }, - { - "epoch": 7.33, - "learning_rate": 7.329587099926704e-08, - "loss": 3.9262, - "step": 660000 - }, - { - "epoch": 7.34, - "learning_rate": 7.335139817426649e-08, - "loss": 3.9054, - "step": 660500 - }, - { - "epoch": 7.34, - "learning_rate": 7.340692534926592e-08, - "loss": 3.9286, - "step": 661000 - }, - { - "epoch": 7.35, - "learning_rate": 7.346245252426538e-08, - "loss": 3.9119, - "step": 661500 - }, - { - "epoch": 7.35, - "learning_rate": 7.351797969926482e-08, - "loss": 3.9145, - "step": 662000 - }, - { - "epoch": 7.36, - "learning_rate": 7.357350687426425e-08, - "loss": 3.8954, - "step": 662500 - }, - { - "epoch": 7.36, - "learning_rate": 7.362903404926371e-08, - "loss": 3.9087, - "step": 663000 - }, - { - "epoch": 7.37, - "learning_rate": 7.368456122426315e-08, - "loss": 3.9164, - "step": 663500 - }, - { - "epoch": 7.37, - "learning_rate": 7.37400883992626e-08, - "loss": 3.9016, - "step": 664000 - }, - { - "epoch": 7.38, - "learning_rate": 7.379561557426204e-08, - "loss": 3.9077, - "step": 664500 - }, - { - "epoch": 7.39, - "learning_rate": 7.385114274926149e-08, - "loss": 3.907, - "step": 665000 - }, - { - "epoch": 7.39, - "learning_rate": 7.390666992426093e-08, - "loss": 3.9029, - "step": 665500 - }, - { - "epoch": 7.4, - "learning_rate": 7.396219709926037e-08, - "loss": 3.9104, - "step": 666000 - }, - { - "epoch": 7.4, - "learning_rate": 7.401772427425982e-08, - "loss": 3.9427, - "step": 666500 - }, - { - "epoch": 7.41, - "learning_rate": 7.407325144925926e-08, - "loss": 3.9092, - "step": 667000 - }, - { - "epoch": 7.41, - "learning_rate": 7.412877862425872e-08, - "loss": 3.9066, - "step": 667500 - }, - { - "epoch": 7.42, - "learning_rate": 7.418430579925815e-08, - "loss": 3.9082, - "step": 668000 - }, - { - "epoch": 7.42, - "learning_rate": 7.42398329742576e-08, - "loss": 3.8989, - "step": 668500 - }, - { - "epoch": 7.43, - "learning_rate": 7.429536014925705e-08, - "loss": 3.9068, - "step": 669000 - }, - { - "epoch": 7.44, - "learning_rate": 7.435088732425648e-08, - "loss": 3.919, - "step": 669500 - }, - { - "epoch": 7.44, - "learning_rate": 7.440641449925593e-08, - "loss": 3.898, - "step": 670000 - }, - { - "epoch": 7.45, - "learning_rate": 7.446194167425538e-08, - "loss": 3.9319, - "step": 670500 - }, - { - "epoch": 7.45, - "learning_rate": 7.451746884925482e-08, - "loss": 3.9045, - "step": 671000 - }, - { - "epoch": 7.46, - "learning_rate": 7.457299602425426e-08, - "loss": 3.94, - "step": 671500 - }, - { - "epoch": 7.46, - "learning_rate": 7.462852319925372e-08, - "loss": 3.9074, - "step": 672000 - }, - { - "epoch": 7.47, - "learning_rate": 7.468405037425316e-08, - "loss": 3.9192, - "step": 672500 - }, - { - "epoch": 7.47, - "learning_rate": 7.473957754925259e-08, - "loss": 3.9254, - "step": 673000 - }, - { - "epoch": 7.48, - "learning_rate": 7.479510472425205e-08, - "loss": 3.8787, - "step": 673500 - }, - { - "epoch": 7.49, - "learning_rate": 7.48506318992515e-08, - "loss": 3.9068, - "step": 674000 - }, - { - "epoch": 7.49, - "learning_rate": 7.490615907425092e-08, - "loss": 3.9256, - "step": 674500 - }, - { - "epoch": 7.5, - "learning_rate": 7.496168624925038e-08, - "loss": 3.9091, - "step": 675000 - }, - { - "epoch": 7.5, - "learning_rate": 7.501721342424983e-08, - "loss": 3.9024, - "step": 675500 - }, - { - "epoch": 7.51, - "learning_rate": 7.507274059924927e-08, - "loss": 3.8895, - "step": 676000 - }, - { - "epoch": 7.51, - "learning_rate": 7.512826777424871e-08, - "loss": 3.8965, - "step": 676500 - }, - { - "epoch": 7.52, - "learning_rate": 7.518379494924816e-08, - "loss": 3.9147, - "step": 677000 - }, - { - "epoch": 7.52, - "learning_rate": 7.52393221242476e-08, - "loss": 3.9156, - "step": 677500 - }, - { - "epoch": 7.53, - "learning_rate": 7.529484929924705e-08, - "loss": 3.8979, - "step": 678000 - }, - { - "epoch": 7.54, - "learning_rate": 7.535037647424649e-08, - "loss": 3.9147, - "step": 678500 - }, - { - "epoch": 7.54, - "learning_rate": 7.540590364924594e-08, - "loss": 3.9017, - "step": 679000 - }, - { - "epoch": 7.55, - "learning_rate": 7.546143082424539e-08, - "loss": 3.9015, - "step": 679500 - }, - { - "epoch": 7.55, - "learning_rate": 7.551695799924482e-08, - "loss": 3.9117, - "step": 680000 - }, - { - "epoch": 7.56, - "learning_rate": 7.557248517424427e-08, - "loss": 3.9013, - "step": 680500 - }, - { - "epoch": 7.56, - "learning_rate": 7.562801234924373e-08, - "loss": 3.9035, - "step": 681000 - }, - { - "epoch": 7.57, - "learning_rate": 7.568353952424316e-08, - "loss": 3.9101, - "step": 681500 - }, - { - "epoch": 7.57, - "learning_rate": 7.57390666992426e-08, - "loss": 3.9151, - "step": 682000 - }, - { - "epoch": 7.58, - "learning_rate": 7.579459387424206e-08, - "loss": 3.9254, - "step": 682500 - }, - { - "epoch": 7.59, - "learning_rate": 7.58501210492415e-08, - "loss": 3.9193, - "step": 683000 - }, - { - "epoch": 7.59, - "learning_rate": 7.590564822424093e-08, - "loss": 3.8989, - "step": 683500 - }, - { - "epoch": 7.6, - "learning_rate": 7.596117539924039e-08, - "loss": 3.9069, - "step": 684000 - }, - { - "epoch": 7.6, - "learning_rate": 7.601670257423984e-08, - "loss": 3.8954, - "step": 684500 - }, - { - "epoch": 7.61, - "learning_rate": 7.607222974923927e-08, - "loss": 3.9021, - "step": 685000 - }, - { - "epoch": 7.61, - "learning_rate": 7.612775692423872e-08, - "loss": 3.9001, - "step": 685500 - }, - { - "epoch": 7.62, - "learning_rate": 7.618328409923817e-08, - "loss": 3.9204, - "step": 686000 - }, - { - "epoch": 7.62, - "learning_rate": 7.623881127423761e-08, - "loss": 3.9168, - "step": 686500 - }, - { - "epoch": 7.63, - "learning_rate": 7.629433844923706e-08, - "loss": 3.9207, - "step": 687000 - }, - { - "epoch": 7.63, - "learning_rate": 7.63498656242365e-08, - "loss": 3.9028, - "step": 687500 - }, - { - "epoch": 7.64, - "learning_rate": 7.640539279923594e-08, - "loss": 3.9302, - "step": 688000 - }, - { - "epoch": 7.65, - "learning_rate": 7.646091997423539e-08, - "loss": 3.93, - "step": 688500 - }, - { - "epoch": 7.65, - "learning_rate": 7.651644714923483e-08, - "loss": 3.9112, - "step": 689000 - }, - { - "epoch": 7.66, - "learning_rate": 7.657197432423428e-08, - "loss": 3.8938, - "step": 689500 - }, - { - "epoch": 7.66, - "learning_rate": 7.662750149923372e-08, - "loss": 3.9033, - "step": 690000 - }, - { - "epoch": 7.67, - "learning_rate": 7.668302867423317e-08, - "loss": 3.9132, - "step": 690500 - }, - { - "epoch": 7.67, - "learning_rate": 7.673855584923261e-08, - "loss": 3.9075, - "step": 691000 - }, - { - "epoch": 7.68, - "learning_rate": 7.679408302423205e-08, - "loss": 3.913, - "step": 691500 - }, - { - "epoch": 7.68, - "learning_rate": 7.68496101992315e-08, - "loss": 3.9108, - "step": 692000 - }, - { - "epoch": 7.69, - "learning_rate": 7.690513737423094e-08, - "loss": 3.8918, - "step": 692500 - }, - { - "epoch": 7.7, - "learning_rate": 7.696066454923039e-08, - "loss": 3.9039, - "step": 693000 - }, - { - "epoch": 7.7, - "learning_rate": 7.701619172422984e-08, - "loss": 3.9054, - "step": 693500 - }, - { - "epoch": 7.71, - "learning_rate": 7.707171889922927e-08, - "loss": 3.9161, - "step": 694000 - }, - { - "epoch": 7.71, - "learning_rate": 7.712724607422872e-08, - "loss": 3.9053, - "step": 694500 - }, - { - "epoch": 7.72, - "learning_rate": 7.718277324922818e-08, - "loss": 3.9258, - "step": 695000 - }, - { - "epoch": 7.72, - "learning_rate": 7.723830042422761e-08, - "loss": 3.8918, - "step": 695500 - }, - { - "epoch": 7.73, - "learning_rate": 7.729382759922705e-08, - "loss": 3.9156, - "step": 696000 - }, - { - "epoch": 7.73, - "learning_rate": 7.734935477422651e-08, - "loss": 3.9374, - "step": 696500 - }, - { - "epoch": 7.74, - "learning_rate": 7.740488194922595e-08, - "loss": 3.8993, - "step": 697000 - }, - { - "epoch": 7.75, - "learning_rate": 7.746040912422538e-08, - "loss": 3.9103, - "step": 697500 - }, - { - "epoch": 7.75, - "learning_rate": 7.751593629922484e-08, - "loss": 3.8828, - "step": 698000 - }, - { - "epoch": 7.76, - "learning_rate": 7.757146347422429e-08, - "loss": 3.9187, - "step": 698500 - }, - { - "epoch": 7.76, - "learning_rate": 7.762699064922372e-08, - "loss": 3.9223, - "step": 699000 - }, - { - "epoch": 7.77, - "learning_rate": 7.768251782422317e-08, - "loss": 3.9157, - "step": 699500 - }, - { - "epoch": 7.77, - "learning_rate": 7.773804499922262e-08, - "loss": 3.8826, - "step": 700000 - }, - { - "epoch": 7.78, - "learning_rate": 7.779357217422206e-08, - "loss": 3.9057, - "step": 700500 - }, - { - "epoch": 7.78, - "learning_rate": 7.78490993492215e-08, - "loss": 3.9149, - "step": 701000 - }, - { - "epoch": 7.79, - "learning_rate": 7.790462652422095e-08, - "loss": 3.8994, - "step": 701500 - }, - { - "epoch": 7.8, - "learning_rate": 7.79601536992204e-08, - "loss": 3.9032, - "step": 702000 - }, - { - "epoch": 7.8, - "learning_rate": 7.801568087421984e-08, - "loss": 3.8996, - "step": 702500 - }, - { - "epoch": 7.81, - "learning_rate": 7.807120804921928e-08, - "loss": 3.9012, - "step": 703000 - }, - { - "epoch": 7.81, - "learning_rate": 7.812673522421873e-08, - "loss": 3.8985, - "step": 703500 - }, - { - "epoch": 7.82, - "learning_rate": 7.818226239921817e-08, - "loss": 3.8717, - "step": 704000 - }, - { - "epoch": 7.82, - "learning_rate": 7.823778957421762e-08, - "loss": 3.8897, - "step": 704500 - }, - { - "epoch": 7.83, - "learning_rate": 7.829331674921706e-08, - "loss": 3.9134, - "step": 705000 - }, - { - "epoch": 7.83, - "learning_rate": 7.834884392421652e-08, - "loss": 3.9101, - "step": 705500 - }, - { - "epoch": 7.84, - "learning_rate": 7.840437109921595e-08, - "loss": 3.9264, - "step": 706000 - }, - { - "epoch": 7.85, - "learning_rate": 7.845989827421539e-08, - "loss": 3.919, - "step": 706500 - }, - { - "epoch": 7.85, - "learning_rate": 7.851542544921485e-08, - "loss": 3.9073, - "step": 707000 - }, - { - "epoch": 7.86, - "learning_rate": 7.857095262421428e-08, - "loss": 3.9105, - "step": 707500 - }, - { - "epoch": 7.86, - "learning_rate": 7.862647979921372e-08, - "loss": 3.9152, - "step": 708000 - }, - { - "epoch": 7.87, - "learning_rate": 7.868200697421318e-08, - "loss": 3.8899, - "step": 708500 - }, - { - "epoch": 7.87, - "learning_rate": 7.873753414921263e-08, - "loss": 3.8969, - "step": 709000 - }, - { - "epoch": 7.88, - "learning_rate": 7.879306132421206e-08, - "loss": 3.8993, - "step": 709500 - }, - { - "epoch": 7.88, - "learning_rate": 7.884858849921151e-08, - "loss": 3.8914, - "step": 710000 - }, - { - "epoch": 7.89, - "learning_rate": 7.890411567421096e-08, - "loss": 3.9001, - "step": 710500 - }, - { - "epoch": 7.9, - "learning_rate": 7.895964284921039e-08, - "loss": 3.9019, - "step": 711000 - }, - { - "epoch": 7.9, - "learning_rate": 7.901517002420985e-08, - "loss": 3.9078, - "step": 711500 - }, - { - "epoch": 7.91, - "learning_rate": 7.907069719920929e-08, - "loss": 3.9036, - "step": 712000 - }, - { - "epoch": 7.91, - "learning_rate": 7.912622437420874e-08, - "loss": 3.9092, - "step": 712500 - }, - { - "epoch": 7.92, - "learning_rate": 7.918175154920818e-08, - "loss": 3.8802, - "step": 713000 - }, - { - "epoch": 7.92, - "learning_rate": 7.923727872420762e-08, - "loss": 3.9023, - "step": 713500 - }, - { - "epoch": 7.93, - "learning_rate": 7.929280589920707e-08, - "loss": 3.9171, - "step": 714000 - }, - { - "epoch": 7.93, - "learning_rate": 7.934833307420651e-08, - "loss": 3.8838, - "step": 714500 - }, - { - "epoch": 7.94, - "learning_rate": 7.940386024920596e-08, - "loss": 3.8974, - "step": 715000 - }, - { - "epoch": 7.95, - "learning_rate": 7.94593874242054e-08, - "loss": 3.9014, - "step": 715500 - }, - { - "epoch": 7.95, - "learning_rate": 7.951491459920486e-08, - "loss": 3.8962, - "step": 716000 - }, - { - "epoch": 7.96, - "learning_rate": 7.957044177420429e-08, - "loss": 3.9001, - "step": 716500 - }, - { - "epoch": 7.96, - "learning_rate": 7.962596894920373e-08, - "loss": 3.9074, - "step": 717000 - }, - { - "epoch": 7.97, - "learning_rate": 7.968149612420319e-08, - "loss": 3.8985, - "step": 717500 - }, - { - "epoch": 7.97, - "learning_rate": 7.973702329920262e-08, - "loss": 3.9001, - "step": 718000 - }, - { - "epoch": 7.98, - "learning_rate": 7.979255047420207e-08, - "loss": 3.9109, - "step": 718500 - }, - { - "epoch": 7.98, - "learning_rate": 7.984807764920152e-08, - "loss": 3.8939, - "step": 719000 - }, - { - "epoch": 7.99, - "learning_rate": 7.990360482420097e-08, - "loss": 3.9072, - "step": 719500 - }, - { - "epoch": 8.0, - "learning_rate": 7.99591319992004e-08, - "loss": 3.9048, - "step": 720000 - }, - { - "epoch": 8.0, - "eval_loss": 3.9263479709625244, - "eval_runtime": 6.3049, - "eval_samples_per_second": 246.475, - "step": 720368 - }, - { - "epoch": 8.0, - "learning_rate": 8.001465917419986e-08, - "loss": 3.9229, - "step": 720500 - }, - { - "epoch": 8.01, - "learning_rate": 8.00701863491993e-08, - "loss": 3.8859, - "step": 721000 - }, - { - "epoch": 8.01, - "learning_rate": 8.012571352419873e-08, - "loss": 3.9058, - "step": 721500 - }, - { - "epoch": 8.02, - "learning_rate": 8.018124069919819e-08, - "loss": 3.9237, - "step": 722000 - }, - { - "epoch": 8.02, - "learning_rate": 8.023676787419763e-08, - "loss": 3.9156, - "step": 722500 - }, - { - "epoch": 8.03, - "learning_rate": 8.029229504919708e-08, - "loss": 3.8977, - "step": 723000 - }, - { - "epoch": 8.03, - "learning_rate": 8.034782222419652e-08, - "loss": 3.894, - "step": 723500 - }, - { - "epoch": 8.04, - "learning_rate": 8.040334939919596e-08, - "loss": 3.8925, - "step": 724000 - }, - { - "epoch": 8.05, - "learning_rate": 8.045887657419541e-08, - "loss": 3.8966, - "step": 724500 - }, - { - "epoch": 8.05, - "learning_rate": 8.051440374919485e-08, - "loss": 3.9057, - "step": 725000 - }, - { - "epoch": 8.06, - "learning_rate": 8.05699309241943e-08, - "loss": 3.9155, - "step": 725500 - }, - { - "epoch": 8.06, - "learning_rate": 8.062545809919374e-08, - "loss": 3.8936, - "step": 726000 - }, - { - "epoch": 8.07, - "learning_rate": 8.068098527419319e-08, - "loss": 3.8901, - "step": 726500 - }, - { - "epoch": 8.07, - "learning_rate": 8.073651244919263e-08, - "loss": 3.9014, - "step": 727000 - }, - { - "epoch": 8.08, - "learning_rate": 8.079203962419207e-08, - "loss": 3.8952, - "step": 727500 - }, - { - "epoch": 8.08, - "learning_rate": 8.084756679919152e-08, - "loss": 3.8791, - "step": 728000 - }, - { - "epoch": 8.09, - "learning_rate": 8.090309397419096e-08, - "loss": 3.8913, - "step": 728500 - }, - { - "epoch": 8.1, - "learning_rate": 8.09586211491904e-08, - "loss": 3.9013, - "step": 729000 - }, - { - "epoch": 8.1, - "learning_rate": 8.101414832418985e-08, - "loss": 3.9127, - "step": 729500 - }, - { - "epoch": 8.11, - "learning_rate": 8.106967549918931e-08, - "loss": 3.8911, - "step": 730000 - }, - { - "epoch": 8.11, - "learning_rate": 8.112520267418874e-08, - "loss": 3.9138, - "step": 730500 - }, - { - "epoch": 8.12, - "learning_rate": 8.118072984918818e-08, - "loss": 3.8962, - "step": 731000 - }, - { - "epoch": 8.12, - "learning_rate": 8.123625702418764e-08, - "loss": 3.8867, - "step": 731500 - }, - { - "epoch": 8.13, - "learning_rate": 8.129178419918707e-08, - "loss": 3.8883, - "step": 732000 - }, - { - "epoch": 8.13, - "learning_rate": 8.134731137418652e-08, - "loss": 3.8978, - "step": 732500 - }, - { - "epoch": 8.14, - "learning_rate": 8.140283854918597e-08, - "loss": 3.8904, - "step": 733000 - }, - { - "epoch": 8.15, - "learning_rate": 8.14583657241854e-08, - "loss": 3.9044, - "step": 733500 - }, - { - "epoch": 8.15, - "learning_rate": 8.151389289918486e-08, - "loss": 3.8837, - "step": 734000 - }, - { - "epoch": 8.16, - "learning_rate": 8.15694200741843e-08, - "loss": 3.8745, - "step": 734500 - }, - { - "epoch": 8.16, - "learning_rate": 8.162494724918375e-08, - "loss": 3.8998, - "step": 735000 - }, - { - "epoch": 8.17, - "learning_rate": 8.16804744241832e-08, - "loss": 3.886, - "step": 735500 - }, - { - "epoch": 8.17, - "learning_rate": 8.173600159918264e-08, - "loss": 3.8898, - "step": 736000 - }, - { - "epoch": 8.18, - "learning_rate": 8.179152877418208e-08, - "loss": 3.8781, - "step": 736500 - }, - { - "epoch": 8.18, - "learning_rate": 8.184705594918153e-08, - "loss": 3.8862, - "step": 737000 - }, - { - "epoch": 8.19, - "learning_rate": 8.190258312418097e-08, - "loss": 3.9022, - "step": 737500 - }, - { - "epoch": 8.2, - "learning_rate": 8.195811029918042e-08, - "loss": 3.8999, - "step": 738000 - }, - { - "epoch": 8.2, - "learning_rate": 8.201363747417986e-08, - "loss": 3.9063, - "step": 738500 - }, - { - "epoch": 8.21, - "learning_rate": 8.20691646491793e-08, - "loss": 3.8978, - "step": 739000 - }, - { - "epoch": 8.21, - "learning_rate": 8.212469182417875e-08, - "loss": 3.8926, - "step": 739500 - }, - { - "epoch": 8.22, - "learning_rate": 8.218021899917819e-08, - "loss": 3.9185, - "step": 740000 - }, - { - "epoch": 8.22, - "learning_rate": 8.223574617417764e-08, - "loss": 3.8823, - "step": 740500 - }, - { - "epoch": 8.23, - "learning_rate": 8.229127334917708e-08, - "loss": 3.9003, - "step": 741000 - }, - { - "epoch": 8.23, - "learning_rate": 8.234680052417652e-08, - "loss": 3.8836, - "step": 741500 - }, - { - "epoch": 8.24, - "learning_rate": 8.240232769917598e-08, - "loss": 3.9047, - "step": 742000 - }, - { - "epoch": 8.25, - "learning_rate": 8.245785487417541e-08, - "loss": 3.9053, - "step": 742500 - }, - { - "epoch": 8.25, - "learning_rate": 8.251338204917486e-08, - "loss": 3.8932, - "step": 743000 - }, - { - "epoch": 8.26, - "learning_rate": 8.256890922417431e-08, - "loss": 3.8972, - "step": 743500 - }, - { - "epoch": 8.26, - "learning_rate": 8.262443639917375e-08, - "loss": 3.8906, - "step": 744000 - }, - { - "epoch": 8.27, - "learning_rate": 8.267996357417319e-08, - "loss": 3.9004, - "step": 744500 - }, - { - "epoch": 8.27, - "learning_rate": 8.273549074917265e-08, - "loss": 3.9118, - "step": 745000 - }, - { - "epoch": 8.28, - "learning_rate": 8.279101792417209e-08, - "loss": 3.8961, - "step": 745500 - }, - { - "epoch": 8.28, - "learning_rate": 8.284654509917152e-08, - "loss": 3.8916, - "step": 746000 - }, - { - "epoch": 8.29, - "learning_rate": 8.290207227417098e-08, - "loss": 3.9015, - "step": 746500 - }, - { - "epoch": 8.3, - "learning_rate": 8.295759944917042e-08, - "loss": 3.902, - "step": 747000 - }, - { - "epoch": 8.3, - "learning_rate": 8.301312662416985e-08, - "loss": 3.9076, - "step": 747500 - }, - { - "epoch": 8.31, - "learning_rate": 8.306865379916931e-08, - "loss": 3.8931, - "step": 748000 - }, - { - "epoch": 8.31, - "learning_rate": 8.312418097416876e-08, - "loss": 3.9017, - "step": 748500 - }, - { - "epoch": 8.32, - "learning_rate": 8.31797081491682e-08, - "loss": 3.9095, - "step": 749000 - }, - { - "epoch": 8.32, - "learning_rate": 8.323523532416764e-08, - "loss": 3.8899, - "step": 749500 - }, - { - "epoch": 8.33, - "learning_rate": 8.329076249916709e-08, - "loss": 3.8843, - "step": 750000 - }, - { - "epoch": 8.33, - "learning_rate": 8.334628967416653e-08, - "loss": 3.89, - "step": 750500 - }, - { - "epoch": 8.34, - "learning_rate": 8.340181684916598e-08, - "loss": 3.8959, - "step": 751000 - }, - { - "epoch": 8.35, - "learning_rate": 8.345734402416542e-08, - "loss": 3.9121, - "step": 751500 - }, - { - "epoch": 8.35, - "learning_rate": 8.351287119916487e-08, - "loss": 3.8851, - "step": 752000 - }, - { - "epoch": 8.36, - "learning_rate": 8.356839837416432e-08, - "loss": 3.8879, - "step": 752500 - }, - { - "epoch": 8.36, - "learning_rate": 8.362392554916375e-08, - "loss": 3.884, - "step": 753000 - }, - { - "epoch": 8.37, - "learning_rate": 8.36794527241632e-08, - "loss": 3.8702, - "step": 753500 - }, - { - "epoch": 8.37, - "learning_rate": 8.373497989916266e-08, - "loss": 3.8989, - "step": 754000 - }, - { - "epoch": 8.38, - "learning_rate": 8.379050707416209e-08, - "loss": 3.9003, - "step": 754500 - }, - { - "epoch": 8.38, - "learning_rate": 8.384603424916153e-08, - "loss": 3.8784, - "step": 755000 - }, - { - "epoch": 8.39, - "learning_rate": 8.390156142416099e-08, - "loss": 3.8971, - "step": 755500 - }, - { - "epoch": 8.4, - "learning_rate": 8.395708859916043e-08, - "loss": 3.8979, - "step": 756000 - }, - { - "epoch": 8.4, - "learning_rate": 8.401261577415986e-08, - "loss": 3.8878, - "step": 756500 - }, - { - "epoch": 8.41, - "learning_rate": 8.406814294915932e-08, - "loss": 3.8994, - "step": 757000 - }, - { - "epoch": 8.41, - "learning_rate": 8.412367012415876e-08, - "loss": 3.8919, - "step": 757500 - }, - { - "epoch": 8.42, - "learning_rate": 8.41791972991582e-08, - "loss": 3.8979, - "step": 758000 - }, - { - "epoch": 8.42, - "learning_rate": 8.423472447415765e-08, - "loss": 3.8847, - "step": 758500 - }, - { - "epoch": 8.43, - "learning_rate": 8.42902516491571e-08, - "loss": 3.8884, - "step": 759000 - }, - { - "epoch": 8.43, - "learning_rate": 8.434577882415654e-08, - "loss": 3.8885, - "step": 759500 - }, - { - "epoch": 8.44, - "learning_rate": 8.440130599915599e-08, - "loss": 3.8975, - "step": 760000 - }, - { - "epoch": 8.45, - "learning_rate": 8.445683317415543e-08, - "loss": 3.8973, - "step": 760500 - }, - { - "epoch": 8.45, - "learning_rate": 8.451236034915487e-08, - "loss": 3.9104, - "step": 761000 - }, - { - "epoch": 8.46, - "learning_rate": 8.456788752415432e-08, - "loss": 3.908, - "step": 761500 - }, - { - "epoch": 8.46, - "learning_rate": 8.462341469915376e-08, - "loss": 3.8881, - "step": 762000 - }, - { - "epoch": 8.47, - "learning_rate": 8.46789418741532e-08, - "loss": 3.9037, - "step": 762500 - }, - { - "epoch": 8.47, - "learning_rate": 8.473446904915265e-08, - "loss": 3.8861, - "step": 763000 - }, - { - "epoch": 8.48, - "learning_rate": 8.47899962241521e-08, - "loss": 3.8965, - "step": 763500 - }, - { - "epoch": 8.48, - "learning_rate": 8.484552339915154e-08, - "loss": 3.8883, - "step": 764000 - }, - { - "epoch": 8.49, - "learning_rate": 8.4901050574151e-08, - "loss": 3.8852, - "step": 764500 - }, - { - "epoch": 8.5, - "learning_rate": 8.495657774915043e-08, - "loss": 3.9045, - "step": 765000 - }, - { - "epoch": 8.5, - "learning_rate": 8.501210492414987e-08, - "loss": 3.895, - "step": 765500 - }, - { - "epoch": 8.51, - "learning_rate": 8.506763209914933e-08, - "loss": 3.8931, - "step": 766000 - }, - { - "epoch": 8.51, - "learning_rate": 8.512315927414876e-08, - "loss": 3.8788, - "step": 766500 - }, - { - "epoch": 8.52, - "learning_rate": 8.51786864491482e-08, - "loss": 3.8993, - "step": 767000 - }, - { - "epoch": 8.52, - "learning_rate": 8.523421362414766e-08, - "loss": 3.9073, - "step": 767500 - }, - { - "epoch": 8.53, - "learning_rate": 8.52897407991471e-08, - "loss": 3.8985, - "step": 768000 - }, - { - "epoch": 8.53, - "learning_rate": 8.534526797414654e-08, - "loss": 3.8953, - "step": 768500 - }, - { - "epoch": 8.54, - "learning_rate": 8.5400795149146e-08, - "loss": 3.9153, - "step": 769000 - }, - { - "epoch": 8.55, - "learning_rate": 8.545632232414544e-08, - "loss": 3.8869, - "step": 769500 - }, - { - "epoch": 8.55, - "learning_rate": 8.551184949914487e-08, - "loss": 3.9031, - "step": 770000 - }, - { - "epoch": 8.56, - "learning_rate": 8.556737667414433e-08, - "loss": 3.8703, - "step": 770500 - }, - { - "epoch": 8.56, - "learning_rate": 8.562290384914377e-08, - "loss": 3.9019, - "step": 771000 - }, - { - "epoch": 8.57, - "learning_rate": 8.567843102414321e-08, - "loss": 3.9049, - "step": 771500 - }, - { - "epoch": 8.57, - "learning_rate": 8.573395819914266e-08, - "loss": 3.8816, - "step": 772000 - }, - { - "epoch": 8.58, - "learning_rate": 8.57894853741421e-08, - "loss": 3.8858, - "step": 772500 - }, - { - "epoch": 8.58, - "learning_rate": 8.584501254914155e-08, - "loss": 3.9054, - "step": 773000 - }, - { - "epoch": 8.59, - "learning_rate": 8.590053972414099e-08, - "loss": 3.8777, - "step": 773500 - }, - { - "epoch": 8.6, - "learning_rate": 8.595606689914044e-08, - "loss": 3.8941, - "step": 774000 - }, - { - "epoch": 8.6, - "learning_rate": 8.601159407413988e-08, - "loss": 3.883, - "step": 774500 - }, - { - "epoch": 8.61, - "learning_rate": 8.606712124913932e-08, - "loss": 3.8956, - "step": 775000 - }, - { - "epoch": 8.61, - "learning_rate": 8.612264842413877e-08, - "loss": 3.8736, - "step": 775500 - }, - { - "epoch": 8.62, - "learning_rate": 8.617817559913821e-08, - "loss": 3.8941, - "step": 776000 - }, - { - "epoch": 8.62, - "learning_rate": 8.623370277413766e-08, - "loss": 3.8871, - "step": 776500 - }, - { - "epoch": 8.63, - "learning_rate": 8.62892299491371e-08, - "loss": 3.8699, - "step": 777000 - }, - { - "epoch": 8.63, - "learning_rate": 8.634475712413654e-08, - "loss": 3.8748, - "step": 777500 - }, - { - "epoch": 8.64, - "learning_rate": 8.640028429913599e-08, - "loss": 3.8798, - "step": 778000 - }, - { - "epoch": 8.65, - "learning_rate": 8.645581147413545e-08, - "loss": 3.9068, - "step": 778500 - }, - { - "epoch": 8.65, - "learning_rate": 8.651133864913488e-08, - "loss": 3.8883, - "step": 779000 - }, - { - "epoch": 8.66, - "learning_rate": 8.656686582413432e-08, - "loss": 3.8784, - "step": 779500 - }, - { - "epoch": 8.66, - "learning_rate": 8.662239299913378e-08, - "loss": 3.8872, - "step": 780000 - }, - { - "epoch": 8.67, - "learning_rate": 8.667792017413321e-08, - "loss": 3.8873, - "step": 780500 - }, - { - "epoch": 8.67, - "learning_rate": 8.673344734913265e-08, - "loss": 3.8939, - "step": 781000 - }, - { - "epoch": 8.68, - "learning_rate": 8.678897452413211e-08, - "loss": 3.9098, - "step": 781500 - }, - { - "epoch": 8.68, - "learning_rate": 8.684450169913156e-08, - "loss": 3.8935, - "step": 782000 - }, - { - "epoch": 8.69, - "learning_rate": 8.690002887413099e-08, - "loss": 3.8902, - "step": 782500 - }, - { - "epoch": 8.7, - "learning_rate": 8.695555604913044e-08, - "loss": 3.8944, - "step": 783000 - }, - { - "epoch": 8.7, - "learning_rate": 8.701108322412989e-08, - "loss": 3.8918, - "step": 783500 - }, - { - "epoch": 8.71, - "learning_rate": 8.706661039912932e-08, - "loss": 3.8821, - "step": 784000 - }, - { - "epoch": 8.71, - "learning_rate": 8.712213757412878e-08, - "loss": 3.877, - "step": 784500 - }, - { - "epoch": 8.72, - "learning_rate": 8.717766474912822e-08, - "loss": 3.8996, - "step": 785000 - }, - { - "epoch": 8.72, - "learning_rate": 8.723319192412766e-08, - "loss": 3.9013, - "step": 785500 - }, - { - "epoch": 8.73, - "learning_rate": 8.728871909912711e-08, - "loss": 3.9013, - "step": 786000 - }, - { - "epoch": 8.73, - "learning_rate": 8.734424627412655e-08, - "loss": 3.907, - "step": 786500 - }, - { - "epoch": 8.74, - "learning_rate": 8.7399773449126e-08, - "loss": 3.8645, - "step": 787000 - }, - { - "epoch": 8.75, - "learning_rate": 8.745530062412544e-08, - "loss": 3.88, - "step": 787500 - }, - { - "epoch": 8.75, - "learning_rate": 8.751082779912489e-08, - "loss": 3.8873, - "step": 788000 - }, - { - "epoch": 8.76, - "learning_rate": 8.756635497412433e-08, - "loss": 3.884, - "step": 788500 - }, - { - "epoch": 8.76, - "learning_rate": 8.762188214912379e-08, - "loss": 3.8772, - "step": 789000 - }, - { - "epoch": 8.77, - "learning_rate": 8.767740932412322e-08, - "loss": 3.9034, - "step": 789500 - }, - { - "epoch": 8.77, - "learning_rate": 8.773293649912266e-08, - "loss": 3.869, - "step": 790000 - }, - { - "epoch": 8.78, - "learning_rate": 8.778846367412212e-08, - "loss": 3.8748, - "step": 790500 - }, - { - "epoch": 8.78, - "learning_rate": 8.784399084912155e-08, - "loss": 3.9012, - "step": 791000 - }, - { - "epoch": 8.79, - "learning_rate": 8.7899518024121e-08, - "loss": 3.894, - "step": 791500 - }, - { - "epoch": 8.8, - "learning_rate": 8.795504519912045e-08, - "loss": 3.8737, - "step": 792000 - }, - { - "epoch": 8.8, - "learning_rate": 8.80105723741199e-08, - "loss": 3.8753, - "step": 792500 - }, - { - "epoch": 8.81, - "learning_rate": 8.806609954911933e-08, - "loss": 3.8971, - "step": 793000 - }, - { - "epoch": 8.81, - "learning_rate": 8.812162672411879e-08, - "loss": 3.9061, - "step": 793500 - }, - { - "epoch": 8.82, - "learning_rate": 8.817715389911823e-08, - "loss": 3.8857, - "step": 794000 - }, - { - "epoch": 8.82, - "learning_rate": 8.823268107411766e-08, - "loss": 3.9059, - "step": 794500 - }, - { - "epoch": 8.83, - "learning_rate": 8.828820824911712e-08, - "loss": 3.889, - "step": 795000 - }, - { - "epoch": 8.83, - "learning_rate": 8.834373542411656e-08, - "loss": 3.8867, - "step": 795500 - }, - { - "epoch": 8.84, - "learning_rate": 8.839926259911599e-08, - "loss": 3.8931, - "step": 796000 - }, - { - "epoch": 8.85, - "learning_rate": 8.845478977411545e-08, - "loss": 3.8838, - "step": 796500 - }, - { - "epoch": 8.85, - "learning_rate": 8.85103169491149e-08, - "loss": 3.8986, - "step": 797000 - }, - { - "epoch": 8.86, - "learning_rate": 8.856584412411434e-08, - "loss": 3.9002, - "step": 797500 - }, - { - "epoch": 8.86, - "learning_rate": 8.862137129911378e-08, - "loss": 3.8997, - "step": 798000 - }, - { - "epoch": 8.87, - "learning_rate": 8.867689847411323e-08, - "loss": 3.8823, - "step": 798500 - }, - { - "epoch": 8.87, - "learning_rate": 8.873242564911267e-08, - "loss": 3.8719, - "step": 799000 - }, - { - "epoch": 8.88, - "learning_rate": 8.878795282411212e-08, - "loss": 3.8995, - "step": 799500 - }, - { - "epoch": 8.88, - "learning_rate": 8.884347999911156e-08, - "loss": 3.8661, - "step": 800000 - }, - { - "epoch": 8.89, - "learning_rate": 8.8899007174111e-08, - "loss": 3.8937, - "step": 800500 - }, - { - "epoch": 8.9, - "learning_rate": 8.895453434911046e-08, - "loss": 3.8786, - "step": 801000 - }, - { - "epoch": 8.9, - "learning_rate": 8.901006152410989e-08, - "loss": 3.9042, - "step": 801500 - }, - { - "epoch": 8.91, - "learning_rate": 8.906558869910934e-08, - "loss": 3.8988, - "step": 802000 - }, - { - "epoch": 8.91, - "learning_rate": 8.91211158741088e-08, - "loss": 3.8838, - "step": 802500 - }, - { - "epoch": 8.92, - "learning_rate": 8.917664304910822e-08, - "loss": 3.8926, - "step": 803000 - }, - { - "epoch": 8.92, - "learning_rate": 8.923217022410767e-08, - "loss": 3.8851, - "step": 803500 - }, - { - "epoch": 8.93, - "learning_rate": 8.928769739910713e-08, - "loss": 3.8916, - "step": 804000 - }, - { - "epoch": 8.93, - "learning_rate": 8.934322457410657e-08, - "loss": 3.879, - "step": 804500 - }, - { - "epoch": 8.94, - "learning_rate": 8.9398751749106e-08, - "loss": 3.8766, - "step": 805000 - }, - { - "epoch": 8.95, - "learning_rate": 8.945427892410546e-08, - "loss": 3.8952, - "step": 805500 - }, - { - "epoch": 8.95, - "learning_rate": 8.95098060991049e-08, - "loss": 3.8778, - "step": 806000 - }, - { - "epoch": 8.96, - "learning_rate": 8.956533327410433e-08, - "loss": 3.8736, - "step": 806500 - }, - { - "epoch": 8.96, - "learning_rate": 8.962086044910379e-08, - "loss": 3.9052, - "step": 807000 - }, - { - "epoch": 8.97, - "learning_rate": 8.967638762410324e-08, - "loss": 3.8695, - "step": 807500 - }, - { - "epoch": 8.97, - "learning_rate": 8.973191479910268e-08, - "loss": 3.8882, - "step": 808000 - }, - { - "epoch": 8.98, - "learning_rate": 8.978744197410212e-08, - "loss": 3.8876, - "step": 808500 - }, - { - "epoch": 8.98, - "learning_rate": 8.984296914910157e-08, - "loss": 3.8702, - "step": 809000 - }, - { - "epoch": 8.99, - "learning_rate": 8.989849632410101e-08, - "loss": 3.8824, - "step": 809500 - }, - { - "epoch": 9.0, - "learning_rate": 8.995402349910046e-08, - "loss": 3.8871, - "step": 810000 - }, - { - "epoch": 9.0, - "eval_loss": 3.914140462875366, - "eval_runtime": 6.3069, - "eval_samples_per_second": 246.396, - "step": 810414 - }, - { - "epoch": 9.0, - "learning_rate": 9.00095506740999e-08, - "loss": 3.8657, - "step": 810500 - }, - { - "epoch": 9.01, - "learning_rate": 9.006507784909934e-08, - "loss": 3.8844, - "step": 811000 - }, - { - "epoch": 9.01, - "learning_rate": 9.012060502409879e-08, - "loss": 3.8874, - "step": 811500 - }, - { - "epoch": 9.02, - "learning_rate": 9.017613219909823e-08, - "loss": 3.8844, - "step": 812000 - }, - { - "epoch": 9.02, - "learning_rate": 9.023165937409768e-08, - "loss": 3.8817, - "step": 812500 - }, - { - "epoch": 9.03, - "learning_rate": 9.028718654909712e-08, - "loss": 3.8831, - "step": 813000 - }, - { - "epoch": 9.03, - "learning_rate": 9.034271372409657e-08, - "loss": 3.8799, - "step": 813500 - }, - { - "epoch": 9.04, - "learning_rate": 9.039824089909601e-08, - "loss": 3.8895, - "step": 814000 - }, - { - "epoch": 9.05, - "learning_rate": 9.045376807409545e-08, - "loss": 3.8962, - "step": 814500 - }, - { - "epoch": 9.05, - "learning_rate": 9.050929524909491e-08, - "loss": 3.8889, - "step": 815000 - }, - { - "epoch": 9.06, - "learning_rate": 9.056482242409434e-08, - "loss": 3.8899, - "step": 815500 - }, - { - "epoch": 9.06, - "learning_rate": 9.062034959909379e-08, - "loss": 3.8798, - "step": 816000 - }, - { - "epoch": 9.07, - "learning_rate": 9.067587677409324e-08, - "loss": 3.8775, - "step": 816500 - }, - { - "epoch": 9.07, - "learning_rate": 9.073140394909267e-08, - "loss": 3.886, - "step": 817000 - }, - { - "epoch": 9.08, - "learning_rate": 9.078693112409212e-08, - "loss": 3.8718, - "step": 817500 - }, - { - "epoch": 9.08, - "learning_rate": 9.084245829909158e-08, - "loss": 3.8816, - "step": 818000 - }, - { - "epoch": 9.09, - "learning_rate": 9.089798547409102e-08, - "loss": 3.8599, - "step": 818500 - }, - { - "epoch": 9.1, - "learning_rate": 9.095351264909046e-08, - "loss": 3.8926, - "step": 819000 - }, - { - "epoch": 9.1, - "learning_rate": 9.100903982408991e-08, - "loss": 3.8887, - "step": 819500 - }, - { - "epoch": 9.11, - "learning_rate": 9.106456699908935e-08, - "loss": 3.8721, - "step": 820000 - }, - { - "epoch": 9.11, - "learning_rate": 9.11200941740888e-08, - "loss": 3.8773, - "step": 820500 - }, - { - "epoch": 9.12, - "learning_rate": 9.117562134908824e-08, - "loss": 3.8793, - "step": 821000 - }, - { - "epoch": 9.12, - "learning_rate": 9.123114852408769e-08, - "loss": 3.8708, - "step": 821500 - }, - { - "epoch": 9.13, - "learning_rate": 9.128667569908713e-08, - "loss": 3.8767, - "step": 822000 - }, - { - "epoch": 9.13, - "learning_rate": 9.134220287408657e-08, - "loss": 3.8931, - "step": 822500 - }, - { - "epoch": 9.14, - "learning_rate": 9.139773004908602e-08, - "loss": 3.8804, - "step": 823000 - }, - { - "epoch": 9.15, - "learning_rate": 9.145325722408546e-08, - "loss": 3.8907, - "step": 823500 - }, - { - "epoch": 9.15, - "learning_rate": 9.15087843990849e-08, - "loss": 3.8805, - "step": 824000 - }, - { - "epoch": 9.16, - "learning_rate": 9.156431157408435e-08, - "loss": 3.8702, - "step": 824500 - }, - { - "epoch": 9.16, - "learning_rate": 9.16198387490838e-08, - "loss": 3.8796, - "step": 825000 - }, - { - "epoch": 9.17, - "learning_rate": 9.167536592408324e-08, - "loss": 3.8664, - "step": 825500 - }, - { - "epoch": 9.17, - "learning_rate": 9.173089309908268e-08, - "loss": 3.8737, - "step": 826000 - }, - { - "epoch": 9.18, - "learning_rate": 9.178642027408213e-08, - "loss": 3.8827, - "step": 826500 - }, - { - "epoch": 9.18, - "learning_rate": 9.184194744908158e-08, - "loss": 3.8768, - "step": 827000 - }, - { - "epoch": 9.19, - "learning_rate": 9.189747462408102e-08, - "loss": 3.879, - "step": 827500 - }, - { - "epoch": 9.2, - "learning_rate": 9.195300179908046e-08, - "loss": 3.8686, - "step": 828000 - }, - { - "epoch": 9.2, - "learning_rate": 9.200852897407992e-08, - "loss": 3.8816, - "step": 828500 - }, - { - "epoch": 9.21, - "learning_rate": 9.206405614907935e-08, - "loss": 3.8592, - "step": 829000 - }, - { - "epoch": 9.21, - "learning_rate": 9.211958332407879e-08, - "loss": 3.8852, - "step": 829500 - }, - { - "epoch": 9.22, - "learning_rate": 9.217511049907825e-08, - "loss": 3.8721, - "step": 830000 - }, - { - "epoch": 9.22, - "learning_rate": 9.22306376740777e-08, - "loss": 3.8841, - "step": 830500 - }, - { - "epoch": 9.23, - "learning_rate": 9.228616484907712e-08, - "loss": 3.8828, - "step": 831000 - }, - { - "epoch": 9.23, - "learning_rate": 9.234169202407658e-08, - "loss": 3.8903, - "step": 831500 - }, - { - "epoch": 9.24, - "learning_rate": 9.239721919907603e-08, - "loss": 3.8985, - "step": 832000 - }, - { - "epoch": 9.25, - "learning_rate": 9.245274637407546e-08, - "loss": 3.8794, - "step": 832500 - }, - { - "epoch": 9.25, - "learning_rate": 9.250827354907491e-08, - "loss": 3.8764, - "step": 833000 - }, - { - "epoch": 9.26, - "learning_rate": 9.256380072407436e-08, - "loss": 3.8839, - "step": 833500 - }, - { - "epoch": 9.26, - "learning_rate": 9.26193278990738e-08, - "loss": 3.8986, - "step": 834000 - }, - { - "epoch": 9.27, - "learning_rate": 9.267485507407325e-08, - "loss": 3.8807, - "step": 834500 - }, - { - "epoch": 9.27, - "learning_rate": 9.273038224907269e-08, - "loss": 3.8898, - "step": 835000 - }, - { - "epoch": 9.28, - "learning_rate": 9.278590942407214e-08, - "loss": 3.8828, - "step": 835500 - }, - { - "epoch": 9.28, - "learning_rate": 9.284143659907158e-08, - "loss": 3.8808, - "step": 836000 - }, - { - "epoch": 9.29, - "learning_rate": 9.289696377407102e-08, - "loss": 3.8742, - "step": 836500 - }, - { - "epoch": 9.3, - "learning_rate": 9.295249094907047e-08, - "loss": 3.8849, - "step": 837000 - }, - { - "epoch": 9.3, - "learning_rate": 9.300801812406993e-08, - "loss": 3.8946, - "step": 837500 - }, - { - "epoch": 9.31, - "learning_rate": 9.306354529906936e-08, - "loss": 3.8681, - "step": 838000 - }, - { - "epoch": 9.31, - "learning_rate": 9.31190724740688e-08, - "loss": 3.8925, - "step": 838500 - }, - { - "epoch": 9.32, - "learning_rate": 9.317459964906826e-08, - "loss": 3.877, - "step": 839000 - }, - { - "epoch": 9.32, - "learning_rate": 9.323012682406769e-08, - "loss": 3.8872, - "step": 839500 - }, - { - "epoch": 9.33, - "learning_rate": 9.328565399906713e-08, - "loss": 3.883, - "step": 840000 - }, - { - "epoch": 9.33, - "learning_rate": 9.334118117406659e-08, - "loss": 3.8679, - "step": 840500 - }, - { - "epoch": 9.34, - "learning_rate": 9.339670834906603e-08, - "loss": 3.8822, - "step": 841000 - }, - { - "epoch": 9.35, - "learning_rate": 9.345223552406547e-08, - "loss": 3.8719, - "step": 841500 - }, - { - "epoch": 9.35, - "learning_rate": 9.350776269906492e-08, - "loss": 3.8754, - "step": 842000 - }, - { - "epoch": 9.36, - "learning_rate": 9.356328987406437e-08, - "loss": 3.8793, - "step": 842500 - }, - { - "epoch": 9.36, - "learning_rate": 9.36188170490638e-08, - "loss": 3.8817, - "step": 843000 - }, - { - "epoch": 9.37, - "learning_rate": 9.367434422406326e-08, - "loss": 3.878, - "step": 843500 - }, - { - "epoch": 9.37, - "learning_rate": 9.37298713990627e-08, - "loss": 3.8828, - "step": 844000 - }, - { - "epoch": 9.38, - "learning_rate": 9.378539857406214e-08, - "loss": 3.866, - "step": 844500 - }, - { - "epoch": 9.38, - "learning_rate": 9.384092574906159e-08, - "loss": 3.864, - "step": 845000 - }, - { - "epoch": 9.39, - "learning_rate": 9.389645292406103e-08, - "loss": 3.8654, - "step": 845500 - }, - { - "epoch": 9.4, - "learning_rate": 9.395198009906048e-08, - "loss": 3.8571, - "step": 846000 - }, - { - "epoch": 9.4, - "learning_rate": 9.400750727405992e-08, - "loss": 3.8963, - "step": 846500 - }, - { - "epoch": 9.41, - "learning_rate": 9.406303444905937e-08, - "loss": 3.8774, - "step": 847000 - }, - { - "epoch": 9.41, - "learning_rate": 9.411856162405881e-08, - "loss": 3.8724, - "step": 847500 - }, - { - "epoch": 9.42, - "learning_rate": 9.417408879905825e-08, - "loss": 3.8785, - "step": 848000 - }, - { - "epoch": 9.42, - "learning_rate": 9.42296159740577e-08, - "loss": 3.8871, - "step": 848500 - }, - { - "epoch": 9.43, - "learning_rate": 9.428514314905714e-08, - "loss": 3.8695, - "step": 849000 - }, - { - "epoch": 9.43, - "learning_rate": 9.43406703240566e-08, - "loss": 3.8861, - "step": 849500 - }, - { - "epoch": 9.44, - "learning_rate": 9.439619749905603e-08, - "loss": 3.8775, - "step": 850000 - }, - { - "epoch": 9.45, - "learning_rate": 9.445172467405547e-08, - "loss": 3.8893, - "step": 850500 - }, - { - "epoch": 9.45, - "learning_rate": 9.450725184905493e-08, - "loss": 3.8753, - "step": 851000 - }, - { - "epoch": 9.46, - "learning_rate": 9.456277902405438e-08, - "loss": 3.882, - "step": 851500 - }, - { - "epoch": 9.46, - "learning_rate": 9.461830619905381e-08, - "loss": 3.8722, - "step": 852000 - }, - { - "epoch": 9.47, - "learning_rate": 9.467383337405326e-08, - "loss": 3.8835, - "step": 852500 - }, - { - "epoch": 9.47, - "learning_rate": 9.472936054905271e-08, - "loss": 3.8712, - "step": 853000 - }, - { - "epoch": 9.48, - "learning_rate": 9.478488772405214e-08, - "loss": 3.8814, - "step": 853500 - }, - { - "epoch": 9.48, - "learning_rate": 9.48404148990516e-08, - "loss": 3.882, - "step": 854000 - }, - { - "epoch": 9.49, - "learning_rate": 9.489594207405104e-08, - "loss": 3.8849, - "step": 854500 - }, - { - "epoch": 9.5, - "learning_rate": 9.495146924905049e-08, - "loss": 3.8773, - "step": 855000 - }, - { - "epoch": 9.5, - "learning_rate": 9.500699642404993e-08, - "loss": 3.8978, - "step": 855500 - }, - { - "epoch": 9.51, - "learning_rate": 9.506252359904937e-08, - "loss": 3.8764, - "step": 856000 - }, - { - "epoch": 9.51, - "learning_rate": 9.511805077404882e-08, - "loss": 3.8656, - "step": 856500 - }, - { - "epoch": 9.52, - "learning_rate": 9.517357794904826e-08, - "loss": 3.8782, - "step": 857000 - }, - { - "epoch": 9.52, - "learning_rate": 9.52291051240477e-08, - "loss": 3.8779, - "step": 857500 - }, - { - "epoch": 9.53, - "learning_rate": 9.528463229904715e-08, - "loss": 3.8603, - "step": 858000 - }, - { - "epoch": 9.53, - "learning_rate": 9.53401594740466e-08, - "loss": 3.8769, - "step": 858500 - }, - { - "epoch": 9.54, - "learning_rate": 9.539568664904604e-08, - "loss": 3.8872, - "step": 859000 - }, - { - "epoch": 9.55, - "learning_rate": 9.545121382404548e-08, - "loss": 3.8783, - "step": 859500 - }, - { - "epoch": 9.55, - "learning_rate": 9.550674099904493e-08, - "loss": 3.8729, - "step": 860000 - }, - { - "epoch": 9.56, - "learning_rate": 9.556226817404437e-08, - "loss": 3.8633, - "step": 860500 - }, - { - "epoch": 9.56, - "learning_rate": 9.561779534904382e-08, - "loss": 3.8976, - "step": 861000 - }, - { - "epoch": 9.57, - "learning_rate": 9.567332252404326e-08, - "loss": 3.8628, - "step": 861500 - }, - { - "epoch": 9.57, - "learning_rate": 9.57288496990427e-08, - "loss": 3.8705, - "step": 862000 - }, - { - "epoch": 9.58, - "learning_rate": 9.578437687404215e-08, - "loss": 3.8724, - "step": 862500 - }, - { - "epoch": 9.58, - "learning_rate": 9.583990404904159e-08, - "loss": 3.8578, - "step": 863000 - }, - { - "epoch": 9.59, - "learning_rate": 9.589543122404105e-08, - "loss": 3.8741, - "step": 863500 - }, - { - "epoch": 9.6, - "learning_rate": 9.595095839904048e-08, - "loss": 3.874, - "step": 864000 - }, - { - "epoch": 9.6, - "learning_rate": 9.600648557403992e-08, - "loss": 3.8756, - "step": 864500 - }, - { - "epoch": 9.61, - "learning_rate": 9.606201274903938e-08, - "loss": 3.8754, - "step": 865000 - }, - { - "epoch": 9.61, - "learning_rate": 9.611753992403881e-08, - "loss": 3.8983, - "step": 865500 - }, - { - "epoch": 9.62, - "learning_rate": 9.617306709903826e-08, - "loss": 3.8807, - "step": 866000 - }, - { - "epoch": 9.62, - "learning_rate": 9.622859427403771e-08, - "loss": 3.874, - "step": 866500 - }, - { - "epoch": 9.63, - "learning_rate": 9.628412144903716e-08, - "loss": 3.8741, - "step": 867000 - }, - { - "epoch": 9.63, - "learning_rate": 9.633964862403659e-08, - "loss": 3.8721, - "step": 867500 - }, - { - "epoch": 9.64, - "learning_rate": 9.639517579903605e-08, - "loss": 3.8652, - "step": 868000 - }, - { - "epoch": 9.65, - "learning_rate": 9.645070297403549e-08, - "loss": 3.8871, - "step": 868500 - }, - { - "epoch": 9.65, - "learning_rate": 9.650623014903492e-08, - "loss": 3.8646, - "step": 869000 - }, - { - "epoch": 9.66, - "learning_rate": 9.656175732403438e-08, - "loss": 3.8755, - "step": 869500 - }, - { - "epoch": 9.66, - "learning_rate": 9.661728449903382e-08, - "loss": 3.9076, - "step": 870000 - }, - { - "epoch": 9.67, - "learning_rate": 9.667281167403327e-08, - "loss": 3.8841, - "step": 870500 - }, - { - "epoch": 9.67, - "learning_rate": 9.672833884903271e-08, - "loss": 3.8814, - "step": 871000 - }, - { - "epoch": 9.68, - "learning_rate": 9.678386602403216e-08, - "loss": 3.8679, - "step": 871500 - }, - { - "epoch": 9.68, - "learning_rate": 9.68393931990316e-08, - "loss": 3.8467, - "step": 872000 - }, - { - "epoch": 9.69, - "learning_rate": 9.689492037403104e-08, - "loss": 3.8771, - "step": 872500 - }, - { - "epoch": 9.7, - "learning_rate": 9.695044754903049e-08, - "loss": 3.8775, - "step": 873000 - }, - { - "epoch": 9.7, - "learning_rate": 9.700597472402993e-08, - "loss": 3.876, - "step": 873500 - }, - { - "epoch": 9.71, - "learning_rate": 9.706150189902939e-08, - "loss": 3.8744, - "step": 874000 - }, - { - "epoch": 9.71, - "learning_rate": 9.711702907402882e-08, - "loss": 3.8832, - "step": 874500 - }, - { - "epoch": 9.72, - "learning_rate": 9.717255624902827e-08, - "loss": 3.8682, - "step": 875000 - }, - { - "epoch": 9.72, - "learning_rate": 9.722808342402772e-08, - "loss": 3.8732, - "step": 875500 - }, - { - "epoch": 9.73, - "learning_rate": 9.728361059902715e-08, - "loss": 3.8604, - "step": 876000 - }, - { - "epoch": 9.73, - "learning_rate": 9.73391377740266e-08, - "loss": 3.8781, - "step": 876500 - }, - { - "epoch": 9.74, - "learning_rate": 9.739466494902606e-08, - "loss": 3.8699, - "step": 877000 - }, - { - "epoch": 9.75, - "learning_rate": 9.74501921240255e-08, - "loss": 3.8748, - "step": 877500 - }, - { - "epoch": 9.75, - "learning_rate": 9.750571929902493e-08, - "loss": 3.8769, - "step": 878000 - }, - { - "epoch": 9.76, - "learning_rate": 9.756124647402439e-08, - "loss": 3.8775, - "step": 878500 - }, - { - "epoch": 9.76, - "learning_rate": 9.761677364902383e-08, - "loss": 3.8547, - "step": 879000 - }, - { - "epoch": 9.77, - "learning_rate": 9.767230082402326e-08, - "loss": 3.8644, - "step": 879500 - }, - { - "epoch": 9.77, - "learning_rate": 9.772782799902272e-08, - "loss": 3.8892, - "step": 880000 - }, - { - "epoch": 9.78, - "learning_rate": 9.778335517402216e-08, - "loss": 3.8671, - "step": 880500 - }, - { - "epoch": 9.78, - "learning_rate": 9.783888234902161e-08, - "loss": 3.8643, - "step": 881000 - }, - { - "epoch": 9.79, - "learning_rate": 9.789440952402105e-08, - "loss": 3.8543, - "step": 881500 - }, - { - "epoch": 9.79, - "learning_rate": 9.79499366990205e-08, - "loss": 3.8538, - "step": 882000 - }, - { - "epoch": 9.8, - "learning_rate": 9.800546387401994e-08, - "loss": 3.8693, - "step": 882500 - }, - { - "epoch": 9.81, - "learning_rate": 9.806099104901939e-08, - "loss": 3.8696, - "step": 883000 - }, - { - "epoch": 9.81, - "learning_rate": 9.811651822401883e-08, - "loss": 3.8674, - "step": 883500 - }, - { - "epoch": 9.82, - "learning_rate": 9.817204539901827e-08, - "loss": 3.8574, - "step": 884000 - }, - { - "epoch": 9.82, - "learning_rate": 9.822757257401773e-08, - "loss": 3.8854, - "step": 884500 - }, - { - "epoch": 9.83, - "learning_rate": 9.828309974901716e-08, - "loss": 3.8651, - "step": 885000 - }, - { - "epoch": 9.83, - "learning_rate": 9.83386269240166e-08, - "loss": 3.8884, - "step": 885500 - }, - { - "epoch": 9.84, - "learning_rate": 9.839415409901606e-08, - "loss": 3.8805, - "step": 886000 - }, - { - "epoch": 9.84, - "learning_rate": 9.84496812740155e-08, - "loss": 3.8592, - "step": 886500 - }, - { - "epoch": 9.85, - "learning_rate": 9.850520844901494e-08, - "loss": 3.8688, - "step": 887000 - }, - { - "epoch": 9.86, - "learning_rate": 9.85607356240144e-08, - "loss": 3.8754, - "step": 887500 - }, - { - "epoch": 9.86, - "learning_rate": 9.861626279901383e-08, - "loss": 3.8672, - "step": 888000 - }, - { - "epoch": 9.87, - "learning_rate": 9.867178997401327e-08, - "loss": 3.8811, - "step": 888500 - }, - { - "epoch": 9.87, - "learning_rate": 9.872731714901273e-08, - "loss": 3.8959, - "step": 889000 - }, - { - "epoch": 9.88, - "learning_rate": 9.878284432401217e-08, - "loss": 3.8589, - "step": 889500 - }, - { - "epoch": 9.88, - "learning_rate": 9.88383714990116e-08, - "loss": 3.8649, - "step": 890000 - }, - { - "epoch": 9.89, - "learning_rate": 9.889389867401106e-08, - "loss": 3.8791, - "step": 890500 - }, - { - "epoch": 9.89, - "learning_rate": 9.89494258490105e-08, - "loss": 3.8653, - "step": 891000 - }, - { - "epoch": 9.9, - "learning_rate": 9.900495302400994e-08, - "loss": 3.8719, - "step": 891500 - }, - { - "epoch": 9.91, - "learning_rate": 9.90604801990094e-08, - "loss": 3.869, - "step": 892000 - }, - { - "epoch": 9.91, - "learning_rate": 9.911600737400884e-08, - "loss": 3.8764, - "step": 892500 - }, - { - "epoch": 9.92, - "learning_rate": 9.917153454900828e-08, - "loss": 3.856, - "step": 893000 - }, - { - "epoch": 9.92, - "learning_rate": 9.922706172400773e-08, - "loss": 3.8704, - "step": 893500 - }, - { - "epoch": 9.93, - "learning_rate": 9.928258889900717e-08, - "loss": 3.8762, - "step": 894000 - }, - { - "epoch": 9.93, - "learning_rate": 9.933811607400662e-08, - "loss": 3.8826, - "step": 894500 - }, - { - "epoch": 9.94, - "learning_rate": 9.939364324900606e-08, - "loss": 3.872, - "step": 895000 - }, - { - "epoch": 9.94, - "learning_rate": 9.94491704240055e-08, - "loss": 3.8834, - "step": 895500 - }, - { - "epoch": 9.95, - "learning_rate": 9.950469759900495e-08, - "loss": 3.8752, - "step": 896000 - }, - { - "epoch": 9.96, - "learning_rate": 9.956022477400439e-08, - "loss": 3.8826, - "step": 896500 - }, - { - "epoch": 9.96, - "learning_rate": 9.961575194900384e-08, - "loss": 3.8484, - "step": 897000 - }, - { - "epoch": 9.97, - "learning_rate": 9.967127912400328e-08, - "loss": 3.8693, - "step": 897500 - }, - { - "epoch": 9.97, - "learning_rate": 9.972680629900272e-08, - "loss": 3.8736, - "step": 898000 - }, - { - "epoch": 9.98, - "learning_rate": 9.978233347400217e-08, - "loss": 3.8756, - "step": 898500 - }, - { - "epoch": 9.98, - "learning_rate": 9.983786064900161e-08, - "loss": 3.8857, - "step": 899000 - }, - { - "epoch": 9.99, - "learning_rate": 9.989338782400106e-08, - "loss": 3.8839, - "step": 899500 - }, - { - "epoch": 9.99, - "learning_rate": 9.994891499900051e-08, - "loss": 3.8706, - "step": 900000 - }, - { - "epoch": 10.0, - "eval_loss": 3.904014825820923, - "eval_runtime": 6.3112, - "eval_samples_per_second": 246.227, - "step": 900460 - }, - { - "epoch": 10.0, - "learning_rate": 9.99988894565e-08, - "loss": 3.8669, - "step": 900500 - }, - { - "epoch": 10.01, - "learning_rate": 9.998500766275015e-08, - "loss": 3.859, - "step": 901000 - }, - { - "epoch": 10.01, - "learning_rate": 9.997112586900029e-08, - "loss": 3.8591, - "step": 901500 - }, - { - "epoch": 10.02, - "learning_rate": 9.995724407525042e-08, - "loss": 3.8883, - "step": 902000 - }, - { - "epoch": 10.02, - "learning_rate": 9.994336228150056e-08, - "loss": 3.8605, - "step": 902500 - }, - { - "epoch": 10.03, - "learning_rate": 9.99294804877507e-08, - "loss": 3.8563, - "step": 903000 - }, - { - "epoch": 10.03, - "learning_rate": 9.991559869400083e-08, - "loss": 3.8597, - "step": 903500 - }, - { - "epoch": 10.04, - "learning_rate": 9.990171690025097e-08, - "loss": 3.8487, - "step": 904000 - }, - { - "epoch": 10.04, - "learning_rate": 9.988783510650112e-08, - "loss": 3.8893, - "step": 904500 - }, - { - "epoch": 10.05, - "learning_rate": 9.987395331275126e-08, - "loss": 3.8711, - "step": 905000 - }, - { - "epoch": 10.06, - "learning_rate": 9.98600715190014e-08, - "loss": 3.8548, - "step": 905500 - }, - { - "epoch": 10.06, - "learning_rate": 9.984618972525153e-08, - "loss": 3.8593, - "step": 906000 - }, - { - "epoch": 10.07, - "learning_rate": 9.983230793150167e-08, - "loss": 3.8685, - "step": 906500 - }, - { - "epoch": 10.07, - "learning_rate": 9.981842613775181e-08, - "loss": 3.8532, - "step": 907000 - }, - { - "epoch": 10.08, - "learning_rate": 9.980454434400196e-08, - "loss": 3.8611, - "step": 907500 - }, - { - "epoch": 10.08, - "learning_rate": 9.979066255025209e-08, - "loss": 3.8682, - "step": 908000 - }, - { - "epoch": 10.09, - "learning_rate": 9.977678075650223e-08, - "loss": 3.8595, - "step": 908500 - }, - { - "epoch": 10.09, - "learning_rate": 9.976289896275236e-08, - "loss": 3.8471, - "step": 909000 - }, - { - "epoch": 10.1, - "learning_rate": 9.97490171690025e-08, - "loss": 3.8813, - "step": 909500 - }, - { - "epoch": 10.11, - "learning_rate": 9.973513537525264e-08, - "loss": 3.8517, - "step": 910000 - }, - { - "epoch": 10.11, - "learning_rate": 9.972125358150278e-08, - "loss": 3.857, - "step": 910500 - }, - { - "epoch": 10.12, - "learning_rate": 9.970737178775293e-08, - "loss": 3.889, - "step": 911000 - }, - { - "epoch": 10.12, - "learning_rate": 9.969348999400307e-08, - "loss": 3.8742, - "step": 911500 - }, - { - "epoch": 10.13, - "learning_rate": 9.96796082002532e-08, - "loss": 3.8673, - "step": 912000 - }, - { - "epoch": 10.13, - "learning_rate": 9.966572640650334e-08, - "loss": 3.8758, - "step": 912500 - }, - { - "epoch": 10.14, - "learning_rate": 9.965184461275348e-08, - "loss": 3.8907, - "step": 913000 - }, - { - "epoch": 10.14, - "learning_rate": 9.963796281900362e-08, - "loss": 3.8586, - "step": 913500 - }, - { - "epoch": 10.15, - "learning_rate": 9.962408102525375e-08, - "loss": 3.8576, - "step": 914000 - }, - { - "epoch": 10.16, - "learning_rate": 9.961019923150388e-08, - "loss": 3.8707, - "step": 914500 - }, - { - "epoch": 10.16, - "learning_rate": 9.959631743775402e-08, - "loss": 3.8731, - "step": 915000 - }, - { - "epoch": 10.17, - "learning_rate": 9.958243564400417e-08, - "loss": 3.8753, - "step": 915500 - }, - { - "epoch": 10.17, - "learning_rate": 9.956855385025431e-08, - "loss": 3.8705, - "step": 916000 - }, - { - "epoch": 10.18, - "learning_rate": 9.955467205650445e-08, - "loss": 3.8709, - "step": 916500 - }, - { - "epoch": 10.18, - "learning_rate": 9.95407902627546e-08, - "loss": 3.8832, - "step": 917000 - }, - { - "epoch": 10.19, - "learning_rate": 9.952690846900472e-08, - "loss": 3.8674, - "step": 917500 - }, - { - "epoch": 10.19, - "learning_rate": 9.951302667525486e-08, - "loss": 3.8332, - "step": 918000 - }, - { - "epoch": 10.2, - "learning_rate": 9.949914488150501e-08, - "loss": 3.8863, - "step": 918500 - }, - { - "epoch": 10.21, - "learning_rate": 9.948526308775515e-08, - "loss": 3.8694, - "step": 919000 - }, - { - "epoch": 10.21, - "learning_rate": 9.947138129400529e-08, - "loss": 3.8543, - "step": 919500 - }, - { - "epoch": 10.22, - "learning_rate": 9.945749950025542e-08, - "loss": 3.8511, - "step": 920000 - }, - { - "epoch": 10.22, - "learning_rate": 9.944361770650555e-08, - "loss": 3.8791, - "step": 920500 - }, - { - "epoch": 10.23, - "learning_rate": 9.942973591275569e-08, - "loss": 3.8505, - "step": 921000 - }, - { - "epoch": 10.23, - "learning_rate": 9.941585411900583e-08, - "loss": 3.8666, - "step": 921500 - }, - { - "epoch": 10.24, - "learning_rate": 9.940197232525598e-08, - "loss": 3.8796, - "step": 922000 - }, - { - "epoch": 10.24, - "learning_rate": 9.938809053150612e-08, - "loss": 3.8867, - "step": 922500 - }, - { - "epoch": 10.25, - "learning_rate": 9.937420873775625e-08, - "loss": 3.8659, - "step": 923000 - }, - { - "epoch": 10.26, - "learning_rate": 9.936032694400639e-08, - "loss": 3.8811, - "step": 923500 - }, - { - "epoch": 10.26, - "learning_rate": 9.934644515025653e-08, - "loss": 3.849, - "step": 924000 - }, - { - "epoch": 10.27, - "learning_rate": 9.933256335650667e-08, - "loss": 3.8673, - "step": 924500 - }, - { - "epoch": 10.27, - "learning_rate": 9.931868156275682e-08, - "loss": 3.8738, - "step": 925000 - }, - { - "epoch": 10.28, - "learning_rate": 9.930479976900696e-08, - "loss": 3.8682, - "step": 925500 - }, - { - "epoch": 10.28, - "learning_rate": 9.929091797525709e-08, - "loss": 3.8537, - "step": 926000 - }, - { - "epoch": 10.29, - "learning_rate": 9.927703618150722e-08, - "loss": 3.865, - "step": 926500 - }, - { - "epoch": 10.29, - "learning_rate": 9.926315438775736e-08, - "loss": 3.8733, - "step": 927000 - }, - { - "epoch": 10.3, - "learning_rate": 9.92492725940075e-08, - "loss": 3.8726, - "step": 927500 - }, - { - "epoch": 10.31, - "learning_rate": 9.923539080025764e-08, - "loss": 3.8549, - "step": 928000 - }, - { - "epoch": 10.31, - "learning_rate": 9.922150900650777e-08, - "loss": 3.8419, - "step": 928500 - }, - { - "epoch": 10.32, - "learning_rate": 9.920762721275792e-08, - "loss": 3.8696, - "step": 929000 - }, - { - "epoch": 10.32, - "learning_rate": 9.919374541900806e-08, - "loss": 3.8543, - "step": 929500 - }, - { - "epoch": 10.33, - "learning_rate": 9.91798636252582e-08, - "loss": 3.8731, - "step": 930000 - }, - { - "epoch": 10.33, - "learning_rate": 9.916598183150834e-08, - "loss": 3.8611, - "step": 930500 - }, - { - "epoch": 10.34, - "learning_rate": 9.915210003775848e-08, - "loss": 3.8893, - "step": 931000 - }, - { - "epoch": 10.34, - "learning_rate": 9.913821824400861e-08, - "loss": 3.8672, - "step": 931500 - }, - { - "epoch": 10.35, - "learning_rate": 9.912433645025876e-08, - "loss": 3.8572, - "step": 932000 - }, - { - "epoch": 10.36, - "learning_rate": 9.911045465650888e-08, - "loss": 3.8588, - "step": 932500 - }, - { - "epoch": 10.36, - "learning_rate": 9.909657286275903e-08, - "loss": 3.8452, - "step": 933000 - }, - { - "epoch": 10.37, - "learning_rate": 9.908269106900917e-08, - "loss": 3.8802, - "step": 933500 - }, - { - "epoch": 10.37, - "learning_rate": 9.90688092752593e-08, - "loss": 3.8591, - "step": 934000 - }, - { - "epoch": 10.38, - "learning_rate": 9.905492748150944e-08, - "loss": 3.8646, - "step": 934500 - }, - { - "epoch": 10.38, - "learning_rate": 9.904104568775958e-08, - "loss": 3.8622, - "step": 935000 - }, - { - "epoch": 10.39, - "learning_rate": 9.902716389400972e-08, - "loss": 3.8741, - "step": 935500 - }, - { - "epoch": 10.39, - "learning_rate": 9.901328210025987e-08, - "loss": 3.8691, - "step": 936000 - }, - { - "epoch": 10.4, - "learning_rate": 9.899940030651001e-08, - "loss": 3.8632, - "step": 936500 - }, - { - "epoch": 10.41, - "learning_rate": 9.898551851276014e-08, - "loss": 3.8659, - "step": 937000 - }, - { - "epoch": 10.41, - "learning_rate": 9.897163671901028e-08, - "loss": 3.8764, - "step": 937500 - }, - { - "epoch": 10.42, - "learning_rate": 9.895775492526042e-08, - "loss": 3.8729, - "step": 938000 - }, - { - "epoch": 10.42, - "learning_rate": 9.894387313151055e-08, - "loss": 3.8833, - "step": 938500 - }, - { - "epoch": 10.43, - "learning_rate": 9.89299913377607e-08, - "loss": 3.8762, - "step": 939000 - }, - { - "epoch": 10.43, - "learning_rate": 9.891610954401084e-08, - "loss": 3.8472, - "step": 939500 - }, - { - "epoch": 10.44, - "learning_rate": 9.890222775026097e-08, - "loss": 3.8667, - "step": 940000 - }, - { - "epoch": 10.44, - "learning_rate": 9.888834595651111e-08, - "loss": 3.8745, - "step": 940500 - }, - { - "epoch": 10.45, - "learning_rate": 9.887446416276125e-08, - "loss": 3.8837, - "step": 941000 - }, - { - "epoch": 10.46, - "learning_rate": 9.886058236901139e-08, - "loss": 3.8511, - "step": 941500 - }, - { - "epoch": 10.46, - "learning_rate": 9.884670057526153e-08, - "loss": 3.8796, - "step": 942000 - }, - { - "epoch": 10.47, - "learning_rate": 9.883281878151166e-08, - "loss": 3.8682, - "step": 942500 - }, - { - "epoch": 10.47, - "learning_rate": 9.88189369877618e-08, - "loss": 3.8515, - "step": 943000 - }, - { - "epoch": 10.48, - "learning_rate": 9.880505519401195e-08, - "loss": 3.8572, - "step": 943500 - }, - { - "epoch": 10.48, - "learning_rate": 9.879117340026209e-08, - "loss": 3.8599, - "step": 944000 - }, - { - "epoch": 10.49, - "learning_rate": 9.877729160651222e-08, - "loss": 3.8675, - "step": 944500 - }, - { - "epoch": 10.49, - "learning_rate": 9.876340981276236e-08, - "loss": 3.8707, - "step": 945000 - }, - { - "epoch": 10.5, - "learning_rate": 9.874952801901249e-08, - "loss": 3.8825, - "step": 945500 - }, - { - "epoch": 10.51, - "learning_rate": 9.873564622526263e-08, - "loss": 3.8788, - "step": 946000 - }, - { - "epoch": 10.51, - "learning_rate": 9.872176443151278e-08, - "loss": 3.8524, - "step": 946500 - }, - { - "epoch": 10.52, - "learning_rate": 9.870788263776292e-08, - "loss": 3.8666, - "step": 947000 - }, - { - "epoch": 10.52, - "learning_rate": 9.869400084401306e-08, - "loss": 3.8758, - "step": 947500 - }, - { - "epoch": 10.53, - "learning_rate": 9.86801190502632e-08, - "loss": 3.8605, - "step": 948000 - }, - { - "epoch": 10.53, - "learning_rate": 9.866623725651333e-08, - "loss": 3.8542, - "step": 948500 - }, - { - "epoch": 10.54, - "learning_rate": 9.865235546276347e-08, - "loss": 3.8557, - "step": 949000 - }, - { - "epoch": 10.54, - "learning_rate": 9.863847366901362e-08, - "loss": 3.8531, - "step": 949500 - }, - { - "epoch": 10.55, - "learning_rate": 9.862459187526376e-08, - "loss": 3.8527, - "step": 950000 - }, - { - "epoch": 10.56, - "learning_rate": 9.861071008151389e-08, - "loss": 3.8588, - "step": 950500 - }, - { - "epoch": 10.56, - "learning_rate": 9.859682828776402e-08, - "loss": 3.8679, - "step": 951000 - }, - { - "epoch": 10.57, - "learning_rate": 9.858294649401416e-08, - "loss": 3.8518, - "step": 951500 - }, - { - "epoch": 10.57, - "learning_rate": 9.85690647002643e-08, - "loss": 3.8525, - "step": 952000 - }, - { - "epoch": 10.58, - "learning_rate": 9.855518290651444e-08, - "loss": 3.8524, - "step": 952500 - }, - { - "epoch": 10.58, - "learning_rate": 9.854130111276459e-08, - "loss": 3.8605, - "step": 953000 - }, - { - "epoch": 10.59, - "learning_rate": 9.852741931901473e-08, - "loss": 3.8719, - "step": 953500 - }, - { - "epoch": 10.59, - "learning_rate": 9.851353752526486e-08, - "loss": 3.8659, - "step": 954000 - }, - { - "epoch": 10.6, - "learning_rate": 9.8499655731515e-08, - "loss": 3.8484, - "step": 954500 - }, - { - "epoch": 10.61, - "learning_rate": 9.848577393776514e-08, - "loss": 3.871, - "step": 955000 - }, - { - "epoch": 10.61, - "learning_rate": 9.847189214401528e-08, - "loss": 3.8532, - "step": 955500 - }, - { - "epoch": 10.62, - "learning_rate": 9.845801035026543e-08, - "loss": 3.8531, - "step": 956000 - }, - { - "epoch": 10.62, - "learning_rate": 9.844412855651555e-08, - "loss": 3.8591, - "step": 956500 - }, - { - "epoch": 10.63, - "learning_rate": 9.843024676276568e-08, - "loss": 3.8737, - "step": 957000 - }, - { - "epoch": 10.63, - "learning_rate": 9.841636496901583e-08, - "loss": 3.8497, - "step": 957500 - }, - { - "epoch": 10.64, - "learning_rate": 9.840248317526597e-08, - "loss": 3.8671, - "step": 958000 - }, - { - "epoch": 10.64, - "learning_rate": 9.838860138151611e-08, - "loss": 3.8644, - "step": 958500 - }, - { - "epoch": 10.65, - "learning_rate": 9.837471958776625e-08, - "loss": 3.8626, - "step": 959000 - }, - { - "epoch": 10.66, - "learning_rate": 9.836083779401638e-08, - "loss": 3.8502, - "step": 959500 - }, - { - "epoch": 10.66, - "learning_rate": 9.834695600026652e-08, - "loss": 3.8785, - "step": 960000 - }, - { - "epoch": 10.67, - "learning_rate": 9.833307420651667e-08, - "loss": 3.8665, - "step": 960500 - }, - { - "epoch": 10.67, - "learning_rate": 9.831919241276681e-08, - "loss": 3.853, - "step": 961000 - }, - { - "epoch": 10.68, - "learning_rate": 9.830531061901695e-08, - "loss": 3.8561, - "step": 961500 - }, - { - "epoch": 10.68, - "learning_rate": 9.829142882526709e-08, - "loss": 3.8521, - "step": 962000 - }, - { - "epoch": 10.69, - "learning_rate": 9.827754703151722e-08, - "loss": 3.8828, - "step": 962500 - }, - { - "epoch": 10.69, - "learning_rate": 9.826366523776735e-08, - "loss": 3.8706, - "step": 963000 - }, - { - "epoch": 10.7, - "learning_rate": 9.824978344401749e-08, - "loss": 3.8622, - "step": 963500 - }, - { - "epoch": 10.71, - "learning_rate": 9.823590165026764e-08, - "loss": 3.8459, - "step": 964000 - }, - { - "epoch": 10.71, - "learning_rate": 9.822201985651778e-08, - "loss": 3.8465, - "step": 964500 - }, - { - "epoch": 10.72, - "learning_rate": 9.820813806276791e-08, - "loss": 3.8832, - "step": 965000 - }, - { - "epoch": 10.72, - "learning_rate": 9.819425626901805e-08, - "loss": 3.8757, - "step": 965500 - }, - { - "epoch": 10.73, - "learning_rate": 9.818037447526819e-08, - "loss": 3.8731, - "step": 966000 - }, - { - "epoch": 10.73, - "learning_rate": 9.816649268151833e-08, - "loss": 3.8566, - "step": 966500 - }, - { - "epoch": 10.74, - "learning_rate": 9.815261088776848e-08, - "loss": 3.8529, - "step": 967000 - }, - { - "epoch": 10.74, - "learning_rate": 9.813872909401862e-08, - "loss": 3.8567, - "step": 967500 - }, - { - "epoch": 10.75, - "learning_rate": 9.812484730026875e-08, - "loss": 3.8475, - "step": 968000 - }, - { - "epoch": 10.76, - "learning_rate": 9.811096550651889e-08, - "loss": 3.8522, - "step": 968500 - }, - { - "epoch": 10.76, - "learning_rate": 9.809708371276902e-08, - "loss": 3.8692, - "step": 969000 - }, - { - "epoch": 10.77, - "learning_rate": 9.808320191901916e-08, - "loss": 3.8667, - "step": 969500 - }, - { - "epoch": 10.77, - "learning_rate": 9.80693201252693e-08, - "loss": 3.869, - "step": 970000 - }, - { - "epoch": 10.78, - "learning_rate": 9.805543833151945e-08, - "loss": 3.8616, - "step": 970500 - }, - { - "epoch": 10.78, - "learning_rate": 9.804155653776957e-08, - "loss": 3.8601, - "step": 971000 - }, - { - "epoch": 10.79, - "learning_rate": 9.802767474401972e-08, - "loss": 3.867, - "step": 971500 - }, - { - "epoch": 10.79, - "learning_rate": 9.801379295026986e-08, - "loss": 3.8644, - "step": 972000 - }, - { - "epoch": 10.8, - "learning_rate": 9.799991115652e-08, - "loss": 3.8434, - "step": 972500 - }, - { - "epoch": 10.81, - "learning_rate": 9.798602936277014e-08, - "loss": 3.8574, - "step": 973000 - }, - { - "epoch": 10.81, - "learning_rate": 9.797214756902027e-08, - "loss": 3.8648, - "step": 973500 - }, - { - "epoch": 10.82, - "learning_rate": 9.795826577527041e-08, - "loss": 3.846, - "step": 974000 - }, - { - "epoch": 10.82, - "learning_rate": 9.794438398152056e-08, - "loss": 3.8593, - "step": 974500 - }, - { - "epoch": 10.83, - "learning_rate": 9.793050218777069e-08, - "loss": 3.8662, - "step": 975000 - }, - { - "epoch": 10.83, - "learning_rate": 9.791662039402083e-08, - "loss": 3.8412, - "step": 975500 - }, - { - "epoch": 10.84, - "learning_rate": 9.790273860027097e-08, - "loss": 3.8641, - "step": 976000 - }, - { - "epoch": 10.84, - "learning_rate": 9.78888568065211e-08, - "loss": 3.8525, - "step": 976500 - }, - { - "epoch": 10.85, - "learning_rate": 9.787497501277124e-08, - "loss": 3.8524, - "step": 977000 - }, - { - "epoch": 10.86, - "learning_rate": 9.786109321902138e-08, - "loss": 3.8351, - "step": 977500 - }, - { - "epoch": 10.86, - "learning_rate": 9.784721142527153e-08, - "loss": 3.8825, - "step": 978000 - }, - { - "epoch": 10.87, - "learning_rate": 9.783332963152167e-08, - "loss": 3.8685, - "step": 978500 - }, - { - "epoch": 10.87, - "learning_rate": 9.78194478377718e-08, - "loss": 3.8586, - "step": 979000 - }, - { - "epoch": 10.88, - "learning_rate": 9.780556604402194e-08, - "loss": 3.8522, - "step": 979500 - }, - { - "epoch": 10.88, - "learning_rate": 9.779168425027208e-08, - "loss": 3.8554, - "step": 980000 - }, - { - "epoch": 10.89, - "learning_rate": 9.777780245652222e-08, - "loss": 3.8516, - "step": 980500 - }, - { - "epoch": 10.89, - "learning_rate": 9.776392066277235e-08, - "loss": 3.8519, - "step": 981000 - }, - { - "epoch": 10.9, - "learning_rate": 9.77500388690225e-08, - "loss": 3.8716, - "step": 981500 - }, - { - "epoch": 10.91, - "learning_rate": 9.773615707527262e-08, - "loss": 3.8573, - "step": 982000 - }, - { - "epoch": 10.91, - "learning_rate": 9.772227528152277e-08, - "loss": 3.8609, - "step": 982500 - }, - { - "epoch": 10.92, - "learning_rate": 9.770839348777291e-08, - "loss": 3.8734, - "step": 983000 - }, - { - "epoch": 10.92, - "learning_rate": 9.769451169402305e-08, - "loss": 3.8622, - "step": 983500 - }, - { - "epoch": 10.93, - "learning_rate": 9.76806299002732e-08, - "loss": 3.8566, - "step": 984000 - }, - { - "epoch": 10.93, - "learning_rate": 9.766674810652334e-08, - "loss": 3.8702, - "step": 984500 - }, - { - "epoch": 10.94, - "learning_rate": 9.765286631277347e-08, - "loss": 3.86, - "step": 985000 - }, - { - "epoch": 10.94, - "learning_rate": 9.763898451902361e-08, - "loss": 3.8647, - "step": 985500 - }, - { - "epoch": 10.95, - "learning_rate": 9.762510272527375e-08, - "loss": 3.8523, - "step": 986000 - }, - { - "epoch": 10.96, - "learning_rate": 9.761122093152389e-08, - "loss": 3.8478, - "step": 986500 - }, - { - "epoch": 10.96, - "learning_rate": 9.759733913777402e-08, - "loss": 3.8731, - "step": 987000 - }, - { - "epoch": 10.97, - "learning_rate": 9.758345734402415e-08, - "loss": 3.8746, - "step": 987500 - }, - { - "epoch": 10.97, - "learning_rate": 9.756957555027429e-08, - "loss": 3.8547, - "step": 988000 - }, - { - "epoch": 10.98, - "learning_rate": 9.755569375652443e-08, - "loss": 3.8569, - "step": 988500 - }, - { - "epoch": 10.98, - "learning_rate": 9.754181196277458e-08, - "loss": 3.8595, - "step": 989000 - }, - { - "epoch": 10.99, - "learning_rate": 9.752793016902472e-08, - "loss": 3.8365, - "step": 989500 - }, - { - "epoch": 10.99, - "learning_rate": 9.751404837527486e-08, - "loss": 3.8444, - "step": 990000 - }, - { - "epoch": 11.0, - "learning_rate": 9.750016658152499e-08, - "loss": 3.8648, - "step": 990500 - }, - { - "epoch": 11.0, - "eval_loss": 3.8952994346618652, - "eval_runtime": 6.3144, - "eval_samples_per_second": 246.105, - "step": 990506 - }, - { - "epoch": 11.01, - "learning_rate": 9.748628478777513e-08, - "loss": 3.8663, - "step": 991000 - }, - { - "epoch": 11.01, - "learning_rate": 9.747240299402527e-08, - "loss": 3.8649, - "step": 991500 - }, - { - "epoch": 11.02, - "learning_rate": 9.745852120027542e-08, - "loss": 3.8657, - "step": 992000 - }, - { - "epoch": 11.02, - "learning_rate": 9.744463940652556e-08, - "loss": 3.853, - "step": 992500 - }, - { - "epoch": 11.03, - "learning_rate": 9.743075761277569e-08, - "loss": 3.8628, - "step": 993000 - }, - { - "epoch": 11.03, - "learning_rate": 9.741687581902582e-08, - "loss": 3.8512, - "step": 993500 - }, - { - "epoch": 11.04, - "learning_rate": 9.740299402527596e-08, - "loss": 3.8547, - "step": 994000 - }, - { - "epoch": 11.04, - "learning_rate": 9.73891122315261e-08, - "loss": 3.8546, - "step": 994500 - }, - { - "epoch": 11.05, - "learning_rate": 9.737523043777624e-08, - "loss": 3.867, - "step": 995000 - }, - { - "epoch": 11.06, - "learning_rate": 9.736134864402639e-08, - "loss": 3.8618, - "step": 995500 - }, - { - "epoch": 11.06, - "learning_rate": 9.734746685027652e-08, - "loss": 3.8536, - "step": 996000 - }, - { - "epoch": 11.07, - "learning_rate": 9.733358505652666e-08, - "loss": 3.8588, - "step": 996500 - }, - { - "epoch": 11.07, - "learning_rate": 9.73197032627768e-08, - "loss": 3.8499, - "step": 997000 - }, - { - "epoch": 11.08, - "learning_rate": 9.730582146902694e-08, - "loss": 3.8623, - "step": 997500 - }, - { - "epoch": 11.08, - "learning_rate": 9.729193967527708e-08, - "loss": 3.854, - "step": 998000 - }, - { - "epoch": 11.09, - "learning_rate": 9.727805788152723e-08, - "loss": 3.842, - "step": 998500 - }, - { - "epoch": 11.09, - "learning_rate": 9.726417608777736e-08, - "loss": 3.872, - "step": 999000 - }, - { - "epoch": 11.1, - "learning_rate": 9.725029429402748e-08, - "loss": 3.8529, - "step": 999500 - }, - { - "epoch": 11.11, - "learning_rate": 9.723641250027763e-08, - "loss": 3.8463, - "step": 1000000 - }, - { - "epoch": 11.11, - "learning_rate": 9.722253070652777e-08, - "loss": 3.8362, - "step": 1000500 - }, - { - "epoch": 11.12, - "learning_rate": 9.720864891277791e-08, - "loss": 3.8544, - "step": 1001000 - }, - { - "epoch": 11.12, - "learning_rate": 9.719476711902804e-08, - "loss": 3.8483, - "step": 1001500 - }, - { - "epoch": 11.13, - "learning_rate": 9.718088532527818e-08, - "loss": 3.8695, - "step": 1002000 - }, - { - "epoch": 11.13, - "learning_rate": 9.716700353152833e-08, - "loss": 3.8459, - "step": 1002500 - }, - { - "epoch": 11.14, - "learning_rate": 9.715312173777847e-08, - "loss": 3.8633, - "step": 1003000 - }, - { - "epoch": 11.14, - "learning_rate": 9.713923994402861e-08, - "loss": 3.8359, - "step": 1003500 - }, - { - "epoch": 11.15, - "learning_rate": 9.712535815027875e-08, - "loss": 3.844, - "step": 1004000 - }, - { - "epoch": 11.16, - "learning_rate": 9.711147635652888e-08, - "loss": 3.8747, - "step": 1004500 - }, - { - "epoch": 11.16, - "learning_rate": 9.709759456277902e-08, - "loss": 3.8569, - "step": 1005000 - }, - { - "epoch": 11.17, - "learning_rate": 9.708371276902915e-08, - "loss": 3.8704, - "step": 1005500 - }, - { - "epoch": 11.17, - "learning_rate": 9.70698309752793e-08, - "loss": 3.847, - "step": 1006000 - }, - { - "epoch": 11.18, - "learning_rate": 9.705594918152944e-08, - "loss": 3.8777, - "step": 1006500 - }, - { - "epoch": 11.18, - "learning_rate": 9.704206738777958e-08, - "loss": 3.8748, - "step": 1007000 - }, - { - "epoch": 11.19, - "learning_rate": 9.702818559402971e-08, - "loss": 3.8703, - "step": 1007500 - }, - { - "epoch": 11.19, - "learning_rate": 9.701430380027985e-08, - "loss": 3.863, - "step": 1008000 - }, - { - "epoch": 11.2, - "learning_rate": 9.700042200652999e-08, - "loss": 3.8448, - "step": 1008500 - }, - { - "epoch": 11.21, - "learning_rate": 9.698654021278013e-08, - "loss": 3.8467, - "step": 1009000 - }, - { - "epoch": 11.21, - "learning_rate": 9.697265841903028e-08, - "loss": 3.8423, - "step": 1009500 - }, - { - "epoch": 11.22, - "learning_rate": 9.69587766252804e-08, - "loss": 3.8522, - "step": 1010000 - }, - { - "epoch": 11.22, - "learning_rate": 9.694489483153055e-08, - "loss": 3.8648, - "step": 1010500 - }, - { - "epoch": 11.23, - "learning_rate": 9.693101303778069e-08, - "loss": 3.8497, - "step": 1011000 - }, - { - "epoch": 11.23, - "learning_rate": 9.691713124403082e-08, - "loss": 3.8511, - "step": 1011500 - }, - { - "epoch": 11.24, - "learning_rate": 9.690324945028096e-08, - "loss": 3.8487, - "step": 1012000 - }, - { - "epoch": 11.24, - "learning_rate": 9.68893676565311e-08, - "loss": 3.8518, - "step": 1012500 - }, - { - "epoch": 11.25, - "learning_rate": 9.687548586278123e-08, - "loss": 3.8421, - "step": 1013000 - }, - { - "epoch": 11.26, - "learning_rate": 9.686160406903138e-08, - "loss": 3.8607, - "step": 1013500 - }, - { - "epoch": 11.26, - "learning_rate": 9.684772227528152e-08, - "loss": 3.8521, - "step": 1014000 - }, - { - "epoch": 11.27, - "learning_rate": 9.683384048153166e-08, - "loss": 3.8516, - "step": 1014500 - }, - { - "epoch": 11.27, - "learning_rate": 9.68199586877818e-08, - "loss": 3.8581, - "step": 1015000 - }, - { - "epoch": 11.28, - "learning_rate": 9.680607689403193e-08, - "loss": 3.8553, - "step": 1015500 - }, - { - "epoch": 11.28, - "learning_rate": 9.679219510028207e-08, - "loss": 3.8452, - "step": 1016000 - }, - { - "epoch": 11.29, - "learning_rate": 9.677831330653222e-08, - "loss": 3.8642, - "step": 1016500 - }, - { - "epoch": 11.29, - "learning_rate": 9.676443151278236e-08, - "loss": 3.8361, - "step": 1017000 - }, - { - "epoch": 11.3, - "learning_rate": 9.675054971903249e-08, - "loss": 3.8413, - "step": 1017500 - }, - { - "epoch": 11.31, - "learning_rate": 9.673666792528263e-08, - "loss": 3.8569, - "step": 1018000 - }, - { - "epoch": 11.31, - "learning_rate": 9.672278613153276e-08, - "loss": 3.8511, - "step": 1018500 - }, - { - "epoch": 11.32, - "learning_rate": 9.67089043377829e-08, - "loss": 3.837, - "step": 1019000 - }, - { - "epoch": 11.32, - "learning_rate": 9.669502254403304e-08, - "loss": 3.8386, - "step": 1019500 - }, - { - "epoch": 11.33, - "learning_rate": 9.668114075028319e-08, - "loss": 3.837, - "step": 1020000 - }, - { - "epoch": 11.33, - "learning_rate": 9.666725895653333e-08, - "loss": 3.8515, - "step": 1020500 - }, - { - "epoch": 11.34, - "learning_rate": 9.665337716278347e-08, - "loss": 3.8484, - "step": 1021000 - }, - { - "epoch": 11.34, - "learning_rate": 9.66394953690336e-08, - "loss": 3.85, - "step": 1021500 - }, - { - "epoch": 11.35, - "learning_rate": 9.662561357528374e-08, - "loss": 3.8509, - "step": 1022000 - }, - { - "epoch": 11.36, - "learning_rate": 9.661173178153388e-08, - "loss": 3.8674, - "step": 1022500 - }, - { - "epoch": 11.36, - "learning_rate": 9.659784998778403e-08, - "loss": 3.8413, - "step": 1023000 - }, - { - "epoch": 11.37, - "learning_rate": 9.658396819403415e-08, - "loss": 3.8277, - "step": 1023500 - }, - { - "epoch": 11.37, - "learning_rate": 9.657008640028428e-08, - "loss": 3.8627, - "step": 1024000 - }, - { - "epoch": 11.38, - "learning_rate": 9.655620460653443e-08, - "loss": 3.8431, - "step": 1024500 - }, - { - "epoch": 11.38, - "learning_rate": 9.654232281278457e-08, - "loss": 3.8492, - "step": 1025000 - }, - { - "epoch": 11.39, - "learning_rate": 9.652844101903471e-08, - "loss": 3.8423, - "step": 1025500 - }, - { - "epoch": 11.39, - "learning_rate": 9.651455922528485e-08, - "loss": 3.8626, - "step": 1026000 - }, - { - "epoch": 11.4, - "learning_rate": 9.6500677431535e-08, - "loss": 3.8437, - "step": 1026500 - }, - { - "epoch": 11.41, - "learning_rate": 9.648679563778512e-08, - "loss": 3.864, - "step": 1027000 - }, - { - "epoch": 11.41, - "learning_rate": 9.647291384403527e-08, - "loss": 3.8621, - "step": 1027500 - }, - { - "epoch": 11.42, - "learning_rate": 9.645903205028541e-08, - "loss": 3.8585, - "step": 1028000 - }, - { - "epoch": 11.42, - "learning_rate": 9.644515025653555e-08, - "loss": 3.848, - "step": 1028500 - }, - { - "epoch": 11.43, - "learning_rate": 9.643126846278569e-08, - "loss": 3.8561, - "step": 1029000 - }, - { - "epoch": 11.43, - "learning_rate": 9.641738666903582e-08, - "loss": 3.8447, - "step": 1029500 - }, - { - "epoch": 11.44, - "learning_rate": 9.640350487528595e-08, - "loss": 3.8637, - "step": 1030000 - }, - { - "epoch": 11.44, - "learning_rate": 9.63896230815361e-08, - "loss": 3.845, - "step": 1030500 - }, - { - "epoch": 11.45, - "learning_rate": 9.637574128778624e-08, - "loss": 3.8456, - "step": 1031000 - }, - { - "epoch": 11.46, - "learning_rate": 9.636185949403638e-08, - "loss": 3.8689, - "step": 1031500 - }, - { - "epoch": 11.46, - "learning_rate": 9.634797770028652e-08, - "loss": 3.8708, - "step": 1032000 - }, - { - "epoch": 11.47, - "learning_rate": 9.633409590653665e-08, - "loss": 3.8386, - "step": 1032500 - }, - { - "epoch": 11.47, - "learning_rate": 9.632021411278679e-08, - "loss": 3.8458, - "step": 1033000 - }, - { - "epoch": 11.48, - "learning_rate": 9.630633231903693e-08, - "loss": 3.8464, - "step": 1033500 - }, - { - "epoch": 11.48, - "learning_rate": 9.629245052528708e-08, - "loss": 3.8691, - "step": 1034000 - }, - { - "epoch": 11.49, - "learning_rate": 9.627856873153722e-08, - "loss": 3.8595, - "step": 1034500 - }, - { - "epoch": 11.49, - "learning_rate": 9.626468693778735e-08, - "loss": 3.8135, - "step": 1035000 - }, - { - "epoch": 11.5, - "learning_rate": 9.625080514403749e-08, - "loss": 3.8604, - "step": 1035500 - }, - { - "epoch": 11.51, - "learning_rate": 9.623692335028762e-08, - "loss": 3.8485, - "step": 1036000 - }, - { - "epoch": 11.51, - "learning_rate": 9.622304155653776e-08, - "loss": 3.8416, - "step": 1036500 - }, - { - "epoch": 11.52, - "learning_rate": 9.62091597627879e-08, - "loss": 3.8547, - "step": 1037000 - }, - { - "epoch": 11.52, - "learning_rate": 9.619527796903805e-08, - "loss": 3.8578, - "step": 1037500 - }, - { - "epoch": 11.53, - "learning_rate": 9.618139617528817e-08, - "loss": 3.8577, - "step": 1038000 - }, - { - "epoch": 11.53, - "learning_rate": 9.616751438153832e-08, - "loss": 3.8423, - "step": 1038500 - }, - { - "epoch": 11.54, - "learning_rate": 9.615363258778846e-08, - "loss": 3.8397, - "step": 1039000 - }, - { - "epoch": 11.54, - "learning_rate": 9.61397507940386e-08, - "loss": 3.8531, - "step": 1039500 - }, - { - "epoch": 11.55, - "learning_rate": 9.612586900028874e-08, - "loss": 3.8626, - "step": 1040000 - }, - { - "epoch": 11.56, - "learning_rate": 9.611198720653889e-08, - "loss": 3.8656, - "step": 1040500 - }, - { - "epoch": 11.56, - "learning_rate": 9.609810541278901e-08, - "loss": 3.8743, - "step": 1041000 - }, - { - "epoch": 11.57, - "learning_rate": 9.608422361903916e-08, - "loss": 3.8585, - "step": 1041500 - }, - { - "epoch": 11.57, - "learning_rate": 9.607034182528929e-08, - "loss": 3.8282, - "step": 1042000 - }, - { - "epoch": 11.58, - "learning_rate": 9.605646003153943e-08, - "loss": 3.8469, - "step": 1042500 - }, - { - "epoch": 11.58, - "learning_rate": 9.604257823778957e-08, - "loss": 3.8483, - "step": 1043000 - }, - { - "epoch": 11.59, - "learning_rate": 9.602869644403971e-08, - "loss": 3.85, - "step": 1043500 - }, - { - "epoch": 11.59, - "learning_rate": 9.601481465028984e-08, - "loss": 3.854, - "step": 1044000 - }, - { - "epoch": 11.6, - "learning_rate": 9.600093285653998e-08, - "loss": 3.8255, - "step": 1044500 - }, - { - "epoch": 11.61, - "learning_rate": 9.598705106279013e-08, - "loss": 3.8294, - "step": 1045000 - }, - { - "epoch": 11.61, - "learning_rate": 9.597316926904027e-08, - "loss": 3.8557, - "step": 1045500 - }, - { - "epoch": 11.62, - "learning_rate": 9.595928747529041e-08, - "loss": 3.8637, - "step": 1046000 - }, - { - "epoch": 11.62, - "learning_rate": 9.594540568154054e-08, - "loss": 3.84, - "step": 1046500 - }, - { - "epoch": 11.63, - "learning_rate": 9.593152388779068e-08, - "loss": 3.8525, - "step": 1047000 - }, - { - "epoch": 11.63, - "learning_rate": 9.591764209404082e-08, - "loss": 3.8453, - "step": 1047500 - }, - { - "epoch": 11.64, - "learning_rate": 9.590376030029095e-08, - "loss": 3.8652, - "step": 1048000 - }, - { - "epoch": 11.64, - "learning_rate": 9.58898785065411e-08, - "loss": 3.8566, - "step": 1048500 - }, - { - "epoch": 11.65, - "learning_rate": 9.587599671279124e-08, - "loss": 3.8502, - "step": 1049000 - }, - { - "epoch": 11.66, - "learning_rate": 9.586211491904137e-08, - "loss": 3.852, - "step": 1049500 - }, - { - "epoch": 11.66, - "learning_rate": 9.584823312529151e-08, - "loss": 3.853, - "step": 1050000 - }, - { - "epoch": 11.67, - "learning_rate": 9.583435133154165e-08, - "loss": 3.8677, - "step": 1050500 - }, - { - "epoch": 11.67, - "learning_rate": 9.58204695377918e-08, - "loss": 3.847, - "step": 1051000 - }, - { - "epoch": 11.68, - "learning_rate": 9.580658774404194e-08, - "loss": 3.8677, - "step": 1051500 - }, - { - "epoch": 11.68, - "learning_rate": 9.579270595029207e-08, - "loss": 3.8543, - "step": 1052000 - }, - { - "epoch": 11.69, - "learning_rate": 9.577882415654221e-08, - "loss": 3.8649, - "step": 1052500 - }, - { - "epoch": 11.69, - "learning_rate": 9.576494236279235e-08, - "loss": 3.8709, - "step": 1053000 - }, - { - "epoch": 11.7, - "learning_rate": 9.575106056904249e-08, - "loss": 3.853, - "step": 1053500 - }, - { - "epoch": 11.71, - "learning_rate": 9.573717877529262e-08, - "loss": 3.8393, - "step": 1054000 - }, - { - "epoch": 11.71, - "learning_rate": 9.572329698154276e-08, - "loss": 3.8714, - "step": 1054500 - }, - { - "epoch": 11.72, - "learning_rate": 9.570941518779289e-08, - "loss": 3.8414, - "step": 1055000 - }, - { - "epoch": 11.72, - "learning_rate": 9.569553339404303e-08, - "loss": 3.8584, - "step": 1055500 - }, - { - "epoch": 11.73, - "learning_rate": 9.568165160029318e-08, - "loss": 3.8517, - "step": 1056000 - }, - { - "epoch": 11.73, - "learning_rate": 9.566776980654332e-08, - "loss": 3.835, - "step": 1056500 - }, - { - "epoch": 11.74, - "learning_rate": 9.565388801279346e-08, - "loss": 3.8427, - "step": 1057000 - }, - { - "epoch": 11.74, - "learning_rate": 9.56400062190436e-08, - "loss": 3.8585, - "step": 1057500 - }, - { - "epoch": 11.75, - "learning_rate": 9.562612442529373e-08, - "loss": 3.8566, - "step": 1058000 - }, - { - "epoch": 11.76, - "learning_rate": 9.561224263154387e-08, - "loss": 3.8435, - "step": 1058500 - }, - { - "epoch": 11.76, - "learning_rate": 9.559836083779402e-08, - "loss": 3.8718, - "step": 1059000 - }, - { - "epoch": 11.77, - "learning_rate": 9.558447904404416e-08, - "loss": 3.8519, - "step": 1059500 - }, - { - "epoch": 11.77, - "learning_rate": 9.557059725029429e-08, - "loss": 3.8283, - "step": 1060000 - }, - { - "epoch": 11.78, - "learning_rate": 9.555671545654442e-08, - "loss": 3.8451, - "step": 1060500 - }, - { - "epoch": 11.78, - "learning_rate": 9.554283366279456e-08, - "loss": 3.8543, - "step": 1061000 - }, - { - "epoch": 11.79, - "learning_rate": 9.55289518690447e-08, - "loss": 3.8585, - "step": 1061500 - }, - { - "epoch": 11.79, - "learning_rate": 9.551507007529484e-08, - "loss": 3.8563, - "step": 1062000 - }, - { - "epoch": 11.8, - "learning_rate": 9.550118828154499e-08, - "loss": 3.852, - "step": 1062500 - }, - { - "epoch": 11.81, - "learning_rate": 9.548730648779513e-08, - "loss": 3.854, - "step": 1063000 - }, - { - "epoch": 11.81, - "learning_rate": 9.547342469404526e-08, - "loss": 3.8477, - "step": 1063500 - }, - { - "epoch": 11.82, - "learning_rate": 9.54595429002954e-08, - "loss": 3.8522, - "step": 1064000 - }, - { - "epoch": 11.82, - "learning_rate": 9.544566110654554e-08, - "loss": 3.8577, - "step": 1064500 - }, - { - "epoch": 11.83, - "learning_rate": 9.543177931279568e-08, - "loss": 3.861, - "step": 1065000 - }, - { - "epoch": 11.83, - "learning_rate": 9.541789751904581e-08, - "loss": 3.8263, - "step": 1065500 - }, - { - "epoch": 11.84, - "learning_rate": 9.540401572529596e-08, - "loss": 3.8297, - "step": 1066000 - }, - { - "epoch": 11.84, - "learning_rate": 9.539013393154608e-08, - "loss": 3.8488, - "step": 1066500 - }, - { - "epoch": 11.85, - "learning_rate": 9.537625213779623e-08, - "loss": 3.8334, - "step": 1067000 - }, - { - "epoch": 11.86, - "learning_rate": 9.536237034404637e-08, - "loss": 3.8351, - "step": 1067500 - }, - { - "epoch": 11.86, - "learning_rate": 9.534848855029651e-08, - "loss": 3.8691, - "step": 1068000 - }, - { - "epoch": 11.87, - "learning_rate": 9.533460675654665e-08, - "loss": 3.8423, - "step": 1068500 - }, - { - "epoch": 11.87, - "learning_rate": 9.532072496279678e-08, - "loss": 3.8619, - "step": 1069000 - }, - { - "epoch": 11.88, - "learning_rate": 9.530684316904693e-08, - "loss": 3.8632, - "step": 1069500 - }, - { - "epoch": 11.88, - "learning_rate": 9.529296137529707e-08, - "loss": 3.8607, - "step": 1070000 - }, - { - "epoch": 11.89, - "learning_rate": 9.527907958154721e-08, - "loss": 3.8384, - "step": 1070500 - }, - { - "epoch": 11.89, - "learning_rate": 9.526519778779735e-08, - "loss": 3.8599, - "step": 1071000 - }, - { - "epoch": 11.9, - "learning_rate": 9.525131599404748e-08, - "loss": 3.838, - "step": 1071500 - }, - { - "epoch": 11.91, - "learning_rate": 9.523743420029762e-08, - "loss": 3.8405, - "step": 1072000 - }, - { - "epoch": 11.91, - "learning_rate": 9.522355240654775e-08, - "loss": 3.8304, - "step": 1072500 - }, - { - "epoch": 11.92, - "learning_rate": 9.52096706127979e-08, - "loss": 3.8313, - "step": 1073000 - }, - { - "epoch": 11.92, - "learning_rate": 9.519578881904804e-08, - "loss": 3.8397, - "step": 1073500 - }, - { - "epoch": 11.93, - "learning_rate": 9.518190702529818e-08, - "loss": 3.8488, - "step": 1074000 - }, - { - "epoch": 11.93, - "learning_rate": 9.516802523154831e-08, - "loss": 3.8463, - "step": 1074500 - }, - { - "epoch": 11.94, - "learning_rate": 9.515414343779845e-08, - "loss": 3.8501, - "step": 1075000 - }, - { - "epoch": 11.94, - "learning_rate": 9.514026164404859e-08, - "loss": 3.8566, - "step": 1075500 - }, - { - "epoch": 11.95, - "learning_rate": 9.512637985029874e-08, - "loss": 3.8396, - "step": 1076000 - }, - { - "epoch": 11.96, - "learning_rate": 9.511249805654888e-08, - "loss": 3.8417, - "step": 1076500 - }, - { - "epoch": 11.96, - "learning_rate": 9.509861626279902e-08, - "loss": 3.8362, - "step": 1077000 - }, - { - "epoch": 11.97, - "learning_rate": 9.508473446904915e-08, - "loss": 3.8552, - "step": 1077500 - }, - { - "epoch": 11.97, - "learning_rate": 9.507085267529929e-08, - "loss": 3.8504, - "step": 1078000 - }, - { - "epoch": 11.98, - "learning_rate": 9.505697088154942e-08, - "loss": 3.8471, - "step": 1078500 - }, - { - "epoch": 11.98, - "learning_rate": 9.504308908779956e-08, - "loss": 3.8452, - "step": 1079000 - }, - { - "epoch": 11.99, - "learning_rate": 9.50292072940497e-08, - "loss": 3.8309, - "step": 1079500 - }, - { - "epoch": 11.99, - "learning_rate": 9.501532550029985e-08, - "loss": 3.8516, - "step": 1080000 - }, - { - "epoch": 12.0, - "learning_rate": 9.500144370654998e-08, - "loss": 3.83, - "step": 1080500 - }, - { - "epoch": 12.0, - "eval_loss": 3.888516426086426, - "eval_runtime": 6.3054, - "eval_samples_per_second": 246.455, - "step": 1080552 - }, - { - "epoch": 12.0, - "learning_rate": 9.498756191280012e-08, - "loss": 3.8563, - "step": 1081000 - }, - { - "epoch": 12.01, - "learning_rate": 9.497368011905026e-08, - "loss": 3.8492, - "step": 1081500 - }, - { - "epoch": 12.02, - "learning_rate": 9.49597983253004e-08, - "loss": 3.851, - "step": 1082000 - }, - { - "epoch": 12.02, - "learning_rate": 9.494591653155054e-08, - "loss": 3.8411, - "step": 1082500 - }, - { - "epoch": 12.03, - "learning_rate": 9.493203473780067e-08, - "loss": 3.8741, - "step": 1083000 - }, - { - "epoch": 12.03, - "learning_rate": 9.491815294405082e-08, - "loss": 3.8397, - "step": 1083500 - }, - { - "epoch": 12.04, - "learning_rate": 9.490427115030096e-08, - "loss": 3.8422, - "step": 1084000 - }, - { - "epoch": 12.04, - "learning_rate": 9.489038935655109e-08, - "loss": 3.8456, - "step": 1084500 - }, - { - "epoch": 12.05, - "learning_rate": 9.487650756280123e-08, - "loss": 3.8559, - "step": 1085000 - }, - { - "epoch": 12.05, - "learning_rate": 9.486262576905137e-08, - "loss": 3.8318, - "step": 1085500 - }, - { - "epoch": 12.06, - "learning_rate": 9.48487439753015e-08, - "loss": 3.8484, - "step": 1086000 - }, - { - "epoch": 12.07, - "learning_rate": 9.483486218155164e-08, - "loss": 3.8378, - "step": 1086500 - }, - { - "epoch": 12.07, - "learning_rate": 9.482098038780179e-08, - "loss": 3.8473, - "step": 1087000 - }, - { - "epoch": 12.08, - "learning_rate": 9.480709859405193e-08, - "loss": 3.8481, - "step": 1087500 - }, - { - "epoch": 12.08, - "learning_rate": 9.479321680030207e-08, - "loss": 3.8382, - "step": 1088000 - }, - { - "epoch": 12.09, - "learning_rate": 9.47793350065522e-08, - "loss": 3.8414, - "step": 1088500 - }, - { - "epoch": 12.09, - "learning_rate": 9.476545321280234e-08, - "loss": 3.861, - "step": 1089000 - }, - { - "epoch": 12.1, - "learning_rate": 9.475157141905248e-08, - "loss": 3.8713, - "step": 1089500 - }, - { - "epoch": 12.1, - "learning_rate": 9.473768962530263e-08, - "loss": 3.8539, - "step": 1090000 - }, - { - "epoch": 12.11, - "learning_rate": 9.472380783155275e-08, - "loss": 3.8533, - "step": 1090500 - }, - { - "epoch": 12.12, - "learning_rate": 9.47099260378029e-08, - "loss": 3.8408, - "step": 1091000 - }, - { - "epoch": 12.12, - "learning_rate": 9.469604424405303e-08, - "loss": 3.8527, - "step": 1091500 - }, - { - "epoch": 12.13, - "learning_rate": 9.468216245030317e-08, - "loss": 3.8252, - "step": 1092000 - }, - { - "epoch": 12.13, - "learning_rate": 9.466828065655331e-08, - "loss": 3.8371, - "step": 1092500 - }, - { - "epoch": 12.14, - "learning_rate": 9.465439886280345e-08, - "loss": 3.8442, - "step": 1093000 - }, - { - "epoch": 12.14, - "learning_rate": 9.46405170690536e-08, - "loss": 3.8413, - "step": 1093500 - }, - { - "epoch": 12.15, - "learning_rate": 9.462663527530374e-08, - "loss": 3.8494, - "step": 1094000 - }, - { - "epoch": 12.15, - "learning_rate": 9.461275348155387e-08, - "loss": 3.8702, - "step": 1094500 - }, - { - "epoch": 12.16, - "learning_rate": 9.459887168780401e-08, - "loss": 3.8266, - "step": 1095000 - }, - { - "epoch": 12.17, - "learning_rate": 9.458498989405415e-08, - "loss": 3.8341, - "step": 1095500 - }, - { - "epoch": 12.17, - "learning_rate": 9.457110810030428e-08, - "loss": 3.8437, - "step": 1096000 - }, - { - "epoch": 12.18, - "learning_rate": 9.455722630655442e-08, - "loss": 3.8279, - "step": 1096500 - }, - { - "epoch": 12.18, - "learning_rate": 9.454334451280455e-08, - "loss": 3.8534, - "step": 1097000 - }, - { - "epoch": 12.19, - "learning_rate": 9.45294627190547e-08, - "loss": 3.8467, - "step": 1097500 - }, - { - "epoch": 12.19, - "learning_rate": 9.451558092530484e-08, - "loss": 3.8438, - "step": 1098000 - }, - { - "epoch": 12.2, - "learning_rate": 9.450169913155498e-08, - "loss": 3.818, - "step": 1098500 - }, - { - "epoch": 12.2, - "learning_rate": 9.448781733780512e-08, - "loss": 3.8491, - "step": 1099000 - }, - { - "epoch": 12.21, - "learning_rate": 9.447393554405526e-08, - "loss": 3.8233, - "step": 1099500 - }, - { - "epoch": 12.22, - "learning_rate": 9.446005375030539e-08, - "loss": 3.8397, - "step": 1100000 - }, - { - "epoch": 12.22, - "learning_rate": 9.444617195655553e-08, - "loss": 3.8621, - "step": 1100500 - }, - { - "epoch": 12.23, - "learning_rate": 9.443229016280568e-08, - "loss": 3.852, - "step": 1101000 - }, - { - "epoch": 12.23, - "learning_rate": 9.441840836905582e-08, - "loss": 3.8483, - "step": 1101500 - }, - { - "epoch": 12.24, - "learning_rate": 9.440452657530595e-08, - "loss": 3.8358, - "step": 1102000 - }, - { - "epoch": 12.24, - "learning_rate": 9.439064478155609e-08, - "loss": 3.8365, - "step": 1102500 - }, - { - "epoch": 12.25, - "learning_rate": 9.437676298780622e-08, - "loss": 3.8265, - "step": 1103000 - }, - { - "epoch": 12.25, - "learning_rate": 9.436288119405636e-08, - "loss": 3.8515, - "step": 1103500 - }, - { - "epoch": 12.26, - "learning_rate": 9.43489994003065e-08, - "loss": 3.8318, - "step": 1104000 - }, - { - "epoch": 12.27, - "learning_rate": 9.433511760655665e-08, - "loss": 3.8362, - "step": 1104500 - }, - { - "epoch": 12.27, - "learning_rate": 9.432123581280679e-08, - "loss": 3.8564, - "step": 1105000 - }, - { - "epoch": 12.28, - "learning_rate": 9.430735401905692e-08, - "loss": 3.8415, - "step": 1105500 - }, - { - "epoch": 12.28, - "learning_rate": 9.429347222530706e-08, - "loss": 3.8442, - "step": 1106000 - }, - { - "epoch": 12.29, - "learning_rate": 9.42795904315572e-08, - "loss": 3.8349, - "step": 1106500 - }, - { - "epoch": 12.29, - "learning_rate": 9.426570863780734e-08, - "loss": 3.8474, - "step": 1107000 - }, - { - "epoch": 12.3, - "learning_rate": 9.425182684405749e-08, - "loss": 3.8379, - "step": 1107500 - }, - { - "epoch": 12.3, - "learning_rate": 9.423794505030761e-08, - "loss": 3.8386, - "step": 1108000 - }, - { - "epoch": 12.31, - "learning_rate": 9.422406325655776e-08, - "loss": 3.8351, - "step": 1108500 - }, - { - "epoch": 12.32, - "learning_rate": 9.421018146280789e-08, - "loss": 3.8518, - "step": 1109000 - }, - { - "epoch": 12.32, - "learning_rate": 9.419629966905803e-08, - "loss": 3.8268, - "step": 1109500 - }, - { - "epoch": 12.33, - "learning_rate": 9.418241787530817e-08, - "loss": 3.8354, - "step": 1110000 - }, - { - "epoch": 12.33, - "learning_rate": 9.416853608155831e-08, - "loss": 3.8409, - "step": 1110500 - }, - { - "epoch": 12.34, - "learning_rate": 9.415465428780844e-08, - "loss": 3.8453, - "step": 1111000 - }, - { - "epoch": 12.34, - "learning_rate": 9.414077249405858e-08, - "loss": 3.8582, - "step": 1111500 - }, - { - "epoch": 12.35, - "learning_rate": 9.412689070030873e-08, - "loss": 3.8493, - "step": 1112000 - }, - { - "epoch": 12.35, - "learning_rate": 9.411300890655887e-08, - "loss": 3.8446, - "step": 1112500 - }, - { - "epoch": 12.36, - "learning_rate": 9.409912711280901e-08, - "loss": 3.8368, - "step": 1113000 - }, - { - "epoch": 12.37, - "learning_rate": 9.408524531905915e-08, - "loss": 3.8305, - "step": 1113500 - }, - { - "epoch": 12.37, - "learning_rate": 9.407136352530928e-08, - "loss": 3.8499, - "step": 1114000 - }, - { - "epoch": 12.38, - "learning_rate": 9.405748173155942e-08, - "loss": 3.86, - "step": 1114500 - }, - { - "epoch": 12.38, - "learning_rate": 9.404359993780955e-08, - "loss": 3.8443, - "step": 1115000 - }, - { - "epoch": 12.39, - "learning_rate": 9.40297181440597e-08, - "loss": 3.8679, - "step": 1115500 - }, - { - "epoch": 12.39, - "learning_rate": 9.401583635030984e-08, - "loss": 3.8378, - "step": 1116000 - }, - { - "epoch": 12.4, - "learning_rate": 9.400195455655998e-08, - "loss": 3.8292, - "step": 1116500 - }, - { - "epoch": 12.4, - "learning_rate": 9.398807276281011e-08, - "loss": 3.8584, - "step": 1117000 - }, - { - "epoch": 12.41, - "learning_rate": 9.397419096906025e-08, - "loss": 3.866, - "step": 1117500 - }, - { - "epoch": 12.42, - "learning_rate": 9.39603091753104e-08, - "loss": 3.8483, - "step": 1118000 - }, - { - "epoch": 12.42, - "learning_rate": 9.394642738156054e-08, - "loss": 3.8627, - "step": 1118500 - }, - { - "epoch": 12.43, - "learning_rate": 9.393254558781068e-08, - "loss": 3.8488, - "step": 1119000 - }, - { - "epoch": 12.43, - "learning_rate": 9.391866379406081e-08, - "loss": 3.8252, - "step": 1119500 - }, - { - "epoch": 12.44, - "learning_rate": 9.390478200031095e-08, - "loss": 3.8566, - "step": 1120000 - }, - { - "epoch": 12.44, - "learning_rate": 9.389090020656109e-08, - "loss": 3.8523, - "step": 1120500 - }, - { - "epoch": 12.45, - "learning_rate": 9.387701841281122e-08, - "loss": 3.8203, - "step": 1121000 - }, - { - "epoch": 12.45, - "learning_rate": 9.386313661906136e-08, - "loss": 3.8511, - "step": 1121500 - }, - { - "epoch": 12.46, - "learning_rate": 9.38492548253115e-08, - "loss": 3.8347, - "step": 1122000 - }, - { - "epoch": 12.47, - "learning_rate": 9.383537303156163e-08, - "loss": 3.8507, - "step": 1122500 - }, - { - "epoch": 12.47, - "learning_rate": 9.382149123781178e-08, - "loss": 3.8291, - "step": 1123000 - }, - { - "epoch": 12.48, - "learning_rate": 9.380760944406192e-08, - "loss": 3.8277, - "step": 1123500 - }, - { - "epoch": 12.48, - "learning_rate": 9.379372765031206e-08, - "loss": 3.8396, - "step": 1124000 - }, - { - "epoch": 12.49, - "learning_rate": 9.37798458565622e-08, - "loss": 3.8276, - "step": 1124500 - }, - { - "epoch": 12.49, - "learning_rate": 9.376596406281235e-08, - "loss": 3.8473, - "step": 1125000 - }, - { - "epoch": 12.5, - "learning_rate": 9.375208226906248e-08, - "loss": 3.8325, - "step": 1125500 - }, - { - "epoch": 12.5, - "learning_rate": 9.373820047531262e-08, - "loss": 3.8774, - "step": 1126000 - }, - { - "epoch": 12.51, - "learning_rate": 9.372431868156275e-08, - "loss": 3.8307, - "step": 1126500 - }, - { - "epoch": 12.52, - "learning_rate": 9.371043688781289e-08, - "loss": 3.8285, - "step": 1127000 - }, - { - "epoch": 12.52, - "learning_rate": 9.369655509406303e-08, - "loss": 3.8353, - "step": 1127500 - }, - { - "epoch": 12.53, - "learning_rate": 9.368267330031316e-08, - "loss": 3.8449, - "step": 1128000 - }, - { - "epoch": 12.53, - "learning_rate": 9.36687915065633e-08, - "loss": 3.8381, - "step": 1128500 - }, - { - "epoch": 12.54, - "learning_rate": 9.365490971281344e-08, - "loss": 3.8458, - "step": 1129000 - }, - { - "epoch": 12.54, - "learning_rate": 9.364102791906359e-08, - "loss": 3.8363, - "step": 1129500 - }, - { - "epoch": 12.55, - "learning_rate": 9.362714612531373e-08, - "loss": 3.8648, - "step": 1130000 - }, - { - "epoch": 12.55, - "learning_rate": 9.361326433156387e-08, - "loss": 3.8383, - "step": 1130500 - }, - { - "epoch": 12.56, - "learning_rate": 9.3599382537814e-08, - "loss": 3.8339, - "step": 1131000 - }, - { - "epoch": 12.57, - "learning_rate": 9.358550074406414e-08, - "loss": 3.8529, - "step": 1131500 - }, - { - "epoch": 12.57, - "learning_rate": 9.357161895031428e-08, - "loss": 3.8507, - "step": 1132000 - }, - { - "epoch": 12.58, - "learning_rate": 9.355773715656441e-08, - "loss": 3.8485, - "step": 1132500 - }, - { - "epoch": 12.58, - "learning_rate": 9.354385536281456e-08, - "loss": 3.8221, - "step": 1133000 - }, - { - "epoch": 12.59, - "learning_rate": 9.352997356906469e-08, - "loss": 3.8151, - "step": 1133500 - }, - { - "epoch": 12.59, - "learning_rate": 9.351609177531483e-08, - "loss": 3.8608, - "step": 1134000 - }, - { - "epoch": 12.6, - "learning_rate": 9.350220998156497e-08, - "loss": 3.83, - "step": 1134500 - }, - { - "epoch": 12.6, - "learning_rate": 9.348832818781511e-08, - "loss": 3.839, - "step": 1135000 - }, - { - "epoch": 12.61, - "learning_rate": 9.347444639406525e-08, - "loss": 3.8346, - "step": 1135500 - }, - { - "epoch": 12.62, - "learning_rate": 9.34605646003154e-08, - "loss": 3.8454, - "step": 1136000 - }, - { - "epoch": 12.62, - "learning_rate": 9.344668280656553e-08, - "loss": 3.8569, - "step": 1136500 - }, - { - "epoch": 12.63, - "learning_rate": 9.343280101281567e-08, - "loss": 3.83, - "step": 1137000 - }, - { - "epoch": 12.63, - "learning_rate": 9.341891921906581e-08, - "loss": 3.8611, - "step": 1137500 - }, - { - "epoch": 12.64, - "learning_rate": 9.340503742531595e-08, - "loss": 3.855, - "step": 1138000 - }, - { - "epoch": 12.64, - "learning_rate": 9.339115563156608e-08, - "loss": 3.8463, - "step": 1138500 - }, - { - "epoch": 12.65, - "learning_rate": 9.337727383781622e-08, - "loss": 3.8259, - "step": 1139000 - }, - { - "epoch": 12.65, - "learning_rate": 9.336339204406635e-08, - "loss": 3.8309, - "step": 1139500 - }, - { - "epoch": 12.66, - "learning_rate": 9.33495102503165e-08, - "loss": 3.8177, - "step": 1140000 - }, - { - "epoch": 12.67, - "learning_rate": 9.333562845656664e-08, - "loss": 3.8511, - "step": 1140500 - }, - { - "epoch": 12.67, - "learning_rate": 9.332174666281678e-08, - "loss": 3.8532, - "step": 1141000 - }, - { - "epoch": 12.68, - "learning_rate": 9.330786486906692e-08, - "loss": 3.8362, - "step": 1141500 - }, - { - "epoch": 12.68, - "learning_rate": 9.329398307531705e-08, - "loss": 3.8447, - "step": 1142000 - }, - { - "epoch": 12.69, - "learning_rate": 9.328010128156719e-08, - "loss": 3.8472, - "step": 1142500 - }, - { - "epoch": 12.69, - "learning_rate": 9.326621948781734e-08, - "loss": 3.8493, - "step": 1143000 - }, - { - "epoch": 12.7, - "learning_rate": 9.325233769406748e-08, - "loss": 3.8513, - "step": 1143500 - }, - { - "epoch": 12.7, - "learning_rate": 9.323845590031762e-08, - "loss": 3.856, - "step": 1144000 - }, - { - "epoch": 12.71, - "learning_rate": 9.322457410656775e-08, - "loss": 3.837, - "step": 1144500 - }, - { - "epoch": 12.72, - "learning_rate": 9.321069231281789e-08, - "loss": 3.8405, - "step": 1145000 - }, - { - "epoch": 12.72, - "learning_rate": 9.319681051906802e-08, - "loss": 3.8275, - "step": 1145500 - }, - { - "epoch": 12.73, - "learning_rate": 9.318292872531816e-08, - "loss": 3.844, - "step": 1146000 - }, - { - "epoch": 12.73, - "learning_rate": 9.31690469315683e-08, - "loss": 3.8407, - "step": 1146500 - }, - { - "epoch": 12.74, - "learning_rate": 9.315516513781845e-08, - "loss": 3.8365, - "step": 1147000 - }, - { - "epoch": 12.74, - "learning_rate": 9.314128334406858e-08, - "loss": 3.8468, - "step": 1147500 - }, - { - "epoch": 12.75, - "learning_rate": 9.312740155031872e-08, - "loss": 3.8471, - "step": 1148000 - }, - { - "epoch": 12.75, - "learning_rate": 9.311351975656886e-08, - "loss": 3.8566, - "step": 1148500 - }, - { - "epoch": 12.76, - "learning_rate": 9.3099637962819e-08, - "loss": 3.8344, - "step": 1149000 - }, - { - "epoch": 12.77, - "learning_rate": 9.308575616906914e-08, - "loss": 3.8387, - "step": 1149500 - }, - { - "epoch": 12.77, - "learning_rate": 9.307187437531929e-08, - "loss": 3.8406, - "step": 1150000 - }, - { - "epoch": 12.78, - "learning_rate": 9.305799258156942e-08, - "loss": 3.8326, - "step": 1150500 - }, - { - "epoch": 12.78, - "learning_rate": 9.304411078781956e-08, - "loss": 3.8347, - "step": 1151000 - }, - { - "epoch": 12.79, - "learning_rate": 9.303022899406969e-08, - "loss": 3.8513, - "step": 1151500 - }, - { - "epoch": 12.79, - "learning_rate": 9.301634720031983e-08, - "loss": 3.8445, - "step": 1152000 - }, - { - "epoch": 12.8, - "learning_rate": 9.300246540656997e-08, - "loss": 3.833, - "step": 1152500 - }, - { - "epoch": 12.8, - "learning_rate": 9.298858361282011e-08, - "loss": 3.8304, - "step": 1153000 - }, - { - "epoch": 12.81, - "learning_rate": 9.297470181907024e-08, - "loss": 3.8581, - "step": 1153500 - }, - { - "epoch": 12.82, - "learning_rate": 9.296082002532039e-08, - "loss": 3.8312, - "step": 1154000 - }, - { - "epoch": 12.82, - "learning_rate": 9.294693823157053e-08, - "loss": 3.8399, - "step": 1154500 - }, - { - "epoch": 12.83, - "learning_rate": 9.293305643782067e-08, - "loss": 3.8477, - "step": 1155000 - }, - { - "epoch": 12.83, - "learning_rate": 9.291917464407081e-08, - "loss": 3.8314, - "step": 1155500 - }, - { - "epoch": 12.84, - "learning_rate": 9.290529285032094e-08, - "loss": 3.8202, - "step": 1156000 - }, - { - "epoch": 12.84, - "learning_rate": 9.289141105657108e-08, - "loss": 3.8501, - "step": 1156500 - }, - { - "epoch": 12.85, - "learning_rate": 9.287752926282121e-08, - "loss": 3.8339, - "step": 1157000 - }, - { - "epoch": 12.85, - "learning_rate": 9.286364746907135e-08, - "loss": 3.8303, - "step": 1157500 - }, - { - "epoch": 12.86, - "learning_rate": 9.28497656753215e-08, - "loss": 3.836, - "step": 1158000 - }, - { - "epoch": 12.87, - "learning_rate": 9.283588388157164e-08, - "loss": 3.8331, - "step": 1158500 - }, - { - "epoch": 12.87, - "learning_rate": 9.282200208782177e-08, - "loss": 3.8067, - "step": 1159000 - }, - { - "epoch": 12.88, - "learning_rate": 9.280812029407191e-08, - "loss": 3.8322, - "step": 1159500 - }, - { - "epoch": 12.88, - "learning_rate": 9.279423850032205e-08, - "loss": 3.8508, - "step": 1160000 - }, - { - "epoch": 12.89, - "learning_rate": 9.27803567065722e-08, - "loss": 3.8496, - "step": 1160500 - }, - { - "epoch": 12.89, - "learning_rate": 9.276647491282234e-08, - "loss": 3.8249, - "step": 1161000 - }, - { - "epoch": 12.9, - "learning_rate": 9.275259311907248e-08, - "loss": 3.8375, - "step": 1161500 - }, - { - "epoch": 12.9, - "learning_rate": 9.273871132532261e-08, - "loss": 3.8365, - "step": 1162000 - }, - { - "epoch": 12.91, - "learning_rate": 9.272482953157275e-08, - "loss": 3.8506, - "step": 1162500 - }, - { - "epoch": 12.92, - "learning_rate": 9.271094773782288e-08, - "loss": 3.8463, - "step": 1163000 - }, - { - "epoch": 12.92, - "learning_rate": 9.269706594407302e-08, - "loss": 3.8315, - "step": 1163500 - }, - { - "epoch": 12.93, - "learning_rate": 9.268318415032316e-08, - "loss": 3.8338, - "step": 1164000 - }, - { - "epoch": 12.93, - "learning_rate": 9.26693023565733e-08, - "loss": 3.8307, - "step": 1164500 - }, - { - "epoch": 12.94, - "learning_rate": 9.265542056282344e-08, - "loss": 3.8092, - "step": 1165000 - }, - { - "epoch": 12.94, - "learning_rate": 9.264153876907358e-08, - "loss": 3.8233, - "step": 1165500 - }, - { - "epoch": 12.95, - "learning_rate": 9.262765697532372e-08, - "loss": 3.851, - "step": 1166000 - }, - { - "epoch": 12.95, - "learning_rate": 9.261377518157386e-08, - "loss": 3.8444, - "step": 1166500 - }, - { - "epoch": 12.96, - "learning_rate": 9.2599893387824e-08, - "loss": 3.8309, - "step": 1167000 - }, - { - "epoch": 12.97, - "learning_rate": 9.258601159407413e-08, - "loss": 3.8357, - "step": 1167500 - }, - { - "epoch": 12.97, - "learning_rate": 9.257212980032428e-08, - "loss": 3.8593, - "step": 1168000 - }, - { - "epoch": 12.98, - "learning_rate": 9.255824800657442e-08, - "loss": 3.8359, - "step": 1168500 - }, - { - "epoch": 12.98, - "learning_rate": 9.254436621282455e-08, - "loss": 3.8585, - "step": 1169000 - }, - { - "epoch": 12.99, - "learning_rate": 9.253048441907469e-08, - "loss": 3.8193, - "step": 1169500 - }, - { - "epoch": 12.99, - "learning_rate": 9.251660262532482e-08, - "loss": 3.8318, - "step": 1170000 - }, - { - "epoch": 13.0, - "learning_rate": 9.250272083157496e-08, - "loss": 3.8407, - "step": 1170500 - }, - { - "epoch": 13.0, - "eval_loss": 3.8813984394073486, - "eval_runtime": 6.3111, - "eval_samples_per_second": 246.232, - "step": 1170598 - }, - { - "epoch": 13.0, - "learning_rate": 9.24888390378251e-08, - "loss": 3.8446, - "step": 1171000 - }, - { - "epoch": 13.01, - "learning_rate": 9.247495724407525e-08, - "loss": 3.8684, - "step": 1171500 - }, - { - "epoch": 13.02, - "learning_rate": 9.246107545032539e-08, - "loss": 3.844, - "step": 1172000 - }, - { - "epoch": 13.02, - "learning_rate": 9.244719365657553e-08, - "loss": 3.82, - "step": 1172500 - }, - { - "epoch": 13.03, - "learning_rate": 9.243331186282566e-08, - "loss": 3.8427, - "step": 1173000 - }, - { - "epoch": 13.03, - "learning_rate": 9.24194300690758e-08, - "loss": 3.8248, - "step": 1173500 - }, - { - "epoch": 13.04, - "learning_rate": 9.240554827532594e-08, - "loss": 3.8373, - "step": 1174000 - }, - { - "epoch": 13.04, - "learning_rate": 9.239166648157609e-08, - "loss": 3.8277, - "step": 1174500 - }, - { - "epoch": 13.05, - "learning_rate": 9.237778468782622e-08, - "loss": 3.8483, - "step": 1175000 - }, - { - "epoch": 13.05, - "learning_rate": 9.236390289407636e-08, - "loss": 3.8482, - "step": 1175500 - }, - { - "epoch": 13.06, - "learning_rate": 9.235002110032649e-08, - "loss": 3.843, - "step": 1176000 - }, - { - "epoch": 13.07, - "learning_rate": 9.233613930657663e-08, - "loss": 3.8175, - "step": 1176500 - }, - { - "epoch": 13.07, - "learning_rate": 9.232225751282677e-08, - "loss": 3.8232, - "step": 1177000 - }, - { - "epoch": 13.08, - "learning_rate": 9.230837571907691e-08, - "loss": 3.8571, - "step": 1177500 - }, - { - "epoch": 13.08, - "learning_rate": 9.229449392532706e-08, - "loss": 3.8311, - "step": 1178000 - }, - { - "epoch": 13.09, - "learning_rate": 9.228061213157718e-08, - "loss": 3.8371, - "step": 1178500 - }, - { - "epoch": 13.09, - "learning_rate": 9.226673033782733e-08, - "loss": 3.8236, - "step": 1179000 - }, - { - "epoch": 13.1, - "learning_rate": 9.225284854407747e-08, - "loss": 3.838, - "step": 1179500 - }, - { - "epoch": 13.1, - "learning_rate": 9.223896675032761e-08, - "loss": 3.8577, - "step": 1180000 - }, - { - "epoch": 13.11, - "learning_rate": 9.222508495657775e-08, - "loss": 3.825, - "step": 1180500 - }, - { - "epoch": 13.12, - "learning_rate": 9.221120316282788e-08, - "loss": 3.8368, - "step": 1181000 - }, - { - "epoch": 13.12, - "learning_rate": 9.219732136907802e-08, - "loss": 3.8097, - "step": 1181500 - }, - { - "epoch": 13.13, - "learning_rate": 9.218343957532815e-08, - "loss": 3.8258, - "step": 1182000 - }, - { - "epoch": 13.13, - "learning_rate": 9.21695577815783e-08, - "loss": 3.8237, - "step": 1182500 - }, - { - "epoch": 13.14, - "learning_rate": 9.215567598782844e-08, - "loss": 3.8195, - "step": 1183000 - }, - { - "epoch": 13.14, - "learning_rate": 9.214179419407858e-08, - "loss": 3.8304, - "step": 1183500 - }, - { - "epoch": 13.15, - "learning_rate": 9.212791240032872e-08, - "loss": 3.8469, - "step": 1184000 - }, - { - "epoch": 13.15, - "learning_rate": 9.211403060657885e-08, - "loss": 3.8369, - "step": 1184500 - }, - { - "epoch": 13.16, - "learning_rate": 9.2100148812829e-08, - "loss": 3.8422, - "step": 1185000 - }, - { - "epoch": 13.17, - "learning_rate": 9.208626701907914e-08, - "loss": 3.8232, - "step": 1185500 - }, - { - "epoch": 13.17, - "learning_rate": 9.207238522532928e-08, - "loss": 3.8285, - "step": 1186000 - }, - { - "epoch": 13.18, - "learning_rate": 9.205850343157942e-08, - "loss": 3.8418, - "step": 1186500 - }, - { - "epoch": 13.18, - "learning_rate": 9.204462163782955e-08, - "loss": 3.8435, - "step": 1187000 - }, - { - "epoch": 13.19, - "learning_rate": 9.203073984407968e-08, - "loss": 3.8342, - "step": 1187500 - }, - { - "epoch": 13.19, - "learning_rate": 9.201685805032982e-08, - "loss": 3.8415, - "step": 1188000 - }, - { - "epoch": 13.2, - "learning_rate": 9.200297625657996e-08, - "loss": 3.8621, - "step": 1188500 - }, - { - "epoch": 13.2, - "learning_rate": 9.19890944628301e-08, - "loss": 3.8267, - "step": 1189000 - }, - { - "epoch": 13.21, - "learning_rate": 9.197521266908025e-08, - "loss": 3.8353, - "step": 1189500 - }, - { - "epoch": 13.22, - "learning_rate": 9.196133087533038e-08, - "loss": 3.8356, - "step": 1190000 - }, - { - "epoch": 13.22, - "learning_rate": 9.194744908158052e-08, - "loss": 3.8301, - "step": 1190500 - }, - { - "epoch": 13.23, - "learning_rate": 9.193356728783066e-08, - "loss": 3.8331, - "step": 1191000 - }, - { - "epoch": 13.23, - "learning_rate": 9.19196854940808e-08, - "loss": 3.8421, - "step": 1191500 - }, - { - "epoch": 13.24, - "learning_rate": 9.190580370033095e-08, - "loss": 3.8236, - "step": 1192000 - }, - { - "epoch": 13.24, - "learning_rate": 9.189192190658108e-08, - "loss": 3.8194, - "step": 1192500 - }, - { - "epoch": 13.25, - "learning_rate": 9.187804011283122e-08, - "loss": 3.8467, - "step": 1193000 - }, - { - "epoch": 13.25, - "learning_rate": 9.186415831908135e-08, - "loss": 3.8361, - "step": 1193500 - }, - { - "epoch": 13.26, - "learning_rate": 9.185027652533149e-08, - "loss": 3.8425, - "step": 1194000 - }, - { - "epoch": 13.27, - "learning_rate": 9.183639473158163e-08, - "loss": 3.8211, - "step": 1194500 - }, - { - "epoch": 13.27, - "learning_rate": 9.182251293783177e-08, - "loss": 3.8441, - "step": 1195000 - }, - { - "epoch": 13.28, - "learning_rate": 9.18086311440819e-08, - "loss": 3.8334, - "step": 1195500 - }, - { - "epoch": 13.28, - "learning_rate": 9.179474935033204e-08, - "loss": 3.8259, - "step": 1196000 - }, - { - "epoch": 13.29, - "learning_rate": 9.178086755658219e-08, - "loss": 3.8333, - "step": 1196500 - }, - { - "epoch": 13.29, - "learning_rate": 9.176698576283233e-08, - "loss": 3.824, - "step": 1197000 - }, - { - "epoch": 13.3, - "learning_rate": 9.175310396908247e-08, - "loss": 3.8325, - "step": 1197500 - }, - { - "epoch": 13.3, - "learning_rate": 9.173922217533261e-08, - "loss": 3.8378, - "step": 1198000 - }, - { - "epoch": 13.31, - "learning_rate": 9.172534038158274e-08, - "loss": 3.814, - "step": 1198500 - }, - { - "epoch": 13.32, - "learning_rate": 9.171145858783288e-08, - "loss": 3.8298, - "step": 1199000 - }, - { - "epoch": 13.32, - "learning_rate": 9.169757679408301e-08, - "loss": 3.8539, - "step": 1199500 - }, - { - "epoch": 13.33, - "learning_rate": 9.168369500033316e-08, - "loss": 3.8354, - "step": 1200000 - }, - { - "epoch": 13.33, - "learning_rate": 9.16698132065833e-08, - "loss": 3.8356, - "step": 1200500 - }, - { - "epoch": 13.34, - "learning_rate": 9.165593141283343e-08, - "loss": 3.8324, - "step": 1201000 - }, - { - "epoch": 13.34, - "learning_rate": 9.164204961908357e-08, - "loss": 3.8284, - "step": 1201500 - }, - { - "epoch": 13.35, - "learning_rate": 9.162816782533371e-08, - "loss": 3.8494, - "step": 1202000 - }, - { - "epoch": 13.35, - "learning_rate": 9.161428603158385e-08, - "loss": 3.8236, - "step": 1202500 - }, - { - "epoch": 13.36, - "learning_rate": 9.1600404237834e-08, - "loss": 3.8249, - "step": 1203000 - }, - { - "epoch": 13.37, - "learning_rate": 9.158652244408414e-08, - "loss": 3.8159, - "step": 1203500 - }, - { - "epoch": 13.37, - "learning_rate": 9.157264065033427e-08, - "loss": 3.8306, - "step": 1204000 - }, - { - "epoch": 13.38, - "learning_rate": 9.155875885658441e-08, - "loss": 3.848, - "step": 1204500 - }, - { - "epoch": 13.38, - "learning_rate": 9.154487706283455e-08, - "loss": 3.8348, - "step": 1205000 - }, - { - "epoch": 13.39, - "learning_rate": 9.153099526908468e-08, - "loss": 3.8314, - "step": 1205500 - }, - { - "epoch": 13.39, - "learning_rate": 9.151711347533482e-08, - "loss": 3.8299, - "step": 1206000 - }, - { - "epoch": 13.4, - "learning_rate": 9.150323168158495e-08, - "loss": 3.8377, - "step": 1206500 - }, - { - "epoch": 13.4, - "learning_rate": 9.14893498878351e-08, - "loss": 3.8287, - "step": 1207000 - }, - { - "epoch": 13.41, - "learning_rate": 9.147546809408524e-08, - "loss": 3.8361, - "step": 1207500 - }, - { - "epoch": 13.42, - "learning_rate": 9.146158630033538e-08, - "loss": 3.8096, - "step": 1208000 - }, - { - "epoch": 13.42, - "learning_rate": 9.144770450658552e-08, - "loss": 3.8503, - "step": 1208500 - }, - { - "epoch": 13.43, - "learning_rate": 9.143382271283566e-08, - "loss": 3.8176, - "step": 1209000 - }, - { - "epoch": 13.43, - "learning_rate": 9.141994091908579e-08, - "loss": 3.8606, - "step": 1209500 - }, - { - "epoch": 13.44, - "learning_rate": 9.140605912533594e-08, - "loss": 3.8349, - "step": 1210000 - }, - { - "epoch": 13.44, - "learning_rate": 9.139217733158608e-08, - "loss": 3.8188, - "step": 1210500 - }, - { - "epoch": 13.45, - "learning_rate": 9.137829553783622e-08, - "loss": 3.8401, - "step": 1211000 - }, - { - "epoch": 13.45, - "learning_rate": 9.136441374408635e-08, - "loss": 3.8354, - "step": 1211500 - }, - { - "epoch": 13.46, - "learning_rate": 9.135053195033649e-08, - "loss": 3.8282, - "step": 1212000 - }, - { - "epoch": 13.47, - "learning_rate": 9.133665015658662e-08, - "loss": 3.8049, - "step": 1212500 - }, - { - "epoch": 13.47, - "learning_rate": 9.132276836283676e-08, - "loss": 3.8396, - "step": 1213000 - }, - { - "epoch": 13.48, - "learning_rate": 9.13088865690869e-08, - "loss": 3.8318, - "step": 1213500 - }, - { - "epoch": 13.48, - "learning_rate": 9.129500477533705e-08, - "loss": 3.8309, - "step": 1214000 - }, - { - "epoch": 13.49, - "learning_rate": 9.128112298158719e-08, - "loss": 3.8524, - "step": 1214500 - }, - { - "epoch": 13.49, - "learning_rate": 9.126724118783732e-08, - "loss": 3.8368, - "step": 1215000 - }, - { - "epoch": 13.5, - "learning_rate": 9.125335939408746e-08, - "loss": 3.8345, - "step": 1215500 - }, - { - "epoch": 13.5, - "learning_rate": 9.12394776003376e-08, - "loss": 3.8618, - "step": 1216000 - }, - { - "epoch": 13.51, - "learning_rate": 9.122559580658775e-08, - "loss": 3.836, - "step": 1216500 - }, - { - "epoch": 13.52, - "learning_rate": 9.121171401283789e-08, - "loss": 3.8349, - "step": 1217000 - }, - { - "epoch": 13.52, - "learning_rate": 9.119783221908802e-08, - "loss": 3.8282, - "step": 1217500 - }, - { - "epoch": 13.53, - "learning_rate": 9.118395042533815e-08, - "loss": 3.8279, - "step": 1218000 - }, - { - "epoch": 13.53, - "learning_rate": 9.117006863158829e-08, - "loss": 3.842, - "step": 1218500 - }, - { - "epoch": 13.54, - "learning_rate": 9.115618683783843e-08, - "loss": 3.8508, - "step": 1219000 - }, - { - "epoch": 13.54, - "learning_rate": 9.114230504408857e-08, - "loss": 3.828, - "step": 1219500 - }, - { - "epoch": 13.55, - "learning_rate": 9.112842325033871e-08, - "loss": 3.8032, - "step": 1220000 - }, - { - "epoch": 13.55, - "learning_rate": 9.111454145658886e-08, - "loss": 3.8283, - "step": 1220500 - }, - { - "epoch": 13.56, - "learning_rate": 9.110065966283899e-08, - "loss": 3.8415, - "step": 1221000 - }, - { - "epoch": 13.57, - "learning_rate": 9.108677786908913e-08, - "loss": 3.8396, - "step": 1221500 - }, - { - "epoch": 13.57, - "learning_rate": 9.107289607533927e-08, - "loss": 3.8329, - "step": 1222000 - }, - { - "epoch": 13.58, - "learning_rate": 9.105901428158941e-08, - "loss": 3.8312, - "step": 1222500 - }, - { - "epoch": 13.58, - "learning_rate": 9.104513248783955e-08, - "loss": 3.8564, - "step": 1223000 - }, - { - "epoch": 13.59, - "learning_rate": 9.103125069408968e-08, - "loss": 3.8346, - "step": 1223500 - }, - { - "epoch": 13.59, - "learning_rate": 9.101736890033981e-08, - "loss": 3.8216, - "step": 1224000 - }, - { - "epoch": 13.6, - "learning_rate": 9.100348710658996e-08, - "loss": 3.8521, - "step": 1224500 - }, - { - "epoch": 13.6, - "learning_rate": 9.09896053128401e-08, - "loss": 3.8444, - "step": 1225000 - }, - { - "epoch": 13.61, - "learning_rate": 9.097572351909024e-08, - "loss": 3.8353, - "step": 1225500 - }, - { - "epoch": 13.62, - "learning_rate": 9.096184172534038e-08, - "loss": 3.8281, - "step": 1226000 - }, - { - "epoch": 13.62, - "learning_rate": 9.094795993159051e-08, - "loss": 3.8366, - "step": 1226500 - }, - { - "epoch": 13.63, - "learning_rate": 9.093407813784065e-08, - "loss": 3.8381, - "step": 1227000 - }, - { - "epoch": 13.63, - "learning_rate": 9.09201963440908e-08, - "loss": 3.8248, - "step": 1227500 - }, - { - "epoch": 13.64, - "learning_rate": 9.090631455034094e-08, - "loss": 3.8382, - "step": 1228000 - }, - { - "epoch": 13.64, - "learning_rate": 9.089243275659108e-08, - "loss": 3.8441, - "step": 1228500 - }, - { - "epoch": 13.65, - "learning_rate": 9.087855096284121e-08, - "loss": 3.8471, - "step": 1229000 - }, - { - "epoch": 13.65, - "learning_rate": 9.086466916909135e-08, - "loss": 3.8133, - "step": 1229500 - }, - { - "epoch": 13.66, - "learning_rate": 9.085078737534148e-08, - "loss": 3.8399, - "step": 1230000 - }, - { - "epoch": 13.67, - "learning_rate": 9.083690558159162e-08, - "loss": 3.8245, - "step": 1230500 - }, - { - "epoch": 13.67, - "learning_rate": 9.082302378784176e-08, - "loss": 3.8368, - "step": 1231000 - }, - { - "epoch": 13.68, - "learning_rate": 9.080914199409191e-08, - "loss": 3.8226, - "step": 1231500 - }, - { - "epoch": 13.68, - "learning_rate": 9.079526020034204e-08, - "loss": 3.8517, - "step": 1232000 - }, - { - "epoch": 13.69, - "learning_rate": 9.078137840659218e-08, - "loss": 3.8241, - "step": 1232500 - }, - { - "epoch": 13.69, - "learning_rate": 9.076749661284232e-08, - "loss": 3.8246, - "step": 1233000 - }, - { - "epoch": 13.7, - "learning_rate": 9.075361481909246e-08, - "loss": 3.8316, - "step": 1233500 - }, - { - "epoch": 13.7, - "learning_rate": 9.07397330253426e-08, - "loss": 3.8368, - "step": 1234000 - }, - { - "epoch": 13.71, - "learning_rate": 9.072585123159275e-08, - "loss": 3.8381, - "step": 1234500 - }, - { - "epoch": 13.72, - "learning_rate": 9.071196943784288e-08, - "loss": 3.8406, - "step": 1235000 - }, - { - "epoch": 13.72, - "learning_rate": 9.069808764409302e-08, - "loss": 3.8354, - "step": 1235500 - }, - { - "epoch": 13.73, - "learning_rate": 9.068420585034315e-08, - "loss": 3.8179, - "step": 1236000 - }, - { - "epoch": 13.73, - "learning_rate": 9.067032405659329e-08, - "loss": 3.8327, - "step": 1236500 - }, - { - "epoch": 13.74, - "learning_rate": 9.065644226284343e-08, - "loss": 3.819, - "step": 1237000 - }, - { - "epoch": 13.74, - "learning_rate": 9.064256046909356e-08, - "loss": 3.8291, - "step": 1237500 - }, - { - "epoch": 13.75, - "learning_rate": 9.06286786753437e-08, - "loss": 3.825, - "step": 1238000 - }, - { - "epoch": 13.75, - "learning_rate": 9.061479688159385e-08, - "loss": 3.8373, - "step": 1238500 - }, - { - "epoch": 13.76, - "learning_rate": 9.060091508784399e-08, - "loss": 3.8331, - "step": 1239000 - }, - { - "epoch": 13.77, - "learning_rate": 9.058703329409413e-08, - "loss": 3.8387, - "step": 1239500 - }, - { - "epoch": 13.77, - "learning_rate": 9.057315150034427e-08, - "loss": 3.8382, - "step": 1240000 - }, - { - "epoch": 13.78, - "learning_rate": 9.05592697065944e-08, - "loss": 3.8133, - "step": 1240500 - }, - { - "epoch": 13.78, - "learning_rate": 9.054538791284454e-08, - "loss": 3.8421, - "step": 1241000 - }, - { - "epoch": 13.79, - "learning_rate": 9.053150611909469e-08, - "loss": 3.8277, - "step": 1241500 - }, - { - "epoch": 13.79, - "learning_rate": 9.051762432534482e-08, - "loss": 3.8341, - "step": 1242000 - }, - { - "epoch": 13.8, - "learning_rate": 9.050374253159496e-08, - "loss": 3.8259, - "step": 1242500 - }, - { - "epoch": 13.8, - "learning_rate": 9.048986073784509e-08, - "loss": 3.8276, - "step": 1243000 - }, - { - "epoch": 13.81, - "learning_rate": 9.047597894409523e-08, - "loss": 3.8129, - "step": 1243500 - }, - { - "epoch": 13.82, - "learning_rate": 9.046209715034537e-08, - "loss": 3.8465, - "step": 1244000 - }, - { - "epoch": 13.82, - "learning_rate": 9.044821535659551e-08, - "loss": 3.8372, - "step": 1244500 - }, - { - "epoch": 13.83, - "learning_rate": 9.043433356284566e-08, - "loss": 3.8241, - "step": 1245000 - }, - { - "epoch": 13.83, - "learning_rate": 9.04204517690958e-08, - "loss": 3.8219, - "step": 1245500 - }, - { - "epoch": 13.84, - "learning_rate": 9.040656997534593e-08, - "loss": 3.856, - "step": 1246000 - }, - { - "epoch": 13.84, - "learning_rate": 9.039268818159607e-08, - "loss": 3.84, - "step": 1246500 - }, - { - "epoch": 13.85, - "learning_rate": 9.037880638784621e-08, - "loss": 3.8275, - "step": 1247000 - }, - { - "epoch": 13.85, - "learning_rate": 9.036492459409635e-08, - "loss": 3.8464, - "step": 1247500 - }, - { - "epoch": 13.86, - "learning_rate": 9.035104280034648e-08, - "loss": 3.8119, - "step": 1248000 - }, - { - "epoch": 13.87, - "learning_rate": 9.033716100659663e-08, - "loss": 3.8291, - "step": 1248500 - }, - { - "epoch": 13.87, - "learning_rate": 9.032327921284675e-08, - "loss": 3.8286, - "step": 1249000 - }, - { - "epoch": 13.88, - "learning_rate": 9.03093974190969e-08, - "loss": 3.8381, - "step": 1249500 - }, - { - "epoch": 13.88, - "learning_rate": 9.029551562534704e-08, - "loss": 3.8245, - "step": 1250000 - }, - { - "epoch": 13.89, - "learning_rate": 9.028163383159718e-08, - "loss": 3.8428, - "step": 1250500 - }, - { - "epoch": 13.89, - "learning_rate": 9.026775203784732e-08, - "loss": 3.8363, - "step": 1251000 - }, - { - "epoch": 13.9, - "learning_rate": 9.025387024409745e-08, - "loss": 3.8453, - "step": 1251500 - }, - { - "epoch": 13.9, - "learning_rate": 9.02399884503476e-08, - "loss": 3.8276, - "step": 1252000 - }, - { - "epoch": 13.91, - "learning_rate": 9.022610665659774e-08, - "loss": 3.8127, - "step": 1252500 - }, - { - "epoch": 13.92, - "learning_rate": 9.021222486284788e-08, - "loss": 3.802, - "step": 1253000 - }, - { - "epoch": 13.92, - "learning_rate": 9.019834306909802e-08, - "loss": 3.8374, - "step": 1253500 - }, - { - "epoch": 13.93, - "learning_rate": 9.018446127534815e-08, - "loss": 3.8412, - "step": 1254000 - }, - { - "epoch": 13.93, - "learning_rate": 9.017057948159828e-08, - "loss": 3.8401, - "step": 1254500 - }, - { - "epoch": 13.94, - "learning_rate": 9.015669768784842e-08, - "loss": 3.8404, - "step": 1255000 - }, - { - "epoch": 13.94, - "learning_rate": 9.014281589409856e-08, - "loss": 3.827, - "step": 1255500 - }, - { - "epoch": 13.95, - "learning_rate": 9.01289341003487e-08, - "loss": 3.8031, - "step": 1256000 - }, - { - "epoch": 13.95, - "learning_rate": 9.011505230659885e-08, - "loss": 3.8386, - "step": 1256500 - }, - { - "epoch": 13.96, - "learning_rate": 9.010117051284899e-08, - "loss": 3.8371, - "step": 1257000 - }, - { - "epoch": 13.97, - "learning_rate": 9.008728871909912e-08, - "loss": 3.8458, - "step": 1257500 - }, - { - "epoch": 13.97, - "learning_rate": 9.007340692534926e-08, - "loss": 3.8242, - "step": 1258000 - }, - { - "epoch": 13.98, - "learning_rate": 9.00595251315994e-08, - "loss": 3.827, - "step": 1258500 - }, - { - "epoch": 13.98, - "learning_rate": 9.004564333784955e-08, - "loss": 3.8209, - "step": 1259000 - }, - { - "epoch": 13.99, - "learning_rate": 9.003176154409969e-08, - "loss": 3.8208, - "step": 1259500 - }, - { - "epoch": 13.99, - "learning_rate": 9.001787975034982e-08, - "loss": 3.8424, - "step": 1260000 - }, - { - "epoch": 14.0, - "learning_rate": 9.000399795659995e-08, - "loss": 3.8257, - "step": 1260500 - }, - { - "epoch": 14.0, - "eval_loss": 3.875882387161255, - "eval_runtime": 6.3086, - "eval_samples_per_second": 246.331, - "step": 1260644 - }, - { - "epoch": 14.0, - "learning_rate": 8.999011616285009e-08, - "loss": 3.8341, - "step": 1261000 - }, - { - "epoch": 14.01, - "learning_rate": 8.997623436910023e-08, - "loss": 3.829, - "step": 1261500 - }, - { - "epoch": 14.02, - "learning_rate": 8.996235257535037e-08, - "loss": 3.8324, - "step": 1262000 - }, - { - "epoch": 14.02, - "learning_rate": 8.994847078160052e-08, - "loss": 3.8381, - "step": 1262500 - }, - { - "epoch": 14.03, - "learning_rate": 8.993458898785064e-08, - "loss": 3.8326, - "step": 1263000 - }, - { - "epoch": 14.03, - "learning_rate": 8.992070719410079e-08, - "loss": 3.8233, - "step": 1263500 - }, - { - "epoch": 14.04, - "learning_rate": 8.990682540035093e-08, - "loss": 3.8228, - "step": 1264000 - }, - { - "epoch": 14.04, - "learning_rate": 8.989294360660107e-08, - "loss": 3.8312, - "step": 1264500 - }, - { - "epoch": 14.05, - "learning_rate": 8.987906181285121e-08, - "loss": 3.8304, - "step": 1265000 - }, - { - "epoch": 14.05, - "learning_rate": 8.986518001910134e-08, - "loss": 3.832, - "step": 1265500 - }, - { - "epoch": 14.06, - "learning_rate": 8.985129822535149e-08, - "loss": 3.8391, - "step": 1266000 - }, - { - "epoch": 14.07, - "learning_rate": 8.983741643160161e-08, - "loss": 3.8225, - "step": 1266500 - }, - { - "epoch": 14.07, - "learning_rate": 8.982353463785176e-08, - "loss": 3.8233, - "step": 1267000 - }, - { - "epoch": 14.08, - "learning_rate": 8.98096528441019e-08, - "loss": 3.7987, - "step": 1267500 - }, - { - "epoch": 14.08, - "learning_rate": 8.979577105035204e-08, - "loss": 3.8269, - "step": 1268000 - }, - { - "epoch": 14.09, - "learning_rate": 8.978188925660217e-08, - "loss": 3.8375, - "step": 1268500 - }, - { - "epoch": 14.09, - "learning_rate": 8.976800746285231e-08, - "loss": 3.8304, - "step": 1269000 - }, - { - "epoch": 14.1, - "learning_rate": 8.975412566910245e-08, - "loss": 3.826, - "step": 1269500 - }, - { - "epoch": 14.1, - "learning_rate": 8.97402438753526e-08, - "loss": 3.8233, - "step": 1270000 - }, - { - "epoch": 14.11, - "learning_rate": 8.972636208160274e-08, - "loss": 3.8151, - "step": 1270500 - }, - { - "epoch": 14.12, - "learning_rate": 8.971248028785288e-08, - "loss": 3.8232, - "step": 1271000 - }, - { - "epoch": 14.12, - "learning_rate": 8.969859849410301e-08, - "loss": 3.82, - "step": 1271500 - }, - { - "epoch": 14.13, - "learning_rate": 8.968471670035315e-08, - "loss": 3.8402, - "step": 1272000 - }, - { - "epoch": 14.13, - "learning_rate": 8.967083490660328e-08, - "loss": 3.8239, - "step": 1272500 - }, - { - "epoch": 14.14, - "learning_rate": 8.965695311285342e-08, - "loss": 3.8446, - "step": 1273000 - }, - { - "epoch": 14.14, - "learning_rate": 8.964307131910357e-08, - "loss": 3.8217, - "step": 1273500 - }, - { - "epoch": 14.15, - "learning_rate": 8.96291895253537e-08, - "loss": 3.8284, - "step": 1274000 - }, - { - "epoch": 14.15, - "learning_rate": 8.961530773160384e-08, - "loss": 3.8369, - "step": 1274500 - }, - { - "epoch": 14.16, - "learning_rate": 8.960142593785398e-08, - "loss": 3.8373, - "step": 1275000 - }, - { - "epoch": 14.16, - "learning_rate": 8.958754414410412e-08, - "loss": 3.8222, - "step": 1275500 - }, - { - "epoch": 14.17, - "learning_rate": 8.957366235035426e-08, - "loss": 3.8344, - "step": 1276000 - }, - { - "epoch": 14.18, - "learning_rate": 8.95597805566044e-08, - "loss": 3.8185, - "step": 1276500 - }, - { - "epoch": 14.18, - "learning_rate": 8.954589876285454e-08, - "loss": 3.8252, - "step": 1277000 - }, - { - "epoch": 14.19, - "learning_rate": 8.953201696910468e-08, - "loss": 3.8374, - "step": 1277500 - }, - { - "epoch": 14.19, - "learning_rate": 8.951813517535482e-08, - "loss": 3.8445, - "step": 1278000 - }, - { - "epoch": 14.2, - "learning_rate": 8.950425338160495e-08, - "loss": 3.8207, - "step": 1278500 - }, - { - "epoch": 14.2, - "learning_rate": 8.949037158785509e-08, - "loss": 3.8263, - "step": 1279000 - }, - { - "epoch": 14.21, - "learning_rate": 8.947648979410523e-08, - "loss": 3.8335, - "step": 1279500 - }, - { - "epoch": 14.21, - "learning_rate": 8.946260800035536e-08, - "loss": 3.817, - "step": 1280000 - }, - { - "epoch": 14.22, - "learning_rate": 8.94487262066055e-08, - "loss": 3.8187, - "step": 1280500 - }, - { - "epoch": 14.23, - "learning_rate": 8.943484441285565e-08, - "loss": 3.8278, - "step": 1281000 - }, - { - "epoch": 14.23, - "learning_rate": 8.942096261910579e-08, - "loss": 3.8334, - "step": 1281500 - }, - { - "epoch": 14.24, - "learning_rate": 8.940708082535593e-08, - "loss": 3.828, - "step": 1282000 - }, - { - "epoch": 14.24, - "learning_rate": 8.939319903160606e-08, - "loss": 3.8245, - "step": 1282500 - }, - { - "epoch": 14.25, - "learning_rate": 8.93793172378562e-08, - "loss": 3.8187, - "step": 1283000 - }, - { - "epoch": 14.25, - "learning_rate": 8.936543544410635e-08, - "loss": 3.8182, - "step": 1283500 - }, - { - "epoch": 14.26, - "learning_rate": 8.935155365035649e-08, - "loss": 3.8117, - "step": 1284000 - }, - { - "epoch": 14.26, - "learning_rate": 8.933767185660662e-08, - "loss": 3.8316, - "step": 1284500 - }, - { - "epoch": 14.27, - "learning_rate": 8.932379006285676e-08, - "loss": 3.8354, - "step": 1285000 - }, - { - "epoch": 14.28, - "learning_rate": 8.930990826910689e-08, - "loss": 3.8284, - "step": 1285500 - }, - { - "epoch": 14.28, - "learning_rate": 8.929602647535703e-08, - "loss": 3.8349, - "step": 1286000 - }, - { - "epoch": 14.29, - "learning_rate": 8.928214468160717e-08, - "loss": 3.8173, - "step": 1286500 - }, - { - "epoch": 14.29, - "learning_rate": 8.926826288785731e-08, - "loss": 3.8206, - "step": 1287000 - }, - { - "epoch": 14.3, - "learning_rate": 8.925438109410746e-08, - "loss": 3.8299, - "step": 1287500 - }, - { - "epoch": 14.3, - "learning_rate": 8.924049930035759e-08, - "loss": 3.8193, - "step": 1288000 - }, - { - "epoch": 14.31, - "learning_rate": 8.922661750660773e-08, - "loss": 3.8268, - "step": 1288500 - }, - { - "epoch": 14.31, - "learning_rate": 8.921273571285787e-08, - "loss": 3.8106, - "step": 1289000 - }, - { - "epoch": 14.32, - "learning_rate": 8.919885391910801e-08, - "loss": 3.819, - "step": 1289500 - }, - { - "epoch": 14.33, - "learning_rate": 8.918497212535815e-08, - "loss": 3.8268, - "step": 1290000 - }, - { - "epoch": 14.33, - "learning_rate": 8.917109033160828e-08, - "loss": 3.8401, - "step": 1290500 - }, - { - "epoch": 14.34, - "learning_rate": 8.915720853785841e-08, - "loss": 3.8225, - "step": 1291000 - }, - { - "epoch": 14.34, - "learning_rate": 8.914332674410856e-08, - "loss": 3.8301, - "step": 1291500 - }, - { - "epoch": 14.35, - "learning_rate": 8.91294449503587e-08, - "loss": 3.8297, - "step": 1292000 - }, - { - "epoch": 14.35, - "learning_rate": 8.911556315660884e-08, - "loss": 3.8346, - "step": 1292500 - }, - { - "epoch": 14.36, - "learning_rate": 8.910168136285898e-08, - "loss": 3.8534, - "step": 1293000 - }, - { - "epoch": 14.36, - "learning_rate": 8.908779956910912e-08, - "loss": 3.8296, - "step": 1293500 - }, - { - "epoch": 14.37, - "learning_rate": 8.907391777535925e-08, - "loss": 3.8473, - "step": 1294000 - }, - { - "epoch": 14.38, - "learning_rate": 8.90600359816094e-08, - "loss": 3.8173, - "step": 1294500 - }, - { - "epoch": 14.38, - "learning_rate": 8.904615418785954e-08, - "loss": 3.8274, - "step": 1295000 - }, - { - "epoch": 14.39, - "learning_rate": 8.903227239410968e-08, - "loss": 3.8126, - "step": 1295500 - }, - { - "epoch": 14.39, - "learning_rate": 8.901839060035982e-08, - "loss": 3.8168, - "step": 1296000 - }, - { - "epoch": 14.4, - "learning_rate": 8.900450880660995e-08, - "loss": 3.8136, - "step": 1296500 - }, - { - "epoch": 14.4, - "learning_rate": 8.899062701286008e-08, - "loss": 3.8212, - "step": 1297000 - }, - { - "epoch": 14.41, - "learning_rate": 8.897674521911022e-08, - "loss": 3.8245, - "step": 1297500 - }, - { - "epoch": 14.41, - "learning_rate": 8.896286342536037e-08, - "loss": 3.8219, - "step": 1298000 - }, - { - "epoch": 14.42, - "learning_rate": 8.894898163161051e-08, - "loss": 3.8133, - "step": 1298500 - }, - { - "epoch": 14.43, - "learning_rate": 8.893509983786065e-08, - "loss": 3.8344, - "step": 1299000 - }, - { - "epoch": 14.43, - "learning_rate": 8.892121804411078e-08, - "loss": 3.8299, - "step": 1299500 - }, - { - "epoch": 14.44, - "learning_rate": 8.890733625036092e-08, - "loss": 3.8283, - "step": 1300000 - }, - { - "epoch": 14.44, - "learning_rate": 8.889345445661106e-08, - "loss": 3.8263, - "step": 1300500 - }, - { - "epoch": 14.45, - "learning_rate": 8.88795726628612e-08, - "loss": 3.8288, - "step": 1301000 - }, - { - "epoch": 14.45, - "learning_rate": 8.886569086911135e-08, - "loss": 3.8437, - "step": 1301500 - }, - { - "epoch": 14.46, - "learning_rate": 8.885180907536148e-08, - "loss": 3.8196, - "step": 1302000 - }, - { - "epoch": 14.46, - "learning_rate": 8.883792728161162e-08, - "loss": 3.8317, - "step": 1302500 - }, - { - "epoch": 14.47, - "learning_rate": 8.882404548786175e-08, - "loss": 3.8008, - "step": 1303000 - }, - { - "epoch": 14.48, - "learning_rate": 8.881016369411189e-08, - "loss": 3.838, - "step": 1303500 - }, - { - "epoch": 14.48, - "learning_rate": 8.879628190036203e-08, - "loss": 3.8156, - "step": 1304000 - }, - { - "epoch": 14.49, - "learning_rate": 8.878240010661217e-08, - "loss": 3.8096, - "step": 1304500 - }, - { - "epoch": 14.49, - "learning_rate": 8.87685183128623e-08, - "loss": 3.8306, - "step": 1305000 - }, - { - "epoch": 14.5, - "learning_rate": 8.875463651911245e-08, - "loss": 3.8375, - "step": 1305500 - }, - { - "epoch": 14.5, - "learning_rate": 8.874075472536259e-08, - "loss": 3.8216, - "step": 1306000 - }, - { - "epoch": 14.51, - "learning_rate": 8.872687293161273e-08, - "loss": 3.8256, - "step": 1306500 - }, - { - "epoch": 14.51, - "learning_rate": 8.871299113786287e-08, - "loss": 3.8302, - "step": 1307000 - }, - { - "epoch": 14.52, - "learning_rate": 8.869910934411302e-08, - "loss": 3.8363, - "step": 1307500 - }, - { - "epoch": 14.53, - "learning_rate": 8.868522755036314e-08, - "loss": 3.8195, - "step": 1308000 - }, - { - "epoch": 14.53, - "learning_rate": 8.867134575661329e-08, - "loss": 3.8135, - "step": 1308500 - }, - { - "epoch": 14.54, - "learning_rate": 8.865746396286342e-08, - "loss": 3.8315, - "step": 1309000 - }, - { - "epoch": 14.54, - "learning_rate": 8.864358216911356e-08, - "loss": 3.8357, - "step": 1309500 - }, - { - "epoch": 14.55, - "learning_rate": 8.86297003753637e-08, - "loss": 3.8469, - "step": 1310000 - }, - { - "epoch": 14.55, - "learning_rate": 8.861581858161383e-08, - "loss": 3.8455, - "step": 1310500 - }, - { - "epoch": 14.56, - "learning_rate": 8.860193678786397e-08, - "loss": 3.8335, - "step": 1311000 - }, - { - "epoch": 14.56, - "learning_rate": 8.858805499411411e-08, - "loss": 3.8315, - "step": 1311500 - }, - { - "epoch": 14.57, - "learning_rate": 8.857417320036426e-08, - "loss": 3.8066, - "step": 1312000 - }, - { - "epoch": 14.58, - "learning_rate": 8.85602914066144e-08, - "loss": 3.8235, - "step": 1312500 - }, - { - "epoch": 14.58, - "learning_rate": 8.854640961286454e-08, - "loss": 3.7941, - "step": 1313000 - }, - { - "epoch": 14.59, - "learning_rate": 8.853252781911467e-08, - "loss": 3.8239, - "step": 1313500 - }, - { - "epoch": 14.59, - "learning_rate": 8.851864602536481e-08, - "loss": 3.8576, - "step": 1314000 - }, - { - "epoch": 14.6, - "learning_rate": 8.850476423161495e-08, - "loss": 3.8174, - "step": 1314500 - }, - { - "epoch": 14.6, - "learning_rate": 8.849088243786508e-08, - "loss": 3.8036, - "step": 1315000 - }, - { - "epoch": 14.61, - "learning_rate": 8.847700064411523e-08, - "loss": 3.8326, - "step": 1315500 - }, - { - "epoch": 14.61, - "learning_rate": 8.846311885036537e-08, - "loss": 3.8239, - "step": 1316000 - }, - { - "epoch": 14.62, - "learning_rate": 8.84492370566155e-08, - "loss": 3.8235, - "step": 1316500 - }, - { - "epoch": 14.63, - "learning_rate": 8.843535526286564e-08, - "loss": 3.8396, - "step": 1317000 - }, - { - "epoch": 14.63, - "learning_rate": 8.842147346911578e-08, - "loss": 3.8159, - "step": 1317500 - }, - { - "epoch": 14.64, - "learning_rate": 8.840759167536592e-08, - "loss": 3.8475, - "step": 1318000 - }, - { - "epoch": 14.64, - "learning_rate": 8.839370988161607e-08, - "loss": 3.8351, - "step": 1318500 - }, - { - "epoch": 14.65, - "learning_rate": 8.83798280878662e-08, - "loss": 3.8275, - "step": 1319000 - }, - { - "epoch": 14.65, - "learning_rate": 8.836594629411634e-08, - "loss": 3.8283, - "step": 1319500 - }, - { - "epoch": 14.66, - "learning_rate": 8.835206450036648e-08, - "loss": 3.8388, - "step": 1320000 - }, - { - "epoch": 14.66, - "learning_rate": 8.833818270661662e-08, - "loss": 3.8339, - "step": 1320500 - }, - { - "epoch": 14.67, - "learning_rate": 8.832430091286675e-08, - "loss": 3.8296, - "step": 1321000 - }, - { - "epoch": 14.68, - "learning_rate": 8.831041911911689e-08, - "loss": 3.8061, - "step": 1321500 - }, - { - "epoch": 14.68, - "learning_rate": 8.829653732536702e-08, - "loss": 3.8157, - "step": 1322000 - }, - { - "epoch": 14.69, - "learning_rate": 8.828265553161716e-08, - "loss": 3.8295, - "step": 1322500 - }, - { - "epoch": 14.69, - "learning_rate": 8.82687737378673e-08, - "loss": 3.8406, - "step": 1323000 - }, - { - "epoch": 14.7, - "learning_rate": 8.825489194411745e-08, - "loss": 3.8231, - "step": 1323500 - }, - { - "epoch": 14.7, - "learning_rate": 8.824101015036759e-08, - "loss": 3.8234, - "step": 1324000 - }, - { - "epoch": 14.71, - "learning_rate": 8.822712835661772e-08, - "loss": 3.8236, - "step": 1324500 - }, - { - "epoch": 14.71, - "learning_rate": 8.821324656286786e-08, - "loss": 3.8288, - "step": 1325000 - }, - { - "epoch": 14.72, - "learning_rate": 8.8199364769118e-08, - "loss": 3.8349, - "step": 1325500 - }, - { - "epoch": 14.73, - "learning_rate": 8.818548297536815e-08, - "loss": 3.822, - "step": 1326000 - }, - { - "epoch": 14.73, - "learning_rate": 8.817160118161829e-08, - "loss": 3.8215, - "step": 1326500 - }, - { - "epoch": 14.74, - "learning_rate": 8.815771938786842e-08, - "loss": 3.8243, - "step": 1327000 - }, - { - "epoch": 14.74, - "learning_rate": 8.814383759411855e-08, - "loss": 3.8323, - "step": 1327500 - }, - { - "epoch": 14.75, - "learning_rate": 8.812995580036869e-08, - "loss": 3.8057, - "step": 1328000 - }, - { - "epoch": 14.75, - "learning_rate": 8.811607400661883e-08, - "loss": 3.8359, - "step": 1328500 - }, - { - "epoch": 14.76, - "learning_rate": 8.810219221286897e-08, - "loss": 3.8208, - "step": 1329000 - }, - { - "epoch": 14.76, - "learning_rate": 8.808831041911912e-08, - "loss": 3.8197, - "step": 1329500 - }, - { - "epoch": 14.77, - "learning_rate": 8.807442862536926e-08, - "loss": 3.7929, - "step": 1330000 - }, - { - "epoch": 14.78, - "learning_rate": 8.806054683161939e-08, - "loss": 3.8472, - "step": 1330500 - }, - { - "epoch": 14.78, - "learning_rate": 8.804666503786953e-08, - "loss": 3.8148, - "step": 1331000 - }, - { - "epoch": 14.79, - "learning_rate": 8.803278324411967e-08, - "loss": 3.8319, - "step": 1331500 - }, - { - "epoch": 14.79, - "learning_rate": 8.801890145036981e-08, - "loss": 3.8321, - "step": 1332000 - }, - { - "epoch": 14.8, - "learning_rate": 8.800501965661996e-08, - "loss": 3.8233, - "step": 1332500 - }, - { - "epoch": 14.8, - "learning_rate": 8.799113786287009e-08, - "loss": 3.8252, - "step": 1333000 - }, - { - "epoch": 14.81, - "learning_rate": 8.797725606912021e-08, - "loss": 3.8348, - "step": 1333500 - }, - { - "epoch": 14.81, - "learning_rate": 8.796337427537036e-08, - "loss": 3.8349, - "step": 1334000 - }, - { - "epoch": 14.82, - "learning_rate": 8.79494924816205e-08, - "loss": 3.825, - "step": 1334500 - }, - { - "epoch": 14.83, - "learning_rate": 8.793561068787064e-08, - "loss": 3.8209, - "step": 1335000 - }, - { - "epoch": 14.83, - "learning_rate": 8.792172889412078e-08, - "loss": 3.8269, - "step": 1335500 - }, - { - "epoch": 14.84, - "learning_rate": 8.790784710037091e-08, - "loss": 3.8154, - "step": 1336000 - }, - { - "epoch": 14.84, - "learning_rate": 8.789396530662105e-08, - "loss": 3.8206, - "step": 1336500 - }, - { - "epoch": 14.85, - "learning_rate": 8.78800835128712e-08, - "loss": 3.8202, - "step": 1337000 - }, - { - "epoch": 14.85, - "learning_rate": 8.786620171912134e-08, - "loss": 3.8162, - "step": 1337500 - }, - { - "epoch": 14.86, - "learning_rate": 8.785231992537148e-08, - "loss": 3.832, - "step": 1338000 - }, - { - "epoch": 14.86, - "learning_rate": 8.783843813162161e-08, - "loss": 3.8283, - "step": 1338500 - }, - { - "epoch": 14.87, - "learning_rate": 8.782455633787175e-08, - "loss": 3.8234, - "step": 1339000 - }, - { - "epoch": 14.88, - "learning_rate": 8.781067454412188e-08, - "loss": 3.8269, - "step": 1339500 - }, - { - "epoch": 14.88, - "learning_rate": 8.779679275037202e-08, - "loss": 3.8264, - "step": 1340000 - }, - { - "epoch": 14.89, - "learning_rate": 8.778291095662217e-08, - "loss": 3.8198, - "step": 1340500 - }, - { - "epoch": 14.89, - "learning_rate": 8.776902916287231e-08, - "loss": 3.8232, - "step": 1341000 - }, - { - "epoch": 14.9, - "learning_rate": 8.775514736912244e-08, - "loss": 3.8087, - "step": 1341500 - }, - { - "epoch": 14.9, - "learning_rate": 8.774126557537258e-08, - "loss": 3.8175, - "step": 1342000 - }, - { - "epoch": 14.91, - "learning_rate": 8.772738378162272e-08, - "loss": 3.843, - "step": 1342500 - }, - { - "epoch": 14.91, - "learning_rate": 8.771350198787286e-08, - "loss": 3.8228, - "step": 1343000 - }, - { - "epoch": 14.92, - "learning_rate": 8.769962019412301e-08, - "loss": 3.8287, - "step": 1343500 - }, - { - "epoch": 14.93, - "learning_rate": 8.768573840037315e-08, - "loss": 3.825, - "step": 1344000 - }, - { - "epoch": 14.93, - "learning_rate": 8.767185660662328e-08, - "loss": 3.8371, - "step": 1344500 - }, - { - "epoch": 14.94, - "learning_rate": 8.765797481287342e-08, - "loss": 3.8076, - "step": 1345000 - }, - { - "epoch": 14.94, - "learning_rate": 8.764409301912355e-08, - "loss": 3.8097, - "step": 1345500 - }, - { - "epoch": 14.95, - "learning_rate": 8.763021122537369e-08, - "loss": 3.8304, - "step": 1346000 - }, - { - "epoch": 14.95, - "learning_rate": 8.761632943162383e-08, - "loss": 3.8157, - "step": 1346500 - }, - { - "epoch": 14.96, - "learning_rate": 8.760244763787396e-08, - "loss": 3.8159, - "step": 1347000 - }, - { - "epoch": 14.96, - "learning_rate": 8.75885658441241e-08, - "loss": 3.8155, - "step": 1347500 - }, - { - "epoch": 14.97, - "learning_rate": 8.757468405037425e-08, - "loss": 3.818, - "step": 1348000 - }, - { - "epoch": 14.98, - "learning_rate": 8.756080225662439e-08, - "loss": 3.835, - "step": 1348500 - }, - { - "epoch": 14.98, - "learning_rate": 8.754692046287453e-08, - "loss": 3.8305, - "step": 1349000 - }, - { - "epoch": 14.99, - "learning_rate": 8.753303866912467e-08, - "loss": 3.8271, - "step": 1349500 - }, - { - "epoch": 14.99, - "learning_rate": 8.75191568753748e-08, - "loss": 3.8227, - "step": 1350000 - }, - { - "epoch": 15.0, - "learning_rate": 8.750527508162495e-08, - "loss": 3.8244, - "step": 1350500 - }, - { - "epoch": 15.0, - "eval_loss": 3.8713603019714355, - "eval_runtime": 6.3095, - "eval_samples_per_second": 246.297, - "step": 1350690 - }, - { - "epoch": 15.0, - "learning_rate": 8.749139328787509e-08, - "loss": 3.8192, - "step": 1351000 - }, - { - "epoch": 15.01, - "learning_rate": 8.747751149412522e-08, - "loss": 3.8112, - "step": 1351500 - }, - { - "epoch": 15.01, - "learning_rate": 8.746362970037536e-08, - "loss": 3.8426, - "step": 1352000 - }, - { - "epoch": 15.02, - "learning_rate": 8.74497479066255e-08, - "loss": 3.8161, - "step": 1352500 - }, - { - "epoch": 15.03, - "learning_rate": 8.743586611287563e-08, - "loss": 3.8126, - "step": 1353000 - }, - { - "epoch": 15.03, - "learning_rate": 8.742198431912577e-08, - "loss": 3.8203, - "step": 1353500 - }, - { - "epoch": 15.04, - "learning_rate": 8.740810252537591e-08, - "loss": 3.8411, - "step": 1354000 - }, - { - "epoch": 15.04, - "learning_rate": 8.739422073162606e-08, - "loss": 3.834, - "step": 1354500 - }, - { - "epoch": 15.05, - "learning_rate": 8.73803389378762e-08, - "loss": 3.8202, - "step": 1355000 - }, - { - "epoch": 15.05, - "learning_rate": 8.736645714412633e-08, - "loss": 3.8192, - "step": 1355500 - }, - { - "epoch": 15.06, - "learning_rate": 8.735257535037647e-08, - "loss": 3.8115, - "step": 1356000 - }, - { - "epoch": 15.06, - "learning_rate": 8.733869355662661e-08, - "loss": 3.8107, - "step": 1356500 - }, - { - "epoch": 15.07, - "learning_rate": 8.732481176287676e-08, - "loss": 3.8264, - "step": 1357000 - }, - { - "epoch": 15.08, - "learning_rate": 8.731092996912688e-08, - "loss": 3.8289, - "step": 1357500 - }, - { - "epoch": 15.08, - "learning_rate": 8.729704817537703e-08, - "loss": 3.8233, - "step": 1358000 - }, - { - "epoch": 15.09, - "learning_rate": 8.728316638162716e-08, - "loss": 3.8138, - "step": 1358500 - }, - { - "epoch": 15.09, - "learning_rate": 8.72692845878773e-08, - "loss": 3.8318, - "step": 1359000 - }, - { - "epoch": 15.1, - "learning_rate": 8.725540279412744e-08, - "loss": 3.8336, - "step": 1359500 - }, - { - "epoch": 15.1, - "learning_rate": 8.724152100037758e-08, - "loss": 3.8161, - "step": 1360000 - }, - { - "epoch": 15.11, - "learning_rate": 8.722763920662772e-08, - "loss": 3.8088, - "step": 1360500 - }, - { - "epoch": 15.11, - "learning_rate": 8.721375741287785e-08, - "loss": 3.8286, - "step": 1361000 - }, - { - "epoch": 15.12, - "learning_rate": 8.7199875619128e-08, - "loss": 3.8334, - "step": 1361500 - }, - { - "epoch": 15.13, - "learning_rate": 8.718599382537814e-08, - "loss": 3.822, - "step": 1362000 - }, - { - "epoch": 15.13, - "learning_rate": 8.717211203162828e-08, - "loss": 3.8159, - "step": 1362500 - }, - { - "epoch": 15.14, - "learning_rate": 8.715823023787842e-08, - "loss": 3.8314, - "step": 1363000 - }, - { - "epoch": 15.14, - "learning_rate": 8.714434844412855e-08, - "loss": 3.8308, - "step": 1363500 - }, - { - "epoch": 15.15, - "learning_rate": 8.713046665037868e-08, - "loss": 3.8333, - "step": 1364000 - }, - { - "epoch": 15.15, - "learning_rate": 8.711658485662882e-08, - "loss": 3.8294, - "step": 1364500 - }, - { - "epoch": 15.16, - "learning_rate": 8.710270306287897e-08, - "loss": 3.7999, - "step": 1365000 - }, - { - "epoch": 15.16, - "learning_rate": 8.708882126912911e-08, - "loss": 3.823, - "step": 1365500 - }, - { - "epoch": 15.17, - "learning_rate": 8.707493947537925e-08, - "loss": 3.8281, - "step": 1366000 - }, - { - "epoch": 15.18, - "learning_rate": 8.706105768162939e-08, - "loss": 3.8191, - "step": 1366500 - }, - { - "epoch": 15.18, - "learning_rate": 8.704717588787952e-08, - "loss": 3.8142, - "step": 1367000 - }, - { - "epoch": 15.19, - "learning_rate": 8.703329409412966e-08, - "loss": 3.827, - "step": 1367500 - }, - { - "epoch": 15.19, - "learning_rate": 8.70194123003798e-08, - "loss": 3.8198, - "step": 1368000 - }, - { - "epoch": 15.2, - "learning_rate": 8.700553050662995e-08, - "loss": 3.8002, - "step": 1368500 - }, - { - "epoch": 15.2, - "learning_rate": 8.699164871288009e-08, - "loss": 3.8125, - "step": 1369000 - }, - { - "epoch": 15.21, - "learning_rate": 8.697776691913022e-08, - "loss": 3.8256, - "step": 1369500 - }, - { - "epoch": 15.21, - "learning_rate": 8.696388512538035e-08, - "loss": 3.8151, - "step": 1370000 - }, - { - "epoch": 15.22, - "learning_rate": 8.695000333163049e-08, - "loss": 3.842, - "step": 1370500 - }, - { - "epoch": 15.23, - "learning_rate": 8.693612153788063e-08, - "loss": 3.8203, - "step": 1371000 - }, - { - "epoch": 15.23, - "learning_rate": 8.692223974413077e-08, - "loss": 3.8075, - "step": 1371500 - }, - { - "epoch": 15.24, - "learning_rate": 8.690835795038092e-08, - "loss": 3.8278, - "step": 1372000 - }, - { - "epoch": 15.24, - "learning_rate": 8.689447615663105e-08, - "loss": 3.8106, - "step": 1372500 - }, - { - "epoch": 15.25, - "learning_rate": 8.688059436288119e-08, - "loss": 3.8194, - "step": 1373000 - }, - { - "epoch": 15.25, - "learning_rate": 8.686671256913133e-08, - "loss": 3.8153, - "step": 1373500 - }, - { - "epoch": 15.26, - "learning_rate": 8.685283077538147e-08, - "loss": 3.8231, - "step": 1374000 - }, - { - "epoch": 15.26, - "learning_rate": 8.683894898163162e-08, - "loss": 3.8125, - "step": 1374500 - }, - { - "epoch": 15.27, - "learning_rate": 8.682506718788174e-08, - "loss": 3.8376, - "step": 1375000 - }, - { - "epoch": 15.28, - "learning_rate": 8.681118539413189e-08, - "loss": 3.8077, - "step": 1375500 - }, - { - "epoch": 15.28, - "learning_rate": 8.679730360038202e-08, - "loss": 3.8267, - "step": 1376000 - }, - { - "epoch": 15.29, - "learning_rate": 8.678342180663216e-08, - "loss": 3.7899, - "step": 1376500 - }, - { - "epoch": 15.29, - "learning_rate": 8.67695400128823e-08, - "loss": 3.8168, - "step": 1377000 - }, - { - "epoch": 15.3, - "learning_rate": 8.675565821913244e-08, - "loss": 3.8086, - "step": 1377500 - }, - { - "epoch": 15.3, - "learning_rate": 8.674177642538257e-08, - "loss": 3.8206, - "step": 1378000 - }, - { - "epoch": 15.31, - "learning_rate": 8.672789463163271e-08, - "loss": 3.8108, - "step": 1378500 - }, - { - "epoch": 15.31, - "learning_rate": 8.671401283788286e-08, - "loss": 3.8131, - "step": 1379000 - }, - { - "epoch": 15.32, - "learning_rate": 8.6700131044133e-08, - "loss": 3.8269, - "step": 1379500 - }, - { - "epoch": 15.33, - "learning_rate": 8.668624925038314e-08, - "loss": 3.8095, - "step": 1380000 - }, - { - "epoch": 15.33, - "learning_rate": 8.667236745663328e-08, - "loss": 3.8163, - "step": 1380500 - }, - { - "epoch": 15.34, - "learning_rate": 8.665848566288341e-08, - "loss": 3.8233, - "step": 1381000 - }, - { - "epoch": 15.34, - "learning_rate": 8.664460386913355e-08, - "loss": 3.8134, - "step": 1381500 - }, - { - "epoch": 15.35, - "learning_rate": 8.663072207538368e-08, - "loss": 3.8263, - "step": 1382000 - }, - { - "epoch": 15.35, - "learning_rate": 8.661684028163383e-08, - "loss": 3.8218, - "step": 1382500 - }, - { - "epoch": 15.36, - "learning_rate": 8.660295848788397e-08, - "loss": 3.8359, - "step": 1383000 - }, - { - "epoch": 15.36, - "learning_rate": 8.65890766941341e-08, - "loss": 3.8273, - "step": 1383500 - }, - { - "epoch": 15.37, - "learning_rate": 8.657519490038424e-08, - "loss": 3.8112, - "step": 1384000 - }, - { - "epoch": 15.38, - "learning_rate": 8.656131310663438e-08, - "loss": 3.8098, - "step": 1384500 - }, - { - "epoch": 15.38, - "learning_rate": 8.654743131288452e-08, - "loss": 3.8206, - "step": 1385000 - }, - { - "epoch": 15.39, - "learning_rate": 8.653354951913467e-08, - "loss": 3.8287, - "step": 1385500 - }, - { - "epoch": 15.39, - "learning_rate": 8.651966772538481e-08, - "loss": 3.8085, - "step": 1386000 - }, - { - "epoch": 15.4, - "learning_rate": 8.650578593163494e-08, - "loss": 3.8165, - "step": 1386500 - }, - { - "epoch": 15.4, - "learning_rate": 8.649190413788508e-08, - "loss": 3.8133, - "step": 1387000 - }, - { - "epoch": 15.41, - "learning_rate": 8.647802234413522e-08, - "loss": 3.8217, - "step": 1387500 - }, - { - "epoch": 15.41, - "learning_rate": 8.646414055038535e-08, - "loss": 3.8273, - "step": 1388000 - }, - { - "epoch": 15.42, - "learning_rate": 8.645025875663549e-08, - "loss": 3.8122, - "step": 1388500 - }, - { - "epoch": 15.43, - "learning_rate": 8.643637696288564e-08, - "loss": 3.8134, - "step": 1389000 - }, - { - "epoch": 15.43, - "learning_rate": 8.642249516913576e-08, - "loss": 3.813, - "step": 1389500 - }, - { - "epoch": 15.44, - "learning_rate": 8.64086133753859e-08, - "loss": 3.81, - "step": 1390000 - }, - { - "epoch": 15.44, - "learning_rate": 8.639473158163605e-08, - "loss": 3.8207, - "step": 1390500 - }, - { - "epoch": 15.45, - "learning_rate": 8.638084978788619e-08, - "loss": 3.8134, - "step": 1391000 - }, - { - "epoch": 15.45, - "learning_rate": 8.636696799413633e-08, - "loss": 3.8155, - "step": 1391500 - }, - { - "epoch": 15.46, - "learning_rate": 8.635308620038646e-08, - "loss": 3.8221, - "step": 1392000 - }, - { - "epoch": 15.46, - "learning_rate": 8.63392044066366e-08, - "loss": 3.8384, - "step": 1392500 - }, - { - "epoch": 15.47, - "learning_rate": 8.632532261288675e-08, - "loss": 3.8383, - "step": 1393000 - }, - { - "epoch": 15.48, - "learning_rate": 8.631144081913689e-08, - "loss": 3.8442, - "step": 1393500 - }, - { - "epoch": 15.48, - "learning_rate": 8.629755902538702e-08, - "loss": 3.8267, - "step": 1394000 - }, - { - "epoch": 15.49, - "learning_rate": 8.628367723163716e-08, - "loss": 3.8281, - "step": 1394500 - }, - { - "epoch": 15.49, - "learning_rate": 8.626979543788729e-08, - "loss": 3.8247, - "step": 1395000 - }, - { - "epoch": 15.5, - "learning_rate": 8.625591364413743e-08, - "loss": 3.8135, - "step": 1395500 - }, - { - "epoch": 15.5, - "learning_rate": 8.624203185038757e-08, - "loss": 3.8152, - "step": 1396000 - }, - { - "epoch": 15.51, - "learning_rate": 8.622815005663772e-08, - "loss": 3.8322, - "step": 1396500 - }, - { - "epoch": 15.51, - "learning_rate": 8.621426826288786e-08, - "loss": 3.8272, - "step": 1397000 - }, - { - "epoch": 15.52, - "learning_rate": 8.620038646913799e-08, - "loss": 3.8155, - "step": 1397500 - }, - { - "epoch": 15.53, - "learning_rate": 8.618650467538813e-08, - "loss": 3.8181, - "step": 1398000 - }, - { - "epoch": 15.53, - "learning_rate": 8.617262288163827e-08, - "loss": 3.8254, - "step": 1398500 - }, - { - "epoch": 15.54, - "learning_rate": 8.615874108788841e-08, - "loss": 3.8095, - "step": 1399000 - }, - { - "epoch": 15.54, - "learning_rate": 8.614485929413856e-08, - "loss": 3.8181, - "step": 1399500 - }, - { - "epoch": 15.55, - "learning_rate": 8.613097750038869e-08, - "loss": 3.8184, - "step": 1400000 - }, - { - "epoch": 15.55, - "learning_rate": 8.611709570663881e-08, - "loss": 3.8115, - "step": 1400500 - }, - { - "epoch": 15.56, - "learning_rate": 8.610321391288896e-08, - "loss": 3.8327, - "step": 1401000 - }, - { - "epoch": 15.56, - "learning_rate": 8.60893321191391e-08, - "loss": 3.8096, - "step": 1401500 - }, - { - "epoch": 15.57, - "learning_rate": 8.607545032538924e-08, - "loss": 3.8092, - "step": 1402000 - }, - { - "epoch": 15.58, - "learning_rate": 8.606156853163938e-08, - "loss": 3.8297, - "step": 1402500 - }, - { - "epoch": 15.58, - "learning_rate": 8.604768673788953e-08, - "loss": 3.8328, - "step": 1403000 - }, - { - "epoch": 15.59, - "learning_rate": 8.603380494413965e-08, - "loss": 3.8203, - "step": 1403500 - }, - { - "epoch": 15.59, - "learning_rate": 8.60199231503898e-08, - "loss": 3.8042, - "step": 1404000 - }, - { - "epoch": 15.6, - "learning_rate": 8.600604135663994e-08, - "loss": 3.804, - "step": 1404500 - }, - { - "epoch": 15.6, - "learning_rate": 8.599215956289008e-08, - "loss": 3.8493, - "step": 1405000 - }, - { - "epoch": 15.61, - "learning_rate": 8.597827776914021e-08, - "loss": 3.8201, - "step": 1405500 - }, - { - "epoch": 15.61, - "learning_rate": 8.596439597539035e-08, - "loss": 3.8153, - "step": 1406000 - }, - { - "epoch": 15.62, - "learning_rate": 8.595051418164048e-08, - "loss": 3.8193, - "step": 1406500 - }, - { - "epoch": 15.63, - "learning_rate": 8.593663238789062e-08, - "loss": 3.8201, - "step": 1407000 - }, - { - "epoch": 15.63, - "learning_rate": 8.592275059414077e-08, - "loss": 3.8155, - "step": 1407500 - }, - { - "epoch": 15.64, - "learning_rate": 8.590886880039091e-08, - "loss": 3.8088, - "step": 1408000 - }, - { - "epoch": 15.64, - "learning_rate": 8.589498700664105e-08, - "loss": 3.8214, - "step": 1408500 - }, - { - "epoch": 15.65, - "learning_rate": 8.588110521289118e-08, - "loss": 3.806, - "step": 1409000 - }, - { - "epoch": 15.65, - "learning_rate": 8.586722341914132e-08, - "loss": 3.8114, - "step": 1409500 - }, - { - "epoch": 15.66, - "learning_rate": 8.585334162539146e-08, - "loss": 3.8107, - "step": 1410000 - }, - { - "epoch": 15.66, - "learning_rate": 8.583945983164161e-08, - "loss": 3.8379, - "step": 1410500 - }, - { - "epoch": 15.67, - "learning_rate": 8.582557803789175e-08, - "loss": 3.806, - "step": 1411000 - }, - { - "epoch": 15.68, - "learning_rate": 8.581169624414188e-08, - "loss": 3.8209, - "step": 1411500 - }, - { - "epoch": 15.68, - "learning_rate": 8.579781445039202e-08, - "loss": 3.821, - "step": 1412000 - }, - { - "epoch": 15.69, - "learning_rate": 8.578393265664215e-08, - "loss": 3.8236, - "step": 1412500 - }, - { - "epoch": 15.69, - "learning_rate": 8.577005086289229e-08, - "loss": 3.8055, - "step": 1413000 - }, - { - "epoch": 15.7, - "learning_rate": 8.575616906914243e-08, - "loss": 3.8257, - "step": 1413500 - }, - { - "epoch": 15.7, - "learning_rate": 8.574228727539258e-08, - "loss": 3.8202, - "step": 1414000 - }, - { - "epoch": 15.71, - "learning_rate": 8.57284054816427e-08, - "loss": 3.8182, - "step": 1414500 - }, - { - "epoch": 15.71, - "learning_rate": 8.571452368789285e-08, - "loss": 3.8128, - "step": 1415000 - }, - { - "epoch": 15.72, - "learning_rate": 8.570064189414299e-08, - "loss": 3.805, - "step": 1415500 - }, - { - "epoch": 15.73, - "learning_rate": 8.568676010039313e-08, - "loss": 3.8303, - "step": 1416000 - }, - { - "epoch": 15.73, - "learning_rate": 8.567287830664327e-08, - "loss": 3.8159, - "step": 1416500 - }, - { - "epoch": 15.74, - "learning_rate": 8.565899651289342e-08, - "loss": 3.8215, - "step": 1417000 - }, - { - "epoch": 15.74, - "learning_rate": 8.564511471914355e-08, - "loss": 3.8219, - "step": 1417500 - }, - { - "epoch": 15.75, - "learning_rate": 8.563123292539369e-08, - "loss": 3.7965, - "step": 1418000 - }, - { - "epoch": 15.75, - "learning_rate": 8.561735113164382e-08, - "loss": 3.8215, - "step": 1418500 - }, - { - "epoch": 15.76, - "learning_rate": 8.560346933789396e-08, - "loss": 3.8024, - "step": 1419000 - }, - { - "epoch": 15.76, - "learning_rate": 8.55895875441441e-08, - "loss": 3.8235, - "step": 1419500 - }, - { - "epoch": 15.77, - "learning_rate": 8.557570575039423e-08, - "loss": 3.8341, - "step": 1420000 - }, - { - "epoch": 15.78, - "learning_rate": 8.556182395664437e-08, - "loss": 3.8348, - "step": 1420500 - }, - { - "epoch": 15.78, - "learning_rate": 8.554794216289451e-08, - "loss": 3.8106, - "step": 1421000 - }, - { - "epoch": 15.79, - "learning_rate": 8.553406036914466e-08, - "loss": 3.8103, - "step": 1421500 - }, - { - "epoch": 15.79, - "learning_rate": 8.55201785753948e-08, - "loss": 3.8239, - "step": 1422000 - }, - { - "epoch": 15.8, - "learning_rate": 8.550629678164494e-08, - "loss": 3.815, - "step": 1422500 - }, - { - "epoch": 15.8, - "learning_rate": 8.549241498789507e-08, - "loss": 3.8078, - "step": 1423000 - }, - { - "epoch": 15.81, - "learning_rate": 8.547853319414521e-08, - "loss": 3.8191, - "step": 1423500 - }, - { - "epoch": 15.81, - "learning_rate": 8.546465140039536e-08, - "loss": 3.8114, - "step": 1424000 - }, - { - "epoch": 15.82, - "learning_rate": 8.545076960664548e-08, - "loss": 3.8183, - "step": 1424500 - }, - { - "epoch": 15.83, - "learning_rate": 8.543688781289563e-08, - "loss": 3.8089, - "step": 1425000 - }, - { - "epoch": 15.83, - "learning_rate": 8.542300601914577e-08, - "loss": 3.8328, - "step": 1425500 - }, - { - "epoch": 15.84, - "learning_rate": 8.54091242253959e-08, - "loss": 3.8204, - "step": 1426000 - }, - { - "epoch": 15.84, - "learning_rate": 8.539524243164604e-08, - "loss": 3.8005, - "step": 1426500 - }, - { - "epoch": 15.85, - "learning_rate": 8.538136063789618e-08, - "loss": 3.8259, - "step": 1427000 - }, - { - "epoch": 15.85, - "learning_rate": 8.536747884414632e-08, - "loss": 3.8232, - "step": 1427500 - }, - { - "epoch": 15.86, - "learning_rate": 8.535359705039647e-08, - "loss": 3.8196, - "step": 1428000 - }, - { - "epoch": 15.86, - "learning_rate": 8.53397152566466e-08, - "loss": 3.8154, - "step": 1428500 - }, - { - "epoch": 15.87, - "learning_rate": 8.532583346289674e-08, - "loss": 3.8187, - "step": 1429000 - }, - { - "epoch": 15.88, - "learning_rate": 8.531195166914688e-08, - "loss": 3.818, - "step": 1429500 - }, - { - "epoch": 15.88, - "learning_rate": 8.529806987539702e-08, - "loss": 3.8212, - "step": 1430000 - }, - { - "epoch": 15.89, - "learning_rate": 8.528418808164715e-08, - "loss": 3.8055, - "step": 1430500 - }, - { - "epoch": 15.89, - "learning_rate": 8.52703062878973e-08, - "loss": 3.8073, - "step": 1431000 - }, - { - "epoch": 15.9, - "learning_rate": 8.525642449414742e-08, - "loss": 3.8221, - "step": 1431500 - }, - { - "epoch": 15.9, - "learning_rate": 8.524254270039757e-08, - "loss": 3.8145, - "step": 1432000 - }, - { - "epoch": 15.91, - "learning_rate": 8.522866090664771e-08, - "loss": 3.8183, - "step": 1432500 - }, - { - "epoch": 15.91, - "learning_rate": 8.521477911289785e-08, - "loss": 3.8356, - "step": 1433000 - }, - { - "epoch": 15.92, - "learning_rate": 8.520089731914799e-08, - "loss": 3.84, - "step": 1433500 - }, - { - "epoch": 15.93, - "learning_rate": 8.518701552539813e-08, - "loss": 3.8109, - "step": 1434000 - }, - { - "epoch": 15.93, - "learning_rate": 8.517313373164826e-08, - "loss": 3.8183, - "step": 1434500 - }, - { - "epoch": 15.94, - "learning_rate": 8.51592519378984e-08, - "loss": 3.8327, - "step": 1435000 - }, - { - "epoch": 15.94, - "learning_rate": 8.514537014414855e-08, - "loss": 3.8311, - "step": 1435500 - }, - { - "epoch": 15.95, - "learning_rate": 8.513148835039868e-08, - "loss": 3.8188, - "step": 1436000 - }, - { - "epoch": 15.95, - "learning_rate": 8.511760655664882e-08, - "loss": 3.8091, - "step": 1436500 - }, - { - "epoch": 15.96, - "learning_rate": 8.510372476289895e-08, - "loss": 3.8162, - "step": 1437000 - }, - { - "epoch": 15.96, - "learning_rate": 8.508984296914909e-08, - "loss": 3.8364, - "step": 1437500 - }, - { - "epoch": 15.97, - "learning_rate": 8.507596117539923e-08, - "loss": 3.8147, - "step": 1438000 - }, - { - "epoch": 15.98, - "learning_rate": 8.506207938164938e-08, - "loss": 3.8158, - "step": 1438500 - }, - { - "epoch": 15.98, - "learning_rate": 8.504819758789952e-08, - "loss": 3.8208, - "step": 1439000 - }, - { - "epoch": 15.99, - "learning_rate": 8.503431579414966e-08, - "loss": 3.8241, - "step": 1439500 - }, - { - "epoch": 15.99, - "learning_rate": 8.502043400039979e-08, - "loss": 3.8143, - "step": 1440000 - }, - { - "epoch": 16.0, - "learning_rate": 8.500655220664993e-08, - "loss": 3.8116, - "step": 1440500 - }, - { - "epoch": 16.0, - "eval_loss": 3.8666203022003174, - "eval_runtime": 6.3142, - "eval_samples_per_second": 246.112, - "step": 1440736 - }, - { - "epoch": 16.0, - "learning_rate": 8.499267041290007e-08, - "loss": 3.8191, - "step": 1441000 - }, - { - "epoch": 16.01, - "learning_rate": 8.497878861915022e-08, - "loss": 3.8049, - "step": 1441500 - }, - { - "epoch": 16.01, - "learning_rate": 8.496490682540034e-08, - "loss": 3.813, - "step": 1442000 - }, - { - "epoch": 16.02, - "learning_rate": 8.495102503165049e-08, - "loss": 3.8038, - "step": 1442500 - }, - { - "epoch": 16.03, - "learning_rate": 8.493714323790062e-08, - "loss": 3.8249, - "step": 1443000 - }, - { - "epoch": 16.03, - "learning_rate": 8.492326144415076e-08, - "loss": 3.8166, - "step": 1443500 - }, - { - "epoch": 16.04, - "learning_rate": 8.49093796504009e-08, - "loss": 3.8103, - "step": 1444000 - }, - { - "epoch": 16.04, - "learning_rate": 8.489549785665104e-08, - "loss": 3.8248, - "step": 1444500 - }, - { - "epoch": 16.05, - "learning_rate": 8.488161606290118e-08, - "loss": 3.7897, - "step": 1445000 - }, - { - "epoch": 16.05, - "learning_rate": 8.486773426915131e-08, - "loss": 3.8007, - "step": 1445500 - }, - { - "epoch": 16.06, - "learning_rate": 8.485385247540146e-08, - "loss": 3.8071, - "step": 1446000 - }, - { - "epoch": 16.06, - "learning_rate": 8.48399706816516e-08, - "loss": 3.8224, - "step": 1446500 - }, - { - "epoch": 16.07, - "learning_rate": 8.482608888790174e-08, - "loss": 3.8002, - "step": 1447000 - }, - { - "epoch": 16.08, - "learning_rate": 8.481220709415188e-08, - "loss": 3.8207, - "step": 1447500 - }, - { - "epoch": 16.08, - "learning_rate": 8.479832530040201e-08, - "loss": 3.8086, - "step": 1448000 - }, - { - "epoch": 16.09, - "learning_rate": 8.478444350665215e-08, - "loss": 3.8017, - "step": 1448500 - }, - { - "epoch": 16.09, - "learning_rate": 8.477056171290228e-08, - "loss": 3.8285, - "step": 1449000 - }, - { - "epoch": 16.1, - "learning_rate": 8.475667991915243e-08, - "loss": 3.8077, - "step": 1449500 - }, - { - "epoch": 16.1, - "learning_rate": 8.474279812540257e-08, - "loss": 3.788, - "step": 1450000 - }, - { - "epoch": 16.11, - "learning_rate": 8.472891633165271e-08, - "loss": 3.8276, - "step": 1450500 - }, - { - "epoch": 16.11, - "learning_rate": 8.471503453790284e-08, - "loss": 3.834, - "step": 1451000 - }, - { - "epoch": 16.12, - "learning_rate": 8.470115274415298e-08, - "loss": 3.8128, - "step": 1451500 - }, - { - "epoch": 16.13, - "learning_rate": 8.468727095040312e-08, - "loss": 3.8144, - "step": 1452000 - }, - { - "epoch": 16.13, - "learning_rate": 8.467338915665327e-08, - "loss": 3.8088, - "step": 1452500 - }, - { - "epoch": 16.14, - "learning_rate": 8.465950736290341e-08, - "loss": 3.8208, - "step": 1453000 - }, - { - "epoch": 16.14, - "learning_rate": 8.464562556915355e-08, - "loss": 3.8222, - "step": 1453500 - }, - { - "epoch": 16.15, - "learning_rate": 8.463174377540368e-08, - "loss": 3.7955, - "step": 1454000 - }, - { - "epoch": 16.15, - "learning_rate": 8.461786198165382e-08, - "loss": 3.8453, - "step": 1454500 - }, - { - "epoch": 16.16, - "learning_rate": 8.460398018790395e-08, - "loss": 3.8136, - "step": 1455000 - }, - { - "epoch": 16.16, - "learning_rate": 8.459009839415409e-08, - "loss": 3.8154, - "step": 1455500 - }, - { - "epoch": 16.17, - "learning_rate": 8.457621660040424e-08, - "loss": 3.8284, - "step": 1456000 - }, - { - "epoch": 16.18, - "learning_rate": 8.456233480665436e-08, - "loss": 3.8186, - "step": 1456500 - }, - { - "epoch": 16.18, - "learning_rate": 8.45484530129045e-08, - "loss": 3.8002, - "step": 1457000 - }, - { - "epoch": 16.19, - "learning_rate": 8.453457121915465e-08, - "loss": 3.8042, - "step": 1457500 - }, - { - "epoch": 16.19, - "learning_rate": 8.452068942540479e-08, - "loss": 3.8248, - "step": 1458000 - }, - { - "epoch": 16.2, - "learning_rate": 8.450680763165493e-08, - "loss": 3.8115, - "step": 1458500 - }, - { - "epoch": 16.2, - "learning_rate": 8.449292583790508e-08, - "loss": 3.7705, - "step": 1459000 - }, - { - "epoch": 16.21, - "learning_rate": 8.44790440441552e-08, - "loss": 3.812, - "step": 1459500 - }, - { - "epoch": 16.21, - "learning_rate": 8.446516225040535e-08, - "loss": 3.815, - "step": 1460000 - }, - { - "epoch": 16.22, - "learning_rate": 8.445128045665549e-08, - "loss": 3.8179, - "step": 1460500 - }, - { - "epoch": 16.23, - "learning_rate": 8.443739866290562e-08, - "loss": 3.8306, - "step": 1461000 - }, - { - "epoch": 16.23, - "learning_rate": 8.442351686915576e-08, - "loss": 3.8334, - "step": 1461500 - }, - { - "epoch": 16.24, - "learning_rate": 8.44096350754059e-08, - "loss": 3.8042, - "step": 1462000 - }, - { - "epoch": 16.24, - "learning_rate": 8.439575328165603e-08, - "loss": 3.8125, - "step": 1462500 - }, - { - "epoch": 16.25, - "learning_rate": 8.438187148790617e-08, - "loss": 3.7856, - "step": 1463000 - }, - { - "epoch": 16.25, - "learning_rate": 8.436798969415632e-08, - "loss": 3.8237, - "step": 1463500 - }, - { - "epoch": 16.26, - "learning_rate": 8.435410790040646e-08, - "loss": 3.8156, - "step": 1464000 - }, - { - "epoch": 16.26, - "learning_rate": 8.43402261066566e-08, - "loss": 3.8183, - "step": 1464500 - }, - { - "epoch": 16.27, - "learning_rate": 8.432634431290673e-08, - "loss": 3.8305, - "step": 1465000 - }, - { - "epoch": 16.28, - "learning_rate": 8.431246251915687e-08, - "loss": 3.8205, - "step": 1465500 - }, - { - "epoch": 16.28, - "learning_rate": 8.429858072540701e-08, - "loss": 3.8137, - "step": 1466000 - }, - { - "epoch": 16.29, - "learning_rate": 8.428469893165714e-08, - "loss": 3.8264, - "step": 1466500 - }, - { - "epoch": 16.29, - "learning_rate": 8.427081713790729e-08, - "loss": 3.8302, - "step": 1467000 - }, - { - "epoch": 16.3, - "learning_rate": 8.425693534415743e-08, - "loss": 3.8016, - "step": 1467500 - }, - { - "epoch": 16.3, - "learning_rate": 8.424305355040756e-08, - "loss": 3.8179, - "step": 1468000 - }, - { - "epoch": 16.31, - "learning_rate": 8.42291717566577e-08, - "loss": 3.8046, - "step": 1468500 - }, - { - "epoch": 16.31, - "learning_rate": 8.421528996290784e-08, - "loss": 3.7876, - "step": 1469000 - }, - { - "epoch": 16.32, - "learning_rate": 8.420140816915798e-08, - "loss": 3.7978, - "step": 1469500 - }, - { - "epoch": 16.32, - "learning_rate": 8.418752637540813e-08, - "loss": 3.8062, - "step": 1470000 - }, - { - "epoch": 16.33, - "learning_rate": 8.417364458165827e-08, - "loss": 3.8247, - "step": 1470500 - }, - { - "epoch": 16.34, - "learning_rate": 8.41597627879084e-08, - "loss": 3.801, - "step": 1471000 - }, - { - "epoch": 16.34, - "learning_rate": 8.414588099415854e-08, - "loss": 3.8146, - "step": 1471500 - }, - { - "epoch": 16.35, - "learning_rate": 8.413199920040868e-08, - "loss": 3.8123, - "step": 1472000 - }, - { - "epoch": 16.35, - "learning_rate": 8.411811740665881e-08, - "loss": 3.8107, - "step": 1472500 - }, - { - "epoch": 16.36, - "learning_rate": 8.410423561290895e-08, - "loss": 3.8171, - "step": 1473000 - }, - { - "epoch": 16.36, - "learning_rate": 8.409035381915908e-08, - "loss": 3.8245, - "step": 1473500 - }, - { - "epoch": 16.37, - "learning_rate": 8.407647202540922e-08, - "loss": 3.7992, - "step": 1474000 - }, - { - "epoch": 16.37, - "learning_rate": 8.406259023165937e-08, - "loss": 3.8018, - "step": 1474500 - }, - { - "epoch": 16.38, - "learning_rate": 8.404870843790951e-08, - "loss": 3.8194, - "step": 1475000 - }, - { - "epoch": 16.39, - "learning_rate": 8.403482664415965e-08, - "loss": 3.8054, - "step": 1475500 - }, - { - "epoch": 16.39, - "learning_rate": 8.40209448504098e-08, - "loss": 3.8085, - "step": 1476000 - }, - { - "epoch": 16.4, - "learning_rate": 8.400706305665992e-08, - "loss": 3.811, - "step": 1476500 - }, - { - "epoch": 16.4, - "learning_rate": 8.399318126291006e-08, - "loss": 3.8089, - "step": 1477000 - }, - { - "epoch": 16.41, - "learning_rate": 8.397929946916021e-08, - "loss": 3.8236, - "step": 1477500 - }, - { - "epoch": 16.41, - "learning_rate": 8.396541767541035e-08, - "loss": 3.8247, - "step": 1478000 - }, - { - "epoch": 16.42, - "learning_rate": 8.395153588166048e-08, - "loss": 3.7988, - "step": 1478500 - }, - { - "epoch": 16.42, - "learning_rate": 8.393765408791062e-08, - "loss": 3.8104, - "step": 1479000 - }, - { - "epoch": 16.43, - "learning_rate": 8.392377229416075e-08, - "loss": 3.8342, - "step": 1479500 - }, - { - "epoch": 16.44, - "learning_rate": 8.390989050041089e-08, - "loss": 3.809, - "step": 1480000 - }, - { - "epoch": 16.44, - "learning_rate": 8.389600870666103e-08, - "loss": 3.7938, - "step": 1480500 - }, - { - "epoch": 16.45, - "learning_rate": 8.388212691291118e-08, - "loss": 3.8169, - "step": 1481000 - }, - { - "epoch": 16.45, - "learning_rate": 8.386824511916132e-08, - "loss": 3.8089, - "step": 1481500 - }, - { - "epoch": 16.46, - "learning_rate": 8.385436332541145e-08, - "loss": 3.8115, - "step": 1482000 - }, - { - "epoch": 16.46, - "learning_rate": 8.384048153166159e-08, - "loss": 3.8208, - "step": 1482500 - }, - { - "epoch": 16.47, - "learning_rate": 8.382659973791173e-08, - "loss": 3.8192, - "step": 1483000 - }, - { - "epoch": 16.47, - "learning_rate": 8.381271794416187e-08, - "loss": 3.8117, - "step": 1483500 - }, - { - "epoch": 16.48, - "learning_rate": 8.379883615041202e-08, - "loss": 3.8197, - "step": 1484000 - }, - { - "epoch": 16.49, - "learning_rate": 8.378495435666215e-08, - "loss": 3.8187, - "step": 1484500 - }, - { - "epoch": 16.49, - "learning_rate": 8.377107256291229e-08, - "loss": 3.8165, - "step": 1485000 - }, - { - "epoch": 16.5, - "learning_rate": 8.375719076916242e-08, - "loss": 3.7969, - "step": 1485500 - }, - { - "epoch": 16.5, - "learning_rate": 8.374330897541256e-08, - "loss": 3.8254, - "step": 1486000 - }, - { - "epoch": 16.51, - "learning_rate": 8.37294271816627e-08, - "loss": 3.8132, - "step": 1486500 - }, - { - "epoch": 16.51, - "learning_rate": 8.371554538791284e-08, - "loss": 3.8066, - "step": 1487000 - }, - { - "epoch": 16.52, - "learning_rate": 8.370166359416297e-08, - "loss": 3.8076, - "step": 1487500 - }, - { - "epoch": 16.52, - "learning_rate": 8.368778180041312e-08, - "loss": 3.8176, - "step": 1488000 - }, - { - "epoch": 16.53, - "learning_rate": 8.367390000666326e-08, - "loss": 3.7983, - "step": 1488500 - }, - { - "epoch": 16.54, - "learning_rate": 8.36600182129134e-08, - "loss": 3.8272, - "step": 1489000 - }, - { - "epoch": 16.54, - "learning_rate": 8.364613641916354e-08, - "loss": 3.8164, - "step": 1489500 - }, - { - "epoch": 16.55, - "learning_rate": 8.363225462541368e-08, - "loss": 3.8167, - "step": 1490000 - }, - { - "epoch": 16.55, - "learning_rate": 8.361837283166381e-08, - "loss": 3.8066, - "step": 1490500 - }, - { - "epoch": 16.56, - "learning_rate": 8.360449103791396e-08, - "loss": 3.7905, - "step": 1491000 - }, - { - "epoch": 16.56, - "learning_rate": 8.359060924416408e-08, - "loss": 3.8025, - "step": 1491500 - }, - { - "epoch": 16.57, - "learning_rate": 8.357672745041423e-08, - "loss": 3.8094, - "step": 1492000 - }, - { - "epoch": 16.57, - "learning_rate": 8.356284565666437e-08, - "loss": 3.8193, - "step": 1492500 - }, - { - "epoch": 16.58, - "learning_rate": 8.35489638629145e-08, - "loss": 3.8276, - "step": 1493000 - }, - { - "epoch": 16.59, - "learning_rate": 8.353508206916464e-08, - "loss": 3.82, - "step": 1493500 - }, - { - "epoch": 16.59, - "learning_rate": 8.352120027541478e-08, - "loss": 3.8219, - "step": 1494000 - }, - { - "epoch": 16.6, - "learning_rate": 8.350731848166492e-08, - "loss": 3.8418, - "step": 1494500 - }, - { - "epoch": 16.6, - "learning_rate": 8.349343668791507e-08, - "loss": 3.8105, - "step": 1495000 - }, - { - "epoch": 16.61, - "learning_rate": 8.347955489416521e-08, - "loss": 3.8177, - "step": 1495500 - }, - { - "epoch": 16.61, - "learning_rate": 8.346567310041534e-08, - "loss": 3.8042, - "step": 1496000 - }, - { - "epoch": 16.62, - "learning_rate": 8.345179130666548e-08, - "loss": 3.8262, - "step": 1496500 - }, - { - "epoch": 16.62, - "learning_rate": 8.343790951291561e-08, - "loss": 3.8134, - "step": 1497000 - }, - { - "epoch": 16.63, - "learning_rate": 8.342402771916575e-08, - "loss": 3.8195, - "step": 1497500 - }, - { - "epoch": 16.64, - "learning_rate": 8.34101459254159e-08, - "loss": 3.8213, - "step": 1498000 - }, - { - "epoch": 16.64, - "learning_rate": 8.339626413166604e-08, - "loss": 3.8041, - "step": 1498500 - }, - { - "epoch": 16.65, - "learning_rate": 8.338238233791617e-08, - "loss": 3.807, - "step": 1499000 - }, - { - "epoch": 16.65, - "learning_rate": 8.336850054416631e-08, - "loss": 3.808, - "step": 1499500 - }, - { - "epoch": 16.66, - "learning_rate": 8.335461875041645e-08, - "loss": 3.8179, - "step": 1500000 - }, - { - "epoch": 16.66, - "learning_rate": 8.334073695666659e-08, - "loss": 3.8197, - "step": 1500500 - }, - { - "epoch": 16.67, - "learning_rate": 8.332685516291673e-08, - "loss": 3.8118, - "step": 1501000 - }, - { - "epoch": 16.67, - "learning_rate": 8.331297336916686e-08, - "loss": 3.8238, - "step": 1501500 - }, - { - "epoch": 16.68, - "learning_rate": 8.3299091575417e-08, - "loss": 3.8, - "step": 1502000 - }, - { - "epoch": 16.69, - "learning_rate": 8.328520978166715e-08, - "loss": 3.8048, - "step": 1502500 - }, - { - "epoch": 16.69, - "learning_rate": 8.327132798791728e-08, - "loss": 3.821, - "step": 1503000 - }, - { - "epoch": 16.7, - "learning_rate": 8.325744619416742e-08, - "loss": 3.8155, - "step": 1503500 - }, - { - "epoch": 16.7, - "learning_rate": 8.324356440041756e-08, - "loss": 3.819, - "step": 1504000 - }, - { - "epoch": 16.71, - "learning_rate": 8.322968260666769e-08, - "loss": 3.8134, - "step": 1504500 - }, - { - "epoch": 16.71, - "learning_rate": 8.321580081291783e-08, - "loss": 3.8152, - "step": 1505000 - }, - { - "epoch": 16.72, - "learning_rate": 8.320191901916798e-08, - "loss": 3.806, - "step": 1505500 - }, - { - "epoch": 16.72, - "learning_rate": 8.318803722541812e-08, - "loss": 3.8079, - "step": 1506000 - }, - { - "epoch": 16.73, - "learning_rate": 8.317415543166826e-08, - "loss": 3.8318, - "step": 1506500 - }, - { - "epoch": 16.74, - "learning_rate": 8.31602736379184e-08, - "loss": 3.8015, - "step": 1507000 - }, - { - "epoch": 16.74, - "learning_rate": 8.314639184416853e-08, - "loss": 3.8231, - "step": 1507500 - }, - { - "epoch": 16.75, - "learning_rate": 8.313251005041867e-08, - "loss": 3.8106, - "step": 1508000 - }, - { - "epoch": 16.75, - "learning_rate": 8.311862825666882e-08, - "loss": 3.8139, - "step": 1508500 - }, - { - "epoch": 16.76, - "learning_rate": 8.310474646291894e-08, - "loss": 3.8296, - "step": 1509000 - }, - { - "epoch": 16.76, - "learning_rate": 8.309086466916909e-08, - "loss": 3.8036, - "step": 1509500 - }, - { - "epoch": 16.77, - "learning_rate": 8.307698287541922e-08, - "loss": 3.8279, - "step": 1510000 - }, - { - "epoch": 16.77, - "learning_rate": 8.306310108166936e-08, - "loss": 3.8196, - "step": 1510500 - }, - { - "epoch": 16.78, - "learning_rate": 8.30492192879195e-08, - "loss": 3.8155, - "step": 1511000 - }, - { - "epoch": 16.79, - "learning_rate": 8.303533749416964e-08, - "loss": 3.81, - "step": 1511500 - }, - { - "epoch": 16.79, - "learning_rate": 8.302145570041978e-08, - "loss": 3.8034, - "step": 1512000 - }, - { - "epoch": 16.8, - "learning_rate": 8.300757390666993e-08, - "loss": 3.8207, - "step": 1512500 - }, - { - "epoch": 16.8, - "learning_rate": 8.299369211292006e-08, - "loss": 3.8107, - "step": 1513000 - }, - { - "epoch": 16.81, - "learning_rate": 8.29798103191702e-08, - "loss": 3.8125, - "step": 1513500 - }, - { - "epoch": 16.81, - "learning_rate": 8.296592852542034e-08, - "loss": 3.8347, - "step": 1514000 - }, - { - "epoch": 16.82, - "learning_rate": 8.295204673167048e-08, - "loss": 3.8297, - "step": 1514500 - }, - { - "epoch": 16.82, - "learning_rate": 8.293816493792061e-08, - "loss": 3.8204, - "step": 1515000 - }, - { - "epoch": 16.83, - "learning_rate": 8.292428314417075e-08, - "loss": 3.82, - "step": 1515500 - }, - { - "epoch": 16.84, - "learning_rate": 8.291040135042088e-08, - "loss": 3.8202, - "step": 1516000 - }, - { - "epoch": 16.84, - "learning_rate": 8.289651955667103e-08, - "loss": 3.8214, - "step": 1516500 - }, - { - "epoch": 16.85, - "learning_rate": 8.288263776292117e-08, - "loss": 3.7997, - "step": 1517000 - }, - { - "epoch": 16.85, - "learning_rate": 8.286875596917131e-08, - "loss": 3.8186, - "step": 1517500 - }, - { - "epoch": 16.86, - "learning_rate": 8.285487417542145e-08, - "loss": 3.81, - "step": 1518000 - }, - { - "epoch": 16.86, - "learning_rate": 8.284099238167158e-08, - "loss": 3.8207, - "step": 1518500 - }, - { - "epoch": 16.87, - "learning_rate": 8.282711058792172e-08, - "loss": 3.8071, - "step": 1519000 - }, - { - "epoch": 16.87, - "learning_rate": 8.281322879417187e-08, - "loss": 3.8247, - "step": 1519500 - }, - { - "epoch": 16.88, - "learning_rate": 8.279934700042201e-08, - "loss": 3.8202, - "step": 1520000 - }, - { - "epoch": 16.89, - "learning_rate": 8.278546520667215e-08, - "loss": 3.8027, - "step": 1520500 - }, - { - "epoch": 16.89, - "learning_rate": 8.277158341292228e-08, - "loss": 3.8041, - "step": 1521000 - }, - { - "epoch": 16.9, - "learning_rate": 8.275770161917242e-08, - "loss": 3.8283, - "step": 1521500 - }, - { - "epoch": 16.9, - "learning_rate": 8.274381982542255e-08, - "loss": 3.8106, - "step": 1522000 - }, - { - "epoch": 16.91, - "learning_rate": 8.272993803167269e-08, - "loss": 3.8153, - "step": 1522500 - }, - { - "epoch": 16.91, - "learning_rate": 8.271605623792284e-08, - "loss": 3.8071, - "step": 1523000 - }, - { - "epoch": 16.92, - "learning_rate": 8.270217444417298e-08, - "loss": 3.8048, - "step": 1523500 - }, - { - "epoch": 16.92, - "learning_rate": 8.268829265042311e-08, - "loss": 3.8246, - "step": 1524000 - }, - { - "epoch": 16.93, - "learning_rate": 8.267441085667325e-08, - "loss": 3.8208, - "step": 1524500 - }, - { - "epoch": 16.94, - "learning_rate": 8.266052906292339e-08, - "loss": 3.8201, - "step": 1525000 - }, - { - "epoch": 16.94, - "learning_rate": 8.264664726917353e-08, - "loss": 3.8069, - "step": 1525500 - }, - { - "epoch": 16.95, - "learning_rate": 8.263276547542368e-08, - "loss": 3.8019, - "step": 1526000 - }, - { - "epoch": 16.95, - "learning_rate": 8.261888368167382e-08, - "loss": 3.8163, - "step": 1526500 - }, - { - "epoch": 16.96, - "learning_rate": 8.260500188792395e-08, - "loss": 3.7999, - "step": 1527000 - }, - { - "epoch": 16.96, - "learning_rate": 8.259112009417408e-08, - "loss": 3.8106, - "step": 1527500 - }, - { - "epoch": 16.97, - "learning_rate": 8.257723830042422e-08, - "loss": 3.7933, - "step": 1528000 - }, - { - "epoch": 16.97, - "learning_rate": 8.256335650667436e-08, - "loss": 3.8071, - "step": 1528500 - }, - { - "epoch": 16.98, - "learning_rate": 8.25494747129245e-08, - "loss": 3.7952, - "step": 1529000 - }, - { - "epoch": 16.99, - "learning_rate": 8.253559291917465e-08, - "loss": 3.7952, - "step": 1529500 - }, - { - "epoch": 16.99, - "learning_rate": 8.252171112542477e-08, - "loss": 3.8094, - "step": 1530000 - }, - { - "epoch": 17.0, - "learning_rate": 8.250782933167492e-08, - "loss": 3.805, - "step": 1530500 - }, - { - "epoch": 17.0, - "eval_loss": 3.8627829551696777, - "eval_runtime": 6.3079, - "eval_samples_per_second": 246.358, - "step": 1530782 - }, - { - "epoch": 17.0, - "learning_rate": 8.249394753792506e-08, - "loss": 3.8162, - "step": 1531000 - }, - { - "epoch": 17.01, - "learning_rate": 8.24800657441752e-08, - "loss": 3.8088, - "step": 1531500 - }, - { - "epoch": 17.01, - "learning_rate": 8.246618395042534e-08, - "loss": 3.8008, - "step": 1532000 - }, - { - "epoch": 17.02, - "learning_rate": 8.245230215667547e-08, - "loss": 3.8112, - "step": 1532500 - }, - { - "epoch": 17.02, - "learning_rate": 8.243842036292561e-08, - "loss": 3.8323, - "step": 1533000 - }, - { - "epoch": 17.03, - "learning_rate": 8.242453856917574e-08, - "loss": 3.7973, - "step": 1533500 - }, - { - "epoch": 17.04, - "learning_rate": 8.241065677542589e-08, - "loss": 3.7927, - "step": 1534000 - }, - { - "epoch": 17.04, - "learning_rate": 8.239677498167603e-08, - "loss": 3.8046, - "step": 1534500 - }, - { - "epoch": 17.05, - "learning_rate": 8.238289318792617e-08, - "loss": 3.794, - "step": 1535000 - }, - { - "epoch": 17.05, - "learning_rate": 8.23690113941763e-08, - "loss": 3.8122, - "step": 1535500 - }, - { - "epoch": 17.06, - "learning_rate": 8.235512960042644e-08, - "loss": 3.8133, - "step": 1536000 - }, - { - "epoch": 17.06, - "learning_rate": 8.234124780667658e-08, - "loss": 3.8241, - "step": 1536500 - }, - { - "epoch": 17.07, - "learning_rate": 8.232736601292673e-08, - "loss": 3.802, - "step": 1537000 - }, - { - "epoch": 17.07, - "learning_rate": 8.231348421917687e-08, - "loss": 3.8265, - "step": 1537500 - }, - { - "epoch": 17.08, - "learning_rate": 8.2299602425427e-08, - "loss": 3.8211, - "step": 1538000 - }, - { - "epoch": 17.09, - "learning_rate": 8.228572063167714e-08, - "loss": 3.7952, - "step": 1538500 - }, - { - "epoch": 17.09, - "learning_rate": 8.227183883792728e-08, - "loss": 3.8058, - "step": 1539000 - }, - { - "epoch": 17.1, - "learning_rate": 8.225795704417741e-08, - "loss": 3.7884, - "step": 1539500 - }, - { - "epoch": 17.1, - "learning_rate": 8.224407525042755e-08, - "loss": 3.8228, - "step": 1540000 - }, - { - "epoch": 17.11, - "learning_rate": 8.22301934566777e-08, - "loss": 3.8158, - "step": 1540500 - }, - { - "epoch": 17.11, - "learning_rate": 8.221631166292782e-08, - "loss": 3.8083, - "step": 1541000 - }, - { - "epoch": 17.12, - "learning_rate": 8.220242986917797e-08, - "loss": 3.8341, - "step": 1541500 - }, - { - "epoch": 17.12, - "learning_rate": 8.218854807542811e-08, - "loss": 3.7961, - "step": 1542000 - }, - { - "epoch": 17.13, - "learning_rate": 8.217466628167825e-08, - "loss": 3.8178, - "step": 1542500 - }, - { - "epoch": 17.14, - "learning_rate": 8.21607844879284e-08, - "loss": 3.802, - "step": 1543000 - }, - { - "epoch": 17.14, - "learning_rate": 8.214690269417854e-08, - "loss": 3.8048, - "step": 1543500 - }, - { - "epoch": 17.15, - "learning_rate": 8.213302090042866e-08, - "loss": 3.8061, - "step": 1544000 - }, - { - "epoch": 17.15, - "learning_rate": 8.211913910667881e-08, - "loss": 3.8264, - "step": 1544500 - }, - { - "epoch": 17.16, - "learning_rate": 8.210525731292895e-08, - "loss": 3.813, - "step": 1545000 - }, - { - "epoch": 17.16, - "learning_rate": 8.209137551917908e-08, - "loss": 3.7973, - "step": 1545500 - }, - { - "epoch": 17.17, - "learning_rate": 8.207749372542922e-08, - "loss": 3.8024, - "step": 1546000 - }, - { - "epoch": 17.17, - "learning_rate": 8.206361193167935e-08, - "loss": 3.8042, - "step": 1546500 - }, - { - "epoch": 17.18, - "learning_rate": 8.204973013792949e-08, - "loss": 3.803, - "step": 1547000 - }, - { - "epoch": 17.19, - "learning_rate": 8.203584834417963e-08, - "loss": 3.8072, - "step": 1547500 - }, - { - "epoch": 17.19, - "learning_rate": 8.202196655042978e-08, - "loss": 3.8096, - "step": 1548000 - }, - { - "epoch": 17.2, - "learning_rate": 8.200808475667992e-08, - "loss": 3.8117, - "step": 1548500 - }, - { - "epoch": 17.2, - "learning_rate": 8.199420296293006e-08, - "loss": 3.8039, - "step": 1549000 - }, - { - "epoch": 17.21, - "learning_rate": 8.198032116918019e-08, - "loss": 3.7965, - "step": 1549500 - }, - { - "epoch": 17.21, - "learning_rate": 8.196643937543033e-08, - "loss": 3.7996, - "step": 1550000 - }, - { - "epoch": 17.22, - "learning_rate": 8.195255758168047e-08, - "loss": 3.8054, - "step": 1550500 - }, - { - "epoch": 17.22, - "learning_rate": 8.193867578793062e-08, - "loss": 3.802, - "step": 1551000 - }, - { - "epoch": 17.23, - "learning_rate": 8.192479399418075e-08, - "loss": 3.7946, - "step": 1551500 - }, - { - "epoch": 17.24, - "learning_rate": 8.191091220043089e-08, - "loss": 3.8046, - "step": 1552000 - }, - { - "epoch": 17.24, - "learning_rate": 8.189703040668102e-08, - "loss": 3.8187, - "step": 1552500 - }, - { - "epoch": 17.25, - "learning_rate": 8.188314861293116e-08, - "loss": 3.8068, - "step": 1553000 - }, - { - "epoch": 17.25, - "learning_rate": 8.18692668191813e-08, - "loss": 3.8058, - "step": 1553500 - }, - { - "epoch": 17.26, - "learning_rate": 8.185538502543144e-08, - "loss": 3.8246, - "step": 1554000 - }, - { - "epoch": 17.26, - "learning_rate": 8.184150323168159e-08, - "loss": 3.8019, - "step": 1554500 - }, - { - "epoch": 17.27, - "learning_rate": 8.182762143793172e-08, - "loss": 3.8083, - "step": 1555000 - }, - { - "epoch": 17.27, - "learning_rate": 8.181373964418186e-08, - "loss": 3.8115, - "step": 1555500 - }, - { - "epoch": 17.28, - "learning_rate": 8.1799857850432e-08, - "loss": 3.8036, - "step": 1556000 - }, - { - "epoch": 17.29, - "learning_rate": 8.178597605668214e-08, - "loss": 3.7998, - "step": 1556500 - }, - { - "epoch": 17.29, - "learning_rate": 8.177209426293228e-08, - "loss": 3.794, - "step": 1557000 - }, - { - "epoch": 17.3, - "learning_rate": 8.175821246918241e-08, - "loss": 3.8139, - "step": 1557500 - }, - { - "epoch": 17.3, - "learning_rate": 8.174433067543254e-08, - "loss": 3.7824, - "step": 1558000 - }, - { - "epoch": 17.31, - "learning_rate": 8.173044888168268e-08, - "loss": 3.8173, - "step": 1558500 - }, - { - "epoch": 17.31, - "learning_rate": 8.171656708793283e-08, - "loss": 3.809, - "step": 1559000 - }, - { - "epoch": 17.32, - "learning_rate": 8.170268529418297e-08, - "loss": 3.8032, - "step": 1559500 - }, - { - "epoch": 17.32, - "learning_rate": 8.168880350043311e-08, - "loss": 3.8012, - "step": 1560000 - }, - { - "epoch": 17.33, - "learning_rate": 8.167492170668324e-08, - "loss": 3.8079, - "step": 1560500 - }, - { - "epoch": 17.34, - "learning_rate": 8.166103991293338e-08, - "loss": 3.8075, - "step": 1561000 - }, - { - "epoch": 17.34, - "learning_rate": 8.164715811918353e-08, - "loss": 3.7996, - "step": 1561500 - }, - { - "epoch": 17.35, - "learning_rate": 8.163327632543367e-08, - "loss": 3.825, - "step": 1562000 - }, - { - "epoch": 17.35, - "learning_rate": 8.161939453168381e-08, - "loss": 3.8108, - "step": 1562500 - }, - { - "epoch": 17.36, - "learning_rate": 8.160551273793395e-08, - "loss": 3.8177, - "step": 1563000 - }, - { - "epoch": 17.36, - "learning_rate": 8.159163094418408e-08, - "loss": 3.8176, - "step": 1563500 - }, - { - "epoch": 17.37, - "learning_rate": 8.157774915043421e-08, - "loss": 3.8287, - "step": 1564000 - }, - { - "epoch": 17.37, - "learning_rate": 8.156386735668435e-08, - "loss": 3.8034, - "step": 1564500 - }, - { - "epoch": 17.38, - "learning_rate": 8.15499855629345e-08, - "loss": 3.8146, - "step": 1565000 - }, - { - "epoch": 17.39, - "learning_rate": 8.153610376918464e-08, - "loss": 3.809, - "step": 1565500 - }, - { - "epoch": 17.39, - "learning_rate": 8.152222197543478e-08, - "loss": 3.8231, - "step": 1566000 - }, - { - "epoch": 17.4, - "learning_rate": 8.150834018168491e-08, - "loss": 3.8085, - "step": 1566500 - }, - { - "epoch": 17.4, - "learning_rate": 8.149445838793505e-08, - "loss": 3.8029, - "step": 1567000 - }, - { - "epoch": 17.41, - "learning_rate": 8.148057659418519e-08, - "loss": 3.821, - "step": 1567500 - }, - { - "epoch": 17.41, - "learning_rate": 8.146669480043533e-08, - "loss": 3.8029, - "step": 1568000 - }, - { - "epoch": 17.42, - "learning_rate": 8.145281300668548e-08, - "loss": 3.807, - "step": 1568500 - }, - { - "epoch": 17.42, - "learning_rate": 8.14389312129356e-08, - "loss": 3.7985, - "step": 1569000 - }, - { - "epoch": 17.43, - "learning_rate": 8.142504941918575e-08, - "loss": 3.8087, - "step": 1569500 - }, - { - "epoch": 17.44, - "learning_rate": 8.141116762543588e-08, - "loss": 3.8044, - "step": 1570000 - }, - { - "epoch": 17.44, - "learning_rate": 8.139728583168602e-08, - "loss": 3.8027, - "step": 1570500 - }, - { - "epoch": 17.45, - "learning_rate": 8.138340403793616e-08, - "loss": 3.7967, - "step": 1571000 - }, - { - "epoch": 17.45, - "learning_rate": 8.13695222441863e-08, - "loss": 3.8133, - "step": 1571500 - }, - { - "epoch": 17.46, - "learning_rate": 8.135564045043643e-08, - "loss": 3.8169, - "step": 1572000 - }, - { - "epoch": 17.46, - "learning_rate": 8.134175865668658e-08, - "loss": 3.8119, - "step": 1572500 - }, - { - "epoch": 17.47, - "learning_rate": 8.132787686293672e-08, - "loss": 3.8137, - "step": 1573000 - }, - { - "epoch": 17.47, - "learning_rate": 8.131399506918686e-08, - "loss": 3.8217, - "step": 1573500 - }, - { - "epoch": 17.48, - "learning_rate": 8.1300113275437e-08, - "loss": 3.8088, - "step": 1574000 - }, - { - "epoch": 17.49, - "learning_rate": 8.128623148168713e-08, - "loss": 3.8072, - "step": 1574500 - }, - { - "epoch": 17.49, - "learning_rate": 8.127234968793727e-08, - "loss": 3.8164, - "step": 1575000 - }, - { - "epoch": 17.5, - "learning_rate": 8.125846789418742e-08, - "loss": 3.8114, - "step": 1575500 - }, - { - "epoch": 17.5, - "learning_rate": 8.124458610043754e-08, - "loss": 3.8284, - "step": 1576000 - }, - { - "epoch": 17.51, - "learning_rate": 8.123070430668769e-08, - "loss": 3.8031, - "step": 1576500 - }, - { - "epoch": 17.51, - "learning_rate": 8.121682251293783e-08, - "loss": 3.8156, - "step": 1577000 - }, - { - "epoch": 17.52, - "learning_rate": 8.120294071918796e-08, - "loss": 3.8122, - "step": 1577500 - }, - { - "epoch": 17.52, - "learning_rate": 8.11890589254381e-08, - "loss": 3.806, - "step": 1578000 - }, - { - "epoch": 17.53, - "learning_rate": 8.117517713168824e-08, - "loss": 3.8084, - "step": 1578500 - }, - { - "epoch": 17.54, - "learning_rate": 8.116129533793839e-08, - "loss": 3.8027, - "step": 1579000 - }, - { - "epoch": 17.54, - "learning_rate": 8.114741354418853e-08, - "loss": 3.826, - "step": 1579500 - }, - { - "epoch": 17.55, - "learning_rate": 8.113353175043867e-08, - "loss": 3.8197, - "step": 1580000 - }, - { - "epoch": 17.55, - "learning_rate": 8.11196499566888e-08, - "loss": 3.7976, - "step": 1580500 - }, - { - "epoch": 17.56, - "learning_rate": 8.110576816293894e-08, - "loss": 3.8124, - "step": 1581000 - }, - { - "epoch": 17.56, - "learning_rate": 8.109188636918908e-08, - "loss": 3.7982, - "step": 1581500 - }, - { - "epoch": 17.57, - "learning_rate": 8.107800457543921e-08, - "loss": 3.8033, - "step": 1582000 - }, - { - "epoch": 17.57, - "learning_rate": 8.106412278168935e-08, - "loss": 3.8034, - "step": 1582500 - }, - { - "epoch": 17.58, - "learning_rate": 8.105024098793948e-08, - "loss": 3.8005, - "step": 1583000 - }, - { - "epoch": 17.59, - "learning_rate": 8.103635919418963e-08, - "loss": 3.8113, - "step": 1583500 - }, - { - "epoch": 17.59, - "learning_rate": 8.102247740043977e-08, - "loss": 3.8051, - "step": 1584000 - }, - { - "epoch": 17.6, - "learning_rate": 8.100859560668991e-08, - "loss": 3.8114, - "step": 1584500 - }, - { - "epoch": 17.6, - "learning_rate": 8.099471381294005e-08, - "loss": 3.7973, - "step": 1585000 - }, - { - "epoch": 17.61, - "learning_rate": 8.09808320191902e-08, - "loss": 3.8058, - "step": 1585500 - }, - { - "epoch": 17.61, - "learning_rate": 8.096695022544032e-08, - "loss": 3.8052, - "step": 1586000 - }, - { - "epoch": 17.62, - "learning_rate": 8.095306843169047e-08, - "loss": 3.8029, - "step": 1586500 - }, - { - "epoch": 17.62, - "learning_rate": 8.093918663794061e-08, - "loss": 3.8095, - "step": 1587000 - }, - { - "epoch": 17.63, - "learning_rate": 8.092530484419075e-08, - "loss": 3.7888, - "step": 1587500 - }, - { - "epoch": 17.64, - "learning_rate": 8.091142305044088e-08, - "loss": 3.8118, - "step": 1588000 - }, - { - "epoch": 17.64, - "learning_rate": 8.089754125669102e-08, - "loss": 3.8101, - "step": 1588500 - }, - { - "epoch": 17.65, - "learning_rate": 8.088365946294115e-08, - "loss": 3.8108, - "step": 1589000 - }, - { - "epoch": 17.65, - "learning_rate": 8.086977766919129e-08, - "loss": 3.8253, - "step": 1589500 - }, - { - "epoch": 17.66, - "learning_rate": 8.085589587544144e-08, - "loss": 3.7959, - "step": 1590000 - }, - { - "epoch": 17.66, - "learning_rate": 8.084201408169158e-08, - "loss": 3.8004, - "step": 1590500 - }, - { - "epoch": 17.67, - "learning_rate": 8.082813228794172e-08, - "loss": 3.8229, - "step": 1591000 - }, - { - "epoch": 17.67, - "learning_rate": 8.081425049419185e-08, - "loss": 3.7977, - "step": 1591500 - }, - { - "epoch": 17.68, - "learning_rate": 8.080036870044199e-08, - "loss": 3.8166, - "step": 1592000 - }, - { - "epoch": 17.69, - "learning_rate": 8.078648690669213e-08, - "loss": 3.8166, - "step": 1592500 - }, - { - "epoch": 17.69, - "learning_rate": 8.077260511294228e-08, - "loss": 3.8313, - "step": 1593000 - }, - { - "epoch": 17.7, - "learning_rate": 8.075872331919242e-08, - "loss": 3.8007, - "step": 1593500 - }, - { - "epoch": 17.7, - "learning_rate": 8.074484152544255e-08, - "loss": 3.8039, - "step": 1594000 - }, - { - "epoch": 17.71, - "learning_rate": 8.073095973169268e-08, - "loss": 3.8128, - "step": 1594500 - }, - { - "epoch": 17.71, - "learning_rate": 8.071707793794282e-08, - "loss": 3.7996, - "step": 1595000 - }, - { - "epoch": 17.72, - "learning_rate": 8.070319614419296e-08, - "loss": 3.8031, - "step": 1595500 - }, - { - "epoch": 17.72, - "learning_rate": 8.06893143504431e-08, - "loss": 3.8277, - "step": 1596000 - }, - { - "epoch": 17.73, - "learning_rate": 8.067543255669325e-08, - "loss": 3.8008, - "step": 1596500 - }, - { - "epoch": 17.74, - "learning_rate": 8.066155076294337e-08, - "loss": 3.8132, - "step": 1597000 - }, - { - "epoch": 17.74, - "learning_rate": 8.064766896919352e-08, - "loss": 3.8232, - "step": 1597500 - }, - { - "epoch": 17.75, - "learning_rate": 8.063378717544366e-08, - "loss": 3.8208, - "step": 1598000 - }, - { - "epoch": 17.75, - "learning_rate": 8.06199053816938e-08, - "loss": 3.8144, - "step": 1598500 - }, - { - "epoch": 17.76, - "learning_rate": 8.060602358794394e-08, - "loss": 3.825, - "step": 1599000 - }, - { - "epoch": 17.76, - "learning_rate": 8.059214179419409e-08, - "loss": 3.8091, - "step": 1599500 - }, - { - "epoch": 17.77, - "learning_rate": 8.057826000044421e-08, - "loss": 3.8149, - "step": 1600000 - }, - { - "epoch": 17.77, - "learning_rate": 8.056437820669434e-08, - "loss": 3.8123, - "step": 1600500 - }, - { - "epoch": 17.78, - "learning_rate": 8.055049641294449e-08, - "loss": 3.8083, - "step": 1601000 - }, - { - "epoch": 17.79, - "learning_rate": 8.053661461919463e-08, - "loss": 3.8047, - "step": 1601500 - }, - { - "epoch": 17.79, - "learning_rate": 8.052273282544477e-08, - "loss": 3.8127, - "step": 1602000 - }, - { - "epoch": 17.8, - "learning_rate": 8.050885103169491e-08, - "loss": 3.7996, - "step": 1602500 - }, - { - "epoch": 17.8, - "learning_rate": 8.049496923794504e-08, - "loss": 3.8156, - "step": 1603000 - }, - { - "epoch": 17.81, - "learning_rate": 8.048108744419518e-08, - "loss": 3.8025, - "step": 1603500 - }, - { - "epoch": 17.81, - "learning_rate": 8.046720565044533e-08, - "loss": 3.8005, - "step": 1604000 - }, - { - "epoch": 17.82, - "learning_rate": 8.045332385669547e-08, - "loss": 3.8118, - "step": 1604500 - }, - { - "epoch": 17.82, - "learning_rate": 8.043944206294561e-08, - "loss": 3.8133, - "step": 1605000 - }, - { - "epoch": 17.83, - "learning_rate": 8.042556026919574e-08, - "loss": 3.8172, - "step": 1605500 - }, - { - "epoch": 17.84, - "learning_rate": 8.041167847544588e-08, - "loss": 3.7863, - "step": 1606000 - }, - { - "epoch": 17.84, - "learning_rate": 8.039779668169601e-08, - "loss": 3.7791, - "step": 1606500 - }, - { - "epoch": 17.85, - "learning_rate": 8.038391488794615e-08, - "loss": 3.7922, - "step": 1607000 - }, - { - "epoch": 17.85, - "learning_rate": 8.03700330941963e-08, - "loss": 3.7973, - "step": 1607500 - }, - { - "epoch": 17.86, - "learning_rate": 8.035615130044644e-08, - "loss": 3.8094, - "step": 1608000 - }, - { - "epoch": 17.86, - "learning_rate": 8.034226950669657e-08, - "loss": 3.7928, - "step": 1608500 - }, - { - "epoch": 17.87, - "learning_rate": 8.032838771294671e-08, - "loss": 3.8039, - "step": 1609000 - }, - { - "epoch": 17.87, - "learning_rate": 8.031450591919685e-08, - "loss": 3.7949, - "step": 1609500 - }, - { - "epoch": 17.88, - "learning_rate": 8.0300624125447e-08, - "loss": 3.7983, - "step": 1610000 - }, - { - "epoch": 17.89, - "learning_rate": 8.028674233169714e-08, - "loss": 3.8129, - "step": 1610500 - }, - { - "epoch": 17.89, - "learning_rate": 8.027286053794727e-08, - "loss": 3.8007, - "step": 1611000 - }, - { - "epoch": 17.9, - "learning_rate": 8.025897874419741e-08, - "loss": 3.8029, - "step": 1611500 - }, - { - "epoch": 17.9, - "learning_rate": 8.024509695044755e-08, - "loss": 3.8091, - "step": 1612000 - }, - { - "epoch": 17.91, - "learning_rate": 8.023121515669768e-08, - "loss": 3.8038, - "step": 1612500 - }, - { - "epoch": 17.91, - "learning_rate": 8.021733336294782e-08, - "loss": 3.806, - "step": 1613000 - }, - { - "epoch": 17.92, - "learning_rate": 8.020345156919796e-08, - "loss": 3.8024, - "step": 1613500 - }, - { - "epoch": 17.92, - "learning_rate": 8.018956977544809e-08, - "loss": 3.801, - "step": 1614000 - }, - { - "epoch": 17.93, - "learning_rate": 8.017568798169823e-08, - "loss": 3.8097, - "step": 1614500 - }, - { - "epoch": 17.94, - "learning_rate": 8.016180618794838e-08, - "loss": 3.7985, - "step": 1615000 - }, - { - "epoch": 17.94, - "learning_rate": 8.014792439419852e-08, - "loss": 3.8051, - "step": 1615500 - }, - { - "epoch": 17.95, - "learning_rate": 8.013404260044866e-08, - "loss": 3.8034, - "step": 1616000 - }, - { - "epoch": 17.95, - "learning_rate": 8.01201608066988e-08, - "loss": 3.8052, - "step": 1616500 - }, - { - "epoch": 17.96, - "learning_rate": 8.010627901294893e-08, - "loss": 3.8057, - "step": 1617000 - }, - { - "epoch": 17.96, - "learning_rate": 8.009239721919907e-08, - "loss": 3.7846, - "step": 1617500 - }, - { - "epoch": 17.97, - "learning_rate": 8.007851542544922e-08, - "loss": 3.7851, - "step": 1618000 - }, - { - "epoch": 17.97, - "learning_rate": 8.006463363169935e-08, - "loss": 3.8106, - "step": 1618500 - }, - { - "epoch": 17.98, - "learning_rate": 8.005075183794949e-08, - "loss": 3.8231, - "step": 1619000 - }, - { - "epoch": 17.99, - "learning_rate": 8.003687004419962e-08, - "loss": 3.8266, - "step": 1619500 - }, - { - "epoch": 17.99, - "learning_rate": 8.002298825044976e-08, - "loss": 3.8091, - "step": 1620000 - }, - { - "epoch": 18.0, - "learning_rate": 8.00091064566999e-08, - "loss": 3.8298, - "step": 1620500 - }, - { - "epoch": 18.0, - "eval_loss": 3.8592498302459717, - "eval_runtime": 6.3075, - "eval_samples_per_second": 246.375, - "step": 1620828 - }, - { - "epoch": 18.0, - "learning_rate": 7.999522466295004e-08, - "loss": 3.8243, - "step": 1621000 - }, - { - "epoch": 18.01, - "learning_rate": 7.998134286920019e-08, - "loss": 3.8152, - "step": 1621500 - }, - { - "epoch": 18.01, - "learning_rate": 7.996746107545033e-08, - "loss": 3.793, - "step": 1622000 - }, - { - "epoch": 18.02, - "learning_rate": 7.995357928170046e-08, - "loss": 3.8098, - "step": 1622500 - }, - { - "epoch": 18.02, - "learning_rate": 7.99396974879506e-08, - "loss": 3.8123, - "step": 1623000 - }, - { - "epoch": 18.03, - "learning_rate": 7.992581569420074e-08, - "loss": 3.7995, - "step": 1623500 - }, - { - "epoch": 18.04, - "learning_rate": 7.991193390045088e-08, - "loss": 3.8055, - "step": 1624000 - }, - { - "epoch": 18.04, - "learning_rate": 7.989805210670101e-08, - "loss": 3.8155, - "step": 1624500 - }, - { - "epoch": 18.05, - "learning_rate": 7.988417031295116e-08, - "loss": 3.7985, - "step": 1625000 - }, - { - "epoch": 18.05, - "learning_rate": 7.987028851920128e-08, - "loss": 3.7892, - "step": 1625500 - }, - { - "epoch": 18.06, - "learning_rate": 7.985640672545143e-08, - "loss": 3.8237, - "step": 1626000 - }, - { - "epoch": 18.06, - "learning_rate": 7.984252493170157e-08, - "loss": 3.7959, - "step": 1626500 - }, - { - "epoch": 18.07, - "learning_rate": 7.982864313795171e-08, - "loss": 3.7915, - "step": 1627000 - }, - { - "epoch": 18.07, - "learning_rate": 7.981476134420185e-08, - "loss": 3.7881, - "step": 1627500 - }, - { - "epoch": 18.08, - "learning_rate": 7.980087955045198e-08, - "loss": 3.8044, - "step": 1628000 - }, - { - "epoch": 18.09, - "learning_rate": 7.978699775670213e-08, - "loss": 3.7891, - "step": 1628500 - }, - { - "epoch": 18.09, - "learning_rate": 7.977311596295227e-08, - "loss": 3.7869, - "step": 1629000 - }, - { - "epoch": 18.1, - "learning_rate": 7.975923416920241e-08, - "loss": 3.8062, - "step": 1629500 - }, - { - "epoch": 18.1, - "learning_rate": 7.974535237545255e-08, - "loss": 3.8081, - "step": 1630000 - }, - { - "epoch": 18.11, - "learning_rate": 7.973147058170268e-08, - "loss": 3.8021, - "step": 1630500 - }, - { - "epoch": 18.11, - "learning_rate": 7.971758878795281e-08, - "loss": 3.7914, - "step": 1631000 - }, - { - "epoch": 18.12, - "learning_rate": 7.970370699420295e-08, - "loss": 3.7964, - "step": 1631500 - }, - { - "epoch": 18.12, - "learning_rate": 7.96898252004531e-08, - "loss": 3.7964, - "step": 1632000 - }, - { - "epoch": 18.13, - "learning_rate": 7.967594340670324e-08, - "loss": 3.814, - "step": 1632500 - }, - { - "epoch": 18.14, - "learning_rate": 7.966206161295338e-08, - "loss": 3.8009, - "step": 1633000 - }, - { - "epoch": 18.14, - "learning_rate": 7.964817981920351e-08, - "loss": 3.8109, - "step": 1633500 - }, - { - "epoch": 18.15, - "learning_rate": 7.963429802545365e-08, - "loss": 3.8077, - "step": 1634000 - }, - { - "epoch": 18.15, - "learning_rate": 7.962041623170379e-08, - "loss": 3.8147, - "step": 1634500 - }, - { - "epoch": 18.16, - "learning_rate": 7.960653443795393e-08, - "loss": 3.8093, - "step": 1635000 - }, - { - "epoch": 18.16, - "learning_rate": 7.959265264420408e-08, - "loss": 3.8039, - "step": 1635500 - }, - { - "epoch": 18.17, - "learning_rate": 7.957877085045422e-08, - "loss": 3.8128, - "step": 1636000 - }, - { - "epoch": 18.17, - "learning_rate": 7.956488905670435e-08, - "loss": 3.7914, - "step": 1636500 - }, - { - "epoch": 18.18, - "learning_rate": 7.955100726295448e-08, - "loss": 3.798, - "step": 1637000 - }, - { - "epoch": 18.19, - "learning_rate": 7.953712546920462e-08, - "loss": 3.8156, - "step": 1637500 - }, - { - "epoch": 18.19, - "learning_rate": 7.952324367545476e-08, - "loss": 3.8045, - "step": 1638000 - }, - { - "epoch": 18.2, - "learning_rate": 7.95093618817049e-08, - "loss": 3.798, - "step": 1638500 - }, - { - "epoch": 18.2, - "learning_rate": 7.949548008795505e-08, - "loss": 3.7958, - "step": 1639000 - }, - { - "epoch": 18.21, - "learning_rate": 7.948159829420518e-08, - "loss": 3.7975, - "step": 1639500 - }, - { - "epoch": 18.21, - "learning_rate": 7.946771650045532e-08, - "loss": 3.8075, - "step": 1640000 - }, - { - "epoch": 18.22, - "learning_rate": 7.945383470670546e-08, - "loss": 3.7894, - "step": 1640500 - }, - { - "epoch": 18.22, - "learning_rate": 7.94399529129556e-08, - "loss": 3.8022, - "step": 1641000 - }, - { - "epoch": 18.23, - "learning_rate": 7.942607111920574e-08, - "loss": 3.8065, - "step": 1641500 - }, - { - "epoch": 18.24, - "learning_rate": 7.941218932545587e-08, - "loss": 3.8134, - "step": 1642000 - }, - { - "epoch": 18.24, - "learning_rate": 7.939830753170602e-08, - "loss": 3.7995, - "step": 1642500 - }, - { - "epoch": 18.25, - "learning_rate": 7.938442573795614e-08, - "loss": 3.8211, - "step": 1643000 - }, - { - "epoch": 18.25, - "learning_rate": 7.937054394420629e-08, - "loss": 3.8057, - "step": 1643500 - }, - { - "epoch": 18.26, - "learning_rate": 7.935666215045643e-08, - "loss": 3.8048, - "step": 1644000 - }, - { - "epoch": 18.26, - "learning_rate": 7.934278035670657e-08, - "loss": 3.8267, - "step": 1644500 - }, - { - "epoch": 18.27, - "learning_rate": 7.93288985629567e-08, - "loss": 3.8134, - "step": 1645000 - }, - { - "epoch": 18.27, - "learning_rate": 7.931501676920684e-08, - "loss": 3.7961, - "step": 1645500 - }, - { - "epoch": 18.28, - "learning_rate": 7.930113497545699e-08, - "loss": 3.8012, - "step": 1646000 - }, - { - "epoch": 18.29, - "learning_rate": 7.928725318170713e-08, - "loss": 3.8117, - "step": 1646500 - }, - { - "epoch": 18.29, - "learning_rate": 7.927337138795727e-08, - "loss": 3.8043, - "step": 1647000 - }, - { - "epoch": 18.3, - "learning_rate": 7.92594895942074e-08, - "loss": 3.8062, - "step": 1647500 - }, - { - "epoch": 18.3, - "learning_rate": 7.924560780045754e-08, - "loss": 3.7995, - "step": 1648000 - }, - { - "epoch": 18.31, - "learning_rate": 7.923172600670768e-08, - "loss": 3.7896, - "step": 1648500 - }, - { - "epoch": 18.31, - "learning_rate": 7.921784421295781e-08, - "loss": 3.7983, - "step": 1649000 - }, - { - "epoch": 18.32, - "learning_rate": 7.920396241920795e-08, - "loss": 3.8059, - "step": 1649500 - }, - { - "epoch": 18.32, - "learning_rate": 7.91900806254581e-08, - "loss": 3.7998, - "step": 1650000 - }, - { - "epoch": 18.33, - "learning_rate": 7.917619883170823e-08, - "loss": 3.8103, - "step": 1650500 - }, - { - "epoch": 18.34, - "learning_rate": 7.916231703795837e-08, - "loss": 3.8165, - "step": 1651000 - }, - { - "epoch": 18.34, - "learning_rate": 7.914843524420851e-08, - "loss": 3.8103, - "step": 1651500 - }, - { - "epoch": 18.35, - "learning_rate": 7.913455345045865e-08, - "loss": 3.7898, - "step": 1652000 - }, - { - "epoch": 18.35, - "learning_rate": 7.91206716567088e-08, - "loss": 3.8264, - "step": 1652500 - }, - { - "epoch": 18.36, - "learning_rate": 7.910678986295894e-08, - "loss": 3.801, - "step": 1653000 - }, - { - "epoch": 18.36, - "learning_rate": 7.909290806920907e-08, - "loss": 3.8075, - "step": 1653500 - }, - { - "epoch": 18.37, - "learning_rate": 7.907902627545921e-08, - "loss": 3.8058, - "step": 1654000 - }, - { - "epoch": 18.37, - "learning_rate": 7.906514448170935e-08, - "loss": 3.7893, - "step": 1654500 - }, - { - "epoch": 18.38, - "learning_rate": 7.905126268795948e-08, - "loss": 3.7998, - "step": 1655000 - }, - { - "epoch": 18.39, - "learning_rate": 7.903738089420962e-08, - "loss": 3.8315, - "step": 1655500 - }, - { - "epoch": 18.39, - "learning_rate": 7.902349910045975e-08, - "loss": 3.7957, - "step": 1656000 - }, - { - "epoch": 18.4, - "learning_rate": 7.90096173067099e-08, - "loss": 3.816, - "step": 1656500 - }, - { - "epoch": 18.4, - "learning_rate": 7.899573551296004e-08, - "loss": 3.7955, - "step": 1657000 - }, - { - "epoch": 18.41, - "learning_rate": 7.898185371921018e-08, - "loss": 3.8179, - "step": 1657500 - }, - { - "epoch": 18.41, - "learning_rate": 7.896797192546032e-08, - "loss": 3.7936, - "step": 1658000 - }, - { - "epoch": 18.42, - "learning_rate": 7.895409013171046e-08, - "loss": 3.8086, - "step": 1658500 - }, - { - "epoch": 18.42, - "learning_rate": 7.894020833796059e-08, - "loss": 3.8047, - "step": 1659000 - }, - { - "epoch": 18.43, - "learning_rate": 7.892632654421073e-08, - "loss": 3.7983, - "step": 1659500 - }, - { - "epoch": 18.44, - "learning_rate": 7.891244475046088e-08, - "loss": 3.8145, - "step": 1660000 - }, - { - "epoch": 18.44, - "learning_rate": 7.889856295671102e-08, - "loss": 3.7944, - "step": 1660500 - }, - { - "epoch": 18.45, - "learning_rate": 7.888468116296115e-08, - "loss": 3.8025, - "step": 1661000 - }, - { - "epoch": 18.45, - "learning_rate": 7.887079936921129e-08, - "loss": 3.79, - "step": 1661500 - }, - { - "epoch": 18.46, - "learning_rate": 7.885691757546142e-08, - "loss": 3.8295, - "step": 1662000 - }, - { - "epoch": 18.46, - "learning_rate": 7.884303578171156e-08, - "loss": 3.8068, - "step": 1662500 - }, - { - "epoch": 18.47, - "learning_rate": 7.88291539879617e-08, - "loss": 3.811, - "step": 1663000 - }, - { - "epoch": 18.47, - "learning_rate": 7.881527219421185e-08, - "loss": 3.7964, - "step": 1663500 - }, - { - "epoch": 18.48, - "learning_rate": 7.880139040046199e-08, - "loss": 3.8171, - "step": 1664000 - }, - { - "epoch": 18.48, - "learning_rate": 7.878750860671212e-08, - "loss": 3.8098, - "step": 1664500 - }, - { - "epoch": 18.49, - "learning_rate": 7.877362681296226e-08, - "loss": 3.7987, - "step": 1665000 - }, - { - "epoch": 18.5, - "learning_rate": 7.87597450192124e-08, - "loss": 3.8104, - "step": 1665500 - }, - { - "epoch": 18.5, - "learning_rate": 7.874586322546254e-08, - "loss": 3.8168, - "step": 1666000 - }, - { - "epoch": 18.51, - "learning_rate": 7.873198143171269e-08, - "loss": 3.8133, - "step": 1666500 - }, - { - "epoch": 18.51, - "learning_rate": 7.871809963796281e-08, - "loss": 3.799, - "step": 1667000 - }, - { - "epoch": 18.52, - "learning_rate": 7.870421784421294e-08, - "loss": 3.8096, - "step": 1667500 - }, - { - "epoch": 18.52, - "learning_rate": 7.869033605046309e-08, - "loss": 3.8316, - "step": 1668000 - }, - { - "epoch": 18.53, - "learning_rate": 7.867645425671323e-08, - "loss": 3.7886, - "step": 1668500 - }, - { - "epoch": 18.53, - "learning_rate": 7.866257246296337e-08, - "loss": 3.8021, - "step": 1669000 - }, - { - "epoch": 18.54, - "learning_rate": 7.864869066921351e-08, - "loss": 3.7834, - "step": 1669500 - }, - { - "epoch": 18.55, - "learning_rate": 7.863480887546364e-08, - "loss": 3.8008, - "step": 1670000 - }, - { - "epoch": 18.55, - "learning_rate": 7.862092708171378e-08, - "loss": 3.8158, - "step": 1670500 - }, - { - "epoch": 18.56, - "learning_rate": 7.860704528796393e-08, - "loss": 3.7952, - "step": 1671000 - }, - { - "epoch": 18.56, - "learning_rate": 7.859316349421407e-08, - "loss": 3.8093, - "step": 1671500 - }, - { - "epoch": 18.57, - "learning_rate": 7.857928170046421e-08, - "loss": 3.8114, - "step": 1672000 - }, - { - "epoch": 18.57, - "learning_rate": 7.856539990671435e-08, - "loss": 3.7972, - "step": 1672500 - }, - { - "epoch": 18.58, - "learning_rate": 7.855151811296448e-08, - "loss": 3.7824, - "step": 1673000 - }, - { - "epoch": 18.58, - "learning_rate": 7.853763631921461e-08, - "loss": 3.7884, - "step": 1673500 - }, - { - "epoch": 18.59, - "learning_rate": 7.852375452546475e-08, - "loss": 3.8032, - "step": 1674000 - }, - { - "epoch": 18.6, - "learning_rate": 7.85098727317149e-08, - "loss": 3.8057, - "step": 1674500 - }, - { - "epoch": 18.6, - "learning_rate": 7.849599093796504e-08, - "loss": 3.8021, - "step": 1675000 - }, - { - "epoch": 18.61, - "learning_rate": 7.848210914421518e-08, - "loss": 3.8104, - "step": 1675500 - }, - { - "epoch": 18.61, - "learning_rate": 7.846822735046531e-08, - "loss": 3.8176, - "step": 1676000 - }, - { - "epoch": 18.62, - "learning_rate": 7.845434555671545e-08, - "loss": 3.7965, - "step": 1676500 - }, - { - "epoch": 18.62, - "learning_rate": 7.84404637629656e-08, - "loss": 3.7893, - "step": 1677000 - }, - { - "epoch": 18.63, - "learning_rate": 7.842658196921574e-08, - "loss": 3.8028, - "step": 1677500 - }, - { - "epoch": 18.63, - "learning_rate": 7.841270017546588e-08, - "loss": 3.8143, - "step": 1678000 - }, - { - "epoch": 18.64, - "learning_rate": 7.839881838171601e-08, - "loss": 3.7986, - "step": 1678500 - }, - { - "epoch": 18.65, - "learning_rate": 7.838493658796615e-08, - "loss": 3.8016, - "step": 1679000 - }, - { - "epoch": 18.65, - "learning_rate": 7.837105479421628e-08, - "loss": 3.7878, - "step": 1679500 - }, - { - "epoch": 18.66, - "learning_rate": 7.835717300046642e-08, - "loss": 3.8143, - "step": 1680000 - }, - { - "epoch": 18.66, - "learning_rate": 7.834329120671656e-08, - "loss": 3.8011, - "step": 1680500 - }, - { - "epoch": 18.67, - "learning_rate": 7.83294094129667e-08, - "loss": 3.8107, - "step": 1681000 - }, - { - "epoch": 18.67, - "learning_rate": 7.831552761921683e-08, - "loss": 3.8064, - "step": 1681500 - }, - { - "epoch": 18.68, - "learning_rate": 7.830164582546698e-08, - "loss": 3.8115, - "step": 1682000 - }, - { - "epoch": 18.68, - "learning_rate": 7.828776403171712e-08, - "loss": 3.7995, - "step": 1682500 - }, - { - "epoch": 18.69, - "learning_rate": 7.827388223796726e-08, - "loss": 3.7899, - "step": 1683000 - }, - { - "epoch": 18.7, - "learning_rate": 7.82600004442174e-08, - "loss": 3.8011, - "step": 1683500 - }, - { - "epoch": 18.7, - "learning_rate": 7.824611865046755e-08, - "loss": 3.8036, - "step": 1684000 - }, - { - "epoch": 18.71, - "learning_rate": 7.823223685671767e-08, - "loss": 3.7953, - "step": 1684500 - }, - { - "epoch": 18.71, - "learning_rate": 7.821835506296782e-08, - "loss": 3.8062, - "step": 1685000 - }, - { - "epoch": 18.72, - "learning_rate": 7.820447326921795e-08, - "loss": 3.8018, - "step": 1685500 - }, - { - "epoch": 18.72, - "learning_rate": 7.819059147546809e-08, - "loss": 3.8173, - "step": 1686000 - }, - { - "epoch": 18.73, - "learning_rate": 7.817670968171823e-08, - "loss": 3.7973, - "step": 1686500 - }, - { - "epoch": 18.73, - "learning_rate": 7.816282788796836e-08, - "loss": 3.7905, - "step": 1687000 - }, - { - "epoch": 18.74, - "learning_rate": 7.81489460942185e-08, - "loss": 3.7987, - "step": 1687500 - }, - { - "epoch": 18.75, - "learning_rate": 7.813506430046864e-08, - "loss": 3.7968, - "step": 1688000 - }, - { - "epoch": 18.75, - "learning_rate": 7.812118250671879e-08, - "loss": 3.8173, - "step": 1688500 - }, - { - "epoch": 18.76, - "learning_rate": 7.810730071296893e-08, - "loss": 3.8063, - "step": 1689000 - }, - { - "epoch": 18.76, - "learning_rate": 7.809341891921907e-08, - "loss": 3.7957, - "step": 1689500 - }, - { - "epoch": 18.77, - "learning_rate": 7.80795371254692e-08, - "loss": 3.8068, - "step": 1690000 - }, - { - "epoch": 18.77, - "learning_rate": 7.806565533171934e-08, - "loss": 3.78, - "step": 1690500 - }, - { - "epoch": 18.78, - "learning_rate": 7.805177353796948e-08, - "loss": 3.8095, - "step": 1691000 - }, - { - "epoch": 18.78, - "learning_rate": 7.803789174421961e-08, - "loss": 3.8058, - "step": 1691500 - }, - { - "epoch": 18.79, - "learning_rate": 7.802400995046976e-08, - "loss": 3.7942, - "step": 1692000 - }, - { - "epoch": 18.8, - "learning_rate": 7.801012815671988e-08, - "loss": 3.8032, - "step": 1692500 - }, - { - "epoch": 18.8, - "learning_rate": 7.799624636297003e-08, - "loss": 3.7896, - "step": 1693000 - }, - { - "epoch": 18.81, - "learning_rate": 7.798236456922017e-08, - "loss": 3.7838, - "step": 1693500 - }, - { - "epoch": 18.81, - "learning_rate": 7.796848277547031e-08, - "loss": 3.7953, - "step": 1694000 - }, - { - "epoch": 18.82, - "learning_rate": 7.795460098172045e-08, - "loss": 3.7927, - "step": 1694500 - }, - { - "epoch": 18.82, - "learning_rate": 7.79407191879706e-08, - "loss": 3.7847, - "step": 1695000 - }, - { - "epoch": 18.83, - "learning_rate": 7.792683739422073e-08, - "loss": 3.8211, - "step": 1695500 - }, - { - "epoch": 18.83, - "learning_rate": 7.791295560047087e-08, - "loss": 3.8017, - "step": 1696000 - }, - { - "epoch": 18.84, - "learning_rate": 7.789907380672101e-08, - "loss": 3.7961, - "step": 1696500 - }, - { - "epoch": 18.85, - "learning_rate": 7.788519201297115e-08, - "loss": 3.8013, - "step": 1697000 - }, - { - "epoch": 18.85, - "learning_rate": 7.787131021922128e-08, - "loss": 3.8001, - "step": 1697500 - }, - { - "epoch": 18.86, - "learning_rate": 7.785742842547142e-08, - "loss": 3.814, - "step": 1698000 - }, - { - "epoch": 18.86, - "learning_rate": 7.784354663172155e-08, - "loss": 3.7919, - "step": 1698500 - }, - { - "epoch": 18.87, - "learning_rate": 7.78296648379717e-08, - "loss": 3.794, - "step": 1699000 - }, - { - "epoch": 18.87, - "learning_rate": 7.781578304422184e-08, - "loss": 3.7969, - "step": 1699500 - }, - { - "epoch": 18.88, - "learning_rate": 7.780190125047198e-08, - "loss": 3.7963, - "step": 1700000 - }, - { - "epoch": 18.88, - "learning_rate": 7.778801945672212e-08, - "loss": 3.7987, - "step": 1700500 - }, - { - "epoch": 18.89, - "learning_rate": 7.777413766297225e-08, - "loss": 3.7847, - "step": 1701000 - }, - { - "epoch": 18.9, - "learning_rate": 7.776025586922239e-08, - "loss": 3.8126, - "step": 1701500 - }, - { - "epoch": 18.9, - "learning_rate": 7.774637407547254e-08, - "loss": 3.8, - "step": 1702000 - }, - { - "epoch": 18.91, - "learning_rate": 7.773249228172268e-08, - "loss": 3.8206, - "step": 1702500 - }, - { - "epoch": 18.91, - "learning_rate": 7.771861048797282e-08, - "loss": 3.7828, - "step": 1703000 - }, - { - "epoch": 18.92, - "learning_rate": 7.770472869422295e-08, - "loss": 3.8022, - "step": 1703500 - }, - { - "epoch": 18.92, - "learning_rate": 7.769084690047308e-08, - "loss": 3.7976, - "step": 1704000 - }, - { - "epoch": 18.93, - "learning_rate": 7.767696510672322e-08, - "loss": 3.7949, - "step": 1704500 - }, - { - "epoch": 18.93, - "learning_rate": 7.766308331297336e-08, - "loss": 3.7969, - "step": 1705000 - }, - { - "epoch": 18.94, - "learning_rate": 7.76492015192235e-08, - "loss": 3.8113, - "step": 1705500 - }, - { - "epoch": 18.95, - "learning_rate": 7.763531972547365e-08, - "loss": 3.791, - "step": 1706000 - }, - { - "epoch": 18.95, - "learning_rate": 7.762143793172378e-08, - "loss": 3.7971, - "step": 1706500 - }, - { - "epoch": 18.96, - "learning_rate": 7.760755613797392e-08, - "loss": 3.7941, - "step": 1707000 - }, - { - "epoch": 18.96, - "learning_rate": 7.759367434422406e-08, - "loss": 3.8058, - "step": 1707500 - }, - { - "epoch": 18.97, - "learning_rate": 7.75797925504742e-08, - "loss": 3.8023, - "step": 1708000 - }, - { - "epoch": 18.97, - "learning_rate": 7.756591075672434e-08, - "loss": 3.8226, - "step": 1708500 - }, - { - "epoch": 18.98, - "learning_rate": 7.755202896297449e-08, - "loss": 3.7875, - "step": 1709000 - }, - { - "epoch": 18.98, - "learning_rate": 7.753814716922462e-08, - "loss": 3.8119, - "step": 1709500 - }, - { - "epoch": 18.99, - "learning_rate": 7.752426537547475e-08, - "loss": 3.8151, - "step": 1710000 - }, - { - "epoch": 19.0, - "learning_rate": 7.751038358172489e-08, - "loss": 3.8161, - "step": 1710500 - }, - { - "epoch": 19.0, - "eval_loss": 3.8565142154693604, - "eval_runtime": 6.3064, - "eval_samples_per_second": 246.416, - "step": 1710874 - }, - { - "epoch": 19.0, - "learning_rate": 7.749650178797503e-08, - "loss": 3.8229, - "step": 1711000 - }, - { - "epoch": 19.01, - "learning_rate": 7.748261999422517e-08, - "loss": 3.794, - "step": 1711500 - }, - { - "epoch": 19.01, - "learning_rate": 7.746873820047531e-08, - "loss": 3.7888, - "step": 1712000 - }, - { - "epoch": 19.02, - "learning_rate": 7.745485640672544e-08, - "loss": 3.7998, - "step": 1712500 - }, - { - "epoch": 19.02, - "learning_rate": 7.744097461297559e-08, - "loss": 3.7976, - "step": 1713000 - }, - { - "epoch": 19.03, - "learning_rate": 7.742709281922573e-08, - "loss": 3.7674, - "step": 1713500 - }, - { - "epoch": 19.03, - "learning_rate": 7.741321102547587e-08, - "loss": 3.7792, - "step": 1714000 - }, - { - "epoch": 19.04, - "learning_rate": 7.739932923172601e-08, - "loss": 3.7915, - "step": 1714500 - }, - { - "epoch": 19.05, - "learning_rate": 7.738544743797614e-08, - "loss": 3.8065, - "step": 1715000 - }, - { - "epoch": 19.05, - "learning_rate": 7.737156564422628e-08, - "loss": 3.8095, - "step": 1715500 - }, - { - "epoch": 19.06, - "learning_rate": 7.735768385047641e-08, - "loss": 3.816, - "step": 1716000 - }, - { - "epoch": 19.06, - "learning_rate": 7.734380205672655e-08, - "loss": 3.8047, - "step": 1716500 - }, - { - "epoch": 19.07, - "learning_rate": 7.73299202629767e-08, - "loss": 3.7947, - "step": 1717000 - }, - { - "epoch": 19.07, - "learning_rate": 7.731603846922684e-08, - "loss": 3.8103, - "step": 1717500 - }, - { - "epoch": 19.08, - "learning_rate": 7.730215667547697e-08, - "loss": 3.7911, - "step": 1718000 - }, - { - "epoch": 19.08, - "learning_rate": 7.728827488172711e-08, - "loss": 3.8011, - "step": 1718500 - }, - { - "epoch": 19.09, - "learning_rate": 7.727439308797725e-08, - "loss": 3.8143, - "step": 1719000 - }, - { - "epoch": 19.1, - "learning_rate": 7.72605112942274e-08, - "loss": 3.8125, - "step": 1719500 - }, - { - "epoch": 19.1, - "learning_rate": 7.724662950047754e-08, - "loss": 3.8094, - "step": 1720000 - }, - { - "epoch": 19.11, - "learning_rate": 7.723274770672768e-08, - "loss": 3.8021, - "step": 1720500 - }, - { - "epoch": 19.11, - "learning_rate": 7.721886591297781e-08, - "loss": 3.8011, - "step": 1721000 - }, - { - "epoch": 19.12, - "learning_rate": 7.720498411922795e-08, - "loss": 3.7851, - "step": 1721500 - }, - { - "epoch": 19.12, - "learning_rate": 7.719110232547808e-08, - "loss": 3.8019, - "step": 1722000 - }, - { - "epoch": 19.13, - "learning_rate": 7.717722053172822e-08, - "loss": 3.8182, - "step": 1722500 - }, - { - "epoch": 19.13, - "learning_rate": 7.716333873797836e-08, - "loss": 3.8107, - "step": 1723000 - }, - { - "epoch": 19.14, - "learning_rate": 7.71494569442285e-08, - "loss": 3.7883, - "step": 1723500 - }, - { - "epoch": 19.15, - "learning_rate": 7.713557515047864e-08, - "loss": 3.7994, - "step": 1724000 - }, - { - "epoch": 19.15, - "learning_rate": 7.712169335672878e-08, - "loss": 3.7832, - "step": 1724500 - }, - { - "epoch": 19.16, - "learning_rate": 7.710781156297892e-08, - "loss": 3.7814, - "step": 1725000 - }, - { - "epoch": 19.16, - "learning_rate": 7.709392976922906e-08, - "loss": 3.8007, - "step": 1725500 - }, - { - "epoch": 19.17, - "learning_rate": 7.70800479754792e-08, - "loss": 3.7963, - "step": 1726000 - }, - { - "epoch": 19.17, - "learning_rate": 7.706616618172933e-08, - "loss": 3.8004, - "step": 1726500 - }, - { - "epoch": 19.18, - "learning_rate": 7.705228438797948e-08, - "loss": 3.8011, - "step": 1727000 - }, - { - "epoch": 19.18, - "learning_rate": 7.703840259422962e-08, - "loss": 3.794, - "step": 1727500 - }, - { - "epoch": 19.19, - "learning_rate": 7.702452080047975e-08, - "loss": 3.7903, - "step": 1728000 - }, - { - "epoch": 19.2, - "learning_rate": 7.701063900672989e-08, - "loss": 3.8178, - "step": 1728500 - }, - { - "epoch": 19.2, - "learning_rate": 7.699675721298002e-08, - "loss": 3.7936, - "step": 1729000 - }, - { - "epoch": 19.21, - "learning_rate": 7.698287541923016e-08, - "loss": 3.7773, - "step": 1729500 - }, - { - "epoch": 19.21, - "learning_rate": 7.69689936254803e-08, - "loss": 3.8125, - "step": 1730000 - }, - { - "epoch": 19.22, - "learning_rate": 7.695511183173045e-08, - "loss": 3.8203, - "step": 1730500 - }, - { - "epoch": 19.22, - "learning_rate": 7.694123003798059e-08, - "loss": 3.7956, - "step": 1731000 - }, - { - "epoch": 19.23, - "learning_rate": 7.692734824423073e-08, - "loss": 3.7872, - "step": 1731500 - }, - { - "epoch": 19.23, - "learning_rate": 7.691346645048086e-08, - "loss": 3.8037, - "step": 1732000 - }, - { - "epoch": 19.24, - "learning_rate": 7.6899584656731e-08, - "loss": 3.7933, - "step": 1732500 - }, - { - "epoch": 19.25, - "learning_rate": 7.688570286298114e-08, - "loss": 3.8028, - "step": 1733000 - }, - { - "epoch": 19.25, - "learning_rate": 7.687182106923129e-08, - "loss": 3.7937, - "step": 1733500 - }, - { - "epoch": 19.26, - "learning_rate": 7.685793927548141e-08, - "loss": 3.7934, - "step": 1734000 - }, - { - "epoch": 19.26, - "learning_rate": 7.684405748173156e-08, - "loss": 3.7865, - "step": 1734500 - }, - { - "epoch": 19.27, - "learning_rate": 7.683017568798169e-08, - "loss": 3.7982, - "step": 1735000 - }, - { - "epoch": 19.27, - "learning_rate": 7.681629389423183e-08, - "loss": 3.7828, - "step": 1735500 - }, - { - "epoch": 19.28, - "learning_rate": 7.680241210048197e-08, - "loss": 3.7969, - "step": 1736000 - }, - { - "epoch": 19.28, - "learning_rate": 7.678853030673211e-08, - "loss": 3.7867, - "step": 1736500 - }, - { - "epoch": 19.29, - "learning_rate": 7.677464851298226e-08, - "loss": 3.7937, - "step": 1737000 - }, - { - "epoch": 19.3, - "learning_rate": 7.676076671923238e-08, - "loss": 3.8099, - "step": 1737500 - }, - { - "epoch": 19.3, - "learning_rate": 7.674688492548253e-08, - "loss": 3.8038, - "step": 1738000 - }, - { - "epoch": 19.31, - "learning_rate": 7.673300313173267e-08, - "loss": 3.805, - "step": 1738500 - }, - { - "epoch": 19.31, - "learning_rate": 7.671912133798281e-08, - "loss": 3.7951, - "step": 1739000 - }, - { - "epoch": 19.32, - "learning_rate": 7.670523954423295e-08, - "loss": 3.7824, - "step": 1739500 - }, - { - "epoch": 19.32, - "learning_rate": 7.669135775048308e-08, - "loss": 3.8023, - "step": 1740000 - }, - { - "epoch": 19.33, - "learning_rate": 7.667747595673321e-08, - "loss": 3.8166, - "step": 1740500 - }, - { - "epoch": 19.33, - "learning_rate": 7.666359416298335e-08, - "loss": 3.8183, - "step": 1741000 - }, - { - "epoch": 19.34, - "learning_rate": 7.66497123692335e-08, - "loss": 3.8044, - "step": 1741500 - }, - { - "epoch": 19.35, - "learning_rate": 7.663583057548364e-08, - "loss": 3.795, - "step": 1742000 - }, - { - "epoch": 19.35, - "learning_rate": 7.662194878173378e-08, - "loss": 3.8058, - "step": 1742500 - }, - { - "epoch": 19.36, - "learning_rate": 7.660806698798391e-08, - "loss": 3.7886, - "step": 1743000 - }, - { - "epoch": 19.36, - "learning_rate": 7.659418519423405e-08, - "loss": 3.7869, - "step": 1743500 - }, - { - "epoch": 19.37, - "learning_rate": 7.65803034004842e-08, - "loss": 3.8041, - "step": 1744000 - }, - { - "epoch": 19.37, - "learning_rate": 7.656642160673434e-08, - "loss": 3.7995, - "step": 1744500 - }, - { - "epoch": 19.38, - "learning_rate": 7.655253981298448e-08, - "loss": 3.8083, - "step": 1745000 - }, - { - "epoch": 19.38, - "learning_rate": 7.653865801923462e-08, - "loss": 3.7982, - "step": 1745500 - }, - { - "epoch": 19.39, - "learning_rate": 7.652477622548475e-08, - "loss": 3.8015, - "step": 1746000 - }, - { - "epoch": 19.4, - "learning_rate": 7.651089443173488e-08, - "loss": 3.7959, - "step": 1746500 - }, - { - "epoch": 19.4, - "learning_rate": 7.649701263798502e-08, - "loss": 3.8234, - "step": 1747000 - }, - { - "epoch": 19.41, - "learning_rate": 7.648313084423516e-08, - "loss": 3.824, - "step": 1747500 - }, - { - "epoch": 19.41, - "learning_rate": 7.64692490504853e-08, - "loss": 3.7907, - "step": 1748000 - }, - { - "epoch": 19.42, - "learning_rate": 7.645536725673545e-08, - "loss": 3.7896, - "step": 1748500 - }, - { - "epoch": 19.42, - "learning_rate": 7.644148546298558e-08, - "loss": 3.8168, - "step": 1749000 - }, - { - "epoch": 19.43, - "learning_rate": 7.642760366923572e-08, - "loss": 3.8147, - "step": 1749500 - }, - { - "epoch": 19.43, - "learning_rate": 7.641372187548586e-08, - "loss": 3.8147, - "step": 1750000 - }, - { - "epoch": 19.44, - "learning_rate": 7.6399840081736e-08, - "loss": 3.7868, - "step": 1750500 - }, - { - "epoch": 19.45, - "learning_rate": 7.638595828798615e-08, - "loss": 3.7938, - "step": 1751000 - }, - { - "epoch": 19.45, - "learning_rate": 7.637207649423628e-08, - "loss": 3.8054, - "step": 1751500 - }, - { - "epoch": 19.46, - "learning_rate": 7.635819470048642e-08, - "loss": 3.8005, - "step": 1752000 - }, - { - "epoch": 19.46, - "learning_rate": 7.634431290673655e-08, - "loss": 3.7959, - "step": 1752500 - }, - { - "epoch": 19.47, - "learning_rate": 7.633043111298669e-08, - "loss": 3.7931, - "step": 1753000 - }, - { - "epoch": 19.47, - "learning_rate": 7.631654931923683e-08, - "loss": 3.7825, - "step": 1753500 - }, - { - "epoch": 19.48, - "learning_rate": 7.630266752548697e-08, - "loss": 3.8054, - "step": 1754000 - }, - { - "epoch": 19.48, - "learning_rate": 7.62887857317371e-08, - "loss": 3.7934, - "step": 1754500 - }, - { - "epoch": 19.49, - "learning_rate": 7.627490393798724e-08, - "loss": 3.798, - "step": 1755000 - }, - { - "epoch": 19.5, - "learning_rate": 7.626102214423739e-08, - "loss": 3.8198, - "step": 1755500 - }, - { - "epoch": 19.5, - "learning_rate": 7.624714035048753e-08, - "loss": 3.7971, - "step": 1756000 - }, - { - "epoch": 19.51, - "learning_rate": 7.623325855673767e-08, - "loss": 3.7956, - "step": 1756500 - }, - { - "epoch": 19.51, - "learning_rate": 7.621937676298781e-08, - "loss": 3.7888, - "step": 1757000 - }, - { - "epoch": 19.52, - "learning_rate": 7.620549496923794e-08, - "loss": 3.7896, - "step": 1757500 - }, - { - "epoch": 19.52, - "learning_rate": 7.619161317548808e-08, - "loss": 3.799, - "step": 1758000 - }, - { - "epoch": 19.53, - "learning_rate": 7.617773138173821e-08, - "loss": 3.7932, - "step": 1758500 - }, - { - "epoch": 19.53, - "learning_rate": 7.616384958798836e-08, - "loss": 3.8061, - "step": 1759000 - }, - { - "epoch": 19.54, - "learning_rate": 7.61499677942385e-08, - "loss": 3.8021, - "step": 1759500 - }, - { - "epoch": 19.55, - "learning_rate": 7.613608600048863e-08, - "loss": 3.7909, - "step": 1760000 - }, - { - "epoch": 19.55, - "learning_rate": 7.612220420673877e-08, - "loss": 3.8025, - "step": 1760500 - }, - { - "epoch": 19.56, - "learning_rate": 7.610832241298891e-08, - "loss": 3.7997, - "step": 1761000 - }, - { - "epoch": 19.56, - "learning_rate": 7.609444061923905e-08, - "loss": 3.7918, - "step": 1761500 - }, - { - "epoch": 19.57, - "learning_rate": 7.60805588254892e-08, - "loss": 3.8004, - "step": 1762000 - }, - { - "epoch": 19.57, - "learning_rate": 7.606667703173934e-08, - "loss": 3.7896, - "step": 1762500 - }, - { - "epoch": 19.58, - "learning_rate": 7.605279523798947e-08, - "loss": 3.7981, - "step": 1763000 - }, - { - "epoch": 19.58, - "learning_rate": 7.603891344423961e-08, - "loss": 3.8126, - "step": 1763500 - }, - { - "epoch": 19.59, - "learning_rate": 7.602503165048975e-08, - "loss": 3.7757, - "step": 1764000 - }, - { - "epoch": 19.6, - "learning_rate": 7.601114985673988e-08, - "loss": 3.8005, - "step": 1764500 - }, - { - "epoch": 19.6, - "learning_rate": 7.599726806299002e-08, - "loss": 3.7864, - "step": 1765000 - }, - { - "epoch": 19.61, - "learning_rate": 7.598338626924015e-08, - "loss": 3.8066, - "step": 1765500 - }, - { - "epoch": 19.61, - "learning_rate": 7.59695044754903e-08, - "loss": 3.7919, - "step": 1766000 - }, - { - "epoch": 19.62, - "learning_rate": 7.595562268174044e-08, - "loss": 3.8104, - "step": 1766500 - }, - { - "epoch": 19.62, - "learning_rate": 7.594174088799058e-08, - "loss": 3.7901, - "step": 1767000 - }, - { - "epoch": 19.63, - "learning_rate": 7.592785909424072e-08, - "loss": 3.7801, - "step": 1767500 - }, - { - "epoch": 19.63, - "learning_rate": 7.591397730049086e-08, - "loss": 3.7929, - "step": 1768000 - }, - { - "epoch": 19.64, - "learning_rate": 7.590009550674099e-08, - "loss": 3.8095, - "step": 1768500 - }, - { - "epoch": 19.65, - "learning_rate": 7.588621371299114e-08, - "loss": 3.7961, - "step": 1769000 - }, - { - "epoch": 19.65, - "learning_rate": 7.587233191924128e-08, - "loss": 3.8034, - "step": 1769500 - }, - { - "epoch": 19.66, - "learning_rate": 7.585845012549142e-08, - "loss": 3.7942, - "step": 1770000 - }, - { - "epoch": 19.66, - "learning_rate": 7.584456833174155e-08, - "loss": 3.8079, - "step": 1770500 - }, - { - "epoch": 19.67, - "learning_rate": 7.583068653799169e-08, - "loss": 3.7797, - "step": 1771000 - }, - { - "epoch": 19.67, - "learning_rate": 7.581680474424182e-08, - "loss": 3.8036, - "step": 1771500 - }, - { - "epoch": 19.68, - "learning_rate": 7.580292295049196e-08, - "loss": 3.797, - "step": 1772000 - }, - { - "epoch": 19.68, - "learning_rate": 7.57890411567421e-08, - "loss": 3.8003, - "step": 1772500 - }, - { - "epoch": 19.69, - "learning_rate": 7.577515936299225e-08, - "loss": 3.7965, - "step": 1773000 - }, - { - "epoch": 19.7, - "learning_rate": 7.576127756924239e-08, - "loss": 3.7857, - "step": 1773500 - }, - { - "epoch": 19.7, - "learning_rate": 7.574739577549252e-08, - "loss": 3.8118, - "step": 1774000 - }, - { - "epoch": 19.71, - "learning_rate": 7.573351398174266e-08, - "loss": 3.805, - "step": 1774500 - }, - { - "epoch": 19.71, - "learning_rate": 7.57196321879928e-08, - "loss": 3.7956, - "step": 1775000 - }, - { - "epoch": 19.72, - "learning_rate": 7.570575039424294e-08, - "loss": 3.811, - "step": 1775500 - }, - { - "epoch": 19.72, - "learning_rate": 7.569186860049309e-08, - "loss": 3.8026, - "step": 1776000 - }, - { - "epoch": 19.73, - "learning_rate": 7.567798680674322e-08, - "loss": 3.7898, - "step": 1776500 - }, - { - "epoch": 19.73, - "learning_rate": 7.566410501299335e-08, - "loss": 3.7926, - "step": 1777000 - }, - { - "epoch": 19.74, - "learning_rate": 7.565022321924349e-08, - "loss": 3.7924, - "step": 1777500 - }, - { - "epoch": 19.75, - "learning_rate": 7.563634142549363e-08, - "loss": 3.7859, - "step": 1778000 - }, - { - "epoch": 19.75, - "learning_rate": 7.562245963174377e-08, - "loss": 3.8176, - "step": 1778500 - }, - { - "epoch": 19.76, - "learning_rate": 7.560857783799391e-08, - "loss": 3.7954, - "step": 1779000 - }, - { - "epoch": 19.76, - "learning_rate": 7.559469604424406e-08, - "loss": 3.7931, - "step": 1779500 - }, - { - "epoch": 19.77, - "learning_rate": 7.558081425049419e-08, - "loss": 3.8269, - "step": 1780000 - }, - { - "epoch": 19.77, - "learning_rate": 7.556693245674433e-08, - "loss": 3.7931, - "step": 1780500 - }, - { - "epoch": 19.78, - "learning_rate": 7.555305066299447e-08, - "loss": 3.8076, - "step": 1781000 - }, - { - "epoch": 19.78, - "learning_rate": 7.553916886924461e-08, - "loss": 3.7931, - "step": 1781500 - }, - { - "epoch": 19.79, - "learning_rate": 7.552528707549474e-08, - "loss": 3.7941, - "step": 1782000 - }, - { - "epoch": 19.8, - "learning_rate": 7.551140528174488e-08, - "loss": 3.7918, - "step": 1782500 - }, - { - "epoch": 19.8, - "learning_rate": 7.549752348799501e-08, - "loss": 3.7983, - "step": 1783000 - }, - { - "epoch": 19.81, - "learning_rate": 7.548364169424516e-08, - "loss": 3.7864, - "step": 1783500 - }, - { - "epoch": 19.81, - "learning_rate": 7.54697599004953e-08, - "loss": 3.7954, - "step": 1784000 - }, - { - "epoch": 19.82, - "learning_rate": 7.545587810674544e-08, - "loss": 3.7967, - "step": 1784500 - }, - { - "epoch": 19.82, - "learning_rate": 7.544199631299558e-08, - "loss": 3.787, - "step": 1785000 - }, - { - "epoch": 19.83, - "learning_rate": 7.542811451924571e-08, - "loss": 3.7962, - "step": 1785500 - }, - { - "epoch": 19.83, - "learning_rate": 7.541423272549585e-08, - "loss": 3.7912, - "step": 1786000 - }, - { - "epoch": 19.84, - "learning_rate": 7.5400350931746e-08, - "loss": 3.7921, - "step": 1786500 - }, - { - "epoch": 19.85, - "learning_rate": 7.538646913799614e-08, - "loss": 3.8113, - "step": 1787000 - }, - { - "epoch": 19.85, - "learning_rate": 7.537258734424628e-08, - "loss": 3.809, - "step": 1787500 - }, - { - "epoch": 19.86, - "learning_rate": 7.535870555049641e-08, - "loss": 3.8026, - "step": 1788000 - }, - { - "epoch": 19.86, - "learning_rate": 7.534482375674655e-08, - "loss": 3.7974, - "step": 1788500 - }, - { - "epoch": 19.87, - "learning_rate": 7.533094196299668e-08, - "loss": 3.7963, - "step": 1789000 - }, - { - "epoch": 19.87, - "learning_rate": 7.531706016924682e-08, - "loss": 3.8083, - "step": 1789500 - }, - { - "epoch": 19.88, - "learning_rate": 7.530317837549696e-08, - "loss": 3.7901, - "step": 1790000 - }, - { - "epoch": 19.88, - "learning_rate": 7.528929658174711e-08, - "loss": 3.8087, - "step": 1790500 - }, - { - "epoch": 19.89, - "learning_rate": 7.527541478799724e-08, - "loss": 3.8044, - "step": 1791000 - }, - { - "epoch": 19.9, - "learning_rate": 7.526153299424738e-08, - "loss": 3.8059, - "step": 1791500 - }, - { - "epoch": 19.9, - "learning_rate": 7.524765120049752e-08, - "loss": 3.8079, - "step": 1792000 - }, - { - "epoch": 19.91, - "learning_rate": 7.523376940674766e-08, - "loss": 3.812, - "step": 1792500 - }, - { - "epoch": 19.91, - "learning_rate": 7.52198876129978e-08, - "loss": 3.8006, - "step": 1793000 - }, - { - "epoch": 19.92, - "learning_rate": 7.520600581924795e-08, - "loss": 3.7945, - "step": 1793500 - }, - { - "epoch": 19.92, - "learning_rate": 7.519212402549808e-08, - "loss": 3.8061, - "step": 1794000 - }, - { - "epoch": 19.93, - "learning_rate": 7.517824223174822e-08, - "loss": 3.791, - "step": 1794500 - }, - { - "epoch": 19.93, - "learning_rate": 7.516436043799835e-08, - "loss": 3.7976, - "step": 1795000 - }, - { - "epoch": 19.94, - "learning_rate": 7.515047864424849e-08, - "loss": 3.7774, - "step": 1795500 - }, - { - "epoch": 19.95, - "learning_rate": 7.513659685049863e-08, - "loss": 3.7975, - "step": 1796000 - }, - { - "epoch": 19.95, - "learning_rate": 7.512271505674876e-08, - "loss": 3.7811, - "step": 1796500 - }, - { - "epoch": 19.96, - "learning_rate": 7.51088332629989e-08, - "loss": 3.8031, - "step": 1797000 - }, - { - "epoch": 19.96, - "learning_rate": 7.509495146924905e-08, - "loss": 3.785, - "step": 1797500 - }, - { - "epoch": 19.97, - "learning_rate": 7.508106967549919e-08, - "loss": 3.8107, - "step": 1798000 - }, - { - "epoch": 19.97, - "learning_rate": 7.506718788174933e-08, - "loss": 3.7846, - "step": 1798500 - }, - { - "epoch": 19.98, - "learning_rate": 7.505330608799947e-08, - "loss": 3.7925, - "step": 1799000 - }, - { - "epoch": 19.98, - "learning_rate": 7.50394242942496e-08, - "loss": 3.786, - "step": 1799500 - }, - { - "epoch": 19.99, - "learning_rate": 7.502554250049974e-08, - "loss": 3.8075, - "step": 1800000 - }, - { - "epoch": 20.0, - "learning_rate": 7.501166070674989e-08, - "loss": 3.8053, - "step": 1800500 - }, - { - "epoch": 20.0, - "eval_loss": 3.853641986846924, - "eval_runtime": 6.3154, - "eval_samples_per_second": 246.067, - "step": 1800920 - }, - { - "epoch": 20.0, - "learning_rate": 7.499777891300002e-08, - "loss": 3.7879, - "step": 1801000 - }, - { - "epoch": 20.01, - "learning_rate": 7.498389711925016e-08, - "loss": 3.8016, - "step": 1801500 - }, - { - "epoch": 20.01, - "learning_rate": 7.497001532550029e-08, - "loss": 3.7886, - "step": 1802000 - }, - { - "epoch": 20.02, - "learning_rate": 7.495613353175043e-08, - "loss": 3.7989, - "step": 1802500 - }, - { - "epoch": 20.02, - "learning_rate": 7.494225173800057e-08, - "loss": 3.7949, - "step": 1803000 - }, - { - "epoch": 20.03, - "learning_rate": 7.492836994425071e-08, - "loss": 3.8031, - "step": 1803500 - }, - { - "epoch": 20.03, - "learning_rate": 7.491448815050086e-08, - "loss": 3.804, - "step": 1804000 - }, - { - "epoch": 20.04, - "learning_rate": 7.4900606356751e-08, - "loss": 3.8101, - "step": 1804500 - }, - { - "epoch": 20.05, - "learning_rate": 7.488672456300113e-08, - "loss": 3.8003, - "step": 1805000 - }, - { - "epoch": 20.05, - "learning_rate": 7.487284276925127e-08, - "loss": 3.8068, - "step": 1805500 - }, - { - "epoch": 20.06, - "learning_rate": 7.485896097550141e-08, - "loss": 3.7904, - "step": 1806000 - }, - { - "epoch": 20.06, - "learning_rate": 7.484507918175155e-08, - "loss": 3.7845, - "step": 1806500 - }, - { - "epoch": 20.07, - "learning_rate": 7.483119738800168e-08, - "loss": 3.8028, - "step": 1807000 - }, - { - "epoch": 20.07, - "learning_rate": 7.481731559425182e-08, - "loss": 3.7913, - "step": 1807500 - }, - { - "epoch": 20.08, - "learning_rate": 7.480343380050195e-08, - "loss": 3.7935, - "step": 1808000 - }, - { - "epoch": 20.08, - "learning_rate": 7.47895520067521e-08, - "loss": 3.8113, - "step": 1808500 - }, - { - "epoch": 20.09, - "learning_rate": 7.477567021300224e-08, - "loss": 3.7969, - "step": 1809000 - }, - { - "epoch": 20.1, - "learning_rate": 7.476178841925238e-08, - "loss": 3.8077, - "step": 1809500 - }, - { - "epoch": 20.1, - "learning_rate": 7.474790662550252e-08, - "loss": 3.7805, - "step": 1810000 - }, - { - "epoch": 20.11, - "learning_rate": 7.473402483175265e-08, - "loss": 3.8041, - "step": 1810500 - }, - { - "epoch": 20.11, - "learning_rate": 7.47201430380028e-08, - "loss": 3.7961, - "step": 1811000 - }, - { - "epoch": 20.12, - "learning_rate": 7.470626124425294e-08, - "loss": 3.7882, - "step": 1811500 - }, - { - "epoch": 20.12, - "learning_rate": 7.469237945050308e-08, - "loss": 3.8015, - "step": 1812000 - }, - { - "epoch": 20.13, - "learning_rate": 7.467849765675321e-08, - "loss": 3.7987, - "step": 1812500 - }, - { - "epoch": 20.13, - "learning_rate": 7.466461586300335e-08, - "loss": 3.7985, - "step": 1813000 - }, - { - "epoch": 20.14, - "learning_rate": 7.465073406925348e-08, - "loss": 3.7907, - "step": 1813500 - }, - { - "epoch": 20.15, - "learning_rate": 7.463685227550362e-08, - "loss": 3.7879, - "step": 1814000 - }, - { - "epoch": 20.15, - "learning_rate": 7.462297048175376e-08, - "loss": 3.8134, - "step": 1814500 - }, - { - "epoch": 20.16, - "learning_rate": 7.46090886880039e-08, - "loss": 3.7943, - "step": 1815000 - }, - { - "epoch": 20.16, - "learning_rate": 7.459520689425405e-08, - "loss": 3.784, - "step": 1815500 - }, - { - "epoch": 20.17, - "learning_rate": 7.458132510050419e-08, - "loss": 3.7972, - "step": 1816000 - }, - { - "epoch": 20.17, - "learning_rate": 7.456744330675432e-08, - "loss": 3.7978, - "step": 1816500 - }, - { - "epoch": 20.18, - "learning_rate": 7.455356151300446e-08, - "loss": 3.7801, - "step": 1817000 - }, - { - "epoch": 20.18, - "learning_rate": 7.45396797192546e-08, - "loss": 3.8055, - "step": 1817500 - }, - { - "epoch": 20.19, - "learning_rate": 7.452579792550475e-08, - "loss": 3.7792, - "step": 1818000 - }, - { - "epoch": 20.2, - "learning_rate": 7.451191613175488e-08, - "loss": 3.8062, - "step": 1818500 - }, - { - "epoch": 20.2, - "learning_rate": 7.449803433800502e-08, - "loss": 3.7984, - "step": 1819000 - }, - { - "epoch": 20.21, - "learning_rate": 7.448415254425515e-08, - "loss": 3.7905, - "step": 1819500 - }, - { - "epoch": 20.21, - "learning_rate": 7.447027075050529e-08, - "loss": 3.7896, - "step": 1820000 - }, - { - "epoch": 20.22, - "learning_rate": 7.445638895675543e-08, - "loss": 3.8079, - "step": 1820500 - }, - { - "epoch": 20.22, - "learning_rate": 7.444250716300557e-08, - "loss": 3.7922, - "step": 1821000 - }, - { - "epoch": 20.23, - "learning_rate": 7.442862536925572e-08, - "loss": 3.8041, - "step": 1821500 - }, - { - "epoch": 20.23, - "learning_rate": 7.441474357550584e-08, - "loss": 3.7892, - "step": 1822000 - }, - { - "epoch": 20.24, - "learning_rate": 7.440086178175599e-08, - "loss": 3.7927, - "step": 1822500 - }, - { - "epoch": 20.25, - "learning_rate": 7.438697998800613e-08, - "loss": 3.7934, - "step": 1823000 - }, - { - "epoch": 20.25, - "learning_rate": 7.437309819425627e-08, - "loss": 3.7822, - "step": 1823500 - }, - { - "epoch": 20.26, - "learning_rate": 7.435921640050641e-08, - "loss": 3.7961, - "step": 1824000 - }, - { - "epoch": 20.26, - "learning_rate": 7.434533460675654e-08, - "loss": 3.7783, - "step": 1824500 - }, - { - "epoch": 20.27, - "learning_rate": 7.433145281300668e-08, - "loss": 3.7934, - "step": 1825000 - }, - { - "epoch": 20.27, - "learning_rate": 7.431757101925681e-08, - "loss": 3.8011, - "step": 1825500 - }, - { - "epoch": 20.28, - "learning_rate": 7.430368922550696e-08, - "loss": 3.7755, - "step": 1826000 - }, - { - "epoch": 20.28, - "learning_rate": 7.42898074317571e-08, - "loss": 3.7925, - "step": 1826500 - }, - { - "epoch": 20.29, - "learning_rate": 7.427592563800724e-08, - "loss": 3.8185, - "step": 1827000 - }, - { - "epoch": 20.3, - "learning_rate": 7.426204384425737e-08, - "loss": 3.7931, - "step": 1827500 - }, - { - "epoch": 20.3, - "learning_rate": 7.424816205050751e-08, - "loss": 3.7789, - "step": 1828000 - }, - { - "epoch": 20.31, - "learning_rate": 7.423428025675765e-08, - "loss": 3.8054, - "step": 1828500 - }, - { - "epoch": 20.31, - "learning_rate": 7.42203984630078e-08, - "loss": 3.7812, - "step": 1829000 - }, - { - "epoch": 20.32, - "learning_rate": 7.420651666925794e-08, - "loss": 3.7771, - "step": 1829500 - }, - { - "epoch": 20.32, - "learning_rate": 7.419263487550808e-08, - "loss": 3.7851, - "step": 1830000 - }, - { - "epoch": 20.33, - "learning_rate": 7.417875308175821e-08, - "loss": 3.7804, - "step": 1830500 - }, - { - "epoch": 20.33, - "learning_rate": 7.416487128800835e-08, - "loss": 3.7709, - "step": 1831000 - }, - { - "epoch": 20.34, - "learning_rate": 7.415098949425848e-08, - "loss": 3.8039, - "step": 1831500 - }, - { - "epoch": 20.35, - "learning_rate": 7.413710770050862e-08, - "loss": 3.7956, - "step": 1832000 - }, - { - "epoch": 20.35, - "learning_rate": 7.412322590675877e-08, - "loss": 3.795, - "step": 1832500 - }, - { - "epoch": 20.36, - "learning_rate": 7.41093441130089e-08, - "loss": 3.8189, - "step": 1833000 - }, - { - "epoch": 20.36, - "learning_rate": 7.409546231925904e-08, - "loss": 3.8009, - "step": 1833500 - }, - { - "epoch": 20.37, - "learning_rate": 7.408158052550918e-08, - "loss": 3.7999, - "step": 1834000 - }, - { - "epoch": 20.37, - "learning_rate": 7.406769873175932e-08, - "loss": 3.801, - "step": 1834500 - }, - { - "epoch": 20.38, - "learning_rate": 7.405381693800946e-08, - "loss": 3.783, - "step": 1835000 - }, - { - "epoch": 20.38, - "learning_rate": 7.40399351442596e-08, - "loss": 3.7943, - "step": 1835500 - }, - { - "epoch": 20.39, - "learning_rate": 7.402605335050974e-08, - "loss": 3.7877, - "step": 1836000 - }, - { - "epoch": 20.4, - "learning_rate": 7.401217155675988e-08, - "loss": 3.8005, - "step": 1836500 - }, - { - "epoch": 20.4, - "learning_rate": 7.399828976301002e-08, - "loss": 3.7856, - "step": 1837000 - }, - { - "epoch": 20.41, - "learning_rate": 7.398440796926015e-08, - "loss": 3.791, - "step": 1837500 - }, - { - "epoch": 20.41, - "learning_rate": 7.397052617551029e-08, - "loss": 3.7982, - "step": 1838000 - }, - { - "epoch": 20.42, - "learning_rate": 7.395664438176043e-08, - "loss": 3.7989, - "step": 1838500 - }, - { - "epoch": 20.42, - "learning_rate": 7.394276258801056e-08, - "loss": 3.7937, - "step": 1839000 - }, - { - "epoch": 20.43, - "learning_rate": 7.39288807942607e-08, - "loss": 3.7971, - "step": 1839500 - }, - { - "epoch": 20.43, - "learning_rate": 7.391499900051085e-08, - "loss": 3.7732, - "step": 1840000 - }, - { - "epoch": 20.44, - "learning_rate": 7.390111720676099e-08, - "loss": 3.7934, - "step": 1840500 - }, - { - "epoch": 20.45, - "learning_rate": 7.388723541301113e-08, - "loss": 3.8004, - "step": 1841000 - }, - { - "epoch": 20.45, - "learning_rate": 7.387335361926126e-08, - "loss": 3.7911, - "step": 1841500 - }, - { - "epoch": 20.46, - "learning_rate": 7.38594718255114e-08, - "loss": 3.7887, - "step": 1842000 - }, - { - "epoch": 20.46, - "learning_rate": 7.384559003176155e-08, - "loss": 3.7934, - "step": 1842500 - }, - { - "epoch": 20.47, - "learning_rate": 7.383170823801167e-08, - "loss": 3.7929, - "step": 1843000 - }, - { - "epoch": 20.47, - "learning_rate": 7.381782644426182e-08, - "loss": 3.7775, - "step": 1843500 - }, - { - "epoch": 20.48, - "learning_rate": 7.380394465051196e-08, - "loss": 3.7854, - "step": 1844000 - }, - { - "epoch": 20.48, - "learning_rate": 7.379006285676209e-08, - "loss": 3.7935, - "step": 1844500 - }, - { - "epoch": 20.49, - "learning_rate": 7.377618106301223e-08, - "loss": 3.7867, - "step": 1845000 - }, - { - "epoch": 20.5, - "learning_rate": 7.376229926926237e-08, - "loss": 3.7832, - "step": 1845500 - }, - { - "epoch": 20.5, - "learning_rate": 7.374841747551251e-08, - "loss": 3.8162, - "step": 1846000 - }, - { - "epoch": 20.51, - "learning_rate": 7.373453568176266e-08, - "loss": 3.7905, - "step": 1846500 - }, - { - "epoch": 20.51, - "learning_rate": 7.372065388801279e-08, - "loss": 3.8053, - "step": 1847000 - }, - { - "epoch": 20.52, - "learning_rate": 7.370677209426293e-08, - "loss": 3.7959, - "step": 1847500 - }, - { - "epoch": 20.52, - "learning_rate": 7.369289030051307e-08, - "loss": 3.7998, - "step": 1848000 - }, - { - "epoch": 20.53, - "learning_rate": 7.367900850676321e-08, - "loss": 3.7926, - "step": 1848500 - }, - { - "epoch": 20.53, - "learning_rate": 7.366512671301334e-08, - "loss": 3.7766, - "step": 1849000 - }, - { - "epoch": 20.54, - "learning_rate": 7.365124491926348e-08, - "loss": 3.791, - "step": 1849500 - }, - { - "epoch": 20.55, - "learning_rate": 7.363736312551361e-08, - "loss": 3.8024, - "step": 1850000 - }, - { - "epoch": 20.55, - "learning_rate": 7.362348133176376e-08, - "loss": 3.8076, - "step": 1850500 - }, - { - "epoch": 20.56, - "learning_rate": 7.36095995380139e-08, - "loss": 3.7737, - "step": 1851000 - }, - { - "epoch": 20.56, - "learning_rate": 7.359571774426404e-08, - "loss": 3.7721, - "step": 1851500 - }, - { - "epoch": 20.57, - "learning_rate": 7.358183595051418e-08, - "loss": 3.8007, - "step": 1852000 - }, - { - "epoch": 20.57, - "learning_rate": 7.356795415676432e-08, - "loss": 3.7948, - "step": 1852500 - }, - { - "epoch": 20.58, - "learning_rate": 7.355407236301445e-08, - "loss": 3.7961, - "step": 1853000 - }, - { - "epoch": 20.58, - "learning_rate": 7.35401905692646e-08, - "loss": 3.7918, - "step": 1853500 - }, - { - "epoch": 20.59, - "learning_rate": 7.352630877551474e-08, - "loss": 3.8145, - "step": 1854000 - }, - { - "epoch": 20.6, - "learning_rate": 7.351242698176488e-08, - "loss": 3.7967, - "step": 1854500 - }, - { - "epoch": 20.6, - "learning_rate": 7.349854518801501e-08, - "loss": 3.8052, - "step": 1855000 - }, - { - "epoch": 20.61, - "learning_rate": 7.348466339426515e-08, - "loss": 3.8081, - "step": 1855500 - }, - { - "epoch": 20.61, - "learning_rate": 7.347078160051528e-08, - "loss": 3.7957, - "step": 1856000 - }, - { - "epoch": 20.62, - "learning_rate": 7.345689980676542e-08, - "loss": 3.779, - "step": 1856500 - }, - { - "epoch": 20.62, - "learning_rate": 7.344301801301556e-08, - "loss": 3.7836, - "step": 1857000 - }, - { - "epoch": 20.63, - "learning_rate": 7.342913621926571e-08, - "loss": 3.8091, - "step": 1857500 - }, - { - "epoch": 20.63, - "learning_rate": 7.341525442551585e-08, - "loss": 3.8083, - "step": 1858000 - }, - { - "epoch": 20.64, - "learning_rate": 7.340137263176598e-08, - "loss": 3.8133, - "step": 1858500 - }, - { - "epoch": 20.65, - "learning_rate": 7.338749083801612e-08, - "loss": 3.7899, - "step": 1859000 - }, - { - "epoch": 20.65, - "learning_rate": 7.337360904426626e-08, - "loss": 3.7923, - "step": 1859500 - }, - { - "epoch": 20.66, - "learning_rate": 7.33597272505164e-08, - "loss": 3.7969, - "step": 1860000 - }, - { - "epoch": 20.66, - "learning_rate": 7.334584545676655e-08, - "loss": 3.7909, - "step": 1860500 - }, - { - "epoch": 20.67, - "learning_rate": 7.333196366301668e-08, - "loss": 3.7936, - "step": 1861000 - }, - { - "epoch": 20.67, - "learning_rate": 7.331808186926682e-08, - "loss": 3.7971, - "step": 1861500 - }, - { - "epoch": 20.68, - "learning_rate": 7.330420007551695e-08, - "loss": 3.792, - "step": 1862000 - }, - { - "epoch": 20.68, - "learning_rate": 7.329031828176709e-08, - "loss": 3.7835, - "step": 1862500 - }, - { - "epoch": 20.69, - "learning_rate": 7.327643648801723e-08, - "loss": 3.8016, - "step": 1863000 - }, - { - "epoch": 20.69, - "learning_rate": 7.326255469426737e-08, - "loss": 3.8168, - "step": 1863500 - }, - { - "epoch": 20.7, - "learning_rate": 7.32486729005175e-08, - "loss": 3.8032, - "step": 1864000 - }, - { - "epoch": 20.71, - "learning_rate": 7.323479110676765e-08, - "loss": 3.8184, - "step": 1864500 - }, - { - "epoch": 20.71, - "learning_rate": 7.322090931301779e-08, - "loss": 3.7962, - "step": 1865000 - }, - { - "epoch": 20.72, - "learning_rate": 7.320702751926793e-08, - "loss": 3.7996, - "step": 1865500 - }, - { - "epoch": 20.72, - "learning_rate": 7.319314572551807e-08, - "loss": 3.7789, - "step": 1866000 - }, - { - "epoch": 20.73, - "learning_rate": 7.317926393176821e-08, - "loss": 3.7911, - "step": 1866500 - }, - { - "epoch": 20.73, - "learning_rate": 7.316538213801834e-08, - "loss": 3.805, - "step": 1867000 - }, - { - "epoch": 20.74, - "learning_rate": 7.315150034426849e-08, - "loss": 3.79, - "step": 1867500 - }, - { - "epoch": 20.74, - "learning_rate": 7.313761855051862e-08, - "loss": 3.7989, - "step": 1868000 - }, - { - "epoch": 20.75, - "learning_rate": 7.312373675676876e-08, - "loss": 3.7835, - "step": 1868500 - }, - { - "epoch": 20.76, - "learning_rate": 7.31098549630189e-08, - "loss": 3.8064, - "step": 1869000 - }, - { - "epoch": 20.76, - "learning_rate": 7.309597316926903e-08, - "loss": 3.7765, - "step": 1869500 - }, - { - "epoch": 20.77, - "learning_rate": 7.308209137551917e-08, - "loss": 3.7731, - "step": 1870000 - }, - { - "epoch": 20.77, - "learning_rate": 7.306820958176931e-08, - "loss": 3.8029, - "step": 1870500 - }, - { - "epoch": 20.78, - "learning_rate": 7.305432778801946e-08, - "loss": 3.7952, - "step": 1871000 - }, - { - "epoch": 20.78, - "learning_rate": 7.30404459942696e-08, - "loss": 3.7889, - "step": 1871500 - }, - { - "epoch": 20.79, - "learning_rate": 7.302656420051974e-08, - "loss": 3.8002, - "step": 1872000 - }, - { - "epoch": 20.79, - "learning_rate": 7.301268240676987e-08, - "loss": 3.7875, - "step": 1872500 - }, - { - "epoch": 20.8, - "learning_rate": 7.299880061302001e-08, - "loss": 3.8035, - "step": 1873000 - }, - { - "epoch": 20.81, - "learning_rate": 7.298491881927014e-08, - "loss": 3.7853, - "step": 1873500 - }, - { - "epoch": 20.81, - "learning_rate": 7.297103702552028e-08, - "loss": 3.7991, - "step": 1874000 - }, - { - "epoch": 20.82, - "learning_rate": 7.295715523177043e-08, - "loss": 3.7909, - "step": 1874500 - }, - { - "epoch": 20.82, - "learning_rate": 7.294327343802057e-08, - "loss": 3.7939, - "step": 1875000 - }, - { - "epoch": 20.83, - "learning_rate": 7.29293916442707e-08, - "loss": 3.7988, - "step": 1875500 - }, - { - "epoch": 20.83, - "learning_rate": 7.291550985052084e-08, - "loss": 3.7876, - "step": 1876000 - }, - { - "epoch": 20.84, - "learning_rate": 7.290162805677098e-08, - "loss": 3.7942, - "step": 1876500 - }, - { - "epoch": 20.84, - "learning_rate": 7.288774626302112e-08, - "loss": 3.8014, - "step": 1877000 - }, - { - "epoch": 20.85, - "learning_rate": 7.287386446927127e-08, - "loss": 3.7714, - "step": 1877500 - }, - { - "epoch": 20.86, - "learning_rate": 7.28599826755214e-08, - "loss": 3.7876, - "step": 1878000 - }, - { - "epoch": 20.86, - "learning_rate": 7.284610088177154e-08, - "loss": 3.7936, - "step": 1878500 - }, - { - "epoch": 20.87, - "learning_rate": 7.283221908802168e-08, - "loss": 3.788, - "step": 1879000 - }, - { - "epoch": 20.87, - "learning_rate": 7.281833729427181e-08, - "loss": 3.8013, - "step": 1879500 - }, - { - "epoch": 20.88, - "learning_rate": 7.280445550052195e-08, - "loss": 3.7853, - "step": 1880000 - }, - { - "epoch": 20.88, - "learning_rate": 7.279057370677209e-08, - "loss": 3.776, - "step": 1880500 - }, - { - "epoch": 20.89, - "learning_rate": 7.277669191302222e-08, - "loss": 3.801, - "step": 1881000 - }, - { - "epoch": 20.89, - "learning_rate": 7.276281011927236e-08, - "loss": 3.795, - "step": 1881500 - }, - { - "epoch": 20.9, - "learning_rate": 7.27489283255225e-08, - "loss": 3.8004, - "step": 1882000 - }, - { - "epoch": 20.91, - "learning_rate": 7.273504653177265e-08, - "loss": 3.7954, - "step": 1882500 - }, - { - "epoch": 20.91, - "learning_rate": 7.272116473802279e-08, - "loss": 3.806, - "step": 1883000 - }, - { - "epoch": 20.92, - "learning_rate": 7.270728294427292e-08, - "loss": 3.7901, - "step": 1883500 - }, - { - "epoch": 20.92, - "learning_rate": 7.269340115052306e-08, - "loss": 3.7892, - "step": 1884000 - }, - { - "epoch": 20.93, - "learning_rate": 7.26795193567732e-08, - "loss": 3.7932, - "step": 1884500 - }, - { - "epoch": 20.93, - "learning_rate": 7.266563756302335e-08, - "loss": 3.7816, - "step": 1885000 - }, - { - "epoch": 20.94, - "learning_rate": 7.265175576927348e-08, - "loss": 3.7863, - "step": 1885500 - }, - { - "epoch": 20.94, - "learning_rate": 7.263787397552362e-08, - "loss": 3.7845, - "step": 1886000 - }, - { - "epoch": 20.95, - "learning_rate": 7.262399218177375e-08, - "loss": 3.7904, - "step": 1886500 - }, - { - "epoch": 20.96, - "learning_rate": 7.261011038802389e-08, - "loss": 3.7817, - "step": 1887000 - }, - { - "epoch": 20.96, - "learning_rate": 7.259622859427403e-08, - "loss": 3.7957, - "step": 1887500 - }, - { - "epoch": 20.97, - "learning_rate": 7.258234680052417e-08, - "loss": 3.8018, - "step": 1888000 - }, - { - "epoch": 20.97, - "learning_rate": 7.256846500677432e-08, - "loss": 3.8093, - "step": 1888500 - }, - { - "epoch": 20.98, - "learning_rate": 7.255458321302446e-08, - "loss": 3.8011, - "step": 1889000 - }, - { - "epoch": 20.98, - "learning_rate": 7.254070141927459e-08, - "loss": 3.8015, - "step": 1889500 - }, - { - "epoch": 20.99, - "learning_rate": 7.252681962552473e-08, - "loss": 3.7908, - "step": 1890000 - }, - { - "epoch": 20.99, - "learning_rate": 7.251293783177487e-08, - "loss": 3.7689, - "step": 1890500 - }, - { - "epoch": 21.0, - "eval_loss": 3.8509271144866943, - "eval_runtime": 6.3078, - "eval_samples_per_second": 246.362, - "step": 1890966 - }, - { - "epoch": 21.0, - "learning_rate": 7.249905603802501e-08, - "loss": 3.7892, - "step": 1891000 - }, - { - "epoch": 21.01, - "learning_rate": 7.248517424427514e-08, - "loss": 3.7717, - "step": 1891500 - }, - { - "epoch": 21.01, - "learning_rate": 7.247129245052529e-08, - "loss": 3.812, - "step": 1892000 - }, - { - "epoch": 21.02, - "learning_rate": 7.245741065677541e-08, - "loss": 3.8008, - "step": 1892500 - }, - { - "epoch": 21.02, - "learning_rate": 7.244352886302556e-08, - "loss": 3.8002, - "step": 1893000 - }, - { - "epoch": 21.03, - "learning_rate": 7.24296470692757e-08, - "loss": 3.7934, - "step": 1893500 - }, - { - "epoch": 21.03, - "learning_rate": 7.241576527552584e-08, - "loss": 3.7885, - "step": 1894000 - }, - { - "epoch": 21.04, - "learning_rate": 7.240188348177598e-08, - "loss": 3.796, - "step": 1894500 - }, - { - "epoch": 21.04, - "learning_rate": 7.238800168802611e-08, - "loss": 3.784, - "step": 1895000 - }, - { - "epoch": 21.05, - "learning_rate": 7.237411989427625e-08, - "loss": 3.8008, - "step": 1895500 - }, - { - "epoch": 21.06, - "learning_rate": 7.23602381005264e-08, - "loss": 3.7905, - "step": 1896000 - }, - { - "epoch": 21.06, - "learning_rate": 7.234635630677654e-08, - "loss": 3.8028, - "step": 1896500 - }, - { - "epoch": 21.07, - "learning_rate": 7.233247451302668e-08, - "loss": 3.7682, - "step": 1897000 - }, - { - "epoch": 21.07, - "learning_rate": 7.231859271927681e-08, - "loss": 3.801, - "step": 1897500 - }, - { - "epoch": 21.08, - "learning_rate": 7.230471092552695e-08, - "loss": 3.7704, - "step": 1898000 - }, - { - "epoch": 21.08, - "learning_rate": 7.229082913177708e-08, - "loss": 3.7966, - "step": 1898500 - }, - { - "epoch": 21.09, - "learning_rate": 7.227694733802722e-08, - "loss": 3.7775, - "step": 1899000 - }, - { - "epoch": 21.09, - "learning_rate": 7.226306554427737e-08, - "loss": 3.7871, - "step": 1899500 - }, - { - "epoch": 21.1, - "learning_rate": 7.224918375052751e-08, - "loss": 3.7854, - "step": 1900000 - }, - { - "epoch": 21.11, - "learning_rate": 7.223530195677764e-08, - "loss": 3.7765, - "step": 1900500 - }, - { - "epoch": 21.11, - "learning_rate": 7.222142016302778e-08, - "loss": 3.8002, - "step": 1901000 - }, - { - "epoch": 21.12, - "learning_rate": 7.220753836927792e-08, - "loss": 3.7949, - "step": 1901500 - }, - { - "epoch": 21.12, - "learning_rate": 7.219365657552806e-08, - "loss": 3.7935, - "step": 1902000 - }, - { - "epoch": 21.13, - "learning_rate": 7.21797747817782e-08, - "loss": 3.8165, - "step": 1902500 - }, - { - "epoch": 21.13, - "learning_rate": 7.216589298802835e-08, - "loss": 3.7842, - "step": 1903000 - }, - { - "epoch": 21.14, - "learning_rate": 7.215201119427848e-08, - "loss": 3.8086, - "step": 1903500 - }, - { - "epoch": 21.14, - "learning_rate": 7.213812940052861e-08, - "loss": 3.7924, - "step": 1904000 - }, - { - "epoch": 21.15, - "learning_rate": 7.212424760677875e-08, - "loss": 3.7946, - "step": 1904500 - }, - { - "epoch": 21.16, - "learning_rate": 7.211036581302889e-08, - "loss": 3.8132, - "step": 1905000 - }, - { - "epoch": 21.16, - "learning_rate": 7.209648401927903e-08, - "loss": 3.8024, - "step": 1905500 - }, - { - "epoch": 21.17, - "learning_rate": 7.208260222552916e-08, - "loss": 3.7942, - "step": 1906000 - }, - { - "epoch": 21.17, - "learning_rate": 7.20687204317793e-08, - "loss": 3.8022, - "step": 1906500 - }, - { - "epoch": 21.18, - "learning_rate": 7.205483863802945e-08, - "loss": 3.7964, - "step": 1907000 - }, - { - "epoch": 21.18, - "learning_rate": 7.204095684427959e-08, - "loss": 3.7667, - "step": 1907500 - }, - { - "epoch": 21.19, - "learning_rate": 7.202707505052973e-08, - "loss": 3.7926, - "step": 1908000 - }, - { - "epoch": 21.19, - "learning_rate": 7.201319325677987e-08, - "loss": 3.7961, - "step": 1908500 - }, - { - "epoch": 21.2, - "learning_rate": 7.199931146303e-08, - "loss": 3.8026, - "step": 1909000 - }, - { - "epoch": 21.21, - "learning_rate": 7.198542966928015e-08, - "loss": 3.7988, - "step": 1909500 - }, - { - "epoch": 21.21, - "learning_rate": 7.197154787553027e-08, - "loss": 3.7841, - "step": 1910000 - }, - { - "epoch": 21.22, - "learning_rate": 7.195766608178042e-08, - "loss": 3.7996, - "step": 1910500 - }, - { - "epoch": 21.22, - "learning_rate": 7.194378428803056e-08, - "loss": 3.797, - "step": 1911000 - }, - { - "epoch": 21.23, - "learning_rate": 7.19299024942807e-08, - "loss": 3.7973, - "step": 1911500 - }, - { - "epoch": 21.23, - "learning_rate": 7.191602070053083e-08, - "loss": 3.8092, - "step": 1912000 - }, - { - "epoch": 21.24, - "learning_rate": 7.190213890678097e-08, - "loss": 3.7942, - "step": 1912500 - }, - { - "epoch": 21.24, - "learning_rate": 7.188825711303111e-08, - "loss": 3.7885, - "step": 1913000 - }, - { - "epoch": 21.25, - "learning_rate": 7.187437531928126e-08, - "loss": 3.7892, - "step": 1913500 - }, - { - "epoch": 21.26, - "learning_rate": 7.18604935255314e-08, - "loss": 3.7898, - "step": 1914000 - }, - { - "epoch": 21.26, - "learning_rate": 7.184661173178153e-08, - "loss": 3.7825, - "step": 1914500 - }, - { - "epoch": 21.27, - "learning_rate": 7.183272993803167e-08, - "loss": 3.7905, - "step": 1915000 - }, - { - "epoch": 21.27, - "learning_rate": 7.181884814428181e-08, - "loss": 3.7796, - "step": 1915500 - }, - { - "epoch": 21.28, - "learning_rate": 7.180496635053194e-08, - "loss": 3.8, - "step": 1916000 - }, - { - "epoch": 21.28, - "learning_rate": 7.179108455678208e-08, - "loss": 3.7928, - "step": 1916500 - }, - { - "epoch": 21.29, - "learning_rate": 7.177720276303223e-08, - "loss": 3.7943, - "step": 1917000 - }, - { - "epoch": 21.29, - "learning_rate": 7.176332096928236e-08, - "loss": 3.7971, - "step": 1917500 - }, - { - "epoch": 21.3, - "learning_rate": 7.17494391755325e-08, - "loss": 3.7886, - "step": 1918000 - }, - { - "epoch": 21.31, - "learning_rate": 7.173555738178264e-08, - "loss": 3.7806, - "step": 1918500 - }, - { - "epoch": 21.31, - "learning_rate": 7.172167558803278e-08, - "loss": 3.7901, - "step": 1919000 - }, - { - "epoch": 21.32, - "learning_rate": 7.170779379428292e-08, - "loss": 3.7816, - "step": 1919500 - }, - { - "epoch": 21.32, - "learning_rate": 7.169391200053305e-08, - "loss": 3.8032, - "step": 1920000 - }, - { - "epoch": 21.33, - "learning_rate": 7.16800302067832e-08, - "loss": 3.7795, - "step": 1920500 - }, - { - "epoch": 21.33, - "learning_rate": 7.166614841303334e-08, - "loss": 3.7688, - "step": 1921000 - }, - { - "epoch": 21.34, - "learning_rate": 7.165226661928348e-08, - "loss": 3.7695, - "step": 1921500 - }, - { - "epoch": 21.34, - "learning_rate": 7.163838482553361e-08, - "loss": 3.8006, - "step": 1922000 - }, - { - "epoch": 21.35, - "learning_rate": 7.162450303178375e-08, - "loss": 3.8097, - "step": 1922500 - }, - { - "epoch": 21.36, - "learning_rate": 7.161062123803388e-08, - "loss": 3.7796, - "step": 1923000 - }, - { - "epoch": 21.36, - "learning_rate": 7.159673944428402e-08, - "loss": 3.7949, - "step": 1923500 - }, - { - "epoch": 21.37, - "learning_rate": 7.158285765053417e-08, - "loss": 3.776, - "step": 1924000 - }, - { - "epoch": 21.37, - "learning_rate": 7.156897585678431e-08, - "loss": 3.78, - "step": 1924500 - }, - { - "epoch": 21.38, - "learning_rate": 7.155509406303445e-08, - "loss": 3.7884, - "step": 1925000 - }, - { - "epoch": 21.38, - "learning_rate": 7.154121226928459e-08, - "loss": 3.803, - "step": 1925500 - }, - { - "epoch": 21.39, - "learning_rate": 7.152733047553472e-08, - "loss": 3.786, - "step": 1926000 - }, - { - "epoch": 21.39, - "learning_rate": 7.151344868178486e-08, - "loss": 3.7773, - "step": 1926500 - }, - { - "epoch": 21.4, - "learning_rate": 7.1499566888035e-08, - "loss": 3.798, - "step": 1927000 - }, - { - "epoch": 21.41, - "learning_rate": 7.148568509428515e-08, - "loss": 3.7925, - "step": 1927500 - }, - { - "epoch": 21.41, - "learning_rate": 7.147180330053528e-08, - "loss": 3.7903, - "step": 1928000 - }, - { - "epoch": 21.42, - "learning_rate": 7.145792150678542e-08, - "loss": 3.7757, - "step": 1928500 - }, - { - "epoch": 21.42, - "learning_rate": 7.144403971303555e-08, - "loss": 3.7916, - "step": 1929000 - }, - { - "epoch": 21.43, - "learning_rate": 7.143015791928569e-08, - "loss": 3.7897, - "step": 1929500 - }, - { - "epoch": 21.43, - "learning_rate": 7.141627612553583e-08, - "loss": 3.789, - "step": 1930000 - }, - { - "epoch": 21.44, - "learning_rate": 7.140239433178597e-08, - "loss": 3.8001, - "step": 1930500 - }, - { - "epoch": 21.44, - "learning_rate": 7.138851253803612e-08, - "loss": 3.7804, - "step": 1931000 - }, - { - "epoch": 21.45, - "learning_rate": 7.137463074428625e-08, - "loss": 3.7953, - "step": 1931500 - }, - { - "epoch": 21.46, - "learning_rate": 7.136074895053639e-08, - "loss": 3.7868, - "step": 1932000 - }, - { - "epoch": 21.46, - "learning_rate": 7.134686715678653e-08, - "loss": 3.7864, - "step": 1932500 - }, - { - "epoch": 21.47, - "learning_rate": 7.133298536303667e-08, - "loss": 3.8067, - "step": 1933000 - }, - { - "epoch": 21.47, - "learning_rate": 7.131910356928682e-08, - "loss": 3.7949, - "step": 1933500 - }, - { - "epoch": 21.48, - "learning_rate": 7.130522177553694e-08, - "loss": 3.8007, - "step": 1934000 - }, - { - "epoch": 21.48, - "learning_rate": 7.129133998178707e-08, - "loss": 3.7754, - "step": 1934500 - }, - { - "epoch": 21.49, - "learning_rate": 7.127745818803722e-08, - "loss": 3.8005, - "step": 1935000 - }, - { - "epoch": 21.49, - "learning_rate": 7.126357639428736e-08, - "loss": 3.7777, - "step": 1935500 - }, - { - "epoch": 21.5, - "learning_rate": 7.12496946005375e-08, - "loss": 3.7963, - "step": 1936000 - }, - { - "epoch": 21.51, - "learning_rate": 7.123581280678764e-08, - "loss": 3.7851, - "step": 1936500 - }, - { - "epoch": 21.51, - "learning_rate": 7.122193101303777e-08, - "loss": 3.7674, - "step": 1937000 - }, - { - "epoch": 21.52, - "learning_rate": 7.120804921928791e-08, - "loss": 3.8032, - "step": 1937500 - }, - { - "epoch": 21.52, - "learning_rate": 7.119416742553806e-08, - "loss": 3.7782, - "step": 1938000 - }, - { - "epoch": 21.53, - "learning_rate": 7.11802856317882e-08, - "loss": 3.7864, - "step": 1938500 - }, - { - "epoch": 21.53, - "learning_rate": 7.116640383803834e-08, - "loss": 3.789, - "step": 1939000 - }, - { - "epoch": 21.54, - "learning_rate": 7.115252204428848e-08, - "loss": 3.7701, - "step": 1939500 - }, - { - "epoch": 21.54, - "learning_rate": 7.113864025053861e-08, - "loss": 3.7921, - "step": 1940000 - }, - { - "epoch": 21.55, - "learning_rate": 7.112475845678874e-08, - "loss": 3.7854, - "step": 1940500 - }, - { - "epoch": 21.56, - "learning_rate": 7.111087666303888e-08, - "loss": 3.7882, - "step": 1941000 - }, - { - "epoch": 21.56, - "learning_rate": 7.109699486928903e-08, - "loss": 3.7841, - "step": 1941500 - }, - { - "epoch": 21.57, - "learning_rate": 7.108311307553917e-08, - "loss": 3.7935, - "step": 1942000 - }, - { - "epoch": 21.57, - "learning_rate": 7.10692312817893e-08, - "loss": 3.7878, - "step": 1942500 - }, - { - "epoch": 21.58, - "learning_rate": 7.105534948803944e-08, - "loss": 3.787, - "step": 1943000 - }, - { - "epoch": 21.58, - "learning_rate": 7.104146769428958e-08, - "loss": 3.8077, - "step": 1943500 - }, - { - "epoch": 21.59, - "learning_rate": 7.102758590053972e-08, - "loss": 3.7806, - "step": 1944000 - }, - { - "epoch": 21.59, - "learning_rate": 7.101370410678987e-08, - "loss": 3.7905, - "step": 1944500 - }, - { - "epoch": 21.6, - "learning_rate": 7.099982231304001e-08, - "loss": 3.7657, - "step": 1945000 - }, - { - "epoch": 21.61, - "learning_rate": 7.098594051929014e-08, - "loss": 3.796, - "step": 1945500 - }, - { - "epoch": 21.61, - "learning_rate": 7.097205872554028e-08, - "loss": 3.7807, - "step": 1946000 - }, - { - "epoch": 21.62, - "learning_rate": 7.095817693179041e-08, - "loss": 3.7795, - "step": 1946500 - }, - { - "epoch": 21.62, - "learning_rate": 7.094429513804055e-08, - "loss": 3.8025, - "step": 1947000 - }, - { - "epoch": 21.63, - "learning_rate": 7.093041334429069e-08, - "loss": 3.8012, - "step": 1947500 - }, - { - "epoch": 21.63, - "learning_rate": 7.091653155054083e-08, - "loss": 3.79, - "step": 1948000 - }, - { - "epoch": 21.64, - "learning_rate": 7.090264975679096e-08, - "loss": 3.7857, - "step": 1948500 - }, - { - "epoch": 21.64, - "learning_rate": 7.08887679630411e-08, - "loss": 3.7686, - "step": 1949000 - }, - { - "epoch": 21.65, - "learning_rate": 7.087488616929125e-08, - "loss": 3.7948, - "step": 1949500 - }, - { - "epoch": 21.66, - "learning_rate": 7.086100437554139e-08, - "loss": 3.7862, - "step": 1950000 - }, - { - "epoch": 21.66, - "learning_rate": 7.084712258179153e-08, - "loss": 3.7964, - "step": 1950500 - }, - { - "epoch": 21.67, - "learning_rate": 7.083324078804166e-08, - "loss": 3.7797, - "step": 1951000 - }, - { - "epoch": 21.67, - "learning_rate": 7.08193589942918e-08, - "loss": 3.8038, - "step": 1951500 - }, - { - "epoch": 21.68, - "learning_rate": 7.080547720054195e-08, - "loss": 3.7925, - "step": 1952000 - }, - { - "epoch": 21.68, - "learning_rate": 7.079159540679208e-08, - "loss": 3.7775, - "step": 1952500 - }, - { - "epoch": 21.69, - "learning_rate": 7.077771361304222e-08, - "loss": 3.788, - "step": 1953000 - }, - { - "epoch": 21.69, - "learning_rate": 7.076383181929236e-08, - "loss": 3.7817, - "step": 1953500 - }, - { - "epoch": 21.7, - "learning_rate": 7.074995002554249e-08, - "loss": 3.7919, - "step": 1954000 - }, - { - "epoch": 21.71, - "learning_rate": 7.073606823179263e-08, - "loss": 3.761, - "step": 1954500 - }, - { - "epoch": 21.71, - "learning_rate": 7.072218643804277e-08, - "loss": 3.8048, - "step": 1955000 - }, - { - "epoch": 21.72, - "learning_rate": 7.070830464429292e-08, - "loss": 3.801, - "step": 1955500 - }, - { - "epoch": 21.72, - "learning_rate": 7.069442285054306e-08, - "loss": 3.7954, - "step": 1956000 - }, - { - "epoch": 21.73, - "learning_rate": 7.068054105679319e-08, - "loss": 3.7729, - "step": 1956500 - }, - { - "epoch": 21.73, - "learning_rate": 7.066665926304333e-08, - "loss": 3.7781, - "step": 1957000 - }, - { - "epoch": 21.74, - "learning_rate": 7.065277746929347e-08, - "loss": 3.7812, - "step": 1957500 - }, - { - "epoch": 21.74, - "learning_rate": 7.063889567554361e-08, - "loss": 3.7885, - "step": 1958000 - }, - { - "epoch": 21.75, - "learning_rate": 7.062501388179374e-08, - "loss": 3.7942, - "step": 1958500 - }, - { - "epoch": 21.76, - "learning_rate": 7.061113208804389e-08, - "loss": 3.7899, - "step": 1959000 - }, - { - "epoch": 21.76, - "learning_rate": 7.059725029429401e-08, - "loss": 3.7894, - "step": 1959500 - }, - { - "epoch": 21.77, - "learning_rate": 7.058336850054416e-08, - "loss": 3.8002, - "step": 1960000 - }, - { - "epoch": 21.77, - "learning_rate": 7.05694867067943e-08, - "loss": 3.7883, - "step": 1960500 - }, - { - "epoch": 21.78, - "learning_rate": 7.055560491304444e-08, - "loss": 3.8024, - "step": 1961000 - }, - { - "epoch": 21.78, - "learning_rate": 7.054172311929458e-08, - "loss": 3.8021, - "step": 1961500 - }, - { - "epoch": 21.79, - "learning_rate": 7.052784132554473e-08, - "loss": 3.7879, - "step": 1962000 - }, - { - "epoch": 21.79, - "learning_rate": 7.051395953179485e-08, - "loss": 3.7841, - "step": 1962500 - }, - { - "epoch": 21.8, - "learning_rate": 7.0500077738045e-08, - "loss": 3.8009, - "step": 1963000 - }, - { - "epoch": 21.81, - "learning_rate": 7.048619594429514e-08, - "loss": 3.8139, - "step": 1963500 - }, - { - "epoch": 21.81, - "learning_rate": 7.047231415054528e-08, - "loss": 3.7907, - "step": 1964000 - }, - { - "epoch": 21.82, - "learning_rate": 7.045843235679541e-08, - "loss": 3.7904, - "step": 1964500 - }, - { - "epoch": 21.82, - "learning_rate": 7.044455056304554e-08, - "loss": 3.7757, - "step": 1965000 - }, - { - "epoch": 21.83, - "learning_rate": 7.043066876929568e-08, - "loss": 3.7891, - "step": 1965500 - }, - { - "epoch": 21.83, - "learning_rate": 7.041678697554582e-08, - "loss": 3.7992, - "step": 1966000 - }, - { - "epoch": 21.84, - "learning_rate": 7.040290518179597e-08, - "loss": 3.7669, - "step": 1966500 - }, - { - "epoch": 21.84, - "learning_rate": 7.038902338804611e-08, - "loss": 3.809, - "step": 1967000 - }, - { - "epoch": 21.85, - "learning_rate": 7.037514159429625e-08, - "loss": 3.7909, - "step": 1967500 - }, - { - "epoch": 21.86, - "learning_rate": 7.036125980054638e-08, - "loss": 3.7907, - "step": 1968000 - }, - { - "epoch": 21.86, - "learning_rate": 7.034737800679652e-08, - "loss": 3.7807, - "step": 1968500 - }, - { - "epoch": 21.87, - "learning_rate": 7.033349621304666e-08, - "loss": 3.787, - "step": 1969000 - }, - { - "epoch": 21.87, - "learning_rate": 7.031961441929681e-08, - "loss": 3.7761, - "step": 1969500 - }, - { - "epoch": 21.88, - "learning_rate": 7.030573262554695e-08, - "loss": 3.779, - "step": 1970000 - }, - { - "epoch": 21.88, - "learning_rate": 7.029185083179708e-08, - "loss": 3.8079, - "step": 1970500 - }, - { - "epoch": 21.89, - "learning_rate": 7.027796903804721e-08, - "loss": 3.7868, - "step": 1971000 - }, - { - "epoch": 21.89, - "learning_rate": 7.026408724429735e-08, - "loss": 3.7868, - "step": 1971500 - }, - { - "epoch": 21.9, - "learning_rate": 7.025020545054749e-08, - "loss": 3.7996, - "step": 1972000 - }, - { - "epoch": 21.91, - "learning_rate": 7.023632365679763e-08, - "loss": 3.7828, - "step": 1972500 - }, - { - "epoch": 21.91, - "learning_rate": 7.022244186304778e-08, - "loss": 3.7873, - "step": 1973000 - }, - { - "epoch": 21.92, - "learning_rate": 7.02085600692979e-08, - "loss": 3.7874, - "step": 1973500 - }, - { - "epoch": 21.92, - "learning_rate": 7.019467827554805e-08, - "loss": 3.7938, - "step": 1974000 - }, - { - "epoch": 21.93, - "learning_rate": 7.018079648179819e-08, - "loss": 3.7869, - "step": 1974500 - }, - { - "epoch": 21.93, - "learning_rate": 7.016691468804833e-08, - "loss": 3.798, - "step": 1975000 - }, - { - "epoch": 21.94, - "learning_rate": 7.015303289429847e-08, - "loss": 3.7919, - "step": 1975500 - }, - { - "epoch": 21.94, - "learning_rate": 7.013915110054862e-08, - "loss": 3.7861, - "step": 1976000 - }, - { - "epoch": 21.95, - "learning_rate": 7.012526930679875e-08, - "loss": 3.795, - "step": 1976500 - }, - { - "epoch": 21.96, - "learning_rate": 7.011138751304887e-08, - "loss": 3.7829, - "step": 1977000 - }, - { - "epoch": 21.96, - "learning_rate": 7.009750571929902e-08, - "loss": 3.8138, - "step": 1977500 - }, - { - "epoch": 21.97, - "learning_rate": 7.008362392554916e-08, - "loss": 3.7756, - "step": 1978000 - }, - { - "epoch": 21.97, - "learning_rate": 7.00697421317993e-08, - "loss": 3.7906, - "step": 1978500 - }, - { - "epoch": 21.98, - "learning_rate": 7.005586033804943e-08, - "loss": 3.79, - "step": 1979000 - }, - { - "epoch": 21.98, - "learning_rate": 7.004197854429957e-08, - "loss": 3.8046, - "step": 1979500 - }, - { - "epoch": 21.99, - "learning_rate": 7.002809675054971e-08, - "loss": 3.7864, - "step": 1980000 - }, - { - "epoch": 21.99, - "learning_rate": 7.001421495679986e-08, - "loss": 3.7936, - "step": 1980500 - }, - { - "epoch": 22.0, - "learning_rate": 7.000033316305e-08, - "loss": 3.7851, - "step": 1981000 - }, - { - "epoch": 22.0, - "eval_loss": 3.8484787940979004, - "eval_runtime": 6.3174, - "eval_samples_per_second": 245.986, - "step": 1981012 - }, - { - "epoch": 22.01, - "learning_rate": 6.998645136930014e-08, - "loss": 3.7871, - "step": 1981500 - }, - { - "epoch": 22.01, - "learning_rate": 6.997256957555027e-08, - "loss": 3.7924, - "step": 1982000 - }, - { - "epoch": 22.02, - "learning_rate": 6.995868778180041e-08, - "loss": 3.7953, - "step": 1982500 - }, - { - "epoch": 22.02, - "learning_rate": 6.994480598805054e-08, - "loss": 3.7907, - "step": 1983000 - }, - { - "epoch": 22.03, - "learning_rate": 6.993092419430068e-08, - "loss": 3.7666, - "step": 1983500 - }, - { - "epoch": 22.03, - "learning_rate": 6.991704240055083e-08, - "loss": 3.7855, - "step": 1984000 - }, - { - "epoch": 22.04, - "learning_rate": 6.990316060680097e-08, - "loss": 3.7738, - "step": 1984500 - }, - { - "epoch": 22.04, - "learning_rate": 6.98892788130511e-08, - "loss": 3.7925, - "step": 1985000 - }, - { - "epoch": 22.05, - "learning_rate": 6.987539701930124e-08, - "loss": 3.7909, - "step": 1985500 - }, - { - "epoch": 22.06, - "learning_rate": 6.986151522555138e-08, - "loss": 3.8002, - "step": 1986000 - }, - { - "epoch": 22.06, - "learning_rate": 6.984763343180152e-08, - "loss": 3.7835, - "step": 1986500 - }, - { - "epoch": 22.07, - "learning_rate": 6.983375163805167e-08, - "loss": 3.7926, - "step": 1987000 - }, - { - "epoch": 22.07, - "learning_rate": 6.98198698443018e-08, - "loss": 3.7992, - "step": 1987500 - }, - { - "epoch": 22.08, - "learning_rate": 6.980598805055194e-08, - "loss": 3.7971, - "step": 1988000 - }, - { - "epoch": 22.08, - "learning_rate": 6.979210625680208e-08, - "loss": 3.7985, - "step": 1988500 - }, - { - "epoch": 22.09, - "learning_rate": 6.977822446305221e-08, - "loss": 3.7874, - "step": 1989000 - }, - { - "epoch": 22.09, - "learning_rate": 6.976434266930235e-08, - "loss": 3.7873, - "step": 1989500 - }, - { - "epoch": 22.1, - "learning_rate": 6.97504608755525e-08, - "loss": 3.7859, - "step": 1990000 - }, - { - "epoch": 22.11, - "learning_rate": 6.973657908180262e-08, - "loss": 3.8092, - "step": 1990500 - }, - { - "epoch": 22.11, - "learning_rate": 6.972269728805277e-08, - "loss": 3.7872, - "step": 1991000 - }, - { - "epoch": 22.12, - "learning_rate": 6.970881549430291e-08, - "loss": 3.7697, - "step": 1991500 - }, - { - "epoch": 22.12, - "learning_rate": 6.969493370055305e-08, - "loss": 3.7745, - "step": 1992000 - }, - { - "epoch": 22.13, - "learning_rate": 6.968105190680319e-08, - "loss": 3.7949, - "step": 1992500 - }, - { - "epoch": 22.13, - "learning_rate": 6.966717011305333e-08, - "loss": 3.7931, - "step": 1993000 - }, - { - "epoch": 22.14, - "learning_rate": 6.965328831930346e-08, - "loss": 3.7968, - "step": 1993500 - }, - { - "epoch": 22.14, - "learning_rate": 6.96394065255536e-08, - "loss": 3.7767, - "step": 1994000 - }, - { - "epoch": 22.15, - "learning_rate": 6.962552473180375e-08, - "loss": 3.8058, - "step": 1994500 - }, - { - "epoch": 22.16, - "learning_rate": 6.961164293805388e-08, - "loss": 3.7957, - "step": 1995000 - }, - { - "epoch": 22.16, - "learning_rate": 6.959776114430402e-08, - "loss": 3.7939, - "step": 1995500 - }, - { - "epoch": 22.17, - "learning_rate": 6.958387935055415e-08, - "loss": 3.7841, - "step": 1996000 - }, - { - "epoch": 22.17, - "learning_rate": 6.956999755680429e-08, - "loss": 3.7881, - "step": 1996500 - }, - { - "epoch": 22.18, - "learning_rate": 6.955611576305443e-08, - "loss": 3.7743, - "step": 1997000 - }, - { - "epoch": 22.18, - "learning_rate": 6.954223396930457e-08, - "loss": 3.8086, - "step": 1997500 - }, - { - "epoch": 22.19, - "learning_rate": 6.952835217555472e-08, - "loss": 3.7994, - "step": 1998000 - }, - { - "epoch": 22.19, - "learning_rate": 6.951447038180486e-08, - "loss": 3.8022, - "step": 1998500 - }, - { - "epoch": 22.2, - "learning_rate": 6.950058858805499e-08, - "loss": 3.7674, - "step": 1999000 - }, - { - "epoch": 22.21, - "learning_rate": 6.948670679430513e-08, - "loss": 3.7945, - "step": 1999500 - }, - { - "epoch": 22.21, - "learning_rate": 6.947282500055527e-08, - "loss": 3.7764, - "step": 2000000 - }, - { - "epoch": 22.22, - "learning_rate": 6.945894320680542e-08, - "loss": 3.7923, - "step": 2000500 - }, - { - "epoch": 22.22, - "learning_rate": 6.944506141305554e-08, - "loss": 3.7838, - "step": 2001000 - }, - { - "epoch": 22.23, - "learning_rate": 6.943117961930567e-08, - "loss": 3.7779, - "step": 2001500 - }, - { - "epoch": 22.23, - "learning_rate": 6.941729782555582e-08, - "loss": 3.7668, - "step": 2002000 - }, - { - "epoch": 22.24, - "learning_rate": 6.940341603180596e-08, - "loss": 3.7736, - "step": 2002500 - }, - { - "epoch": 22.24, - "learning_rate": 6.93895342380561e-08, - "loss": 3.7946, - "step": 2003000 - }, - { - "epoch": 22.25, - "learning_rate": 6.937565244430624e-08, - "loss": 3.8017, - "step": 2003500 - }, - { - "epoch": 22.26, - "learning_rate": 6.936177065055638e-08, - "loss": 3.79, - "step": 2004000 - }, - { - "epoch": 22.26, - "learning_rate": 6.934788885680651e-08, - "loss": 3.7758, - "step": 2004500 - }, - { - "epoch": 22.27, - "learning_rate": 6.933400706305666e-08, - "loss": 3.7753, - "step": 2005000 - }, - { - "epoch": 22.27, - "learning_rate": 6.93201252693068e-08, - "loss": 3.7827, - "step": 2005500 - }, - { - "epoch": 22.28, - "learning_rate": 6.930624347555694e-08, - "loss": 3.7772, - "step": 2006000 - }, - { - "epoch": 22.28, - "learning_rate": 6.929236168180708e-08, - "loss": 3.7935, - "step": 2006500 - }, - { - "epoch": 22.29, - "learning_rate": 6.927847988805721e-08, - "loss": 3.7779, - "step": 2007000 - }, - { - "epoch": 22.29, - "learning_rate": 6.926459809430734e-08, - "loss": 3.796, - "step": 2007500 - }, - { - "epoch": 22.3, - "learning_rate": 6.925071630055748e-08, - "loss": 3.762, - "step": 2008000 - }, - { - "epoch": 22.31, - "learning_rate": 6.923683450680763e-08, - "loss": 3.7926, - "step": 2008500 - }, - { - "epoch": 22.31, - "learning_rate": 6.922295271305777e-08, - "loss": 3.7823, - "step": 2009000 - }, - { - "epoch": 22.32, - "learning_rate": 6.920907091930791e-08, - "loss": 3.802, - "step": 2009500 - }, - { - "epoch": 22.32, - "learning_rate": 6.919518912555804e-08, - "loss": 3.7753, - "step": 2010000 - }, - { - "epoch": 22.33, - "learning_rate": 6.918130733180818e-08, - "loss": 3.7734, - "step": 2010500 - }, - { - "epoch": 22.33, - "learning_rate": 6.916742553805832e-08, - "loss": 3.7947, - "step": 2011000 - }, - { - "epoch": 22.34, - "learning_rate": 6.915354374430847e-08, - "loss": 3.7967, - "step": 2011500 - }, - { - "epoch": 22.34, - "learning_rate": 6.913966195055861e-08, - "loss": 3.7954, - "step": 2012000 - }, - { - "epoch": 22.35, - "learning_rate": 6.912578015680875e-08, - "loss": 3.7812, - "step": 2012500 - }, - { - "epoch": 22.36, - "learning_rate": 6.911189836305888e-08, - "loss": 3.7872, - "step": 2013000 - }, - { - "epoch": 22.36, - "learning_rate": 6.909801656930901e-08, - "loss": 3.7915, - "step": 2013500 - }, - { - "epoch": 22.37, - "learning_rate": 6.908413477555915e-08, - "loss": 3.7899, - "step": 2014000 - }, - { - "epoch": 22.37, - "learning_rate": 6.907025298180929e-08, - "loss": 3.7875, - "step": 2014500 - }, - { - "epoch": 22.38, - "learning_rate": 6.905637118805944e-08, - "loss": 3.7868, - "step": 2015000 - }, - { - "epoch": 22.38, - "learning_rate": 6.904248939430956e-08, - "loss": 3.8169, - "step": 2015500 - }, - { - "epoch": 22.39, - "learning_rate": 6.90286076005597e-08, - "loss": 3.7773, - "step": 2016000 - }, - { - "epoch": 22.39, - "learning_rate": 6.901472580680985e-08, - "loss": 3.7923, - "step": 2016500 - }, - { - "epoch": 22.4, - "learning_rate": 6.900084401305999e-08, - "loss": 3.7977, - "step": 2017000 - }, - { - "epoch": 22.41, - "learning_rate": 6.898696221931013e-08, - "loss": 3.7897, - "step": 2017500 - }, - { - "epoch": 22.41, - "learning_rate": 6.897308042556028e-08, - "loss": 3.7929, - "step": 2018000 - }, - { - "epoch": 22.42, - "learning_rate": 6.89591986318104e-08, - "loss": 3.8075, - "step": 2018500 - }, - { - "epoch": 22.42, - "learning_rate": 6.894531683806055e-08, - "loss": 3.7867, - "step": 2019000 - }, - { - "epoch": 22.43, - "learning_rate": 6.893143504431068e-08, - "loss": 3.7928, - "step": 2019500 - }, - { - "epoch": 22.43, - "learning_rate": 6.891755325056082e-08, - "loss": 3.7794, - "step": 2020000 - }, - { - "epoch": 22.44, - "learning_rate": 6.890367145681096e-08, - "loss": 3.7838, - "step": 2020500 - }, - { - "epoch": 22.44, - "learning_rate": 6.88897896630611e-08, - "loss": 3.7674, - "step": 2021000 - }, - { - "epoch": 22.45, - "learning_rate": 6.887590786931123e-08, - "loss": 3.7865, - "step": 2021500 - }, - { - "epoch": 22.46, - "learning_rate": 6.886202607556137e-08, - "loss": 3.7721, - "step": 2022000 - }, - { - "epoch": 22.46, - "learning_rate": 6.884814428181152e-08, - "loss": 3.7869, - "step": 2022500 - }, - { - "epoch": 22.47, - "learning_rate": 6.883426248806166e-08, - "loss": 3.785, - "step": 2023000 - }, - { - "epoch": 22.47, - "learning_rate": 6.88203806943118e-08, - "loss": 3.7929, - "step": 2023500 - }, - { - "epoch": 22.48, - "learning_rate": 6.880649890056193e-08, - "loss": 3.7761, - "step": 2024000 - }, - { - "epoch": 22.48, - "learning_rate": 6.879261710681207e-08, - "loss": 3.7886, - "step": 2024500 - }, - { - "epoch": 22.49, - "learning_rate": 6.877873531306221e-08, - "loss": 3.7965, - "step": 2025000 - }, - { - "epoch": 22.49, - "learning_rate": 6.876485351931234e-08, - "loss": 3.7879, - "step": 2025500 - }, - { - "epoch": 22.5, - "learning_rate": 6.875097172556249e-08, - "loss": 3.7966, - "step": 2026000 - }, - { - "epoch": 22.51, - "learning_rate": 6.873708993181263e-08, - "loss": 3.7729, - "step": 2026500 - }, - { - "epoch": 22.51, - "learning_rate": 6.872320813806276e-08, - "loss": 3.7856, - "step": 2027000 - }, - { - "epoch": 22.52, - "learning_rate": 6.87093263443129e-08, - "loss": 3.7729, - "step": 2027500 - }, - { - "epoch": 22.52, - "learning_rate": 6.869544455056304e-08, - "loss": 3.7935, - "step": 2028000 - }, - { - "epoch": 22.53, - "learning_rate": 6.868156275681318e-08, - "loss": 3.7977, - "step": 2028500 - }, - { - "epoch": 22.53, - "learning_rate": 6.866768096306333e-08, - "loss": 3.7955, - "step": 2029000 - }, - { - "epoch": 22.54, - "learning_rate": 6.865379916931347e-08, - "loss": 3.7826, - "step": 2029500 - }, - { - "epoch": 22.54, - "learning_rate": 6.86399173755636e-08, - "loss": 3.7743, - "step": 2030000 - }, - { - "epoch": 22.55, - "learning_rate": 6.862603558181374e-08, - "loss": 3.8023, - "step": 2030500 - }, - { - "epoch": 22.56, - "learning_rate": 6.861215378806388e-08, - "loss": 3.7918, - "step": 2031000 - }, - { - "epoch": 22.56, - "learning_rate": 6.859827199431401e-08, - "loss": 3.7863, - "step": 2031500 - }, - { - "epoch": 22.57, - "learning_rate": 6.858439020056415e-08, - "loss": 3.7613, - "step": 2032000 - }, - { - "epoch": 22.57, - "learning_rate": 6.857050840681428e-08, - "loss": 3.7797, - "step": 2032500 - }, - { - "epoch": 22.58, - "learning_rate": 6.855662661306442e-08, - "loss": 3.798, - "step": 2033000 - }, - { - "epoch": 22.58, - "learning_rate": 6.854274481931457e-08, - "loss": 3.7839, - "step": 2033500 - }, - { - "epoch": 22.59, - "learning_rate": 6.852886302556471e-08, - "loss": 3.7743, - "step": 2034000 - }, - { - "epoch": 22.59, - "learning_rate": 6.851498123181485e-08, - "loss": 3.7922, - "step": 2034500 - }, - { - "epoch": 22.6, - "learning_rate": 6.850109943806499e-08, - "loss": 3.7975, - "step": 2035000 - }, - { - "epoch": 22.61, - "learning_rate": 6.848721764431512e-08, - "loss": 3.8052, - "step": 2035500 - }, - { - "epoch": 22.61, - "learning_rate": 6.847333585056526e-08, - "loss": 3.7909, - "step": 2036000 - }, - { - "epoch": 22.62, - "learning_rate": 6.845945405681541e-08, - "loss": 3.7927, - "step": 2036500 - }, - { - "epoch": 22.62, - "learning_rate": 6.844557226306555e-08, - "loss": 3.7935, - "step": 2037000 - }, - { - "epoch": 22.63, - "learning_rate": 6.843169046931568e-08, - "loss": 3.7803, - "step": 2037500 - }, - { - "epoch": 22.63, - "learning_rate": 6.841780867556581e-08, - "loss": 3.7838, - "step": 2038000 - }, - { - "epoch": 22.64, - "learning_rate": 6.840392688181595e-08, - "loss": 3.7787, - "step": 2038500 - }, - { - "epoch": 22.64, - "learning_rate": 6.839004508806609e-08, - "loss": 3.7879, - "step": 2039000 - }, - { - "epoch": 22.65, - "learning_rate": 6.837616329431623e-08, - "loss": 3.7746, - "step": 2039500 - }, - { - "epoch": 22.66, - "learning_rate": 6.836228150056638e-08, - "loss": 3.784, - "step": 2040000 - }, - { - "epoch": 22.66, - "learning_rate": 6.834839970681652e-08, - "loss": 3.7763, - "step": 2040500 - }, - { - "epoch": 22.67, - "learning_rate": 6.833451791306665e-08, - "loss": 3.7761, - "step": 2041000 - }, - { - "epoch": 22.67, - "learning_rate": 6.832063611931679e-08, - "loss": 3.7993, - "step": 2041500 - }, - { - "epoch": 22.68, - "learning_rate": 6.830675432556693e-08, - "loss": 3.8021, - "step": 2042000 - }, - { - "epoch": 22.68, - "learning_rate": 6.829287253181707e-08, - "loss": 3.7867, - "step": 2042500 - }, - { - "epoch": 22.69, - "learning_rate": 6.827899073806722e-08, - "loss": 3.7863, - "step": 2043000 - }, - { - "epoch": 22.69, - "learning_rate": 6.826510894431735e-08, - "loss": 3.789, - "step": 2043500 - }, - { - "epoch": 22.7, - "learning_rate": 6.825122715056747e-08, - "loss": 3.7828, - "step": 2044000 - }, - { - "epoch": 22.71, - "learning_rate": 6.823734535681762e-08, - "loss": 3.7976, - "step": 2044500 - }, - { - "epoch": 22.71, - "learning_rate": 6.822346356306776e-08, - "loss": 3.7772, - "step": 2045000 - }, - { - "epoch": 22.72, - "learning_rate": 6.82095817693179e-08, - "loss": 3.7953, - "step": 2045500 - }, - { - "epoch": 22.72, - "learning_rate": 6.819569997556804e-08, - "loss": 3.7684, - "step": 2046000 - }, - { - "epoch": 22.73, - "learning_rate": 6.818181818181817e-08, - "loss": 3.7981, - "step": 2046500 - }, - { - "epoch": 22.73, - "learning_rate": 6.816793638806831e-08, - "loss": 3.7904, - "step": 2047000 - }, - { - "epoch": 22.74, - "learning_rate": 6.815405459431846e-08, - "loss": 3.7839, - "step": 2047500 - }, - { - "epoch": 22.74, - "learning_rate": 6.81401728005686e-08, - "loss": 3.7794, - "step": 2048000 - }, - { - "epoch": 22.75, - "learning_rate": 6.812629100681874e-08, - "loss": 3.78, - "step": 2048500 - }, - { - "epoch": 22.76, - "learning_rate": 6.811240921306888e-08, - "loss": 3.7841, - "step": 2049000 - }, - { - "epoch": 22.76, - "learning_rate": 6.809852741931901e-08, - "loss": 3.7841, - "step": 2049500 - }, - { - "epoch": 22.77, - "learning_rate": 6.808464562556914e-08, - "loss": 3.7819, - "step": 2050000 - }, - { - "epoch": 22.77, - "learning_rate": 6.807076383181928e-08, - "loss": 3.786, - "step": 2050500 - }, - { - "epoch": 22.78, - "learning_rate": 6.805688203806943e-08, - "loss": 3.8044, - "step": 2051000 - }, - { - "epoch": 22.78, - "learning_rate": 6.804300024431957e-08, - "loss": 3.7804, - "step": 2051500 - }, - { - "epoch": 22.79, - "learning_rate": 6.80291184505697e-08, - "loss": 3.7735, - "step": 2052000 - }, - { - "epoch": 22.79, - "learning_rate": 6.801523665681984e-08, - "loss": 3.8017, - "step": 2052500 - }, - { - "epoch": 22.8, - "learning_rate": 6.800135486306998e-08, - "loss": 3.7766, - "step": 2053000 - }, - { - "epoch": 22.81, - "learning_rate": 6.798747306932012e-08, - "loss": 3.7809, - "step": 2053500 - }, - { - "epoch": 22.81, - "learning_rate": 6.797359127557027e-08, - "loss": 3.7856, - "step": 2054000 - }, - { - "epoch": 22.82, - "learning_rate": 6.795970948182041e-08, - "loss": 3.7727, - "step": 2054500 - }, - { - "epoch": 22.82, - "learning_rate": 6.794582768807054e-08, - "loss": 3.798, - "step": 2055000 - }, - { - "epoch": 22.83, - "learning_rate": 6.793194589432068e-08, - "loss": 3.7815, - "step": 2055500 - }, - { - "epoch": 22.83, - "learning_rate": 6.791806410057081e-08, - "loss": 3.7905, - "step": 2056000 - }, - { - "epoch": 22.84, - "learning_rate": 6.790418230682095e-08, - "loss": 3.7875, - "step": 2056500 - }, - { - "epoch": 22.84, - "learning_rate": 6.78903005130711e-08, - "loss": 3.7908, - "step": 2057000 - }, - { - "epoch": 22.85, - "learning_rate": 6.787641871932124e-08, - "loss": 3.7915, - "step": 2057500 - }, - { - "epoch": 22.85, - "learning_rate": 6.786253692557137e-08, - "loss": 3.7781, - "step": 2058000 - }, - { - "epoch": 22.86, - "learning_rate": 6.784865513182151e-08, - "loss": 3.7852, - "step": 2058500 - }, - { - "epoch": 22.87, - "learning_rate": 6.783477333807165e-08, - "loss": 3.796, - "step": 2059000 - }, - { - "epoch": 22.87, - "learning_rate": 6.782089154432179e-08, - "loss": 3.7695, - "step": 2059500 - }, - { - "epoch": 22.88, - "learning_rate": 6.780700975057193e-08, - "loss": 3.7731, - "step": 2060000 - }, - { - "epoch": 22.88, - "learning_rate": 6.779312795682206e-08, - "loss": 3.7799, - "step": 2060500 - }, - { - "epoch": 22.89, - "learning_rate": 6.77792461630722e-08, - "loss": 3.7956, - "step": 2061000 - }, - { - "epoch": 22.89, - "learning_rate": 6.776536436932235e-08, - "loss": 3.7715, - "step": 2061500 - }, - { - "epoch": 22.9, - "learning_rate": 6.775148257557248e-08, - "loss": 3.7685, - "step": 2062000 - }, - { - "epoch": 22.9, - "learning_rate": 6.773760078182262e-08, - "loss": 3.7821, - "step": 2062500 - }, - { - "epoch": 22.91, - "learning_rate": 6.772371898807276e-08, - "loss": 3.7799, - "step": 2063000 - }, - { - "epoch": 22.92, - "learning_rate": 6.770983719432289e-08, - "loss": 3.7798, - "step": 2063500 - }, - { - "epoch": 22.92, - "learning_rate": 6.769595540057303e-08, - "loss": 3.8001, - "step": 2064000 - }, - { - "epoch": 22.93, - "learning_rate": 6.768207360682318e-08, - "loss": 3.761, - "step": 2064500 - }, - { - "epoch": 22.93, - "learning_rate": 6.766819181307332e-08, - "loss": 3.7929, - "step": 2065000 - }, - { - "epoch": 22.94, - "learning_rate": 6.765431001932346e-08, - "loss": 3.7812, - "step": 2065500 - }, - { - "epoch": 22.94, - "learning_rate": 6.76404282255736e-08, - "loss": 3.7751, - "step": 2066000 - }, - { - "epoch": 22.95, - "learning_rate": 6.762654643182373e-08, - "loss": 3.778, - "step": 2066500 - }, - { - "epoch": 22.95, - "learning_rate": 6.761266463807387e-08, - "loss": 3.7746, - "step": 2067000 - }, - { - "epoch": 22.96, - "learning_rate": 6.759878284432402e-08, - "loss": 3.7818, - "step": 2067500 - }, - { - "epoch": 22.97, - "learning_rate": 6.758490105057414e-08, - "loss": 3.7983, - "step": 2068000 - }, - { - "epoch": 22.97, - "learning_rate": 6.757101925682429e-08, - "loss": 3.7863, - "step": 2068500 - }, - { - "epoch": 22.98, - "learning_rate": 6.755713746307442e-08, - "loss": 3.7871, - "step": 2069000 - }, - { - "epoch": 22.98, - "learning_rate": 6.754325566932456e-08, - "loss": 3.8034, - "step": 2069500 - }, - { - "epoch": 22.99, - "learning_rate": 6.75293738755747e-08, - "loss": 3.7894, - "step": 2070000 - }, - { - "epoch": 22.99, - "learning_rate": 6.751549208182484e-08, - "loss": 3.789, - "step": 2070500 - }, - { - "epoch": 23.0, - "learning_rate": 6.750161028807498e-08, - "loss": 3.7924, - "step": 2071000 - }, - { - "epoch": 23.0, - "eval_loss": 3.8470423221588135, - "eval_runtime": 6.312, - "eval_samples_per_second": 246.197, - "step": 2071058 - }, - { - "epoch": 23.0, - "learning_rate": 6.748772849432513e-08, - "loss": 3.7791, - "step": 2071500 - }, - { - "epoch": 23.01, - "learning_rate": 6.747384670057526e-08, - "loss": 3.788, - "step": 2072000 - }, - { - "epoch": 23.02, - "learning_rate": 6.74599649068254e-08, - "loss": 3.7828, - "step": 2072500 - }, - { - "epoch": 23.02, - "learning_rate": 6.744608311307554e-08, - "loss": 3.781, - "step": 2073000 - }, - { - "epoch": 23.03, - "learning_rate": 6.743220131932568e-08, - "loss": 3.7739, - "step": 2073500 - }, - { - "epoch": 23.03, - "learning_rate": 6.741831952557581e-08, - "loss": 3.7775, - "step": 2074000 - }, - { - "epoch": 23.04, - "learning_rate": 6.740443773182594e-08, - "loss": 3.7998, - "step": 2074500 - }, - { - "epoch": 23.04, - "learning_rate": 6.739055593807608e-08, - "loss": 3.7705, - "step": 2075000 - }, - { - "epoch": 23.05, - "learning_rate": 6.737667414432623e-08, - "loss": 3.7864, - "step": 2075500 - }, - { - "epoch": 23.05, - "learning_rate": 6.736279235057637e-08, - "loss": 3.7928, - "step": 2076000 - }, - { - "epoch": 23.06, - "learning_rate": 6.734891055682651e-08, - "loss": 3.7908, - "step": 2076500 - }, - { - "epoch": 23.07, - "learning_rate": 6.733502876307665e-08, - "loss": 3.778, - "step": 2077000 - }, - { - "epoch": 23.07, - "learning_rate": 6.732114696932678e-08, - "loss": 3.7924, - "step": 2077500 - }, - { - "epoch": 23.08, - "learning_rate": 6.730726517557692e-08, - "loss": 3.79, - "step": 2078000 - }, - { - "epoch": 23.08, - "learning_rate": 6.729338338182707e-08, - "loss": 3.781, - "step": 2078500 - }, - { - "epoch": 23.09, - "learning_rate": 6.727950158807721e-08, - "loss": 3.7905, - "step": 2079000 - }, - { - "epoch": 23.09, - "learning_rate": 6.726561979432735e-08, - "loss": 3.7688, - "step": 2079500 - }, - { - "epoch": 23.1, - "learning_rate": 6.725173800057748e-08, - "loss": 3.7778, - "step": 2080000 - }, - { - "epoch": 23.1, - "learning_rate": 6.723785620682761e-08, - "loss": 3.7955, - "step": 2080500 - }, - { - "epoch": 23.11, - "learning_rate": 6.722397441307775e-08, - "loss": 3.7835, - "step": 2081000 - }, - { - "epoch": 23.12, - "learning_rate": 6.721009261932789e-08, - "loss": 3.7987, - "step": 2081500 - }, - { - "epoch": 23.12, - "learning_rate": 6.719621082557804e-08, - "loss": 3.7814, - "step": 2082000 - }, - { - "epoch": 23.13, - "learning_rate": 6.718232903182818e-08, - "loss": 3.7963, - "step": 2082500 - }, - { - "epoch": 23.13, - "learning_rate": 6.71684472380783e-08, - "loss": 3.7843, - "step": 2083000 - }, - { - "epoch": 23.14, - "learning_rate": 6.715456544432845e-08, - "loss": 3.7916, - "step": 2083500 - }, - { - "epoch": 23.14, - "learning_rate": 6.714068365057859e-08, - "loss": 3.7684, - "step": 2084000 - }, - { - "epoch": 23.15, - "learning_rate": 6.712680185682873e-08, - "loss": 3.784, - "step": 2084500 - }, - { - "epoch": 23.15, - "learning_rate": 6.711292006307888e-08, - "loss": 3.7906, - "step": 2085000 - }, - { - "epoch": 23.16, - "learning_rate": 6.709903826932902e-08, - "loss": 3.8045, - "step": 2085500 - }, - { - "epoch": 23.17, - "learning_rate": 6.708515647557915e-08, - "loss": 3.7896, - "step": 2086000 - }, - { - "epoch": 23.17, - "learning_rate": 6.707127468182928e-08, - "loss": 3.7908, - "step": 2086500 - }, - { - "epoch": 23.18, - "learning_rate": 6.705739288807942e-08, - "loss": 3.7766, - "step": 2087000 - }, - { - "epoch": 23.18, - "learning_rate": 6.704351109432956e-08, - "loss": 3.7789, - "step": 2087500 - }, - { - "epoch": 23.19, - "learning_rate": 6.70296293005797e-08, - "loss": 3.7694, - "step": 2088000 - }, - { - "epoch": 23.19, - "learning_rate": 6.701574750682984e-08, - "loss": 3.7662, - "step": 2088500 - }, - { - "epoch": 23.2, - "learning_rate": 6.700186571307997e-08, - "loss": 3.7678, - "step": 2089000 - }, - { - "epoch": 23.2, - "learning_rate": 6.698798391933012e-08, - "loss": 3.7835, - "step": 2089500 - }, - { - "epoch": 23.21, - "learning_rate": 6.697410212558026e-08, - "loss": 3.7938, - "step": 2090000 - }, - { - "epoch": 23.22, - "learning_rate": 6.69602203318304e-08, - "loss": 3.8115, - "step": 2090500 - }, - { - "epoch": 23.22, - "learning_rate": 6.694633853808054e-08, - "loss": 3.7777, - "step": 2091000 - }, - { - "epoch": 23.23, - "learning_rate": 6.693245674433067e-08, - "loss": 3.7758, - "step": 2091500 - }, - { - "epoch": 23.23, - "learning_rate": 6.691857495058081e-08, - "loss": 3.8004, - "step": 2092000 - }, - { - "epoch": 23.24, - "learning_rate": 6.690469315683094e-08, - "loss": 3.7832, - "step": 2092500 - }, - { - "epoch": 23.24, - "learning_rate": 6.689081136308109e-08, - "loss": 3.788, - "step": 2093000 - }, - { - "epoch": 23.25, - "learning_rate": 6.687692956933123e-08, - "loss": 3.7774, - "step": 2093500 - }, - { - "epoch": 23.25, - "learning_rate": 6.686304777558137e-08, - "loss": 3.786, - "step": 2094000 - }, - { - "epoch": 23.26, - "learning_rate": 6.68491659818315e-08, - "loss": 3.7752, - "step": 2094500 - }, - { - "epoch": 23.27, - "learning_rate": 6.683528418808164e-08, - "loss": 3.7946, - "step": 2095000 - }, - { - "epoch": 23.27, - "learning_rate": 6.682140239433178e-08, - "loss": 3.7814, - "step": 2095500 - }, - { - "epoch": 23.28, - "learning_rate": 6.680752060058193e-08, - "loss": 3.7875, - "step": 2096000 - }, - { - "epoch": 23.28, - "learning_rate": 6.679363880683207e-08, - "loss": 3.7734, - "step": 2096500 - }, - { - "epoch": 23.29, - "learning_rate": 6.67797570130822e-08, - "loss": 3.7906, - "step": 2097000 - }, - { - "epoch": 23.29, - "learning_rate": 6.676587521933234e-08, - "loss": 3.7844, - "step": 2097500 - }, - { - "epoch": 23.3, - "learning_rate": 6.675199342558248e-08, - "loss": 3.7729, - "step": 2098000 - }, - { - "epoch": 23.3, - "learning_rate": 6.673811163183261e-08, - "loss": 3.7776, - "step": 2098500 - }, - { - "epoch": 23.31, - "learning_rate": 6.672422983808275e-08, - "loss": 3.7633, - "step": 2099000 - }, - { - "epoch": 23.32, - "learning_rate": 6.67103480443329e-08, - "loss": 3.7919, - "step": 2099500 - }, - { - "epoch": 23.32, - "learning_rate": 6.669646625058302e-08, - "loss": 3.7902, - "step": 2100000 - }, - { - "epoch": 23.33, - "learning_rate": 6.668258445683317e-08, - "loss": 3.7825, - "step": 2100500 - }, - { - "epoch": 23.33, - "learning_rate": 6.666870266308331e-08, - "loss": 3.779, - "step": 2101000 - }, - { - "epoch": 23.34, - "learning_rate": 6.665482086933345e-08, - "loss": 3.7957, - "step": 2101500 - }, - { - "epoch": 23.34, - "learning_rate": 6.66409390755836e-08, - "loss": 3.7888, - "step": 2102000 - }, - { - "epoch": 23.35, - "learning_rate": 6.662705728183374e-08, - "loss": 3.7831, - "step": 2102500 - }, - { - "epoch": 23.35, - "learning_rate": 6.661317548808386e-08, - "loss": 3.7662, - "step": 2103000 - }, - { - "epoch": 23.36, - "learning_rate": 6.659929369433401e-08, - "loss": 3.7787, - "step": 2103500 - }, - { - "epoch": 23.37, - "learning_rate": 6.658541190058415e-08, - "loss": 3.7825, - "step": 2104000 - }, - { - "epoch": 23.37, - "learning_rate": 6.657153010683428e-08, - "loss": 3.7757, - "step": 2104500 - }, - { - "epoch": 23.38, - "learning_rate": 6.655764831308442e-08, - "loss": 3.7915, - "step": 2105000 - }, - { - "epoch": 23.38, - "learning_rate": 6.654376651933455e-08, - "loss": 3.7791, - "step": 2105500 - }, - { - "epoch": 23.39, - "learning_rate": 6.652988472558469e-08, - "loss": 3.7841, - "step": 2106000 - }, - { - "epoch": 23.39, - "learning_rate": 6.651600293183483e-08, - "loss": 3.7806, - "step": 2106500 - }, - { - "epoch": 23.4, - "learning_rate": 6.650212113808498e-08, - "loss": 3.7761, - "step": 2107000 - }, - { - "epoch": 23.4, - "learning_rate": 6.648823934433512e-08, - "loss": 3.7916, - "step": 2107500 - }, - { - "epoch": 23.41, - "learning_rate": 6.647435755058526e-08, - "loss": 3.7705, - "step": 2108000 - }, - { - "epoch": 23.42, - "learning_rate": 6.646047575683539e-08, - "loss": 3.7778, - "step": 2108500 - }, - { - "epoch": 23.42, - "learning_rate": 6.644659396308553e-08, - "loss": 3.7875, - "step": 2109000 - }, - { - "epoch": 23.43, - "learning_rate": 6.643271216933567e-08, - "loss": 3.794, - "step": 2109500 - }, - { - "epoch": 23.43, - "learning_rate": 6.641883037558582e-08, - "loss": 3.7824, - "step": 2110000 - }, - { - "epoch": 23.44, - "learning_rate": 6.640494858183595e-08, - "loss": 3.7773, - "step": 2110500 - }, - { - "epoch": 23.44, - "learning_rate": 6.639106678808607e-08, - "loss": 3.7975, - "step": 2111000 - }, - { - "epoch": 23.45, - "learning_rate": 6.637718499433622e-08, - "loss": 3.8004, - "step": 2111500 - }, - { - "epoch": 23.45, - "learning_rate": 6.636330320058636e-08, - "loss": 3.7899, - "step": 2112000 - }, - { - "epoch": 23.46, - "learning_rate": 6.63494214068365e-08, - "loss": 3.7871, - "step": 2112500 - }, - { - "epoch": 23.47, - "learning_rate": 6.633553961308664e-08, - "loss": 3.7853, - "step": 2113000 - }, - { - "epoch": 23.47, - "learning_rate": 6.632165781933679e-08, - "loss": 3.7827, - "step": 2113500 - }, - { - "epoch": 23.48, - "learning_rate": 6.630777602558692e-08, - "loss": 3.7907, - "step": 2114000 - }, - { - "epoch": 23.48, - "learning_rate": 6.629389423183706e-08, - "loss": 3.7943, - "step": 2114500 - }, - { - "epoch": 23.49, - "learning_rate": 6.62800124380872e-08, - "loss": 3.793, - "step": 2115000 - }, - { - "epoch": 23.49, - "learning_rate": 6.626613064433734e-08, - "loss": 3.7769, - "step": 2115500 - }, - { - "epoch": 23.5, - "learning_rate": 6.625224885058748e-08, - "loss": 3.7895, - "step": 2116000 - }, - { - "epoch": 23.5, - "learning_rate": 6.623836705683761e-08, - "loss": 3.7738, - "step": 2116500 - }, - { - "epoch": 23.51, - "learning_rate": 6.622448526308774e-08, - "loss": 3.7806, - "step": 2117000 - }, - { - "epoch": 23.52, - "learning_rate": 6.621060346933788e-08, - "loss": 3.7863, - "step": 2117500 - }, - { - "epoch": 23.52, - "learning_rate": 6.619672167558803e-08, - "loss": 3.7868, - "step": 2118000 - }, - { - "epoch": 23.53, - "learning_rate": 6.618283988183817e-08, - "loss": 3.7666, - "step": 2118500 - }, - { - "epoch": 23.53, - "learning_rate": 6.616895808808831e-08, - "loss": 3.8007, - "step": 2119000 - }, - { - "epoch": 23.54, - "learning_rate": 6.615507629433844e-08, - "loss": 3.7921, - "step": 2119500 - }, - { - "epoch": 23.54, - "learning_rate": 6.614119450058858e-08, - "loss": 3.7645, - "step": 2120000 - }, - { - "epoch": 23.55, - "learning_rate": 6.612731270683872e-08, - "loss": 3.7755, - "step": 2120500 - }, - { - "epoch": 23.55, - "learning_rate": 6.611343091308887e-08, - "loss": 3.7808, - "step": 2121000 - }, - { - "epoch": 23.56, - "learning_rate": 6.609954911933901e-08, - "loss": 3.7815, - "step": 2121500 - }, - { - "epoch": 23.57, - "learning_rate": 6.608566732558914e-08, - "loss": 3.8045, - "step": 2122000 - }, - { - "epoch": 23.57, - "learning_rate": 6.607178553183928e-08, - "loss": 3.7643, - "step": 2122500 - }, - { - "epoch": 23.58, - "learning_rate": 6.605790373808941e-08, - "loss": 3.7958, - "step": 2123000 - }, - { - "epoch": 23.58, - "learning_rate": 6.604402194433955e-08, - "loss": 3.8002, - "step": 2123500 - }, - { - "epoch": 23.59, - "learning_rate": 6.60301401505897e-08, - "loss": 3.7913, - "step": 2124000 - }, - { - "epoch": 23.59, - "learning_rate": 6.601625835683984e-08, - "loss": 3.7967, - "step": 2124500 - }, - { - "epoch": 23.6, - "learning_rate": 6.600237656308998e-08, - "loss": 3.7851, - "step": 2125000 - }, - { - "epoch": 23.6, - "learning_rate": 6.598849476934011e-08, - "loss": 3.7898, - "step": 2125500 - }, - { - "epoch": 23.61, - "learning_rate": 6.597461297559025e-08, - "loss": 3.7706, - "step": 2126000 - }, - { - "epoch": 23.62, - "learning_rate": 6.596073118184039e-08, - "loss": 3.7717, - "step": 2126500 - }, - { - "epoch": 23.62, - "learning_rate": 6.594684938809053e-08, - "loss": 3.793, - "step": 2127000 - }, - { - "epoch": 23.63, - "learning_rate": 6.593296759434068e-08, - "loss": 3.7624, - "step": 2127500 - }, - { - "epoch": 23.63, - "learning_rate": 6.59190858005908e-08, - "loss": 3.8061, - "step": 2128000 - }, - { - "epoch": 23.64, - "learning_rate": 6.590520400684095e-08, - "loss": 3.7868, - "step": 2128500 - }, - { - "epoch": 23.64, - "learning_rate": 6.589132221309108e-08, - "loss": 3.7649, - "step": 2129000 - }, - { - "epoch": 23.65, - "learning_rate": 6.587744041934122e-08, - "loss": 3.7737, - "step": 2129500 - }, - { - "epoch": 23.65, - "learning_rate": 6.586355862559136e-08, - "loss": 3.7802, - "step": 2130000 - }, - { - "epoch": 23.66, - "learning_rate": 6.58496768318415e-08, - "loss": 3.777, - "step": 2130500 - }, - { - "epoch": 23.67, - "learning_rate": 6.583579503809163e-08, - "loss": 3.7966, - "step": 2131000 - }, - { - "epoch": 23.67, - "learning_rate": 6.582191324434178e-08, - "loss": 3.784, - "step": 2131500 - }, - { - "epoch": 23.68, - "learning_rate": 6.580803145059192e-08, - "loss": 3.7752, - "step": 2132000 - }, - { - "epoch": 23.68, - "learning_rate": 6.579414965684206e-08, - "loss": 3.7828, - "step": 2132500 - }, - { - "epoch": 23.69, - "learning_rate": 6.57802678630922e-08, - "loss": 3.7681, - "step": 2133000 - }, - { - "epoch": 23.69, - "learning_rate": 6.576638606934233e-08, - "loss": 3.7744, - "step": 2133500 - }, - { - "epoch": 23.7, - "learning_rate": 6.575250427559247e-08, - "loss": 3.7893, - "step": 2134000 - }, - { - "epoch": 23.7, - "learning_rate": 6.573862248184262e-08, - "loss": 3.77, - "step": 2134500 - }, - { - "epoch": 23.71, - "learning_rate": 6.572474068809274e-08, - "loss": 3.7745, - "step": 2135000 - }, - { - "epoch": 23.72, - "learning_rate": 6.571085889434289e-08, - "loss": 3.786, - "step": 2135500 - }, - { - "epoch": 23.72, - "learning_rate": 6.569697710059303e-08, - "loss": 3.7672, - "step": 2136000 - }, - { - "epoch": 23.73, - "learning_rate": 6.568309530684316e-08, - "loss": 3.8036, - "step": 2136500 - }, - { - "epoch": 23.73, - "learning_rate": 6.56692135130933e-08, - "loss": 3.7874, - "step": 2137000 - }, - { - "epoch": 23.74, - "learning_rate": 6.565533171934344e-08, - "loss": 3.765, - "step": 2137500 - }, - { - "epoch": 23.74, - "learning_rate": 6.564144992559358e-08, - "loss": 3.7837, - "step": 2138000 - }, - { - "epoch": 23.75, - "learning_rate": 6.562756813184373e-08, - "loss": 3.7807, - "step": 2138500 - }, - { - "epoch": 23.75, - "learning_rate": 6.561368633809387e-08, - "loss": 3.7927, - "step": 2139000 - }, - { - "epoch": 23.76, - "learning_rate": 6.5599804544344e-08, - "loss": 3.768, - "step": 2139500 - }, - { - "epoch": 23.77, - "learning_rate": 6.558592275059414e-08, - "loss": 3.7887, - "step": 2140000 - }, - { - "epoch": 23.77, - "learning_rate": 6.557204095684428e-08, - "loss": 3.775, - "step": 2140500 - }, - { - "epoch": 23.78, - "learning_rate": 6.555815916309441e-08, - "loss": 3.7894, - "step": 2141000 - }, - { - "epoch": 23.78, - "learning_rate": 6.554427736934455e-08, - "loss": 3.7793, - "step": 2141500 - }, - { - "epoch": 23.79, - "learning_rate": 6.553039557559468e-08, - "loss": 3.7877, - "step": 2142000 - }, - { - "epoch": 23.79, - "learning_rate": 6.551651378184483e-08, - "loss": 3.7821, - "step": 2142500 - }, - { - "epoch": 23.8, - "learning_rate": 6.550263198809497e-08, - "loss": 3.7771, - "step": 2143000 - }, - { - "epoch": 23.8, - "learning_rate": 6.548875019434511e-08, - "loss": 3.7745, - "step": 2143500 - }, - { - "epoch": 23.81, - "learning_rate": 6.547486840059525e-08, - "loss": 3.7832, - "step": 2144000 - }, - { - "epoch": 23.82, - "learning_rate": 6.54609866068454e-08, - "loss": 3.7857, - "step": 2144500 - }, - { - "epoch": 23.82, - "learning_rate": 6.544710481309552e-08, - "loss": 3.7874, - "step": 2145000 - }, - { - "epoch": 23.83, - "learning_rate": 6.543322301934567e-08, - "loss": 3.7822, - "step": 2145500 - }, - { - "epoch": 23.83, - "learning_rate": 6.541934122559581e-08, - "loss": 3.789, - "step": 2146000 - }, - { - "epoch": 23.84, - "learning_rate": 6.540545943184595e-08, - "loss": 3.7924, - "step": 2146500 - }, - { - "epoch": 23.84, - "learning_rate": 6.539157763809608e-08, - "loss": 3.7907, - "step": 2147000 - }, - { - "epoch": 23.85, - "learning_rate": 6.537769584434621e-08, - "loss": 3.7795, - "step": 2147500 - }, - { - "epoch": 23.85, - "learning_rate": 6.536381405059635e-08, - "loss": 3.7665, - "step": 2148000 - }, - { - "epoch": 23.86, - "learning_rate": 6.534993225684649e-08, - "loss": 3.7786, - "step": 2148500 - }, - { - "epoch": 23.87, - "learning_rate": 6.533605046309664e-08, - "loss": 3.7822, - "step": 2149000 - }, - { - "epoch": 23.87, - "learning_rate": 6.532216866934678e-08, - "loss": 3.7764, - "step": 2149500 - }, - { - "epoch": 23.88, - "learning_rate": 6.530828687559692e-08, - "loss": 3.7871, - "step": 2150000 - }, - { - "epoch": 23.88, - "learning_rate": 6.529440508184705e-08, - "loss": 3.7928, - "step": 2150500 - }, - { - "epoch": 23.89, - "learning_rate": 6.528052328809719e-08, - "loss": 3.787, - "step": 2151000 - }, - { - "epoch": 23.89, - "learning_rate": 6.526664149434733e-08, - "loss": 3.7874, - "step": 2151500 - }, - { - "epoch": 23.9, - "learning_rate": 6.525275970059748e-08, - "loss": 3.7823, - "step": 2152000 - }, - { - "epoch": 23.9, - "learning_rate": 6.52388779068476e-08, - "loss": 3.7776, - "step": 2152500 - }, - { - "epoch": 23.91, - "learning_rate": 6.522499611309775e-08, - "loss": 3.7992, - "step": 2153000 - }, - { - "epoch": 23.92, - "learning_rate": 6.521111431934788e-08, - "loss": 3.7724, - "step": 2153500 - }, - { - "epoch": 23.92, - "learning_rate": 6.519723252559802e-08, - "loss": 3.7752, - "step": 2154000 - }, - { - "epoch": 23.93, - "learning_rate": 6.518335073184816e-08, - "loss": 3.7777, - "step": 2154500 - }, - { - "epoch": 23.93, - "learning_rate": 6.51694689380983e-08, - "loss": 3.7808, - "step": 2155000 - }, - { - "epoch": 23.94, - "learning_rate": 6.515558714434845e-08, - "loss": 3.7812, - "step": 2155500 - }, - { - "epoch": 23.94, - "learning_rate": 6.514170535059857e-08, - "loss": 3.7915, - "step": 2156000 - }, - { - "epoch": 23.95, - "learning_rate": 6.512782355684872e-08, - "loss": 3.7822, - "step": 2156500 - }, - { - "epoch": 23.95, - "learning_rate": 6.511394176309886e-08, - "loss": 3.7834, - "step": 2157000 - }, - { - "epoch": 23.96, - "learning_rate": 6.5100059969349e-08, - "loss": 3.7774, - "step": 2157500 - }, - { - "epoch": 23.97, - "learning_rate": 6.508617817559914e-08, - "loss": 3.7848, - "step": 2158000 - }, - { - "epoch": 23.97, - "learning_rate": 6.507229638184927e-08, - "loss": 3.7703, - "step": 2158500 - }, - { - "epoch": 23.98, - "learning_rate": 6.505841458809941e-08, - "loss": 3.7652, - "step": 2159000 - }, - { - "epoch": 23.98, - "learning_rate": 6.504453279434954e-08, - "loss": 3.7697, - "step": 2159500 - }, - { - "epoch": 23.99, - "learning_rate": 6.503065100059969e-08, - "loss": 3.7894, - "step": 2160000 - }, - { - "epoch": 23.99, - "learning_rate": 6.501676920684983e-08, - "loss": 3.7768, - "step": 2160500 - }, - { - "epoch": 24.0, - "learning_rate": 6.500288741309997e-08, - "loss": 3.7882, - "step": 2161000 - }, - { - "epoch": 24.0, - "eval_loss": 3.845104455947876, - "eval_runtime": 6.3115, - "eval_samples_per_second": 246.219, - "step": 2161104 - }, - { - "epoch": 24.0, - "learning_rate": 6.498900561935011e-08, - "loss": 3.799, - "step": 2161500 - }, - { - "epoch": 24.01, - "learning_rate": 6.497512382560024e-08, - "loss": 3.8033, - "step": 2162000 - }, - { - "epoch": 24.02, - "learning_rate": 6.496124203185038e-08, - "loss": 3.7733, - "step": 2162500 - }, - { - "epoch": 24.02, - "learning_rate": 6.494736023810053e-08, - "loss": 3.7673, - "step": 2163000 - }, - { - "epoch": 24.03, - "learning_rate": 6.493347844435067e-08, - "loss": 3.7686, - "step": 2163500 - }, - { - "epoch": 24.03, - "learning_rate": 6.491959665060081e-08, - "loss": 3.7849, - "step": 2164000 - }, - { - "epoch": 24.04, - "learning_rate": 6.490571485685094e-08, - "loss": 3.7871, - "step": 2164500 - }, - { - "epoch": 24.04, - "learning_rate": 6.489183306310108e-08, - "loss": 3.7774, - "step": 2165000 - }, - { - "epoch": 24.05, - "learning_rate": 6.487795126935121e-08, - "loss": 3.7842, - "step": 2165500 - }, - { - "epoch": 24.05, - "learning_rate": 6.486406947560135e-08, - "loss": 3.781, - "step": 2166000 - }, - { - "epoch": 24.06, - "learning_rate": 6.48501876818515e-08, - "loss": 3.7829, - "step": 2166500 - }, - { - "epoch": 24.07, - "learning_rate": 6.483630588810164e-08, - "loss": 3.7887, - "step": 2167000 - }, - { - "epoch": 24.07, - "learning_rate": 6.482242409435177e-08, - "loss": 3.8049, - "step": 2167500 - }, - { - "epoch": 24.08, - "learning_rate": 6.480854230060191e-08, - "loss": 3.773, - "step": 2168000 - }, - { - "epoch": 24.08, - "learning_rate": 6.479466050685205e-08, - "loss": 3.7746, - "step": 2168500 - }, - { - "epoch": 24.09, - "learning_rate": 6.47807787131022e-08, - "loss": 3.7972, - "step": 2169000 - }, - { - "epoch": 24.09, - "learning_rate": 6.476689691935234e-08, - "loss": 3.792, - "step": 2169500 - }, - { - "epoch": 24.1, - "learning_rate": 6.475301512560246e-08, - "loss": 3.7721, - "step": 2170000 - }, - { - "epoch": 24.1, - "learning_rate": 6.473913333185261e-08, - "loss": 3.7841, - "step": 2170500 - }, - { - "epoch": 24.11, - "learning_rate": 6.472525153810275e-08, - "loss": 3.7675, - "step": 2171000 - }, - { - "epoch": 24.12, - "learning_rate": 6.471136974435288e-08, - "loss": 3.7843, - "step": 2171500 - }, - { - "epoch": 24.12, - "learning_rate": 6.469748795060302e-08, - "loss": 3.7789, - "step": 2172000 - }, - { - "epoch": 24.13, - "learning_rate": 6.468360615685316e-08, - "loss": 3.7668, - "step": 2172500 - }, - { - "epoch": 24.13, - "learning_rate": 6.466972436310329e-08, - "loss": 3.7814, - "step": 2173000 - }, - { - "epoch": 24.14, - "learning_rate": 6.465584256935343e-08, - "loss": 3.7736, - "step": 2173500 - }, - { - "epoch": 24.14, - "learning_rate": 6.464196077560358e-08, - "loss": 3.7837, - "step": 2174000 - }, - { - "epoch": 24.15, - "learning_rate": 6.462807898185372e-08, - "loss": 3.806, - "step": 2174500 - }, - { - "epoch": 24.15, - "learning_rate": 6.461419718810386e-08, - "loss": 3.7826, - "step": 2175000 - }, - { - "epoch": 24.16, - "learning_rate": 6.4600315394354e-08, - "loss": 3.7926, - "step": 2175500 - }, - { - "epoch": 24.17, - "learning_rate": 6.458643360060413e-08, - "loss": 3.7778, - "step": 2176000 - }, - { - "epoch": 24.17, - "learning_rate": 6.457255180685427e-08, - "loss": 3.7669, - "step": 2176500 - }, - { - "epoch": 24.18, - "learning_rate": 6.455867001310442e-08, - "loss": 3.7813, - "step": 2177000 - }, - { - "epoch": 24.18, - "learning_rate": 6.454478821935455e-08, - "loss": 3.7727, - "step": 2177500 - }, - { - "epoch": 24.19, - "learning_rate": 6.453090642560469e-08, - "loss": 3.7971, - "step": 2178000 - }, - { - "epoch": 24.19, - "learning_rate": 6.451702463185482e-08, - "loss": 3.7803, - "step": 2178500 - }, - { - "epoch": 24.2, - "learning_rate": 6.450314283810496e-08, - "loss": 3.7864, - "step": 2179000 - }, - { - "epoch": 24.2, - "learning_rate": 6.44892610443551e-08, - "loss": 3.7819, - "step": 2179500 - }, - { - "epoch": 24.21, - "learning_rate": 6.447537925060524e-08, - "loss": 3.7894, - "step": 2180000 - }, - { - "epoch": 24.22, - "learning_rate": 6.446149745685539e-08, - "loss": 3.7786, - "step": 2180500 - }, - { - "epoch": 24.22, - "learning_rate": 6.444761566310553e-08, - "loss": 3.7788, - "step": 2181000 - }, - { - "epoch": 24.23, - "learning_rate": 6.443373386935566e-08, - "loss": 3.7876, - "step": 2181500 - }, - { - "epoch": 24.23, - "learning_rate": 6.44198520756058e-08, - "loss": 3.7829, - "step": 2182000 - }, - { - "epoch": 24.24, - "learning_rate": 6.440597028185594e-08, - "loss": 3.7617, - "step": 2182500 - }, - { - "epoch": 24.24, - "learning_rate": 6.439208848810607e-08, - "loss": 3.7806, - "step": 2183000 - }, - { - "epoch": 24.25, - "learning_rate": 6.437820669435621e-08, - "loss": 3.7923, - "step": 2183500 - }, - { - "epoch": 24.25, - "learning_rate": 6.436432490060636e-08, - "loss": 3.7772, - "step": 2184000 - }, - { - "epoch": 24.26, - "learning_rate": 6.435044310685648e-08, - "loss": 3.7799, - "step": 2184500 - }, - { - "epoch": 24.27, - "learning_rate": 6.433656131310663e-08, - "loss": 3.787, - "step": 2185000 - }, - { - "epoch": 24.27, - "learning_rate": 6.432267951935677e-08, - "loss": 3.787, - "step": 2185500 - }, - { - "epoch": 24.28, - "learning_rate": 6.430879772560691e-08, - "loss": 3.7804, - "step": 2186000 - }, - { - "epoch": 24.28, - "learning_rate": 6.429491593185705e-08, - "loss": 3.7947, - "step": 2186500 - }, - { - "epoch": 24.29, - "learning_rate": 6.428103413810718e-08, - "loss": 3.7832, - "step": 2187000 - }, - { - "epoch": 24.29, - "learning_rate": 6.426715234435733e-08, - "loss": 3.7852, - "step": 2187500 - }, - { - "epoch": 24.3, - "learning_rate": 6.425327055060747e-08, - "loss": 3.7644, - "step": 2188000 - }, - { - "epoch": 24.3, - "learning_rate": 6.423938875685761e-08, - "loss": 3.8106, - "step": 2188500 - }, - { - "epoch": 24.31, - "learning_rate": 6.422550696310774e-08, - "loss": 3.7837, - "step": 2189000 - }, - { - "epoch": 24.32, - "learning_rate": 6.421162516935788e-08, - "loss": 3.7755, - "step": 2189500 - }, - { - "epoch": 24.32, - "learning_rate": 6.419774337560801e-08, - "loss": 3.7884, - "step": 2190000 - }, - { - "epoch": 24.33, - "learning_rate": 6.418386158185815e-08, - "loss": 3.7811, - "step": 2190500 - }, - { - "epoch": 24.33, - "learning_rate": 6.41699797881083e-08, - "loss": 3.7565, - "step": 2191000 - }, - { - "epoch": 24.34, - "learning_rate": 6.415609799435844e-08, - "loss": 3.7716, - "step": 2191500 - }, - { - "epoch": 24.34, - "learning_rate": 6.414221620060858e-08, - "loss": 3.7875, - "step": 2192000 - }, - { - "epoch": 24.35, - "learning_rate": 6.412833440685871e-08, - "loss": 3.7719, - "step": 2192500 - }, - { - "epoch": 24.35, - "learning_rate": 6.411445261310885e-08, - "loss": 3.7735, - "step": 2193000 - }, - { - "epoch": 24.36, - "learning_rate": 6.410057081935899e-08, - "loss": 3.7724, - "step": 2193500 - }, - { - "epoch": 24.37, - "learning_rate": 6.408668902560913e-08, - "loss": 3.7767, - "step": 2194000 - }, - { - "epoch": 24.37, - "learning_rate": 6.407280723185928e-08, - "loss": 3.787, - "step": 2194500 - }, - { - "epoch": 24.38, - "learning_rate": 6.40589254381094e-08, - "loss": 3.778, - "step": 2195000 - }, - { - "epoch": 24.38, - "learning_rate": 6.404504364435955e-08, - "loss": 3.7974, - "step": 2195500 - }, - { - "epoch": 24.39, - "learning_rate": 6.403116185060968e-08, - "loss": 3.7689, - "step": 2196000 - }, - { - "epoch": 24.39, - "learning_rate": 6.401728005685982e-08, - "loss": 3.7784, - "step": 2196500 - }, - { - "epoch": 24.4, - "learning_rate": 6.400339826310996e-08, - "loss": 3.7827, - "step": 2197000 - }, - { - "epoch": 24.4, - "learning_rate": 6.39895164693601e-08, - "loss": 3.7858, - "step": 2197500 - }, - { - "epoch": 24.41, - "learning_rate": 6.397563467561025e-08, - "loss": 3.7733, - "step": 2198000 - }, - { - "epoch": 24.42, - "learning_rate": 6.396175288186038e-08, - "loss": 3.7667, - "step": 2198500 - }, - { - "epoch": 24.42, - "learning_rate": 6.394787108811052e-08, - "loss": 3.7876, - "step": 2199000 - }, - { - "epoch": 24.43, - "learning_rate": 6.393398929436066e-08, - "loss": 3.7564, - "step": 2199500 - }, - { - "epoch": 24.43, - "learning_rate": 6.39201075006108e-08, - "loss": 3.7918, - "step": 2200000 - }, - { - "epoch": 24.44, - "learning_rate": 6.390622570686094e-08, - "loss": 3.7811, - "step": 2200500 - }, - { - "epoch": 24.44, - "learning_rate": 6.389234391311107e-08, - "loss": 3.7851, - "step": 2201000 - }, - { - "epoch": 24.45, - "learning_rate": 6.387846211936122e-08, - "loss": 3.7748, - "step": 2201500 - }, - { - "epoch": 24.45, - "learning_rate": 6.386458032561134e-08, - "loss": 3.7831, - "step": 2202000 - }, - { - "epoch": 24.46, - "learning_rate": 6.385069853186149e-08, - "loss": 3.7806, - "step": 2202500 - }, - { - "epoch": 24.47, - "learning_rate": 6.383681673811163e-08, - "loss": 3.7971, - "step": 2203000 - }, - { - "epoch": 24.47, - "learning_rate": 6.382293494436177e-08, - "loss": 3.7712, - "step": 2203500 - }, - { - "epoch": 24.48, - "learning_rate": 6.38090531506119e-08, - "loss": 3.7673, - "step": 2204000 - }, - { - "epoch": 24.48, - "learning_rate": 6.379517135686204e-08, - "loss": 3.7984, - "step": 2204500 - }, - { - "epoch": 24.49, - "learning_rate": 6.378128956311219e-08, - "loss": 3.7709, - "step": 2205000 - }, - { - "epoch": 24.49, - "learning_rate": 6.376740776936233e-08, - "loss": 3.7762, - "step": 2205500 - }, - { - "epoch": 24.5, - "learning_rate": 6.375352597561247e-08, - "loss": 3.7866, - "step": 2206000 - }, - { - "epoch": 24.5, - "learning_rate": 6.37396441818626e-08, - "loss": 3.7719, - "step": 2206500 - }, - { - "epoch": 24.51, - "learning_rate": 6.372576238811274e-08, - "loss": 3.7738, - "step": 2207000 - }, - { - "epoch": 24.52, - "learning_rate": 6.371188059436288e-08, - "loss": 3.7794, - "step": 2207500 - }, - { - "epoch": 24.52, - "learning_rate": 6.369799880061301e-08, - "loss": 3.7505, - "step": 2208000 - }, - { - "epoch": 24.53, - "learning_rate": 6.368411700686315e-08, - "loss": 3.7902, - "step": 2208500 - }, - { - "epoch": 24.53, - "learning_rate": 6.36702352131133e-08, - "loss": 3.7845, - "step": 2209000 - }, - { - "epoch": 24.54, - "learning_rate": 6.365635341936343e-08, - "loss": 3.7939, - "step": 2209500 - }, - { - "epoch": 24.54, - "learning_rate": 6.364247162561357e-08, - "loss": 3.7785, - "step": 2210000 - }, - { - "epoch": 24.55, - "learning_rate": 6.362858983186371e-08, - "loss": 3.7825, - "step": 2210500 - }, - { - "epoch": 24.55, - "learning_rate": 6.361470803811385e-08, - "loss": 3.7744, - "step": 2211000 - }, - { - "epoch": 24.56, - "learning_rate": 6.3600826244364e-08, - "loss": 3.7761, - "step": 2211500 - }, - { - "epoch": 24.57, - "learning_rate": 6.358694445061414e-08, - "loss": 3.7914, - "step": 2212000 - }, - { - "epoch": 24.57, - "learning_rate": 6.357306265686427e-08, - "loss": 3.7731, - "step": 2212500 - }, - { - "epoch": 24.58, - "learning_rate": 6.355918086311441e-08, - "loss": 3.7857, - "step": 2213000 - }, - { - "epoch": 24.58, - "learning_rate": 6.354529906936455e-08, - "loss": 3.7786, - "step": 2213500 - }, - { - "epoch": 24.59, - "learning_rate": 6.353141727561468e-08, - "loss": 3.7713, - "step": 2214000 - }, - { - "epoch": 24.59, - "learning_rate": 6.351753548186482e-08, - "loss": 3.7961, - "step": 2214500 - }, - { - "epoch": 24.6, - "learning_rate": 6.350365368811495e-08, - "loss": 3.7708, - "step": 2215000 - }, - { - "epoch": 24.6, - "learning_rate": 6.348977189436509e-08, - "loss": 3.7718, - "step": 2215500 - }, - { - "epoch": 24.61, - "learning_rate": 6.347589010061524e-08, - "loss": 3.7713, - "step": 2216000 - }, - { - "epoch": 24.62, - "learning_rate": 6.346200830686538e-08, - "loss": 3.763, - "step": 2216500 - }, - { - "epoch": 24.62, - "learning_rate": 6.344812651311552e-08, - "loss": 3.7869, - "step": 2217000 - }, - { - "epoch": 24.63, - "learning_rate": 6.343424471936566e-08, - "loss": 3.7868, - "step": 2217500 - }, - { - "epoch": 24.63, - "learning_rate": 6.342036292561579e-08, - "loss": 3.7583, - "step": 2218000 - }, - { - "epoch": 24.64, - "learning_rate": 6.340648113186593e-08, - "loss": 3.7679, - "step": 2218500 - }, - { - "epoch": 24.64, - "learning_rate": 6.339259933811608e-08, - "loss": 3.7673, - "step": 2219000 - }, - { - "epoch": 24.65, - "learning_rate": 6.33787175443662e-08, - "loss": 3.7673, - "step": 2219500 - }, - { - "epoch": 24.65, - "learning_rate": 6.336483575061635e-08, - "loss": 3.7909, - "step": 2220000 - }, - { - "epoch": 24.66, - "learning_rate": 6.335095395686649e-08, - "loss": 3.764, - "step": 2220500 - }, - { - "epoch": 24.67, - "learning_rate": 6.333707216311662e-08, - "loss": 3.7871, - "step": 2221000 - }, - { - "epoch": 24.67, - "learning_rate": 6.332319036936676e-08, - "loss": 3.7685, - "step": 2221500 - }, - { - "epoch": 24.68, - "learning_rate": 6.33093085756169e-08, - "loss": 3.7835, - "step": 2222000 - }, - { - "epoch": 24.68, - "learning_rate": 6.329542678186705e-08, - "loss": 3.7625, - "step": 2222500 - }, - { - "epoch": 24.69, - "learning_rate": 6.328154498811719e-08, - "loss": 3.7959, - "step": 2223000 - }, - { - "epoch": 24.69, - "learning_rate": 6.326766319436732e-08, - "loss": 3.7993, - "step": 2223500 - }, - { - "epoch": 24.7, - "learning_rate": 6.325378140061746e-08, - "loss": 3.7785, - "step": 2224000 - }, - { - "epoch": 24.7, - "learning_rate": 6.32398996068676e-08, - "loss": 3.7664, - "step": 2224500 - }, - { - "epoch": 24.71, - "learning_rate": 6.322601781311774e-08, - "loss": 3.7902, - "step": 2225000 - }, - { - "epoch": 24.72, - "learning_rate": 6.321213601936787e-08, - "loss": 3.7818, - "step": 2225500 - }, - { - "epoch": 24.72, - "learning_rate": 6.319825422561801e-08, - "loss": 3.7948, - "step": 2226000 - }, - { - "epoch": 24.73, - "learning_rate": 6.318437243186814e-08, - "loss": 3.7724, - "step": 2226500 - }, - { - "epoch": 24.73, - "learning_rate": 6.317049063811829e-08, - "loss": 3.7673, - "step": 2227000 - }, - { - "epoch": 24.74, - "learning_rate": 6.315660884436843e-08, - "loss": 3.7821, - "step": 2227500 - }, - { - "epoch": 24.74, - "learning_rate": 6.314272705061857e-08, - "loss": 3.7784, - "step": 2228000 - }, - { - "epoch": 24.75, - "learning_rate": 6.312884525686871e-08, - "loss": 3.781, - "step": 2228500 - }, - { - "epoch": 24.75, - "learning_rate": 6.311496346311884e-08, - "loss": 3.7779, - "step": 2229000 - }, - { - "epoch": 24.76, - "learning_rate": 6.310108166936898e-08, - "loss": 3.799, - "step": 2229500 - }, - { - "epoch": 24.77, - "learning_rate": 6.308719987561913e-08, - "loss": 3.7793, - "step": 2230000 - }, - { - "epoch": 24.77, - "learning_rate": 6.307331808186927e-08, - "loss": 3.7945, - "step": 2230500 - }, - { - "epoch": 24.78, - "learning_rate": 6.305943628811941e-08, - "loss": 3.7786, - "step": 2231000 - }, - { - "epoch": 24.78, - "learning_rate": 6.304555449436954e-08, - "loss": 3.7963, - "step": 2231500 - }, - { - "epoch": 24.79, - "learning_rate": 6.303167270061968e-08, - "loss": 3.7816, - "step": 2232000 - }, - { - "epoch": 24.79, - "learning_rate": 6.301779090686981e-08, - "loss": 3.7791, - "step": 2232500 - }, - { - "epoch": 24.8, - "learning_rate": 6.300390911311995e-08, - "loss": 3.7793, - "step": 2233000 - }, - { - "epoch": 24.8, - "learning_rate": 6.29900273193701e-08, - "loss": 3.7623, - "step": 2233500 - }, - { - "epoch": 24.81, - "learning_rate": 6.297614552562024e-08, - "loss": 3.7672, - "step": 2234000 - }, - { - "epoch": 24.82, - "learning_rate": 6.296226373187038e-08, - "loss": 3.7618, - "step": 2234500 - }, - { - "epoch": 24.82, - "learning_rate": 6.294838193812051e-08, - "loss": 3.7773, - "step": 2235000 - }, - { - "epoch": 24.83, - "learning_rate": 6.293450014437065e-08, - "loss": 3.7935, - "step": 2235500 - }, - { - "epoch": 24.83, - "learning_rate": 6.29206183506208e-08, - "loss": 3.7926, - "step": 2236000 - }, - { - "epoch": 24.84, - "learning_rate": 6.290673655687094e-08, - "loss": 3.7756, - "step": 2236500 - }, - { - "epoch": 24.84, - "learning_rate": 6.289285476312108e-08, - "loss": 3.7582, - "step": 2237000 - }, - { - "epoch": 24.85, - "learning_rate": 6.287897296937121e-08, - "loss": 3.758, - "step": 2237500 - }, - { - "epoch": 24.85, - "learning_rate": 6.286509117562135e-08, - "loss": 3.7717, - "step": 2238000 - }, - { - "epoch": 24.86, - "learning_rate": 6.285120938187148e-08, - "loss": 3.7701, - "step": 2238500 - }, - { - "epoch": 24.87, - "learning_rate": 6.283732758812162e-08, - "loss": 3.7683, - "step": 2239000 - }, - { - "epoch": 24.87, - "learning_rate": 6.282344579437176e-08, - "loss": 3.7978, - "step": 2239500 - }, - { - "epoch": 24.88, - "learning_rate": 6.28095640006219e-08, - "loss": 3.7998, - "step": 2240000 - }, - { - "epoch": 24.88, - "learning_rate": 6.279568220687203e-08, - "loss": 3.7641, - "step": 2240500 - }, - { - "epoch": 24.89, - "learning_rate": 6.278180041312218e-08, - "loss": 3.798, - "step": 2241000 - }, - { - "epoch": 24.89, - "learning_rate": 6.276791861937232e-08, - "loss": 3.7828, - "step": 2241500 - }, - { - "epoch": 24.9, - "learning_rate": 6.275403682562246e-08, - "loss": 3.7854, - "step": 2242000 - }, - { - "epoch": 24.9, - "learning_rate": 6.27401550318726e-08, - "loss": 3.7846, - "step": 2242500 - }, - { - "epoch": 24.91, - "learning_rate": 6.272627323812275e-08, - "loss": 3.7684, - "step": 2243000 - }, - { - "epoch": 24.92, - "learning_rate": 6.271239144437287e-08, - "loss": 3.7712, - "step": 2243500 - }, - { - "epoch": 24.92, - "learning_rate": 6.269850965062302e-08, - "loss": 3.7804, - "step": 2244000 - }, - { - "epoch": 24.93, - "learning_rate": 6.268462785687315e-08, - "loss": 3.7827, - "step": 2244500 - }, - { - "epoch": 24.93, - "learning_rate": 6.267074606312329e-08, - "loss": 3.7814, - "step": 2245000 - }, - { - "epoch": 24.94, - "learning_rate": 6.265686426937343e-08, - "loss": 3.7627, - "step": 2245500 - }, - { - "epoch": 24.94, - "learning_rate": 6.264298247562356e-08, - "loss": 3.7555, - "step": 2246000 - }, - { - "epoch": 24.95, - "learning_rate": 6.26291006818737e-08, - "loss": 3.804, - "step": 2246500 - }, - { - "epoch": 24.95, - "learning_rate": 6.261521888812384e-08, - "loss": 3.7932, - "step": 2247000 - }, - { - "epoch": 24.96, - "learning_rate": 6.260133709437399e-08, - "loss": 3.7669, - "step": 2247500 - }, - { - "epoch": 24.97, - "learning_rate": 6.258745530062413e-08, - "loss": 3.7754, - "step": 2248000 - }, - { - "epoch": 24.97, - "learning_rate": 6.257357350687427e-08, - "loss": 3.7734, - "step": 2248500 - }, - { - "epoch": 24.98, - "learning_rate": 6.25596917131244e-08, - "loss": 3.7856, - "step": 2249000 - }, - { - "epoch": 24.98, - "learning_rate": 6.254580991937454e-08, - "loss": 3.7783, - "step": 2249500 - }, - { - "epoch": 24.99, - "learning_rate": 6.253192812562467e-08, - "loss": 3.7844, - "step": 2250000 - }, - { - "epoch": 24.99, - "learning_rate": 6.251804633187481e-08, - "loss": 3.7854, - "step": 2250500 - }, - { - "epoch": 25.0, - "learning_rate": 6.250416453812496e-08, - "loss": 3.7824, - "step": 2251000 - }, - { - "epoch": 25.0, - "eval_loss": 3.843087911605835, - "eval_runtime": 6.3026, - "eval_samples_per_second": 246.566, - "step": 2251150 - }, - { - "epoch": 25.0, - "learning_rate": 6.249028274437508e-08, - "loss": 3.7844, - "step": 2251500 - }, - { - "epoch": 25.01, - "learning_rate": 6.247640095062523e-08, - "loss": 3.7535, - "step": 2252000 - }, - { - "epoch": 25.01, - "learning_rate": 6.246251915687537e-08, - "loss": 3.7942, - "step": 2252500 - }, - { - "epoch": 25.02, - "learning_rate": 6.244863736312551e-08, - "loss": 3.7737, - "step": 2253000 - }, - { - "epoch": 25.03, - "learning_rate": 6.243475556937565e-08, - "loss": 3.7851, - "step": 2253500 - }, - { - "epoch": 25.03, - "learning_rate": 6.24208737756258e-08, - "loss": 3.789, - "step": 2254000 - }, - { - "epoch": 25.04, - "learning_rate": 6.240699198187593e-08, - "loss": 3.7767, - "step": 2254500 - }, - { - "epoch": 25.04, - "learning_rate": 6.239311018812607e-08, - "loss": 3.7702, - "step": 2255000 - }, - { - "epoch": 25.05, - "learning_rate": 6.237922839437621e-08, - "loss": 3.7632, - "step": 2255500 - }, - { - "epoch": 25.05, - "learning_rate": 6.236534660062634e-08, - "loss": 3.7774, - "step": 2256000 - }, - { - "epoch": 25.06, - "learning_rate": 6.235146480687648e-08, - "loss": 3.7659, - "step": 2256500 - }, - { - "epoch": 25.06, - "learning_rate": 6.233758301312662e-08, - "loss": 3.7828, - "step": 2257000 - }, - { - "epoch": 25.07, - "learning_rate": 6.232370121937675e-08, - "loss": 3.7881, - "step": 2257500 - }, - { - "epoch": 25.08, - "learning_rate": 6.23098194256269e-08, - "loss": 3.7728, - "step": 2258000 - }, - { - "epoch": 25.08, - "learning_rate": 6.229593763187704e-08, - "loss": 3.7864, - "step": 2258500 - }, - { - "epoch": 25.09, - "learning_rate": 6.228205583812718e-08, - "loss": 3.7813, - "step": 2259000 - }, - { - "epoch": 25.09, - "learning_rate": 6.226817404437732e-08, - "loss": 3.7711, - "step": 2259500 - }, - { - "epoch": 25.1, - "learning_rate": 6.225429225062745e-08, - "loss": 3.7546, - "step": 2260000 - }, - { - "epoch": 25.1, - "learning_rate": 6.224041045687759e-08, - "loss": 3.7778, - "step": 2260500 - }, - { - "epoch": 25.11, - "learning_rate": 6.222652866312773e-08, - "loss": 3.7861, - "step": 2261000 - }, - { - "epoch": 25.11, - "learning_rate": 6.221264686937788e-08, - "loss": 3.7819, - "step": 2261500 - }, - { - "epoch": 25.12, - "learning_rate": 6.2198765075628e-08, - "loss": 3.768, - "step": 2262000 - }, - { - "epoch": 25.13, - "learning_rate": 6.218488328187815e-08, - "loss": 3.7939, - "step": 2262500 - }, - { - "epoch": 25.13, - "learning_rate": 6.217100148812828e-08, - "loss": 3.7827, - "step": 2263000 - }, - { - "epoch": 25.14, - "learning_rate": 6.215711969437842e-08, - "loss": 3.792, - "step": 2263500 - }, - { - "epoch": 25.14, - "learning_rate": 6.214323790062856e-08, - "loss": 3.7551, - "step": 2264000 - }, - { - "epoch": 25.15, - "learning_rate": 6.21293561068787e-08, - "loss": 3.7861, - "step": 2264500 - }, - { - "epoch": 25.15, - "learning_rate": 6.211547431312885e-08, - "loss": 3.7695, - "step": 2265000 - }, - { - "epoch": 25.16, - "learning_rate": 6.210159251937898e-08, - "loss": 3.7738, - "step": 2265500 - }, - { - "epoch": 25.16, - "learning_rate": 6.208771072562912e-08, - "loss": 3.7672, - "step": 2266000 - }, - { - "epoch": 25.17, - "learning_rate": 6.207382893187926e-08, - "loss": 3.7886, - "step": 2266500 - }, - { - "epoch": 25.18, - "learning_rate": 6.20599471381294e-08, - "loss": 3.7748, - "step": 2267000 - }, - { - "epoch": 25.18, - "learning_rate": 6.204606534437954e-08, - "loss": 3.7885, - "step": 2267500 - }, - { - "epoch": 25.19, - "learning_rate": 6.203218355062967e-08, - "loss": 3.7758, - "step": 2268000 - }, - { - "epoch": 25.19, - "learning_rate": 6.201830175687982e-08, - "loss": 3.775, - "step": 2268500 - }, - { - "epoch": 25.2, - "learning_rate": 6.200441996312994e-08, - "loss": 3.7799, - "step": 2269000 - }, - { - "epoch": 25.2, - "learning_rate": 6.199053816938009e-08, - "loss": 3.7692, - "step": 2269500 - }, - { - "epoch": 25.21, - "learning_rate": 6.197665637563023e-08, - "loss": 3.7766, - "step": 2270000 - }, - { - "epoch": 25.21, - "learning_rate": 6.196277458188037e-08, - "loss": 3.7793, - "step": 2270500 - }, - { - "epoch": 25.22, - "learning_rate": 6.194889278813051e-08, - "loss": 3.7781, - "step": 2271000 - }, - { - "epoch": 25.23, - "learning_rate": 6.193501099438064e-08, - "loss": 3.7762, - "step": 2271500 - }, - { - "epoch": 25.23, - "learning_rate": 6.192112920063079e-08, - "loss": 3.7535, - "step": 2272000 - }, - { - "epoch": 25.24, - "learning_rate": 6.190724740688093e-08, - "loss": 3.7742, - "step": 2272500 - }, - { - "epoch": 25.24, - "learning_rate": 6.189336561313107e-08, - "loss": 3.8031, - "step": 2273000 - }, - { - "epoch": 25.25, - "learning_rate": 6.187948381938121e-08, - "loss": 3.7771, - "step": 2273500 - }, - { - "epoch": 25.25, - "learning_rate": 6.186560202563134e-08, - "loss": 3.7847, - "step": 2274000 - }, - { - "epoch": 25.26, - "learning_rate": 6.185172023188148e-08, - "loss": 3.7708, - "step": 2274500 - }, - { - "epoch": 25.26, - "learning_rate": 6.183783843813161e-08, - "loss": 3.7818, - "step": 2275000 - }, - { - "epoch": 25.27, - "learning_rate": 6.182395664438175e-08, - "loss": 3.7806, - "step": 2275500 - }, - { - "epoch": 25.28, - "learning_rate": 6.18100748506319e-08, - "loss": 3.78, - "step": 2276000 - }, - { - "epoch": 25.28, - "learning_rate": 6.179619305688204e-08, - "loss": 3.773, - "step": 2276500 - }, - { - "epoch": 25.29, - "learning_rate": 6.178231126313217e-08, - "loss": 3.7871, - "step": 2277000 - }, - { - "epoch": 25.29, - "learning_rate": 6.176842946938231e-08, - "loss": 3.7756, - "step": 2277500 - }, - { - "epoch": 25.3, - "learning_rate": 6.175454767563245e-08, - "loss": 3.7719, - "step": 2278000 - }, - { - "epoch": 25.3, - "learning_rate": 6.17406658818826e-08, - "loss": 3.7703, - "step": 2278500 - }, - { - "epoch": 25.31, - "learning_rate": 6.172678408813274e-08, - "loss": 3.7784, - "step": 2279000 - }, - { - "epoch": 25.31, - "learning_rate": 6.171290229438288e-08, - "loss": 3.7751, - "step": 2279500 - }, - { - "epoch": 25.32, - "learning_rate": 6.169902050063301e-08, - "loss": 3.7622, - "step": 2280000 - }, - { - "epoch": 25.33, - "learning_rate": 6.168513870688314e-08, - "loss": 3.7569, - "step": 2280500 - }, - { - "epoch": 25.33, - "learning_rate": 6.167125691313328e-08, - "loss": 3.7805, - "step": 2281000 - }, - { - "epoch": 25.34, - "learning_rate": 6.165737511938342e-08, - "loss": 3.7769, - "step": 2281500 - }, - { - "epoch": 25.34, - "learning_rate": 6.164349332563356e-08, - "loss": 3.7592, - "step": 2282000 - }, - { - "epoch": 25.35, - "learning_rate": 6.16296115318837e-08, - "loss": 3.7694, - "step": 2282500 - }, - { - "epoch": 25.35, - "learning_rate": 6.161572973813384e-08, - "loss": 3.771, - "step": 2283000 - }, - { - "epoch": 25.36, - "learning_rate": 6.160184794438398e-08, - "loss": 3.7988, - "step": 2283500 - }, - { - "epoch": 25.36, - "learning_rate": 6.158796615063412e-08, - "loss": 3.7844, - "step": 2284000 - }, - { - "epoch": 25.37, - "learning_rate": 6.157408435688426e-08, - "loss": 3.7726, - "step": 2284500 - }, - { - "epoch": 25.38, - "learning_rate": 6.15602025631344e-08, - "loss": 3.7683, - "step": 2285000 - }, - { - "epoch": 25.38, - "learning_rate": 6.154632076938453e-08, - "loss": 3.7863, - "step": 2285500 - }, - { - "epoch": 25.39, - "learning_rate": 6.153243897563468e-08, - "loss": 3.7739, - "step": 2286000 - }, - { - "epoch": 25.39, - "learning_rate": 6.15185571818848e-08, - "loss": 3.7684, - "step": 2286500 - }, - { - "epoch": 25.4, - "learning_rate": 6.150467538813495e-08, - "loss": 3.7718, - "step": 2287000 - }, - { - "epoch": 25.4, - "learning_rate": 6.149079359438509e-08, - "loss": 3.775, - "step": 2287500 - }, - { - "epoch": 25.41, - "learning_rate": 6.147691180063522e-08, - "loss": 3.7615, - "step": 2288000 - }, - { - "epoch": 25.41, - "learning_rate": 6.146303000688536e-08, - "loss": 3.7717, - "step": 2288500 - }, - { - "epoch": 25.42, - "learning_rate": 6.14491482131355e-08, - "loss": 3.7709, - "step": 2289000 - }, - { - "epoch": 25.43, - "learning_rate": 6.143526641938565e-08, - "loss": 3.7846, - "step": 2289500 - }, - { - "epoch": 25.43, - "learning_rate": 6.142138462563579e-08, - "loss": 3.7955, - "step": 2290000 - }, - { - "epoch": 25.44, - "learning_rate": 6.140750283188593e-08, - "loss": 3.7863, - "step": 2290500 - }, - { - "epoch": 25.44, - "learning_rate": 6.139362103813606e-08, - "loss": 3.7812, - "step": 2291000 - }, - { - "epoch": 25.45, - "learning_rate": 6.13797392443862e-08, - "loss": 3.7727, - "step": 2291500 - }, - { - "epoch": 25.45, - "learning_rate": 6.136585745063634e-08, - "loss": 3.7847, - "step": 2292000 - }, - { - "epoch": 25.46, - "learning_rate": 6.135197565688647e-08, - "loss": 3.7922, - "step": 2292500 - }, - { - "epoch": 25.46, - "learning_rate": 6.133809386313661e-08, - "loss": 3.7741, - "step": 2293000 - }, - { - "epoch": 25.47, - "learning_rate": 6.132421206938676e-08, - "loss": 3.809, - "step": 2293500 - }, - { - "epoch": 25.48, - "learning_rate": 6.131033027563689e-08, - "loss": 3.7726, - "step": 2294000 - }, - { - "epoch": 25.48, - "learning_rate": 6.129644848188703e-08, - "loss": 3.7901, - "step": 2294500 - }, - { - "epoch": 25.49, - "learning_rate": 6.128256668813717e-08, - "loss": 3.7835, - "step": 2295000 - }, - { - "epoch": 25.49, - "learning_rate": 6.126868489438731e-08, - "loss": 3.7768, - "step": 2295500 - }, - { - "epoch": 25.5, - "learning_rate": 6.125480310063746e-08, - "loss": 3.7663, - "step": 2296000 - }, - { - "epoch": 25.5, - "learning_rate": 6.124092130688758e-08, - "loss": 3.7675, - "step": 2296500 - }, - { - "epoch": 25.51, - "learning_rate": 6.122703951313773e-08, - "loss": 3.7719, - "step": 2297000 - }, - { - "epoch": 25.51, - "learning_rate": 6.121315771938787e-08, - "loss": 3.7881, - "step": 2297500 - }, - { - "epoch": 25.52, - "learning_rate": 6.119927592563801e-08, - "loss": 3.7921, - "step": 2298000 - }, - { - "epoch": 25.53, - "learning_rate": 6.118539413188814e-08, - "loss": 3.7846, - "step": 2298500 - }, - { - "epoch": 25.53, - "learning_rate": 6.117151233813828e-08, - "loss": 3.791, - "step": 2299000 - }, - { - "epoch": 25.54, - "learning_rate": 6.115763054438841e-08, - "loss": 3.7707, - "step": 2299500 - }, - { - "epoch": 25.54, - "learning_rate": 6.114374875063855e-08, - "loss": 3.7853, - "step": 2300000 - }, - { - "epoch": 25.55, - "learning_rate": 6.11298669568887e-08, - "loss": 3.7911, - "step": 2300500 - }, - { - "epoch": 25.55, - "learning_rate": 6.111598516313884e-08, - "loss": 3.7801, - "step": 2301000 - }, - { - "epoch": 25.56, - "learning_rate": 6.110210336938898e-08, - "loss": 3.7801, - "step": 2301500 - }, - { - "epoch": 25.56, - "learning_rate": 6.108822157563911e-08, - "loss": 3.7603, - "step": 2302000 - }, - { - "epoch": 25.57, - "learning_rate": 6.107433978188925e-08, - "loss": 3.7835, - "step": 2302500 - }, - { - "epoch": 25.58, - "learning_rate": 6.10604579881394e-08, - "loss": 3.7807, - "step": 2303000 - }, - { - "epoch": 25.58, - "learning_rate": 6.104657619438954e-08, - "loss": 3.7854, - "step": 2303500 - }, - { - "epoch": 25.59, - "learning_rate": 6.103269440063968e-08, - "loss": 3.8086, - "step": 2304000 - }, - { - "epoch": 25.59, - "learning_rate": 6.101881260688981e-08, - "loss": 3.7595, - "step": 2304500 - }, - { - "epoch": 25.6, - "learning_rate": 6.100493081313995e-08, - "loss": 3.7767, - "step": 2305000 - }, - { - "epoch": 25.6, - "learning_rate": 6.099104901939008e-08, - "loss": 3.7811, - "step": 2305500 - }, - { - "epoch": 25.61, - "learning_rate": 6.097716722564022e-08, - "loss": 3.7619, - "step": 2306000 - }, - { - "epoch": 25.61, - "learning_rate": 6.096328543189036e-08, - "loss": 3.7782, - "step": 2306500 - }, - { - "epoch": 25.62, - "learning_rate": 6.09494036381405e-08, - "loss": 3.7546, - "step": 2307000 - }, - { - "epoch": 25.63, - "learning_rate": 6.093552184439065e-08, - "loss": 3.789, - "step": 2307500 - }, - { - "epoch": 25.63, - "learning_rate": 6.092164005064078e-08, - "loss": 3.7785, - "step": 2308000 - }, - { - "epoch": 25.64, - "learning_rate": 6.090775825689092e-08, - "loss": 3.7831, - "step": 2308500 - }, - { - "epoch": 25.64, - "learning_rate": 6.089387646314106e-08, - "loss": 3.7747, - "step": 2309000 - }, - { - "epoch": 25.65, - "learning_rate": 6.08799946693912e-08, - "loss": 3.7653, - "step": 2309500 - }, - { - "epoch": 25.65, - "learning_rate": 6.086611287564135e-08, - "loss": 3.7682, - "step": 2310000 - }, - { - "epoch": 25.66, - "learning_rate": 6.085223108189147e-08, - "loss": 3.7801, - "step": 2310500 - }, - { - "epoch": 25.66, - "learning_rate": 6.08383492881416e-08, - "loss": 3.7543, - "step": 2311000 - }, - { - "epoch": 25.67, - "learning_rate": 6.082446749439175e-08, - "loss": 3.8091, - "step": 2311500 - }, - { - "epoch": 25.68, - "learning_rate": 6.081058570064189e-08, - "loss": 3.7603, - "step": 2312000 - }, - { - "epoch": 25.68, - "learning_rate": 6.079670390689203e-08, - "loss": 3.7774, - "step": 2312500 - }, - { - "epoch": 25.69, - "learning_rate": 6.078282211314217e-08, - "loss": 3.7646, - "step": 2313000 - }, - { - "epoch": 25.69, - "learning_rate": 6.07689403193923e-08, - "loss": 3.7538, - "step": 2313500 - }, - { - "epoch": 25.7, - "learning_rate": 6.075505852564244e-08, - "loss": 3.7725, - "step": 2314000 - }, - { - "epoch": 25.7, - "learning_rate": 6.074117673189259e-08, - "loss": 3.7809, - "step": 2314500 - }, - { - "epoch": 25.71, - "learning_rate": 6.072729493814273e-08, - "loss": 3.7843, - "step": 2315000 - }, - { - "epoch": 25.71, - "learning_rate": 6.071341314439287e-08, - "loss": 3.7941, - "step": 2315500 - }, - { - "epoch": 25.72, - "learning_rate": 6.069953135064301e-08, - "loss": 3.7652, - "step": 2316000 - }, - { - "epoch": 25.73, - "learning_rate": 6.068564955689314e-08, - "loss": 3.7785, - "step": 2316500 - }, - { - "epoch": 25.73, - "learning_rate": 6.067176776314327e-08, - "loss": 3.7539, - "step": 2317000 - }, - { - "epoch": 25.74, - "learning_rate": 6.065788596939341e-08, - "loss": 3.7901, - "step": 2317500 - }, - { - "epoch": 25.74, - "learning_rate": 6.064400417564356e-08, - "loss": 3.7908, - "step": 2318000 - }, - { - "epoch": 25.75, - "learning_rate": 6.06301223818937e-08, - "loss": 3.7813, - "step": 2318500 - }, - { - "epoch": 25.75, - "learning_rate": 6.061624058814383e-08, - "loss": 3.7818, - "step": 2319000 - }, - { - "epoch": 25.76, - "learning_rate": 6.060235879439397e-08, - "loss": 3.7954, - "step": 2319500 - }, - { - "epoch": 25.76, - "learning_rate": 6.058847700064411e-08, - "loss": 3.7612, - "step": 2320000 - }, - { - "epoch": 25.77, - "learning_rate": 6.057459520689425e-08, - "loss": 3.7849, - "step": 2320500 - }, - { - "epoch": 25.78, - "learning_rate": 6.05607134131444e-08, - "loss": 3.793, - "step": 2321000 - }, - { - "epoch": 25.78, - "learning_rate": 6.054683161939454e-08, - "loss": 3.7967, - "step": 2321500 - }, - { - "epoch": 25.79, - "learning_rate": 6.053294982564467e-08, - "loss": 3.7936, - "step": 2322000 - }, - { - "epoch": 25.79, - "learning_rate": 6.051906803189481e-08, - "loss": 3.7671, - "step": 2322500 - }, - { - "epoch": 25.8, - "learning_rate": 6.050518623814494e-08, - "loss": 3.7697, - "step": 2323000 - }, - { - "epoch": 25.8, - "learning_rate": 6.049130444439508e-08, - "loss": 3.7808, - "step": 2323500 - }, - { - "epoch": 25.81, - "learning_rate": 6.047742265064522e-08, - "loss": 3.7815, - "step": 2324000 - }, - { - "epoch": 25.81, - "learning_rate": 6.046354085689535e-08, - "loss": 3.77, - "step": 2324500 - }, - { - "epoch": 25.82, - "learning_rate": 6.04496590631455e-08, - "loss": 3.7696, - "step": 2325000 - }, - { - "epoch": 25.83, - "learning_rate": 6.043577726939564e-08, - "loss": 3.7941, - "step": 2325500 - }, - { - "epoch": 25.83, - "learning_rate": 6.042189547564578e-08, - "loss": 3.7838, - "step": 2326000 - }, - { - "epoch": 25.84, - "learning_rate": 6.040801368189592e-08, - "loss": 3.7897, - "step": 2326500 - }, - { - "epoch": 25.84, - "learning_rate": 6.039413188814606e-08, - "loss": 3.7522, - "step": 2327000 - }, - { - "epoch": 25.85, - "learning_rate": 6.038025009439619e-08, - "loss": 3.7578, - "step": 2327500 - }, - { - "epoch": 25.85, - "learning_rate": 6.036636830064634e-08, - "loss": 3.7898, - "step": 2328000 - }, - { - "epoch": 25.86, - "learning_rate": 6.035248650689648e-08, - "loss": 3.7778, - "step": 2328500 - }, - { - "epoch": 25.86, - "learning_rate": 6.03386047131466e-08, - "loss": 3.7821, - "step": 2329000 - }, - { - "epoch": 25.87, - "learning_rate": 6.032472291939675e-08, - "loss": 3.7623, - "step": 2329500 - }, - { - "epoch": 25.88, - "learning_rate": 6.031084112564689e-08, - "loss": 3.7655, - "step": 2330000 - }, - { - "epoch": 25.88, - "learning_rate": 6.029695933189702e-08, - "loss": 3.7794, - "step": 2330500 - }, - { - "epoch": 25.89, - "learning_rate": 6.028307753814716e-08, - "loss": 3.7804, - "step": 2331000 - }, - { - "epoch": 25.89, - "learning_rate": 6.02691957443973e-08, - "loss": 3.787, - "step": 2331500 - }, - { - "epoch": 25.9, - "learning_rate": 6.025531395064745e-08, - "loss": 3.7841, - "step": 2332000 - }, - { - "epoch": 25.9, - "learning_rate": 6.024143215689759e-08, - "loss": 3.7678, - "step": 2332500 - }, - { - "epoch": 25.91, - "learning_rate": 6.022755036314772e-08, - "loss": 3.7612, - "step": 2333000 - }, - { - "epoch": 25.91, - "learning_rate": 6.021366856939786e-08, - "loss": 3.7752, - "step": 2333500 - }, - { - "epoch": 25.92, - "learning_rate": 6.0199786775648e-08, - "loss": 3.7913, - "step": 2334000 - }, - { - "epoch": 25.93, - "learning_rate": 6.018590498189814e-08, - "loss": 3.7827, - "step": 2334500 - }, - { - "epoch": 25.93, - "learning_rate": 6.017202318814827e-08, - "loss": 3.7714, - "step": 2335000 - }, - { - "epoch": 25.94, - "learning_rate": 6.015814139439842e-08, - "loss": 3.7884, - "step": 2335500 - }, - { - "epoch": 25.94, - "learning_rate": 6.014425960064855e-08, - "loss": 3.7875, - "step": 2336000 - }, - { - "epoch": 25.95, - "learning_rate": 6.013037780689869e-08, - "loss": 3.7807, - "step": 2336500 - }, - { - "epoch": 25.95, - "learning_rate": 6.011649601314883e-08, - "loss": 3.7625, - "step": 2337000 - }, - { - "epoch": 25.96, - "learning_rate": 6.010261421939897e-08, - "loss": 3.7606, - "step": 2337500 - }, - { - "epoch": 25.96, - "learning_rate": 6.008873242564911e-08, - "loss": 3.7747, - "step": 2338000 - }, - { - "epoch": 25.97, - "learning_rate": 6.007485063189926e-08, - "loss": 3.7736, - "step": 2338500 - }, - { - "epoch": 25.98, - "learning_rate": 6.006096883814939e-08, - "loss": 3.7746, - "step": 2339000 - }, - { - "epoch": 25.98, - "learning_rate": 6.004708704439953e-08, - "loss": 3.7684, - "step": 2339500 - }, - { - "epoch": 25.99, - "learning_rate": 6.003320525064967e-08, - "loss": 3.7687, - "step": 2340000 - }, - { - "epoch": 25.99, - "learning_rate": 6.001932345689981e-08, - "loss": 3.7736, - "step": 2340500 - }, - { - "epoch": 26.0, - "learning_rate": 6.000544166314994e-08, - "loss": 3.7579, - "step": 2341000 - }, - { - "epoch": 26.0, - "eval_loss": 3.8411214351654053, - "eval_runtime": 6.2991, - "eval_samples_per_second": 246.7, - "step": 2341196 - }, - { - "epoch": 26.0, - "learning_rate": 5.999155986940007e-08, - "loss": 3.7662, - "step": 2341500 - }, - { - "epoch": 26.01, - "learning_rate": 5.997767807565021e-08, - "loss": 3.785, - "step": 2342000 - }, - { - "epoch": 26.01, - "learning_rate": 5.996379628190035e-08, - "loss": 3.7827, - "step": 2342500 - }, - { - "epoch": 26.02, - "learning_rate": 5.99499144881505e-08, - "loss": 3.7691, - "step": 2343000 - }, - { - "epoch": 26.03, - "learning_rate": 5.993603269440064e-08, - "loss": 3.7776, - "step": 2343500 - }, - { - "epoch": 26.03, - "learning_rate": 5.992215090065078e-08, - "loss": 3.7821, - "step": 2344000 - }, - { - "epoch": 26.04, - "learning_rate": 5.990826910690091e-08, - "loss": 3.7668, - "step": 2344500 - }, - { - "epoch": 26.04, - "learning_rate": 5.989438731315105e-08, - "loss": 3.7789, - "step": 2345000 - }, - { - "epoch": 26.05, - "learning_rate": 5.98805055194012e-08, - "loss": 3.7851, - "step": 2345500 - }, - { - "epoch": 26.05, - "learning_rate": 5.986662372565134e-08, - "loss": 3.7689, - "step": 2346000 - }, - { - "epoch": 26.06, - "learning_rate": 5.985274193190148e-08, - "loss": 3.7674, - "step": 2346500 - }, - { - "epoch": 26.06, - "learning_rate": 5.983886013815161e-08, - "loss": 3.773, - "step": 2347000 - }, - { - "epoch": 26.07, - "learning_rate": 5.982497834440174e-08, - "loss": 3.7795, - "step": 2347500 - }, - { - "epoch": 26.08, - "learning_rate": 5.981109655065188e-08, - "loss": 3.7839, - "step": 2348000 - }, - { - "epoch": 26.08, - "learning_rate": 5.979721475690202e-08, - "loss": 3.7577, - "step": 2348500 - }, - { - "epoch": 26.09, - "learning_rate": 5.978333296315216e-08, - "loss": 3.7677, - "step": 2349000 - }, - { - "epoch": 26.09, - "learning_rate": 5.976945116940231e-08, - "loss": 3.7729, - "step": 2349500 - }, - { - "epoch": 26.1, - "learning_rate": 5.975556937565244e-08, - "loss": 3.7759, - "step": 2350000 - }, - { - "epoch": 26.1, - "learning_rate": 5.974168758190258e-08, - "loss": 3.7739, - "step": 2350500 - }, - { - "epoch": 26.11, - "learning_rate": 5.972780578815272e-08, - "loss": 3.7726, - "step": 2351000 - }, - { - "epoch": 26.11, - "learning_rate": 5.971392399440286e-08, - "loss": 3.7724, - "step": 2351500 - }, - { - "epoch": 26.12, - "learning_rate": 5.9700042200653e-08, - "loss": 3.7776, - "step": 2352000 - }, - { - "epoch": 26.13, - "learning_rate": 5.968616040690315e-08, - "loss": 3.7806, - "step": 2352500 - }, - { - "epoch": 26.13, - "learning_rate": 5.967227861315328e-08, - "loss": 3.7977, - "step": 2353000 - }, - { - "epoch": 26.14, - "learning_rate": 5.96583968194034e-08, - "loss": 3.7766, - "step": 2353500 - }, - { - "epoch": 26.14, - "learning_rate": 5.964451502565355e-08, - "loss": 3.7802, - "step": 2354000 - }, - { - "epoch": 26.15, - "learning_rate": 5.963063323190369e-08, - "loss": 3.7702, - "step": 2354500 - }, - { - "epoch": 26.15, - "learning_rate": 5.961675143815383e-08, - "loss": 3.7937, - "step": 2355000 - }, - { - "epoch": 26.16, - "learning_rate": 5.960286964440396e-08, - "loss": 3.7515, - "step": 2355500 - }, - { - "epoch": 26.16, - "learning_rate": 5.9588987850654103e-08, - "loss": 3.7776, - "step": 2356000 - }, - { - "epoch": 26.17, - "learning_rate": 5.9575106056904246e-08, - "loss": 3.7774, - "step": 2356500 - }, - { - "epoch": 26.18, - "learning_rate": 5.956122426315439e-08, - "loss": 3.7552, - "step": 2357000 - }, - { - "epoch": 26.18, - "learning_rate": 5.9547342469404524e-08, - "loss": 3.7695, - "step": 2357500 - }, - { - "epoch": 26.19, - "learning_rate": 5.9533460675654666e-08, - "loss": 3.7692, - "step": 2358000 - }, - { - "epoch": 26.19, - "learning_rate": 5.9519578881904795e-08, - "loss": 3.7857, - "step": 2358500 - }, - { - "epoch": 26.2, - "learning_rate": 5.950569708815494e-08, - "loss": 3.7701, - "step": 2359000 - }, - { - "epoch": 26.2, - "learning_rate": 5.949181529440508e-08, - "loss": 3.7657, - "step": 2359500 - }, - { - "epoch": 26.21, - "learning_rate": 5.947793350065522e-08, - "loss": 3.7755, - "step": 2360000 - }, - { - "epoch": 26.21, - "learning_rate": 5.946405170690536e-08, - "loss": 3.7925, - "step": 2360500 - }, - { - "epoch": 26.22, - "learning_rate": 5.945016991315549e-08, - "loss": 3.7727, - "step": 2361000 - }, - { - "epoch": 26.23, - "learning_rate": 5.943628811940563e-08, - "loss": 3.7822, - "step": 2361500 - }, - { - "epoch": 26.23, - "learning_rate": 5.942240632565577e-08, - "loss": 3.7901, - "step": 2362000 - }, - { - "epoch": 26.24, - "learning_rate": 5.940852453190591e-08, - "loss": 3.7575, - "step": 2362500 - }, - { - "epoch": 26.24, - "learning_rate": 5.9394642738156055e-08, - "loss": 3.7709, - "step": 2363000 - }, - { - "epoch": 26.25, - "learning_rate": 5.938076094440619e-08, - "loss": 3.7666, - "step": 2363500 - }, - { - "epoch": 26.25, - "learning_rate": 5.9366879150656327e-08, - "loss": 3.7754, - "step": 2364000 - }, - { - "epoch": 26.26, - "learning_rate": 5.935299735690646e-08, - "loss": 3.7759, - "step": 2364500 - }, - { - "epoch": 26.26, - "learning_rate": 5.9339115563156605e-08, - "loss": 3.7902, - "step": 2365000 - }, - { - "epoch": 26.27, - "learning_rate": 5.932523376940675e-08, - "loss": 3.7736, - "step": 2365500 - }, - { - "epoch": 26.28, - "learning_rate": 5.931135197565689e-08, - "loss": 3.7741, - "step": 2366000 - }, - { - "epoch": 26.28, - "learning_rate": 5.9297470181907025e-08, - "loss": 3.7795, - "step": 2366500 - }, - { - "epoch": 26.29, - "learning_rate": 5.928358838815716e-08, - "loss": 3.7559, - "step": 2367000 - }, - { - "epoch": 26.29, - "learning_rate": 5.9269706594407296e-08, - "loss": 3.7731, - "step": 2367500 - }, - { - "epoch": 26.3, - "learning_rate": 5.925582480065744e-08, - "loss": 3.7804, - "step": 2368000 - }, - { - "epoch": 26.3, - "learning_rate": 5.924194300690758e-08, - "loss": 3.7877, - "step": 2368500 - }, - { - "epoch": 26.31, - "learning_rate": 5.922806121315772e-08, - "loss": 3.771, - "step": 2369000 - }, - { - "epoch": 26.31, - "learning_rate": 5.921417941940785e-08, - "loss": 3.76, - "step": 2369500 - }, - { - "epoch": 26.32, - "learning_rate": 5.9200297625657994e-08, - "loss": 3.7905, - "step": 2370000 - }, - { - "epoch": 26.33, - "learning_rate": 5.918641583190813e-08, - "loss": 3.7537, - "step": 2370500 - }, - { - "epoch": 26.33, - "learning_rate": 5.917253403815827e-08, - "loss": 3.7657, - "step": 2371000 - }, - { - "epoch": 26.34, - "learning_rate": 5.9158652244408414e-08, - "loss": 3.7689, - "step": 2371500 - }, - { - "epoch": 26.34, - "learning_rate": 5.9144770450658557e-08, - "loss": 3.7825, - "step": 2372000 - }, - { - "epoch": 26.35, - "learning_rate": 5.9130888656908686e-08, - "loss": 3.778, - "step": 2372500 - }, - { - "epoch": 26.35, - "learning_rate": 5.911700686315883e-08, - "loss": 3.7542, - "step": 2373000 - }, - { - "epoch": 26.36, - "learning_rate": 5.9103125069408963e-08, - "loss": 3.7827, - "step": 2373500 - }, - { - "epoch": 26.36, - "learning_rate": 5.9089243275659106e-08, - "loss": 3.7815, - "step": 2374000 - }, - { - "epoch": 26.37, - "learning_rate": 5.907536148190925e-08, - "loss": 3.7804, - "step": 2374500 - }, - { - "epoch": 26.38, - "learning_rate": 5.906147968815939e-08, - "loss": 3.7731, - "step": 2375000 - }, - { - "epoch": 26.38, - "learning_rate": 5.904759789440952e-08, - "loss": 3.765, - "step": 2375500 - }, - { - "epoch": 26.39, - "learning_rate": 5.903371610065966e-08, - "loss": 3.7802, - "step": 2376000 - }, - { - "epoch": 26.39, - "learning_rate": 5.90198343069098e-08, - "loss": 3.7828, - "step": 2376500 - }, - { - "epoch": 26.4, - "learning_rate": 5.900595251315994e-08, - "loss": 3.7764, - "step": 2377000 - }, - { - "epoch": 26.4, - "learning_rate": 5.899207071941008e-08, - "loss": 3.7724, - "step": 2377500 - }, - { - "epoch": 26.41, - "learning_rate": 5.897818892566021e-08, - "loss": 3.7816, - "step": 2378000 - }, - { - "epoch": 26.41, - "learning_rate": 5.896430713191035e-08, - "loss": 3.7665, - "step": 2378500 - }, - { - "epoch": 26.42, - "learning_rate": 5.8950425338160495e-08, - "loss": 3.7869, - "step": 2379000 - }, - { - "epoch": 26.43, - "learning_rate": 5.893654354441063e-08, - "loss": 3.7587, - "step": 2379500 - }, - { - "epoch": 26.43, - "learning_rate": 5.892266175066077e-08, - "loss": 3.7765, - "step": 2380000 - }, - { - "epoch": 26.44, - "learning_rate": 5.8908779956910915e-08, - "loss": 3.7869, - "step": 2380500 - }, - { - "epoch": 26.44, - "learning_rate": 5.8894898163161044e-08, - "loss": 3.7881, - "step": 2381000 - }, - { - "epoch": 26.45, - "learning_rate": 5.888101636941119e-08, - "loss": 3.7907, - "step": 2381500 - }, - { - "epoch": 26.45, - "learning_rate": 5.886713457566133e-08, - "loss": 3.775, - "step": 2382000 - }, - { - "epoch": 26.46, - "learning_rate": 5.8853252781911465e-08, - "loss": 3.7772, - "step": 2382500 - }, - { - "epoch": 26.46, - "learning_rate": 5.883937098816161e-08, - "loss": 3.776, - "step": 2383000 - }, - { - "epoch": 26.47, - "learning_rate": 5.8825489194411736e-08, - "loss": 3.7497, - "step": 2383500 - }, - { - "epoch": 26.48, - "learning_rate": 5.881160740066188e-08, - "loss": 3.7557, - "step": 2384000 - }, - { - "epoch": 26.48, - "learning_rate": 5.879772560691202e-08, - "loss": 3.7666, - "step": 2384500 - }, - { - "epoch": 26.49, - "learning_rate": 5.878384381316216e-08, - "loss": 3.7546, - "step": 2385000 - }, - { - "epoch": 26.49, - "learning_rate": 5.87699620194123e-08, - "loss": 3.7955, - "step": 2385500 - }, - { - "epoch": 26.5, - "learning_rate": 5.875608022566244e-08, - "loss": 3.7892, - "step": 2386000 - }, - { - "epoch": 26.5, - "learning_rate": 5.874219843191257e-08, - "loss": 3.8018, - "step": 2386500 - }, - { - "epoch": 26.51, - "learning_rate": 5.872831663816271e-08, - "loss": 3.7711, - "step": 2387000 - }, - { - "epoch": 26.51, - "learning_rate": 5.8714434844412854e-08, - "loss": 3.7864, - "step": 2387500 - }, - { - "epoch": 26.52, - "learning_rate": 5.870055305066299e-08, - "loss": 3.7797, - "step": 2388000 - }, - { - "epoch": 26.53, - "learning_rate": 5.868667125691313e-08, - "loss": 3.7586, - "step": 2388500 - }, - { - "epoch": 26.53, - "learning_rate": 5.8672789463163274e-08, - "loss": 3.7873, - "step": 2389000 - }, - { - "epoch": 26.54, - "learning_rate": 5.8658907669413403e-08, - "loss": 3.7757, - "step": 2389500 - }, - { - "epoch": 26.54, - "learning_rate": 5.8645025875663546e-08, - "loss": 3.7946, - "step": 2390000 - }, - { - "epoch": 26.55, - "learning_rate": 5.863114408191369e-08, - "loss": 3.7511, - "step": 2390500 - }, - { - "epoch": 26.55, - "learning_rate": 5.8617262288163824e-08, - "loss": 3.7815, - "step": 2391000 - }, - { - "epoch": 26.56, - "learning_rate": 5.8603380494413966e-08, - "loss": 3.7808, - "step": 2391500 - }, - { - "epoch": 26.56, - "learning_rate": 5.8589498700664095e-08, - "loss": 3.7613, - "step": 2392000 - }, - { - "epoch": 26.57, - "learning_rate": 5.857561690691424e-08, - "loss": 3.7756, - "step": 2392500 - }, - { - "epoch": 26.58, - "learning_rate": 5.856173511316438e-08, - "loss": 3.7898, - "step": 2393000 - }, - { - "epoch": 26.58, - "learning_rate": 5.854785331941452e-08, - "loss": 3.7744, - "step": 2393500 - }, - { - "epoch": 26.59, - "learning_rate": 5.853397152566466e-08, - "loss": 3.7509, - "step": 2394000 - }, - { - "epoch": 26.59, - "learning_rate": 5.85200897319148e-08, - "loss": 3.7573, - "step": 2394500 - }, - { - "epoch": 26.6, - "learning_rate": 5.850620793816493e-08, - "loss": 3.7545, - "step": 2395000 - }, - { - "epoch": 26.6, - "learning_rate": 5.849232614441507e-08, - "loss": 3.7856, - "step": 2395500 - }, - { - "epoch": 26.61, - "learning_rate": 5.847844435066521e-08, - "loss": 3.7902, - "step": 2396000 - }, - { - "epoch": 26.61, - "learning_rate": 5.8464562556915355e-08, - "loss": 3.7875, - "step": 2396500 - }, - { - "epoch": 26.62, - "learning_rate": 5.845068076316549e-08, - "loss": 3.7675, - "step": 2397000 - }, - { - "epoch": 26.63, - "learning_rate": 5.8436798969415633e-08, - "loss": 3.7775, - "step": 2397500 - }, - { - "epoch": 26.63, - "learning_rate": 5.842291717566576e-08, - "loss": 3.7969, - "step": 2398000 - }, - { - "epoch": 26.64, - "learning_rate": 5.8409035381915905e-08, - "loss": 3.7738, - "step": 2398500 - }, - { - "epoch": 26.64, - "learning_rate": 5.839515358816605e-08, - "loss": 3.775, - "step": 2399000 - }, - { - "epoch": 26.65, - "learning_rate": 5.838127179441619e-08, - "loss": 3.7892, - "step": 2399500 - }, - { - "epoch": 26.65, - "learning_rate": 5.8367390000666325e-08, - "loss": 3.7576, - "step": 2400000 - }, - { - "epoch": 26.66, - "learning_rate": 5.835350820691646e-08, - "loss": 3.7613, - "step": 2400500 - }, - { - "epoch": 26.66, - "learning_rate": 5.8339626413166596e-08, - "loss": 3.7818, - "step": 2401000 - }, - { - "epoch": 26.67, - "learning_rate": 5.832574461941674e-08, - "loss": 3.7678, - "step": 2401500 - }, - { - "epoch": 26.68, - "learning_rate": 5.831186282566688e-08, - "loss": 3.7704, - "step": 2402000 - }, - { - "epoch": 26.68, - "learning_rate": 5.829798103191702e-08, - "loss": 3.7732, - "step": 2402500 - }, - { - "epoch": 26.69, - "learning_rate": 5.828409923816716e-08, - "loss": 3.7856, - "step": 2403000 - }, - { - "epoch": 26.69, - "learning_rate": 5.8270217444417294e-08, - "loss": 3.753, - "step": 2403500 - }, - { - "epoch": 26.7, - "learning_rate": 5.825633565066743e-08, - "loss": 3.7683, - "step": 2404000 - }, - { - "epoch": 26.7, - "learning_rate": 5.824245385691757e-08, - "loss": 3.7785, - "step": 2404500 - }, - { - "epoch": 26.71, - "learning_rate": 5.8228572063167714e-08, - "loss": 3.7612, - "step": 2405000 - }, - { - "epoch": 26.71, - "learning_rate": 5.8214690269417857e-08, - "loss": 3.8029, - "step": 2405500 - }, - { - "epoch": 26.72, - "learning_rate": 5.8200808475667986e-08, - "loss": 3.7592, - "step": 2406000 - }, - { - "epoch": 26.73, - "learning_rate": 5.818692668191813e-08, - "loss": 3.7703, - "step": 2406500 - }, - { - "epoch": 26.73, - "learning_rate": 5.8173044888168264e-08, - "loss": 3.8063, - "step": 2407000 - }, - { - "epoch": 26.74, - "learning_rate": 5.8159163094418406e-08, - "loss": 3.7556, - "step": 2407500 - }, - { - "epoch": 26.74, - "learning_rate": 5.814528130066855e-08, - "loss": 3.7629, - "step": 2408000 - }, - { - "epoch": 26.75, - "learning_rate": 5.813139950691869e-08, - "loss": 3.7713, - "step": 2408500 - }, - { - "epoch": 26.75, - "learning_rate": 5.811751771316882e-08, - "loss": 3.7854, - "step": 2409000 - }, - { - "epoch": 26.76, - "learning_rate": 5.810363591941896e-08, - "loss": 3.7742, - "step": 2409500 - }, - { - "epoch": 26.76, - "learning_rate": 5.80897541256691e-08, - "loss": 3.7741, - "step": 2410000 - }, - { - "epoch": 26.77, - "learning_rate": 5.807587233191924e-08, - "loss": 3.7968, - "step": 2410500 - }, - { - "epoch": 26.78, - "learning_rate": 5.806199053816938e-08, - "loss": 3.786, - "step": 2411000 - }, - { - "epoch": 26.78, - "learning_rate": 5.8048108744419524e-08, - "loss": 3.7779, - "step": 2411500 - }, - { - "epoch": 26.79, - "learning_rate": 5.803422695066965e-08, - "loss": 3.7668, - "step": 2412000 - }, - { - "epoch": 26.79, - "learning_rate": 5.8020345156919795e-08, - "loss": 3.7873, - "step": 2412500 - }, - { - "epoch": 26.8, - "learning_rate": 5.800646336316993e-08, - "loss": 3.7635, - "step": 2413000 - }, - { - "epoch": 26.8, - "learning_rate": 5.799258156942007e-08, - "loss": 3.7599, - "step": 2413500 - }, - { - "epoch": 26.81, - "learning_rate": 5.7978699775670216e-08, - "loss": 3.7678, - "step": 2414000 - }, - { - "epoch": 26.81, - "learning_rate": 5.7964817981920345e-08, - "loss": 3.794, - "step": 2414500 - }, - { - "epoch": 26.82, - "learning_rate": 5.795093618817049e-08, - "loss": 3.7688, - "step": 2415000 - }, - { - "epoch": 26.83, - "learning_rate": 5.793705439442063e-08, - "loss": 3.7707, - "step": 2415500 - }, - { - "epoch": 26.83, - "learning_rate": 5.7923172600670765e-08, - "loss": 3.7452, - "step": 2416000 - }, - { - "epoch": 26.84, - "learning_rate": 5.790929080692091e-08, - "loss": 3.7764, - "step": 2416500 - }, - { - "epoch": 26.84, - "learning_rate": 5.789540901317105e-08, - "loss": 3.7758, - "step": 2417000 - }, - { - "epoch": 26.85, - "learning_rate": 5.788152721942118e-08, - "loss": 3.7747, - "step": 2417500 - }, - { - "epoch": 26.85, - "learning_rate": 5.786764542567132e-08, - "loss": 3.7881, - "step": 2418000 - }, - { - "epoch": 26.86, - "learning_rate": 5.7853763631921456e-08, - "loss": 3.7616, - "step": 2418500 - }, - { - "epoch": 26.86, - "learning_rate": 5.78398818381716e-08, - "loss": 3.7473, - "step": 2419000 - }, - { - "epoch": 26.87, - "learning_rate": 5.782600004442174e-08, - "loss": 3.7767, - "step": 2419500 - }, - { - "epoch": 26.88, - "learning_rate": 5.781211825067187e-08, - "loss": 3.7812, - "step": 2420000 - }, - { - "epoch": 26.88, - "learning_rate": 5.779823645692201e-08, - "loss": 3.7537, - "step": 2420500 - }, - { - "epoch": 26.89, - "learning_rate": 5.7784354663172154e-08, - "loss": 3.7691, - "step": 2421000 - }, - { - "epoch": 26.89, - "learning_rate": 5.777047286942229e-08, - "loss": 3.7589, - "step": 2421500 - }, - { - "epoch": 26.9, - "learning_rate": 5.775659107567243e-08, - "loss": 3.7915, - "step": 2422000 - }, - { - "epoch": 26.9, - "learning_rate": 5.7742709281922574e-08, - "loss": 3.7548, - "step": 2422500 - }, - { - "epoch": 26.91, - "learning_rate": 5.7728827488172704e-08, - "loss": 3.7735, - "step": 2423000 - }, - { - "epoch": 26.91, - "learning_rate": 5.7714945694422846e-08, - "loss": 3.7637, - "step": 2423500 - }, - { - "epoch": 26.92, - "learning_rate": 5.770106390067299e-08, - "loss": 3.7853, - "step": 2424000 - }, - { - "epoch": 26.93, - "learning_rate": 5.7687182106923124e-08, - "loss": 3.7713, - "step": 2424500 - }, - { - "epoch": 26.93, - "learning_rate": 5.7673300313173266e-08, - "loss": 3.769, - "step": 2425000 - }, - { - "epoch": 26.94, - "learning_rate": 5.765941851942341e-08, - "loss": 3.7731, - "step": 2425500 - }, - { - "epoch": 26.94, - "learning_rate": 5.764553672567354e-08, - "loss": 3.7869, - "step": 2426000 - }, - { - "epoch": 26.95, - "learning_rate": 5.763165493192368e-08, - "loss": 3.7782, - "step": 2426500 - }, - { - "epoch": 26.95, - "learning_rate": 5.761777313817382e-08, - "loss": 3.7669, - "step": 2427000 - }, - { - "epoch": 26.96, - "learning_rate": 5.760389134442396e-08, - "loss": 3.7904, - "step": 2427500 - }, - { - "epoch": 26.96, - "learning_rate": 5.75900095506741e-08, - "loss": 3.7676, - "step": 2428000 - }, - { - "epoch": 26.97, - "learning_rate": 5.757612775692423e-08, - "loss": 3.7673, - "step": 2428500 - }, - { - "epoch": 26.98, - "learning_rate": 5.756224596317437e-08, - "loss": 3.7503, - "step": 2429000 - }, - { - "epoch": 26.98, - "learning_rate": 5.754836416942451e-08, - "loss": 3.7921, - "step": 2429500 - }, - { - "epoch": 26.99, - "learning_rate": 5.7534482375674656e-08, - "loss": 3.7747, - "step": 2430000 - }, - { - "epoch": 26.99, - "learning_rate": 5.752060058192479e-08, - "loss": 3.7642, - "step": 2430500 - }, - { - "epoch": 27.0, - "learning_rate": 5.7506718788174933e-08, - "loss": 3.7778, - "step": 2431000 - }, - { - "epoch": 27.0, - "eval_loss": 3.839319944381714, - "eval_runtime": 6.3037, - "eval_samples_per_second": 246.521, - "step": 2431242 - }, - { - "epoch": 27.0, - "learning_rate": 5.749283699442506e-08, - "loss": 3.7644, - "step": 2431500 - }, - { - "epoch": 27.01, - "learning_rate": 5.7478955200675205e-08, - "loss": 3.7694, - "step": 2432000 - }, - { - "epoch": 27.01, - "learning_rate": 5.746507340692535e-08, - "loss": 3.7794, - "step": 2432500 - }, - { - "epoch": 27.02, - "learning_rate": 5.745119161317549e-08, - "loss": 3.7575, - "step": 2433000 - }, - { - "epoch": 27.03, - "learning_rate": 5.7437309819425625e-08, - "loss": 3.786, - "step": 2433500 - }, - { - "epoch": 27.03, - "learning_rate": 5.742342802567577e-08, - "loss": 3.7761, - "step": 2434000 - }, - { - "epoch": 27.04, - "learning_rate": 5.7409546231925896e-08, - "loss": 3.7711, - "step": 2434500 - }, - { - "epoch": 27.04, - "learning_rate": 5.739566443817604e-08, - "loss": 3.7599, - "step": 2435000 - }, - { - "epoch": 27.05, - "learning_rate": 5.738178264442618e-08, - "loss": 3.7726, - "step": 2435500 - }, - { - "epoch": 27.05, - "learning_rate": 5.736790085067632e-08, - "loss": 3.7816, - "step": 2436000 - }, - { - "epoch": 27.06, - "learning_rate": 5.735401905692646e-08, - "loss": 3.7926, - "step": 2436500 - }, - { - "epoch": 27.06, - "learning_rate": 5.7340137263176594e-08, - "loss": 3.77, - "step": 2437000 - }, - { - "epoch": 27.07, - "learning_rate": 5.732625546942673e-08, - "loss": 3.7489, - "step": 2437500 - }, - { - "epoch": 27.08, - "learning_rate": 5.731237367567687e-08, - "loss": 3.7666, - "step": 2438000 - }, - { - "epoch": 27.08, - "learning_rate": 5.7298491881927014e-08, - "loss": 3.7658, - "step": 2438500 - }, - { - "epoch": 27.09, - "learning_rate": 5.728461008817716e-08, - "loss": 3.785, - "step": 2439000 - }, - { - "epoch": 27.09, - "learning_rate": 5.727072829442729e-08, - "loss": 3.7628, - "step": 2439500 - }, - { - "epoch": 27.1, - "learning_rate": 5.725684650067743e-08, - "loss": 3.7657, - "step": 2440000 - }, - { - "epoch": 27.1, - "learning_rate": 5.7242964706927564e-08, - "loss": 3.7782, - "step": 2440500 - }, - { - "epoch": 27.11, - "learning_rate": 5.7229082913177706e-08, - "loss": 3.7781, - "step": 2441000 - }, - { - "epoch": 27.11, - "learning_rate": 5.721520111942785e-08, - "loss": 3.7833, - "step": 2441500 - }, - { - "epoch": 27.12, - "learning_rate": 5.720131932567799e-08, - "loss": 3.7717, - "step": 2442000 - }, - { - "epoch": 27.13, - "learning_rate": 5.718743753192812e-08, - "loss": 3.7749, - "step": 2442500 - }, - { - "epoch": 27.13, - "learning_rate": 5.717355573817826e-08, - "loss": 3.7478, - "step": 2443000 - }, - { - "epoch": 27.14, - "learning_rate": 5.71596739444284e-08, - "loss": 3.7702, - "step": 2443500 - }, - { - "epoch": 27.14, - "learning_rate": 5.714579215067854e-08, - "loss": 3.7641, - "step": 2444000 - }, - { - "epoch": 27.15, - "learning_rate": 5.713191035692868e-08, - "loss": 3.7758, - "step": 2444500 - }, - { - "epoch": 27.15, - "learning_rate": 5.7118028563178824e-08, - "loss": 3.7961, - "step": 2445000 - }, - { - "epoch": 27.16, - "learning_rate": 5.710414676942895e-08, - "loss": 3.7872, - "step": 2445500 - }, - { - "epoch": 27.16, - "learning_rate": 5.7090264975679095e-08, - "loss": 3.7932, - "step": 2446000 - }, - { - "epoch": 27.17, - "learning_rate": 5.707638318192923e-08, - "loss": 3.7592, - "step": 2446500 - }, - { - "epoch": 27.17, - "learning_rate": 5.7062501388179373e-08, - "loss": 3.761, - "step": 2447000 - }, - { - "epoch": 27.18, - "learning_rate": 5.7048619594429516e-08, - "loss": 3.7795, - "step": 2447500 - }, - { - "epoch": 27.19, - "learning_rate": 5.703473780067966e-08, - "loss": 3.7765, - "step": 2448000 - }, - { - "epoch": 27.19, - "learning_rate": 5.702085600692979e-08, - "loss": 3.7743, - "step": 2448500 - }, - { - "epoch": 27.2, - "learning_rate": 5.700697421317992e-08, - "loss": 3.7592, - "step": 2449000 - }, - { - "epoch": 27.2, - "learning_rate": 5.6993092419430065e-08, - "loss": 3.7796, - "step": 2449500 - }, - { - "epoch": 27.21, - "learning_rate": 5.697921062568021e-08, - "loss": 3.773, - "step": 2450000 - }, - { - "epoch": 27.21, - "learning_rate": 5.696532883193035e-08, - "loss": 3.768, - "step": 2450500 - }, - { - "epoch": 27.22, - "learning_rate": 5.695144703818048e-08, - "loss": 3.7485, - "step": 2451000 - }, - { - "epoch": 27.22, - "learning_rate": 5.693756524443062e-08, - "loss": 3.7807, - "step": 2451500 - }, - { - "epoch": 27.23, - "learning_rate": 5.6923683450680756e-08, - "loss": 3.7811, - "step": 2452000 - }, - { - "epoch": 27.24, - "learning_rate": 5.69098016569309e-08, - "loss": 3.7592, - "step": 2452500 - }, - { - "epoch": 27.24, - "learning_rate": 5.689591986318104e-08, - "loss": 3.7698, - "step": 2453000 - }, - { - "epoch": 27.25, - "learning_rate": 5.688203806943118e-08, - "loss": 3.7714, - "step": 2453500 - }, - { - "epoch": 27.25, - "learning_rate": 5.686815627568131e-08, - "loss": 3.77, - "step": 2454000 - }, - { - "epoch": 27.26, - "learning_rate": 5.6854274481931454e-08, - "loss": 3.7745, - "step": 2454500 - }, - { - "epoch": 27.26, - "learning_rate": 5.684039268818159e-08, - "loss": 3.7805, - "step": 2455000 - }, - { - "epoch": 27.27, - "learning_rate": 5.682651089443173e-08, - "loss": 3.7796, - "step": 2455500 - }, - { - "epoch": 27.27, - "learning_rate": 5.6812629100681875e-08, - "loss": 3.7723, - "step": 2456000 - }, - { - "epoch": 27.28, - "learning_rate": 5.6798747306932004e-08, - "loss": 3.7536, - "step": 2456500 - }, - { - "epoch": 27.29, - "learning_rate": 5.6784865513182146e-08, - "loss": 3.7881, - "step": 2457000 - }, - { - "epoch": 27.29, - "learning_rate": 5.677098371943229e-08, - "loss": 3.7827, - "step": 2457500 - }, - { - "epoch": 27.3, - "learning_rate": 5.6757101925682424e-08, - "loss": 3.7577, - "step": 2458000 - }, - { - "epoch": 27.3, - "learning_rate": 5.6743220131932566e-08, - "loss": 3.7379, - "step": 2458500 - }, - { - "epoch": 27.31, - "learning_rate": 5.672933833818271e-08, - "loss": 3.7722, - "step": 2459000 - }, - { - "epoch": 27.31, - "learning_rate": 5.671545654443284e-08, - "loss": 3.7742, - "step": 2459500 - }, - { - "epoch": 27.32, - "learning_rate": 5.670157475068298e-08, - "loss": 3.7498, - "step": 2460000 - }, - { - "epoch": 27.32, - "learning_rate": 5.668769295693312e-08, - "loss": 3.7747, - "step": 2460500 - }, - { - "epoch": 27.33, - "learning_rate": 5.667381116318326e-08, - "loss": 3.7791, - "step": 2461000 - }, - { - "epoch": 27.34, - "learning_rate": 5.66599293694334e-08, - "loss": 3.7614, - "step": 2461500 - }, - { - "epoch": 27.34, - "learning_rate": 5.664604757568354e-08, - "loss": 3.7726, - "step": 2462000 - }, - { - "epoch": 27.35, - "learning_rate": 5.663216578193367e-08, - "loss": 3.7659, - "step": 2462500 - }, - { - "epoch": 27.35, - "learning_rate": 5.6618283988183813e-08, - "loss": 3.7647, - "step": 2463000 - }, - { - "epoch": 27.36, - "learning_rate": 5.6604402194433956e-08, - "loss": 3.7823, - "step": 2463500 - }, - { - "epoch": 27.36, - "learning_rate": 5.659052040068409e-08, - "loss": 3.747, - "step": 2464000 - }, - { - "epoch": 27.37, - "learning_rate": 5.6576638606934233e-08, - "loss": 3.7872, - "step": 2464500 - }, - { - "epoch": 27.37, - "learning_rate": 5.656275681318436e-08, - "loss": 3.7819, - "step": 2465000 - }, - { - "epoch": 27.38, - "learning_rate": 5.6548875019434505e-08, - "loss": 3.779, - "step": 2465500 - }, - { - "epoch": 27.39, - "learning_rate": 5.653499322568465e-08, - "loss": 3.7772, - "step": 2466000 - }, - { - "epoch": 27.39, - "learning_rate": 5.652111143193479e-08, - "loss": 3.7762, - "step": 2466500 - }, - { - "epoch": 27.4, - "learning_rate": 5.6507229638184925e-08, - "loss": 3.7859, - "step": 2467000 - }, - { - "epoch": 27.4, - "learning_rate": 5.649334784443507e-08, - "loss": 3.7633, - "step": 2467500 - }, - { - "epoch": 27.41, - "learning_rate": 5.6479466050685196e-08, - "loss": 3.7559, - "step": 2468000 - }, - { - "epoch": 27.41, - "learning_rate": 5.646558425693534e-08, - "loss": 3.7681, - "step": 2468500 - }, - { - "epoch": 27.42, - "learning_rate": 5.645170246318548e-08, - "loss": 3.7661, - "step": 2469000 - }, - { - "epoch": 27.42, - "learning_rate": 5.643782066943562e-08, - "loss": 3.7617, - "step": 2469500 - }, - { - "epoch": 27.43, - "learning_rate": 5.642393887568576e-08, - "loss": 3.7621, - "step": 2470000 - }, - { - "epoch": 27.44, - "learning_rate": 5.64100570819359e-08, - "loss": 3.7585, - "step": 2470500 - }, - { - "epoch": 27.44, - "learning_rate": 5.639617528818603e-08, - "loss": 3.7719, - "step": 2471000 - }, - { - "epoch": 27.45, - "learning_rate": 5.638229349443617e-08, - "loss": 3.7611, - "step": 2471500 - }, - { - "epoch": 27.45, - "learning_rate": 5.6368411700686315e-08, - "loss": 3.7682, - "step": 2472000 - }, - { - "epoch": 27.46, - "learning_rate": 5.635452990693646e-08, - "loss": 3.7916, - "step": 2472500 - }, - { - "epoch": 27.46, - "learning_rate": 5.634064811318659e-08, - "loss": 3.7689, - "step": 2473000 - }, - { - "epoch": 27.47, - "learning_rate": 5.632676631943673e-08, - "loss": 3.7816, - "step": 2473500 - }, - { - "epoch": 27.47, - "learning_rate": 5.6312884525686864e-08, - "loss": 3.772, - "step": 2474000 - }, - { - "epoch": 27.48, - "learning_rate": 5.6299002731937006e-08, - "loss": 3.7815, - "step": 2474500 - }, - { - "epoch": 27.49, - "learning_rate": 5.628512093818715e-08, - "loss": 3.7819, - "step": 2475000 - }, - { - "epoch": 27.49, - "learning_rate": 5.627123914443729e-08, - "loss": 3.7742, - "step": 2475500 - }, - { - "epoch": 27.5, - "learning_rate": 5.6257357350687426e-08, - "loss": 3.7708, - "step": 2476000 - }, - { - "epoch": 27.5, - "learning_rate": 5.624347555693756e-08, - "loss": 3.7639, - "step": 2476500 - }, - { - "epoch": 27.51, - "learning_rate": 5.62295937631877e-08, - "loss": 3.7575, - "step": 2477000 - }, - { - "epoch": 27.51, - "learning_rate": 5.621571196943784e-08, - "loss": 3.7897, - "step": 2477500 - }, - { - "epoch": 27.52, - "learning_rate": 5.620183017568798e-08, - "loss": 3.7764, - "step": 2478000 - }, - { - "epoch": 27.52, - "learning_rate": 5.6187948381938124e-08, - "loss": 3.7682, - "step": 2478500 - }, - { - "epoch": 27.53, - "learning_rate": 5.617406658818825e-08, - "loss": 3.7814, - "step": 2479000 - }, - { - "epoch": 27.54, - "learning_rate": 5.616018479443839e-08, - "loss": 3.7725, - "step": 2479500 - }, - { - "epoch": 27.54, - "learning_rate": 5.614630300068853e-08, - "loss": 3.7657, - "step": 2480000 - }, - { - "epoch": 27.55, - "learning_rate": 5.6132421206938673e-08, - "loss": 3.7681, - "step": 2480500 - }, - { - "epoch": 27.55, - "learning_rate": 5.6118539413188816e-08, - "loss": 3.7775, - "step": 2481000 - }, - { - "epoch": 27.56, - "learning_rate": 5.610465761943896e-08, - "loss": 3.7931, - "step": 2481500 - }, - { - "epoch": 27.56, - "learning_rate": 5.609077582568909e-08, - "loss": 3.8074, - "step": 2482000 - }, - { - "epoch": 27.57, - "learning_rate": 5.607689403193922e-08, - "loss": 3.783, - "step": 2482500 - }, - { - "epoch": 27.57, - "learning_rate": 5.6063012238189365e-08, - "loss": 3.7563, - "step": 2483000 - }, - { - "epoch": 27.58, - "learning_rate": 5.604913044443951e-08, - "loss": 3.766, - "step": 2483500 - }, - { - "epoch": 27.59, - "learning_rate": 5.603524865068965e-08, - "loss": 3.7731, - "step": 2484000 - }, - { - "epoch": 27.59, - "learning_rate": 5.602136685693979e-08, - "loss": 3.7682, - "step": 2484500 - }, - { - "epoch": 27.6, - "learning_rate": 5.600748506318992e-08, - "loss": 3.7857, - "step": 2485000 - }, - { - "epoch": 27.6, - "learning_rate": 5.5993603269440056e-08, - "loss": 3.7857, - "step": 2485500 - }, - { - "epoch": 27.61, - "learning_rate": 5.59797214756902e-08, - "loss": 3.765, - "step": 2486000 - }, - { - "epoch": 27.61, - "learning_rate": 5.596583968194034e-08, - "loss": 3.7875, - "step": 2486500 - }, - { - "epoch": 27.62, - "learning_rate": 5.595195788819048e-08, - "loss": 3.7687, - "step": 2487000 - }, - { - "epoch": 27.62, - "learning_rate": 5.593807609444061e-08, - "loss": 3.7513, - "step": 2487500 - }, - { - "epoch": 27.63, - "learning_rate": 5.5924194300690754e-08, - "loss": 3.7733, - "step": 2488000 - }, - { - "epoch": 27.64, - "learning_rate": 5.591031250694089e-08, - "loss": 3.7782, - "step": 2488500 - }, - { - "epoch": 27.64, - "learning_rate": 5.589643071319103e-08, - "loss": 3.7516, - "step": 2489000 - }, - { - "epoch": 27.65, - "learning_rate": 5.5882548919441175e-08, - "loss": 3.7798, - "step": 2489500 - }, - { - "epoch": 27.65, - "learning_rate": 5.586866712569132e-08, - "loss": 3.7559, - "step": 2490000 - }, - { - "epoch": 27.66, - "learning_rate": 5.5854785331941446e-08, - "loss": 3.7728, - "step": 2490500 - }, - { - "epoch": 27.66, - "learning_rate": 5.584090353819159e-08, - "loss": 3.7841, - "step": 2491000 - }, - { - "epoch": 27.67, - "learning_rate": 5.5827021744441724e-08, - "loss": 3.7556, - "step": 2491500 - }, - { - "epoch": 27.67, - "learning_rate": 5.5813139950691866e-08, - "loss": 3.7736, - "step": 2492000 - }, - { - "epoch": 27.68, - "learning_rate": 5.579925815694201e-08, - "loss": 3.7714, - "step": 2492500 - }, - { - "epoch": 27.69, - "learning_rate": 5.578537636319215e-08, - "loss": 3.7686, - "step": 2493000 - }, - { - "epoch": 27.69, - "learning_rate": 5.577149456944228e-08, - "loss": 3.7593, - "step": 2493500 - }, - { - "epoch": 27.7, - "learning_rate": 5.575761277569242e-08, - "loss": 3.7764, - "step": 2494000 - }, - { - "epoch": 27.7, - "learning_rate": 5.574373098194256e-08, - "loss": 3.7686, - "step": 2494500 - }, - { - "epoch": 27.71, - "learning_rate": 5.57298491881927e-08, - "loss": 3.7731, - "step": 2495000 - }, - { - "epoch": 27.71, - "learning_rate": 5.571596739444284e-08, - "loss": 3.7633, - "step": 2495500 - }, - { - "epoch": 27.72, - "learning_rate": 5.570208560069297e-08, - "loss": 3.7755, - "step": 2496000 - }, - { - "epoch": 27.72, - "learning_rate": 5.5688203806943113e-08, - "loss": 3.775, - "step": 2496500 - }, - { - "epoch": 27.73, - "learning_rate": 5.5674322013193256e-08, - "loss": 3.7678, - "step": 2497000 - }, - { - "epoch": 27.74, - "learning_rate": 5.566044021944339e-08, - "loss": 3.7694, - "step": 2497500 - }, - { - "epoch": 27.74, - "learning_rate": 5.5646558425693534e-08, - "loss": 3.7903, - "step": 2498000 - }, - { - "epoch": 27.75, - "learning_rate": 5.5632676631943676e-08, - "loss": 3.7774, - "step": 2498500 - }, - { - "epoch": 27.75, - "learning_rate": 5.5618794838193805e-08, - "loss": 3.7595, - "step": 2499000 - }, - { - "epoch": 27.76, - "learning_rate": 5.560491304444395e-08, - "loss": 3.7634, - "step": 2499500 - }, - { - "epoch": 27.76, - "learning_rate": 5.559103125069409e-08, - "loss": 3.7895, - "step": 2500000 - }, - { - "epoch": 27.77, - "learning_rate": 5.5577149456944225e-08, - "loss": 3.7898, - "step": 2500500 - }, - { - "epoch": 27.77, - "learning_rate": 5.556326766319437e-08, - "loss": 3.7875, - "step": 2501000 - }, - { - "epoch": 27.78, - "learning_rate": 5.5549385869444496e-08, - "loss": 3.7768, - "step": 2501500 - }, - { - "epoch": 27.79, - "learning_rate": 5.553550407569464e-08, - "loss": 3.7725, - "step": 2502000 - }, - { - "epoch": 27.79, - "learning_rate": 5.552162228194478e-08, - "loss": 3.7867, - "step": 2502500 - }, - { - "epoch": 27.8, - "learning_rate": 5.550774048819492e-08, - "loss": 3.7657, - "step": 2503000 - }, - { - "epoch": 27.8, - "learning_rate": 5.549385869444506e-08, - "loss": 3.7743, - "step": 2503500 - }, - { - "epoch": 27.81, - "learning_rate": 5.54799769006952e-08, - "loss": 3.7864, - "step": 2504000 - }, - { - "epoch": 27.81, - "learning_rate": 5.546609510694533e-08, - "loss": 3.7541, - "step": 2504500 - }, - { - "epoch": 27.82, - "learning_rate": 5.545221331319547e-08, - "loss": 3.7544, - "step": 2505000 - }, - { - "epoch": 27.82, - "learning_rate": 5.5438331519445615e-08, - "loss": 3.7779, - "step": 2505500 - }, - { - "epoch": 27.83, - "learning_rate": 5.542444972569576e-08, - "loss": 3.7703, - "step": 2506000 - }, - { - "epoch": 27.84, - "learning_rate": 5.541056793194589e-08, - "loss": 3.7863, - "step": 2506500 - }, - { - "epoch": 27.84, - "learning_rate": 5.5396686138196035e-08, - "loss": 3.7769, - "step": 2507000 - }, - { - "epoch": 27.85, - "learning_rate": 5.5382804344446164e-08, - "loss": 3.7691, - "step": 2507500 - }, - { - "epoch": 27.85, - "learning_rate": 5.5368922550696306e-08, - "loss": 3.7773, - "step": 2508000 - }, - { - "epoch": 27.86, - "learning_rate": 5.535504075694645e-08, - "loss": 3.7756, - "step": 2508500 - }, - { - "epoch": 27.86, - "learning_rate": 5.534115896319659e-08, - "loss": 3.7769, - "step": 2509000 - }, - { - "epoch": 27.87, - "learning_rate": 5.5327277169446726e-08, - "loss": 3.7518, - "step": 2509500 - }, - { - "epoch": 27.87, - "learning_rate": 5.5313395375696855e-08, - "loss": 3.7714, - "step": 2510000 - }, - { - "epoch": 27.88, - "learning_rate": 5.5299513581947e-08, - "loss": 3.7649, - "step": 2510500 - }, - { - "epoch": 27.89, - "learning_rate": 5.528563178819714e-08, - "loss": 3.7651, - "step": 2511000 - }, - { - "epoch": 27.89, - "learning_rate": 5.527174999444728e-08, - "loss": 3.7759, - "step": 2511500 - }, - { - "epoch": 27.9, - "learning_rate": 5.5257868200697424e-08, - "loss": 3.7776, - "step": 2512000 - }, - { - "epoch": 27.9, - "learning_rate": 5.524398640694756e-08, - "loss": 3.7678, - "step": 2512500 - }, - { - "epoch": 27.91, - "learning_rate": 5.523010461319769e-08, - "loss": 3.7774, - "step": 2513000 - }, - { - "epoch": 27.91, - "learning_rate": 5.521622281944783e-08, - "loss": 3.7678, - "step": 2513500 - }, - { - "epoch": 27.92, - "learning_rate": 5.5202341025697974e-08, - "loss": 3.7747, - "step": 2514000 - }, - { - "epoch": 27.92, - "learning_rate": 5.5188459231948116e-08, - "loss": 3.7619, - "step": 2514500 - }, - { - "epoch": 27.93, - "learning_rate": 5.517457743819826e-08, - "loss": 3.7457, - "step": 2515000 - }, - { - "epoch": 27.94, - "learning_rate": 5.516069564444839e-08, - "loss": 3.7715, - "step": 2515500 - }, - { - "epoch": 27.94, - "learning_rate": 5.514681385069852e-08, - "loss": 3.7597, - "step": 2516000 - }, - { - "epoch": 27.95, - "learning_rate": 5.5132932056948665e-08, - "loss": 3.7613, - "step": 2516500 - }, - { - "epoch": 27.95, - "learning_rate": 5.511905026319881e-08, - "loss": 3.7777, - "step": 2517000 - }, - { - "epoch": 27.96, - "learning_rate": 5.510516846944895e-08, - "loss": 3.7765, - "step": 2517500 - }, - { - "epoch": 27.96, - "learning_rate": 5.509128667569909e-08, - "loss": 3.77, - "step": 2518000 - }, - { - "epoch": 27.97, - "learning_rate": 5.507740488194922e-08, - "loss": 3.7971, - "step": 2518500 - }, - { - "epoch": 27.97, - "learning_rate": 5.5063523088199356e-08, - "loss": 3.7718, - "step": 2519000 - }, - { - "epoch": 27.98, - "learning_rate": 5.50496412944495e-08, - "loss": 3.7833, - "step": 2519500 - }, - { - "epoch": 27.99, - "learning_rate": 5.503575950069964e-08, - "loss": 3.7656, - "step": 2520000 - }, - { - "epoch": 27.99, - "learning_rate": 5.502187770694978e-08, - "loss": 3.7643, - "step": 2520500 - }, - { - "epoch": 28.0, - "learning_rate": 5.5007995913199926e-08, - "loss": 3.7503, - "step": 2521000 - }, - { - "epoch": 28.0, - "eval_loss": 3.837568759918213, - "eval_runtime": 6.3037, - "eval_samples_per_second": 246.521, - "step": 2521288 - }, - { - "epoch": 28.0, - "learning_rate": 5.4994114119450055e-08, - "loss": 3.7668, - "step": 2521500 - }, - { - "epoch": 28.01, - "learning_rate": 5.498023232570019e-08, - "loss": 3.7821, - "step": 2522000 - }, - { - "epoch": 28.01, - "learning_rate": 5.496635053195033e-08, - "loss": 3.7631, - "step": 2522500 - }, - { - "epoch": 28.02, - "learning_rate": 5.4952468738200475e-08, - "loss": 3.8038, - "step": 2523000 - }, - { - "epoch": 28.02, - "learning_rate": 5.493858694445062e-08, - "loss": 3.7596, - "step": 2523500 - }, - { - "epoch": 28.03, - "learning_rate": 5.4924705150700746e-08, - "loss": 3.7611, - "step": 2524000 - }, - { - "epoch": 28.04, - "learning_rate": 5.491082335695089e-08, - "loss": 3.7857, - "step": 2524500 - }, - { - "epoch": 28.04, - "learning_rate": 5.4896941563201024e-08, - "loss": 3.7609, - "step": 2525000 - }, - { - "epoch": 28.05, - "learning_rate": 5.4883059769451166e-08, - "loss": 3.7681, - "step": 2525500 - }, - { - "epoch": 28.05, - "learning_rate": 5.486917797570131e-08, - "loss": 3.7794, - "step": 2526000 - }, - { - "epoch": 28.06, - "learning_rate": 5.485529618195145e-08, - "loss": 3.7833, - "step": 2526500 - }, - { - "epoch": 28.06, - "learning_rate": 5.484141438820158e-08, - "loss": 3.7641, - "step": 2527000 - }, - { - "epoch": 28.07, - "learning_rate": 5.482753259445172e-08, - "loss": 3.7664, - "step": 2527500 - }, - { - "epoch": 28.07, - "learning_rate": 5.481365080070186e-08, - "loss": 3.7649, - "step": 2528000 - }, - { - "epoch": 28.08, - "learning_rate": 5.4799769006952e-08, - "loss": 3.7749, - "step": 2528500 - }, - { - "epoch": 28.09, - "learning_rate": 5.478588721320214e-08, - "loss": 3.7693, - "step": 2529000 - }, - { - "epoch": 28.09, - "learning_rate": 5.4772005419452284e-08, - "loss": 3.7652, - "step": 2529500 - }, - { - "epoch": 28.1, - "learning_rate": 5.4758123625702413e-08, - "loss": 3.7573, - "step": 2530000 - }, - { - "epoch": 28.1, - "learning_rate": 5.4744241831952556e-08, - "loss": 3.7508, - "step": 2530500 - }, - { - "epoch": 28.11, - "learning_rate": 5.473036003820269e-08, - "loss": 3.7849, - "step": 2531000 - }, - { - "epoch": 28.11, - "learning_rate": 5.4716478244452834e-08, - "loss": 3.7644, - "step": 2531500 - }, - { - "epoch": 28.12, - "learning_rate": 5.4702596450702976e-08, - "loss": 3.7669, - "step": 2532000 - }, - { - "epoch": 28.12, - "learning_rate": 5.4688714656953105e-08, - "loss": 3.7671, - "step": 2532500 - }, - { - "epoch": 28.13, - "learning_rate": 5.467483286320325e-08, - "loss": 3.7686, - "step": 2533000 - }, - { - "epoch": 28.14, - "learning_rate": 5.466095106945339e-08, - "loss": 3.7706, - "step": 2533500 - }, - { - "epoch": 28.14, - "learning_rate": 5.4647069275703525e-08, - "loss": 3.7591, - "step": 2534000 - }, - { - "epoch": 28.15, - "learning_rate": 5.463318748195367e-08, - "loss": 3.7771, - "step": 2534500 - }, - { - "epoch": 28.15, - "learning_rate": 5.461930568820381e-08, - "loss": 3.7745, - "step": 2535000 - }, - { - "epoch": 28.16, - "learning_rate": 5.460542389445394e-08, - "loss": 3.7868, - "step": 2535500 - }, - { - "epoch": 28.16, - "learning_rate": 5.459154210070408e-08, - "loss": 3.7652, - "step": 2536000 - }, - { - "epoch": 28.17, - "learning_rate": 5.457766030695422e-08, - "loss": 3.7696, - "step": 2536500 - }, - { - "epoch": 28.17, - "learning_rate": 5.456377851320436e-08, - "loss": 3.7645, - "step": 2537000 - }, - { - "epoch": 28.18, - "learning_rate": 5.45498967194545e-08, - "loss": 3.7591, - "step": 2537500 - }, - { - "epoch": 28.19, - "learning_rate": 5.453601492570463e-08, - "loss": 3.7625, - "step": 2538000 - }, - { - "epoch": 28.19, - "learning_rate": 5.452213313195477e-08, - "loss": 3.7703, - "step": 2538500 - }, - { - "epoch": 28.2, - "learning_rate": 5.4508251338204915e-08, - "loss": 3.7596, - "step": 2539000 - }, - { - "epoch": 28.2, - "learning_rate": 5.449436954445506e-08, - "loss": 3.7648, - "step": 2539500 - }, - { - "epoch": 28.21, - "learning_rate": 5.448048775070519e-08, - "loss": 3.7689, - "step": 2540000 - }, - { - "epoch": 28.21, - "learning_rate": 5.4466605956955335e-08, - "loss": 3.7598, - "step": 2540500 - }, - { - "epoch": 28.22, - "learning_rate": 5.4452724163205464e-08, - "loss": 3.7659, - "step": 2541000 - }, - { - "epoch": 28.22, - "learning_rate": 5.4438842369455606e-08, - "loss": 3.7738, - "step": 2541500 - }, - { - "epoch": 28.23, - "learning_rate": 5.442496057570575e-08, - "loss": 3.7589, - "step": 2542000 - }, - { - "epoch": 28.24, - "learning_rate": 5.441107878195589e-08, - "loss": 3.7769, - "step": 2542500 - }, - { - "epoch": 28.24, - "learning_rate": 5.4397196988206026e-08, - "loss": 3.7698, - "step": 2543000 - }, - { - "epoch": 28.25, - "learning_rate": 5.438331519445617e-08, - "loss": 3.773, - "step": 2543500 - }, - { - "epoch": 28.25, - "learning_rate": 5.43694334007063e-08, - "loss": 3.7597, - "step": 2544000 - }, - { - "epoch": 28.26, - "learning_rate": 5.435555160695644e-08, - "loss": 3.7762, - "step": 2544500 - }, - { - "epoch": 28.26, - "learning_rate": 5.434166981320658e-08, - "loss": 3.7725, - "step": 2545000 - }, - { - "epoch": 28.27, - "learning_rate": 5.4327788019456724e-08, - "loss": 3.773, - "step": 2545500 - }, - { - "epoch": 28.27, - "learning_rate": 5.431390622570686e-08, - "loss": 3.7687, - "step": 2546000 - }, - { - "epoch": 28.28, - "learning_rate": 5.430002443195699e-08, - "loss": 3.7913, - "step": 2546500 - }, - { - "epoch": 28.29, - "learning_rate": 5.428614263820713e-08, - "loss": 3.7714, - "step": 2547000 - }, - { - "epoch": 28.29, - "learning_rate": 5.4272260844457274e-08, - "loss": 3.7808, - "step": 2547500 - }, - { - "epoch": 28.3, - "learning_rate": 5.4258379050707416e-08, - "loss": 3.784, - "step": 2548000 - }, - { - "epoch": 28.3, - "learning_rate": 5.424449725695756e-08, - "loss": 3.7787, - "step": 2548500 - }, - { - "epoch": 28.31, - "learning_rate": 5.4230615463207694e-08, - "loss": 3.7822, - "step": 2549000 - }, - { - "epoch": 28.31, - "learning_rate": 5.421673366945782e-08, - "loss": 3.7806, - "step": 2549500 - }, - { - "epoch": 28.32, - "learning_rate": 5.4202851875707965e-08, - "loss": 3.7723, - "step": 2550000 - }, - { - "epoch": 28.32, - "learning_rate": 5.418897008195811e-08, - "loss": 3.7749, - "step": 2550500 - }, - { - "epoch": 28.33, - "learning_rate": 5.417508828820825e-08, - "loss": 3.757, - "step": 2551000 - }, - { - "epoch": 28.34, - "learning_rate": 5.416120649445839e-08, - "loss": 3.7731, - "step": 2551500 - }, - { - "epoch": 28.34, - "learning_rate": 5.414732470070852e-08, - "loss": 3.7605, - "step": 2552000 - }, - { - "epoch": 28.35, - "learning_rate": 5.4133442906958657e-08, - "loss": 3.7846, - "step": 2552500 - }, - { - "epoch": 28.35, - "learning_rate": 5.41195611132088e-08, - "loss": 3.7525, - "step": 2553000 - }, - { - "epoch": 28.36, - "learning_rate": 5.410567931945894e-08, - "loss": 3.7738, - "step": 2553500 - }, - { - "epoch": 28.36, - "learning_rate": 5.4091797525709083e-08, - "loss": 3.7572, - "step": 2554000 - }, - { - "epoch": 28.37, - "learning_rate": 5.4077915731959226e-08, - "loss": 3.77, - "step": 2554500 - }, - { - "epoch": 28.37, - "learning_rate": 5.4064033938209355e-08, - "loss": 3.7914, - "step": 2555000 - }, - { - "epoch": 28.38, - "learning_rate": 5.405015214445949e-08, - "loss": 3.7527, - "step": 2555500 - }, - { - "epoch": 28.39, - "learning_rate": 5.403627035070963e-08, - "loss": 3.7717, - "step": 2556000 - }, - { - "epoch": 28.39, - "learning_rate": 5.4022388556959775e-08, - "loss": 3.7775, - "step": 2556500 - }, - { - "epoch": 28.4, - "learning_rate": 5.400850676320992e-08, - "loss": 3.7579, - "step": 2557000 - }, - { - "epoch": 28.4, - "learning_rate": 5.399462496946006e-08, - "loss": 3.7778, - "step": 2557500 - }, - { - "epoch": 28.41, - "learning_rate": 5.398074317571019e-08, - "loss": 3.7643, - "step": 2558000 - }, - { - "epoch": 28.41, - "learning_rate": 5.3966861381960324e-08, - "loss": 3.785, - "step": 2558500 - }, - { - "epoch": 28.42, - "learning_rate": 5.3952979588210466e-08, - "loss": 3.7529, - "step": 2559000 - }, - { - "epoch": 28.42, - "learning_rate": 5.393909779446061e-08, - "loss": 3.7797, - "step": 2559500 - }, - { - "epoch": 28.43, - "learning_rate": 5.392521600071075e-08, - "loss": 3.7663, - "step": 2560000 - }, - { - "epoch": 28.44, - "learning_rate": 5.391133420696088e-08, - "loss": 3.7744, - "step": 2560500 - }, - { - "epoch": 28.44, - "learning_rate": 5.389745241321102e-08, - "loss": 3.7721, - "step": 2561000 - }, - { - "epoch": 28.45, - "learning_rate": 5.388357061946116e-08, - "loss": 3.7684, - "step": 2561500 - }, - { - "epoch": 28.45, - "learning_rate": 5.38696888257113e-08, - "loss": 3.7405, - "step": 2562000 - }, - { - "epoch": 28.46, - "learning_rate": 5.385580703196144e-08, - "loss": 3.7801, - "step": 2562500 - }, - { - "epoch": 28.46, - "learning_rate": 5.3841925238211585e-08, - "loss": 3.7697, - "step": 2563000 - }, - { - "epoch": 28.47, - "learning_rate": 5.3828043444461714e-08, - "loss": 3.7516, - "step": 2563500 - }, - { - "epoch": 28.47, - "learning_rate": 5.3814161650711856e-08, - "loss": 3.7671, - "step": 2564000 - }, - { - "epoch": 28.48, - "learning_rate": 5.380027985696199e-08, - "loss": 3.7682, - "step": 2564500 - }, - { - "epoch": 28.49, - "learning_rate": 5.3786398063212134e-08, - "loss": 3.7763, - "step": 2565000 - }, - { - "epoch": 28.49, - "learning_rate": 5.3772516269462276e-08, - "loss": 3.7905, - "step": 2565500 - }, - { - "epoch": 28.5, - "learning_rate": 5.375863447571242e-08, - "loss": 3.7611, - "step": 2566000 - }, - { - "epoch": 28.5, - "learning_rate": 5.374475268196255e-08, - "loss": 3.7803, - "step": 2566500 - }, - { - "epoch": 28.51, - "learning_rate": 5.373087088821269e-08, - "loss": 3.7443, - "step": 2567000 - }, - { - "epoch": 28.51, - "learning_rate": 5.3716989094462825e-08, - "loss": 3.7675, - "step": 2567500 - }, - { - "epoch": 28.52, - "learning_rate": 5.370310730071297e-08, - "loss": 3.759, - "step": 2568000 - }, - { - "epoch": 28.52, - "learning_rate": 5.368922550696311e-08, - "loss": 3.7613, - "step": 2568500 - }, - { - "epoch": 28.53, - "learning_rate": 5.367534371321324e-08, - "loss": 3.7768, - "step": 2569000 - }, - { - "epoch": 28.54, - "learning_rate": 5.366146191946338e-08, - "loss": 3.7792, - "step": 2569500 - }, - { - "epoch": 28.54, - "learning_rate": 5.364758012571352e-08, - "loss": 3.7777, - "step": 2570000 - }, - { - "epoch": 28.55, - "learning_rate": 5.363369833196366e-08, - "loss": 3.7599, - "step": 2570500 - }, - { - "epoch": 28.55, - "learning_rate": 5.36198165382138e-08, - "loss": 3.76, - "step": 2571000 - }, - { - "epoch": 28.56, - "learning_rate": 5.3605934744463943e-08, - "loss": 3.7725, - "step": 2571500 - }, - { - "epoch": 28.56, - "learning_rate": 5.359205295071407e-08, - "loss": 3.7684, - "step": 2572000 - }, - { - "epoch": 28.57, - "learning_rate": 5.3578171156964215e-08, - "loss": 3.7799, - "step": 2572500 - }, - { - "epoch": 28.57, - "learning_rate": 5.356428936321436e-08, - "loss": 3.7608, - "step": 2573000 - }, - { - "epoch": 28.58, - "learning_rate": 5.355040756946449e-08, - "loss": 3.7667, - "step": 2573500 - }, - { - "epoch": 28.59, - "learning_rate": 5.3536525775714635e-08, - "loss": 3.7679, - "step": 2574000 - }, - { - "epoch": 28.59, - "learning_rate": 5.3522643981964764e-08, - "loss": 3.7654, - "step": 2574500 - }, - { - "epoch": 28.6, - "learning_rate": 5.3508762188214906e-08, - "loss": 3.7837, - "step": 2575000 - }, - { - "epoch": 28.6, - "learning_rate": 5.349488039446505e-08, - "loss": 3.7658, - "step": 2575500 - }, - { - "epoch": 28.61, - "learning_rate": 5.348099860071519e-08, - "loss": 3.7712, - "step": 2576000 - }, - { - "epoch": 28.61, - "learning_rate": 5.3467116806965326e-08, - "loss": 3.7651, - "step": 2576500 - }, - { - "epoch": 28.62, - "learning_rate": 5.345323501321547e-08, - "loss": 3.7747, - "step": 2577000 - }, - { - "epoch": 28.62, - "learning_rate": 5.34393532194656e-08, - "loss": 3.7698, - "step": 2577500 - }, - { - "epoch": 28.63, - "learning_rate": 5.342547142571574e-08, - "loss": 3.775, - "step": 2578000 - }, - { - "epoch": 28.64, - "learning_rate": 5.341158963196588e-08, - "loss": 3.766, - "step": 2578500 - }, - { - "epoch": 28.64, - "learning_rate": 5.3397707838216024e-08, - "loss": 3.769, - "step": 2579000 - }, - { - "epoch": 28.65, - "learning_rate": 5.338382604446616e-08, - "loss": 3.7623, - "step": 2579500 - }, - { - "epoch": 28.65, - "learning_rate": 5.33699442507163e-08, - "loss": 3.7738, - "step": 2580000 - }, - { - "epoch": 28.66, - "learning_rate": 5.335606245696643e-08, - "loss": 3.7875, - "step": 2580500 - }, - { - "epoch": 28.66, - "learning_rate": 5.3342180663216574e-08, - "loss": 3.7705, - "step": 2581000 - }, - { - "epoch": 28.67, - "learning_rate": 5.3328298869466716e-08, - "loss": 3.7491, - "step": 2581500 - }, - { - "epoch": 28.67, - "learning_rate": 5.331441707571686e-08, - "loss": 3.8024, - "step": 2582000 - }, - { - "epoch": 28.68, - "learning_rate": 5.3300535281966994e-08, - "loss": 3.7934, - "step": 2582500 - }, - { - "epoch": 28.69, - "learning_rate": 5.328665348821712e-08, - "loss": 3.7658, - "step": 2583000 - }, - { - "epoch": 28.69, - "learning_rate": 5.3272771694467265e-08, - "loss": 3.7845, - "step": 2583500 - }, - { - "epoch": 28.7, - "learning_rate": 5.325888990071741e-08, - "loss": 3.7534, - "step": 2584000 - }, - { - "epoch": 28.7, - "learning_rate": 5.324500810696755e-08, - "loss": 3.7584, - "step": 2584500 - }, - { - "epoch": 28.71, - "learning_rate": 5.323112631321769e-08, - "loss": 3.7484, - "step": 2585000 - }, - { - "epoch": 28.71, - "learning_rate": 5.321724451946783e-08, - "loss": 3.7678, - "step": 2585500 - }, - { - "epoch": 28.72, - "learning_rate": 5.3203362725717957e-08, - "loss": 3.7768, - "step": 2586000 - }, - { - "epoch": 28.72, - "learning_rate": 5.31894809319681e-08, - "loss": 3.7751, - "step": 2586500 - }, - { - "epoch": 28.73, - "learning_rate": 5.317559913821824e-08, - "loss": 3.7677, - "step": 2587000 - }, - { - "epoch": 28.74, - "learning_rate": 5.3161717344468383e-08, - "loss": 3.7652, - "step": 2587500 - }, - { - "epoch": 28.74, - "learning_rate": 5.3147835550718526e-08, - "loss": 3.7557, - "step": 2588000 - }, - { - "epoch": 28.75, - "learning_rate": 5.313395375696866e-08, - "loss": 3.7673, - "step": 2588500 - }, - { - "epoch": 28.75, - "learning_rate": 5.312007196321879e-08, - "loss": 3.7609, - "step": 2589000 - }, - { - "epoch": 28.76, - "learning_rate": 5.310619016946893e-08, - "loss": 3.7527, - "step": 2589500 - }, - { - "epoch": 28.76, - "learning_rate": 5.3092308375719075e-08, - "loss": 3.7541, - "step": 2590000 - }, - { - "epoch": 28.77, - "learning_rate": 5.307842658196922e-08, - "loss": 3.7651, - "step": 2590500 - }, - { - "epoch": 28.77, - "learning_rate": 5.306454478821936e-08, - "loss": 3.7737, - "step": 2591000 - }, - { - "epoch": 28.78, - "learning_rate": 5.305066299446949e-08, - "loss": 3.7792, - "step": 2591500 - }, - { - "epoch": 28.79, - "learning_rate": 5.3036781200719624e-08, - "loss": 3.7542, - "step": 2592000 - }, - { - "epoch": 28.79, - "learning_rate": 5.3022899406969766e-08, - "loss": 3.7635, - "step": 2592500 - }, - { - "epoch": 28.8, - "learning_rate": 5.300901761321991e-08, - "loss": 3.7749, - "step": 2593000 - }, - { - "epoch": 28.8, - "learning_rate": 5.299513581947005e-08, - "loss": 3.7702, - "step": 2593500 - }, - { - "epoch": 28.81, - "learning_rate": 5.298125402572019e-08, - "loss": 3.775, - "step": 2594000 - }, - { - "epoch": 28.81, - "learning_rate": 5.296737223197032e-08, - "loss": 3.7669, - "step": 2594500 - }, - { - "epoch": 28.82, - "learning_rate": 5.295349043822046e-08, - "loss": 3.7697, - "step": 2595000 - }, - { - "epoch": 28.82, - "learning_rate": 5.29396086444706e-08, - "loss": 3.7737, - "step": 2595500 - }, - { - "epoch": 28.83, - "learning_rate": 5.292572685072074e-08, - "loss": 3.7578, - "step": 2596000 - }, - { - "epoch": 28.84, - "learning_rate": 5.2911845056970885e-08, - "loss": 3.7476, - "step": 2596500 - }, - { - "epoch": 28.84, - "learning_rate": 5.2897963263221014e-08, - "loss": 3.7765, - "step": 2597000 - }, - { - "epoch": 28.85, - "learning_rate": 5.2884081469471156e-08, - "loss": 3.7741, - "step": 2597500 - }, - { - "epoch": 28.85, - "learning_rate": 5.287019967572129e-08, - "loss": 3.7794, - "step": 2598000 - }, - { - "epoch": 28.86, - "learning_rate": 5.2856317881971434e-08, - "loss": 3.7458, - "step": 2598500 - }, - { - "epoch": 28.86, - "learning_rate": 5.2842436088221576e-08, - "loss": 3.7572, - "step": 2599000 - }, - { - "epoch": 28.87, - "learning_rate": 5.282855429447172e-08, - "loss": 3.7682, - "step": 2599500 - }, - { - "epoch": 28.87, - "learning_rate": 5.281467250072185e-08, - "loss": 3.7551, - "step": 2600000 - }, - { - "epoch": 28.88, - "learning_rate": 5.280079070697199e-08, - "loss": 3.7694, - "step": 2600500 - }, - { - "epoch": 28.89, - "learning_rate": 5.2786908913222125e-08, - "loss": 3.7704, - "step": 2601000 - }, - { - "epoch": 28.89, - "learning_rate": 5.277302711947227e-08, - "loss": 3.776, - "step": 2601500 - }, - { - "epoch": 28.9, - "learning_rate": 5.275914532572241e-08, - "loss": 3.7691, - "step": 2602000 - }, - { - "epoch": 28.9, - "learning_rate": 5.274526353197255e-08, - "loss": 3.7667, - "step": 2602500 - }, - { - "epoch": 28.91, - "learning_rate": 5.273138173822268e-08, - "loss": 3.7694, - "step": 2603000 - }, - { - "epoch": 28.91, - "learning_rate": 5.2717499944472823e-08, - "loss": 3.7928, - "step": 2603500 - }, - { - "epoch": 28.92, - "learning_rate": 5.270361815072296e-08, - "loss": 3.7794, - "step": 2604000 - }, - { - "epoch": 28.92, - "learning_rate": 5.26897363569731e-08, - "loss": 3.766, - "step": 2604500 - }, - { - "epoch": 28.93, - "learning_rate": 5.2675854563223244e-08, - "loss": 3.7884, - "step": 2605000 - }, - { - "epoch": 28.94, - "learning_rate": 5.266197276947337e-08, - "loss": 3.7749, - "step": 2605500 - }, - { - "epoch": 28.94, - "learning_rate": 5.2648090975723515e-08, - "loss": 3.7596, - "step": 2606000 - }, - { - "epoch": 28.95, - "learning_rate": 5.263420918197366e-08, - "loss": 3.7671, - "step": 2606500 - }, - { - "epoch": 28.95, - "learning_rate": 5.262032738822379e-08, - "loss": 3.7743, - "step": 2607000 - }, - { - "epoch": 28.96, - "learning_rate": 5.2606445594473935e-08, - "loss": 3.775, - "step": 2607500 - }, - { - "epoch": 28.96, - "learning_rate": 5.259256380072408e-08, - "loss": 3.7653, - "step": 2608000 - }, - { - "epoch": 28.97, - "learning_rate": 5.2578682006974206e-08, - "loss": 3.7734, - "step": 2608500 - }, - { - "epoch": 28.97, - "learning_rate": 5.256480021322435e-08, - "loss": 3.7429, - "step": 2609000 - }, - { - "epoch": 28.98, - "learning_rate": 5.255091841947449e-08, - "loss": 3.7762, - "step": 2609500 - }, - { - "epoch": 28.99, - "learning_rate": 5.2537036625724626e-08, - "loss": 3.7587, - "step": 2610000 - }, - { - "epoch": 28.99, - "learning_rate": 5.252315483197477e-08, - "loss": 3.7841, - "step": 2610500 - }, - { - "epoch": 29.0, - "learning_rate": 5.25092730382249e-08, - "loss": 3.7746, - "step": 2611000 - }, - { - "epoch": 29.0, - "eval_loss": 3.836146116256714, - "eval_runtime": 6.2989, - "eval_samples_per_second": 246.709, - "step": 2611334 - }, - { - "epoch": 29.0, - "learning_rate": 5.249539124447504e-08, - "loss": 3.7836, - "step": 2611500 - }, - { - "epoch": 29.01, - "learning_rate": 5.248150945072518e-08, - "loss": 3.7712, - "step": 2612000 - }, - { - "epoch": 29.01, - "learning_rate": 5.2467627656975325e-08, - "loss": 3.77, - "step": 2612500 - }, - { - "epoch": 29.02, - "learning_rate": 5.245374586322546e-08, - "loss": 3.7509, - "step": 2613000 - }, - { - "epoch": 29.02, - "learning_rate": 5.24398640694756e-08, - "loss": 3.7809, - "step": 2613500 - }, - { - "epoch": 29.03, - "learning_rate": 5.242598227572573e-08, - "loss": 3.7669, - "step": 2614000 - }, - { - "epoch": 29.04, - "learning_rate": 5.2412100481975874e-08, - "loss": 3.7776, - "step": 2614500 - }, - { - "epoch": 29.04, - "learning_rate": 5.2398218688226016e-08, - "loss": 3.7686, - "step": 2615000 - }, - { - "epoch": 29.05, - "learning_rate": 5.238433689447616e-08, - "loss": 3.767, - "step": 2615500 - }, - { - "epoch": 29.05, - "learning_rate": 5.2370455100726294e-08, - "loss": 3.7638, - "step": 2616000 - }, - { - "epoch": 29.06, - "learning_rate": 5.2356573306976436e-08, - "loss": 3.7723, - "step": 2616500 - }, - { - "epoch": 29.06, - "learning_rate": 5.2342691513226565e-08, - "loss": 3.7813, - "step": 2617000 - }, - { - "epoch": 29.07, - "learning_rate": 5.232880971947671e-08, - "loss": 3.7649, - "step": 2617500 - }, - { - "epoch": 29.07, - "learning_rate": 5.231492792572685e-08, - "loss": 3.7858, - "step": 2618000 - }, - { - "epoch": 29.08, - "learning_rate": 5.230104613197699e-08, - "loss": 3.7564, - "step": 2618500 - }, - { - "epoch": 29.09, - "learning_rate": 5.228716433822713e-08, - "loss": 3.7837, - "step": 2619000 - }, - { - "epoch": 29.09, - "learning_rate": 5.227328254447726e-08, - "loss": 3.7526, - "step": 2619500 - }, - { - "epoch": 29.1, - "learning_rate": 5.22594007507274e-08, - "loss": 3.7426, - "step": 2620000 - }, - { - "epoch": 29.1, - "learning_rate": 5.224551895697754e-08, - "loss": 3.7511, - "step": 2620500 - }, - { - "epoch": 29.11, - "learning_rate": 5.2231637163227683e-08, - "loss": 3.7685, - "step": 2621000 - }, - { - "epoch": 29.11, - "learning_rate": 5.2217755369477826e-08, - "loss": 3.7696, - "step": 2621500 - }, - { - "epoch": 29.12, - "learning_rate": 5.220387357572796e-08, - "loss": 3.7784, - "step": 2622000 - }, - { - "epoch": 29.12, - "learning_rate": 5.218999178197809e-08, - "loss": 3.7704, - "step": 2622500 - }, - { - "epoch": 29.13, - "learning_rate": 5.217610998822823e-08, - "loss": 3.7666, - "step": 2623000 - }, - { - "epoch": 29.14, - "learning_rate": 5.2162228194478375e-08, - "loss": 3.7656, - "step": 2623500 - }, - { - "epoch": 29.14, - "learning_rate": 5.214834640072852e-08, - "loss": 3.776, - "step": 2624000 - }, - { - "epoch": 29.15, - "learning_rate": 5.213446460697866e-08, - "loss": 3.7656, - "step": 2624500 - }, - { - "epoch": 29.15, - "learning_rate": 5.2120582813228795e-08, - "loss": 3.7611, - "step": 2625000 - }, - { - "epoch": 29.16, - "learning_rate": 5.2106701019478924e-08, - "loss": 3.7619, - "step": 2625500 - }, - { - "epoch": 29.16, - "learning_rate": 5.2092819225729066e-08, - "loss": 3.7549, - "step": 2626000 - }, - { - "epoch": 29.17, - "learning_rate": 5.207893743197921e-08, - "loss": 3.756, - "step": 2626500 - }, - { - "epoch": 29.17, - "learning_rate": 5.206505563822935e-08, - "loss": 3.775, - "step": 2627000 - }, - { - "epoch": 29.18, - "learning_rate": 5.205117384447949e-08, - "loss": 3.7486, - "step": 2627500 - }, - { - "epoch": 29.19, - "learning_rate": 5.203729205072962e-08, - "loss": 3.7472, - "step": 2628000 - }, - { - "epoch": 29.19, - "learning_rate": 5.202341025697976e-08, - "loss": 3.7592, - "step": 2628500 - }, - { - "epoch": 29.2, - "learning_rate": 5.20095284632299e-08, - "loss": 3.7796, - "step": 2629000 - }, - { - "epoch": 29.2, - "learning_rate": 5.199564666948004e-08, - "loss": 3.7748, - "step": 2629500 - }, - { - "epoch": 29.21, - "learning_rate": 5.1981764875730185e-08, - "loss": 3.7732, - "step": 2630000 - }, - { - "epoch": 29.21, - "learning_rate": 5.196788308198033e-08, - "loss": 3.7515, - "step": 2630500 - }, - { - "epoch": 29.22, - "learning_rate": 5.1954001288230456e-08, - "loss": 3.7697, - "step": 2631000 - }, - { - "epoch": 29.22, - "learning_rate": 5.194011949448059e-08, - "loss": 3.7469, - "step": 2631500 - }, - { - "epoch": 29.23, - "learning_rate": 5.1926237700730734e-08, - "loss": 3.7817, - "step": 2632000 - }, - { - "epoch": 29.24, - "learning_rate": 5.1912355906980876e-08, - "loss": 3.7532, - "step": 2632500 - }, - { - "epoch": 29.24, - "learning_rate": 5.189847411323102e-08, - "loss": 3.7805, - "step": 2633000 - }, - { - "epoch": 29.25, - "learning_rate": 5.188459231948115e-08, - "loss": 3.7704, - "step": 2633500 - }, - { - "epoch": 29.25, - "learning_rate": 5.187071052573129e-08, - "loss": 3.7754, - "step": 2634000 - }, - { - "epoch": 29.26, - "learning_rate": 5.1856828731981425e-08, - "loss": 3.7734, - "step": 2634500 - }, - { - "epoch": 29.26, - "learning_rate": 5.184294693823157e-08, - "loss": 3.7508, - "step": 2635000 - }, - { - "epoch": 29.27, - "learning_rate": 5.182906514448171e-08, - "loss": 3.771, - "step": 2635500 - }, - { - "epoch": 29.27, - "learning_rate": 5.181518335073185e-08, - "loss": 3.77, - "step": 2636000 - }, - { - "epoch": 29.28, - "learning_rate": 5.180130155698198e-08, - "loss": 3.7555, - "step": 2636500 - }, - { - "epoch": 29.29, - "learning_rate": 5.1787419763232123e-08, - "loss": 3.7738, - "step": 2637000 - }, - { - "epoch": 29.29, - "learning_rate": 5.177353796948226e-08, - "loss": 3.7749, - "step": 2637500 - }, - { - "epoch": 29.3, - "learning_rate": 5.17596561757324e-08, - "loss": 3.7692, - "step": 2638000 - }, - { - "epoch": 29.3, - "learning_rate": 5.1745774381982544e-08, - "loss": 3.7758, - "step": 2638500 - }, - { - "epoch": 29.31, - "learning_rate": 5.1731892588232686e-08, - "loss": 3.7551, - "step": 2639000 - }, - { - "epoch": 29.31, - "learning_rate": 5.1718010794482815e-08, - "loss": 3.7752, - "step": 2639500 - }, - { - "epoch": 29.32, - "learning_rate": 5.170412900073296e-08, - "loss": 3.7765, - "step": 2640000 - }, - { - "epoch": 29.32, - "learning_rate": 5.169024720698309e-08, - "loss": 3.7751, - "step": 2640500 - }, - { - "epoch": 29.33, - "learning_rate": 5.1676365413233235e-08, - "loss": 3.7584, - "step": 2641000 - }, - { - "epoch": 29.34, - "learning_rate": 5.166248361948338e-08, - "loss": 3.7634, - "step": 2641500 - }, - { - "epoch": 29.34, - "learning_rate": 5.1648601825733506e-08, - "loss": 3.781, - "step": 2642000 - }, - { - "epoch": 29.35, - "learning_rate": 5.163472003198365e-08, - "loss": 3.7671, - "step": 2642500 - }, - { - "epoch": 29.35, - "learning_rate": 5.162083823823379e-08, - "loss": 3.7683, - "step": 2643000 - }, - { - "epoch": 29.36, - "learning_rate": 5.1606956444483927e-08, - "loss": 3.7547, - "step": 2643500 - }, - { - "epoch": 29.36, - "learning_rate": 5.159307465073407e-08, - "loss": 3.7499, - "step": 2644000 - }, - { - "epoch": 29.37, - "learning_rate": 5.157919285698421e-08, - "loss": 3.781, - "step": 2644500 - }, - { - "epoch": 29.37, - "learning_rate": 5.156531106323434e-08, - "loss": 3.7786, - "step": 2645000 - }, - { - "epoch": 29.38, - "learning_rate": 5.155142926948448e-08, - "loss": 3.7796, - "step": 2645500 - }, - { - "epoch": 29.38, - "learning_rate": 5.1537547475734625e-08, - "loss": 3.7593, - "step": 2646000 - }, - { - "epoch": 29.39, - "learning_rate": 5.152366568198476e-08, - "loss": 3.7875, - "step": 2646500 - }, - { - "epoch": 29.4, - "learning_rate": 5.15097838882349e-08, - "loss": 3.7678, - "step": 2647000 - }, - { - "epoch": 29.4, - "learning_rate": 5.1495902094485045e-08, - "loss": 3.7732, - "step": 2647500 - }, - { - "epoch": 29.41, - "learning_rate": 5.1482020300735174e-08, - "loss": 3.7924, - "step": 2648000 - }, - { - "epoch": 29.41, - "learning_rate": 5.1468138506985316e-08, - "loss": 3.7593, - "step": 2648500 - }, - { - "epoch": 29.42, - "learning_rate": 5.145425671323546e-08, - "loss": 3.7542, - "step": 2649000 - }, - { - "epoch": 29.42, - "learning_rate": 5.1440374919485594e-08, - "loss": 3.7581, - "step": 2649500 - }, - { - "epoch": 29.43, - "learning_rate": 5.1426493125735736e-08, - "loss": 3.7624, - "step": 2650000 - }, - { - "epoch": 29.43, - "learning_rate": 5.1412611331985865e-08, - "loss": 3.7558, - "step": 2650500 - }, - { - "epoch": 29.44, - "learning_rate": 5.139872953823601e-08, - "loss": 3.7723, - "step": 2651000 - }, - { - "epoch": 29.45, - "learning_rate": 5.138484774448615e-08, - "loss": 3.7777, - "step": 2651500 - }, - { - "epoch": 29.45, - "learning_rate": 5.137096595073629e-08, - "loss": 3.7681, - "step": 2652000 - }, - { - "epoch": 29.46, - "learning_rate": 5.135708415698643e-08, - "loss": 3.7632, - "step": 2652500 - }, - { - "epoch": 29.46, - "learning_rate": 5.134320236323657e-08, - "loss": 3.7764, - "step": 2653000 - }, - { - "epoch": 29.47, - "learning_rate": 5.13293205694867e-08, - "loss": 3.7549, - "step": 2653500 - }, - { - "epoch": 29.47, - "learning_rate": 5.131543877573684e-08, - "loss": 3.78, - "step": 2654000 - }, - { - "epoch": 29.48, - "learning_rate": 5.1301556981986984e-08, - "loss": 3.7548, - "step": 2654500 - }, - { - "epoch": 29.48, - "learning_rate": 5.1287675188237126e-08, - "loss": 3.7605, - "step": 2655000 - }, - { - "epoch": 29.49, - "learning_rate": 5.127379339448726e-08, - "loss": 3.7659, - "step": 2655500 - }, - { - "epoch": 29.5, - "learning_rate": 5.125991160073739e-08, - "loss": 3.7814, - "step": 2656000 - }, - { - "epoch": 29.5, - "learning_rate": 5.124602980698753e-08, - "loss": 3.7644, - "step": 2656500 - }, - { - "epoch": 29.51, - "learning_rate": 5.1232148013237675e-08, - "loss": 3.7524, - "step": 2657000 - }, - { - "epoch": 29.51, - "learning_rate": 5.121826621948782e-08, - "loss": 3.7788, - "step": 2657500 - }, - { - "epoch": 29.52, - "learning_rate": 5.120438442573796e-08, - "loss": 3.7704, - "step": 2658000 - }, - { - "epoch": 29.52, - "learning_rate": 5.1190502631988095e-08, - "loss": 3.7571, - "step": 2658500 - }, - { - "epoch": 29.53, - "learning_rate": 5.1176620838238224e-08, - "loss": 3.7555, - "step": 2659000 - }, - { - "epoch": 29.53, - "learning_rate": 5.1162739044488367e-08, - "loss": 3.7691, - "step": 2659500 - }, - { - "epoch": 29.54, - "learning_rate": 5.114885725073851e-08, - "loss": 3.7753, - "step": 2660000 - }, - { - "epoch": 29.55, - "learning_rate": 5.113497545698865e-08, - "loss": 3.7795, - "step": 2660500 - }, - { - "epoch": 29.55, - "learning_rate": 5.112109366323879e-08, - "loss": 3.7595, - "step": 2661000 - }, - { - "epoch": 29.56, - "learning_rate": 5.110721186948893e-08, - "loss": 3.775, - "step": 2661500 - }, - { - "epoch": 29.56, - "learning_rate": 5.109333007573906e-08, - "loss": 3.7522, - "step": 2662000 - }, - { - "epoch": 29.57, - "learning_rate": 5.10794482819892e-08, - "loss": 3.7524, - "step": 2662500 - }, - { - "epoch": 29.57, - "learning_rate": 5.106556648823934e-08, - "loss": 3.7649, - "step": 2663000 - }, - { - "epoch": 29.58, - "learning_rate": 5.1051684694489485e-08, - "loss": 3.7763, - "step": 2663500 - }, - { - "epoch": 29.58, - "learning_rate": 5.103780290073963e-08, - "loss": 3.75, - "step": 2664000 - }, - { - "epoch": 29.59, - "learning_rate": 5.1023921106989756e-08, - "loss": 3.7703, - "step": 2664500 - }, - { - "epoch": 29.6, - "learning_rate": 5.101003931323989e-08, - "loss": 3.7751, - "step": 2665000 - }, - { - "epoch": 29.6, - "learning_rate": 5.0996157519490034e-08, - "loss": 3.7531, - "step": 2665500 - }, - { - "epoch": 29.61, - "learning_rate": 5.0982275725740176e-08, - "loss": 3.7502, - "step": 2666000 - }, - { - "epoch": 29.61, - "learning_rate": 5.096839393199032e-08, - "loss": 3.7666, - "step": 2666500 - }, - { - "epoch": 29.62, - "learning_rate": 5.095451213824046e-08, - "loss": 3.7664, - "step": 2667000 - }, - { - "epoch": 29.62, - "learning_rate": 5.094063034449059e-08, - "loss": 3.7618, - "step": 2667500 - }, - { - "epoch": 29.63, - "learning_rate": 5.0926748550740725e-08, - "loss": 3.7658, - "step": 2668000 - }, - { - "epoch": 29.63, - "learning_rate": 5.091286675699087e-08, - "loss": 3.7573, - "step": 2668500 - }, - { - "epoch": 29.64, - "learning_rate": 5.089898496324101e-08, - "loss": 3.7585, - "step": 2669000 - }, - { - "epoch": 29.65, - "learning_rate": 5.088510316949115e-08, - "loss": 3.7745, - "step": 2669500 - }, - { - "epoch": 29.65, - "learning_rate": 5.087122137574128e-08, - "loss": 3.7827, - "step": 2670000 - }, - { - "epoch": 29.66, - "learning_rate": 5.0857339581991424e-08, - "loss": 3.7803, - "step": 2670500 - }, - { - "epoch": 29.66, - "learning_rate": 5.084345778824156e-08, - "loss": 3.7764, - "step": 2671000 - }, - { - "epoch": 29.67, - "learning_rate": 5.08295759944917e-08, - "loss": 3.7813, - "step": 2671500 - }, - { - "epoch": 29.67, - "learning_rate": 5.0815694200741844e-08, - "loss": 3.7388, - "step": 2672000 - }, - { - "epoch": 29.68, - "learning_rate": 5.0801812406991986e-08, - "loss": 3.7636, - "step": 2672500 - }, - { - "epoch": 29.68, - "learning_rate": 5.0787930613242115e-08, - "loss": 3.7608, - "step": 2673000 - }, - { - "epoch": 29.69, - "learning_rate": 5.077404881949226e-08, - "loss": 3.7752, - "step": 2673500 - }, - { - "epoch": 29.7, - "learning_rate": 5.076016702574239e-08, - "loss": 3.7728, - "step": 2674000 - }, - { - "epoch": 29.7, - "learning_rate": 5.0746285231992535e-08, - "loss": 3.7556, - "step": 2674500 - }, - { - "epoch": 29.71, - "learning_rate": 5.073240343824268e-08, - "loss": 3.7533, - "step": 2675000 - }, - { - "epoch": 29.71, - "learning_rate": 5.071852164449282e-08, - "loss": 3.79, - "step": 2675500 - }, - { - "epoch": 29.72, - "learning_rate": 5.070463985074295e-08, - "loss": 3.7713, - "step": 2676000 - }, - { - "epoch": 29.72, - "learning_rate": 5.069075805699309e-08, - "loss": 3.7606, - "step": 2676500 - }, - { - "epoch": 29.73, - "learning_rate": 5.0676876263243227e-08, - "loss": 3.7742, - "step": 2677000 - }, - { - "epoch": 29.73, - "learning_rate": 5.066299446949337e-08, - "loss": 3.762, - "step": 2677500 - }, - { - "epoch": 29.74, - "learning_rate": 5.064911267574351e-08, - "loss": 3.7516, - "step": 2678000 - }, - { - "epoch": 29.75, - "learning_rate": 5.063523088199364e-08, - "loss": 3.774, - "step": 2678500 - }, - { - "epoch": 29.75, - "learning_rate": 5.062134908824378e-08, - "loss": 3.7729, - "step": 2679000 - }, - { - "epoch": 29.76, - "learning_rate": 5.0607467294493925e-08, - "loss": 3.7587, - "step": 2679500 - }, - { - "epoch": 29.76, - "learning_rate": 5.059358550074406e-08, - "loss": 3.7804, - "step": 2680000 - }, - { - "epoch": 29.77, - "learning_rate": 5.05797037069942e-08, - "loss": 3.7754, - "step": 2680500 - }, - { - "epoch": 29.77, - "learning_rate": 5.0565821913244345e-08, - "loss": 3.7708, - "step": 2681000 - }, - { - "epoch": 29.78, - "learning_rate": 5.0551940119494474e-08, - "loss": 3.7724, - "step": 2681500 - }, - { - "epoch": 29.78, - "learning_rate": 5.0538058325744616e-08, - "loss": 3.7744, - "step": 2682000 - }, - { - "epoch": 29.79, - "learning_rate": 5.052417653199476e-08, - "loss": 3.7566, - "step": 2682500 - }, - { - "epoch": 29.8, - "learning_rate": 5.0510294738244894e-08, - "loss": 3.7669, - "step": 2683000 - }, - { - "epoch": 29.8, - "learning_rate": 5.0496412944495036e-08, - "loss": 3.7627, - "step": 2683500 - }, - { - "epoch": 29.81, - "learning_rate": 5.048253115074518e-08, - "loss": 3.7537, - "step": 2684000 - }, - { - "epoch": 29.81, - "learning_rate": 5.046864935699531e-08, - "loss": 3.7803, - "step": 2684500 - }, - { - "epoch": 29.82, - "learning_rate": 5.045476756324545e-08, - "loss": 3.7685, - "step": 2685000 - }, - { - "epoch": 29.82, - "learning_rate": 5.044088576949559e-08, - "loss": 3.7589, - "step": 2685500 - }, - { - "epoch": 29.83, - "learning_rate": 5.042700397574573e-08, - "loss": 3.758, - "step": 2686000 - }, - { - "epoch": 29.83, - "learning_rate": 5.041312218199587e-08, - "loss": 3.772, - "step": 2686500 - }, - { - "epoch": 29.84, - "learning_rate": 5.0399240388246e-08, - "loss": 3.7625, - "step": 2687000 - }, - { - "epoch": 29.85, - "learning_rate": 5.038535859449614e-08, - "loss": 3.7611, - "step": 2687500 - }, - { - "epoch": 29.85, - "learning_rate": 5.0371476800746284e-08, - "loss": 3.7872, - "step": 2688000 - }, - { - "epoch": 29.86, - "learning_rate": 5.0357595006996426e-08, - "loss": 3.7626, - "step": 2688500 - }, - { - "epoch": 29.86, - "learning_rate": 5.034371321324656e-08, - "loss": 3.7752, - "step": 2689000 - }, - { - "epoch": 29.87, - "learning_rate": 5.0329831419496704e-08, - "loss": 3.7508, - "step": 2689500 - }, - { - "epoch": 29.87, - "learning_rate": 5.031594962574683e-08, - "loss": 3.7767, - "step": 2690000 - }, - { - "epoch": 29.88, - "learning_rate": 5.0302067831996975e-08, - "loss": 3.7667, - "step": 2690500 - }, - { - "epoch": 29.88, - "learning_rate": 5.028818603824712e-08, - "loss": 3.759, - "step": 2691000 - }, - { - "epoch": 29.89, - "learning_rate": 5.027430424449726e-08, - "loss": 3.7686, - "step": 2691500 - }, - { - "epoch": 29.9, - "learning_rate": 5.0260422450747395e-08, - "loss": 3.7747, - "step": 2692000 - }, - { - "epoch": 29.9, - "learning_rate": 5.0246540656997524e-08, - "loss": 3.7592, - "step": 2692500 - }, - { - "epoch": 29.91, - "learning_rate": 5.0232658863247667e-08, - "loss": 3.7826, - "step": 2693000 - }, - { - "epoch": 29.91, - "learning_rate": 5.021877706949781e-08, - "loss": 3.7786, - "step": 2693500 - }, - { - "epoch": 29.92, - "learning_rate": 5.020489527574795e-08, - "loss": 3.7856, - "step": 2694000 - }, - { - "epoch": 29.92, - "learning_rate": 5.0191013481998093e-08, - "loss": 3.7665, - "step": 2694500 - }, - { - "epoch": 29.93, - "learning_rate": 5.017713168824823e-08, - "loss": 3.7503, - "step": 2695000 - }, - { - "epoch": 29.93, - "learning_rate": 5.016324989449836e-08, - "loss": 3.77, - "step": 2695500 - }, - { - "epoch": 29.94, - "learning_rate": 5.01493681007485e-08, - "loss": 3.7702, - "step": 2696000 - }, - { - "epoch": 29.95, - "learning_rate": 5.013548630699864e-08, - "loss": 3.7729, - "step": 2696500 - }, - { - "epoch": 29.95, - "learning_rate": 5.0121604513248785e-08, - "loss": 3.7511, - "step": 2697000 - }, - { - "epoch": 29.96, - "learning_rate": 5.010772271949893e-08, - "loss": 3.7647, - "step": 2697500 - }, - { - "epoch": 29.96, - "learning_rate": 5.009384092574906e-08, - "loss": 3.7785, - "step": 2698000 - }, - { - "epoch": 29.97, - "learning_rate": 5.007995913199919e-08, - "loss": 3.7635, - "step": 2698500 - }, - { - "epoch": 29.97, - "learning_rate": 5.0066077338249334e-08, - "loss": 3.7817, - "step": 2699000 - }, - { - "epoch": 29.98, - "learning_rate": 5.0052195544499476e-08, - "loss": 3.7431, - "step": 2699500 - }, - { - "epoch": 29.98, - "learning_rate": 5.003831375074962e-08, - "loss": 3.751, - "step": 2700000 - }, - { - "epoch": 29.99, - "learning_rate": 5.0024431956999754e-08, - "loss": 3.7688, - "step": 2700500 - }, - { - "epoch": 30.0, - "learning_rate": 5.001055016324989e-08, - "loss": 3.7721, - "step": 2701000 - }, - { - "epoch": 30.0, - "eval_loss": 3.8352878093719482, - "eval_runtime": 6.305, - "eval_samples_per_second": 246.469, - "step": 2701380 - }, - { - "epoch": 30.0, - "learning_rate": 4.999666836950003e-08, - "loss": 3.7702, - "step": 2701500 - }, - { - "epoch": 30.01, - "learning_rate": 4.998278657575017e-08, - "loss": 3.7899, - "step": 2702000 - }, - { - "epoch": 30.01, - "learning_rate": 4.996890478200031e-08, - "loss": 3.7609, - "step": 2702500 - }, - { - "epoch": 30.02, - "learning_rate": 4.9955022988250446e-08, - "loss": 3.7615, - "step": 2703000 - }, - { - "epoch": 30.02, - "learning_rate": 4.994114119450059e-08, - "loss": 3.7746, - "step": 2703500 - }, - { - "epoch": 30.03, - "learning_rate": 4.9927259400750724e-08, - "loss": 3.7662, - "step": 2704000 - }, - { - "epoch": 30.03, - "learning_rate": 4.991337760700086e-08, - "loss": 3.7601, - "step": 2704500 - }, - { - "epoch": 30.04, - "learning_rate": 4.9899495813251e-08, - "loss": 3.7351, - "step": 2705000 - }, - { - "epoch": 30.05, - "learning_rate": 4.9885614019501144e-08, - "loss": 3.77, - "step": 2705500 - }, - { - "epoch": 30.05, - "learning_rate": 4.987173222575128e-08, - "loss": 3.7664, - "step": 2706000 - }, - { - "epoch": 30.06, - "learning_rate": 4.985785043200142e-08, - "loss": 3.7691, - "step": 2706500 - }, - { - "epoch": 30.06, - "learning_rate": 4.984396863825156e-08, - "loss": 3.7685, - "step": 2707000 - }, - { - "epoch": 30.07, - "learning_rate": 4.983008684450169e-08, - "loss": 3.7541, - "step": 2707500 - }, - { - "epoch": 30.07, - "learning_rate": 4.9816205050751835e-08, - "loss": 3.7622, - "step": 2708000 - }, - { - "epoch": 30.08, - "learning_rate": 4.980232325700198e-08, - "loss": 3.7779, - "step": 2708500 - }, - { - "epoch": 30.08, - "learning_rate": 4.978844146325211e-08, - "loss": 3.7647, - "step": 2709000 - }, - { - "epoch": 30.09, - "learning_rate": 4.9774559669502255e-08, - "loss": 3.7736, - "step": 2709500 - }, - { - "epoch": 30.1, - "learning_rate": 4.976067787575239e-08, - "loss": 3.7728, - "step": 2710000 - }, - { - "epoch": 30.1, - "learning_rate": 4.974679608200253e-08, - "loss": 3.7479, - "step": 2710500 - }, - { - "epoch": 30.11, - "learning_rate": 4.973291428825267e-08, - "loss": 3.7517, - "step": 2711000 - }, - { - "epoch": 30.11, - "learning_rate": 4.9719032494502805e-08, - "loss": 3.7607, - "step": 2711500 - }, - { - "epoch": 30.12, - "learning_rate": 4.970515070075295e-08, - "loss": 3.7558, - "step": 2712000 - }, - { - "epoch": 30.12, - "learning_rate": 4.969126890700309e-08, - "loss": 3.7703, - "step": 2712500 - }, - { - "epoch": 30.13, - "learning_rate": 4.9677387113253225e-08, - "loss": 3.7723, - "step": 2713000 - }, - { - "epoch": 30.13, - "learning_rate": 4.966350531950336e-08, - "loss": 3.7644, - "step": 2713500 - }, - { - "epoch": 30.14, - "learning_rate": 4.96496235257535e-08, - "loss": 3.7544, - "step": 2714000 - }, - { - "epoch": 30.15, - "learning_rate": 4.963574173200364e-08, - "loss": 3.7594, - "step": 2714500 - }, - { - "epoch": 30.15, - "learning_rate": 4.962185993825378e-08, - "loss": 3.769, - "step": 2715000 - }, - { - "epoch": 30.16, - "learning_rate": 4.960797814450392e-08, - "loss": 3.7682, - "step": 2715500 - }, - { - "epoch": 30.16, - "learning_rate": 4.959409635075406e-08, - "loss": 3.7493, - "step": 2716000 - }, - { - "epoch": 30.17, - "learning_rate": 4.9580214557004194e-08, - "loss": 3.761, - "step": 2716500 - }, - { - "epoch": 30.17, - "learning_rate": 4.9566332763254336e-08, - "loss": 3.7622, - "step": 2717000 - }, - { - "epoch": 30.18, - "learning_rate": 4.955245096950447e-08, - "loss": 3.7678, - "step": 2717500 - }, - { - "epoch": 30.18, - "learning_rate": 4.9538569175754614e-08, - "loss": 3.7566, - "step": 2718000 - }, - { - "epoch": 30.19, - "learning_rate": 4.952468738200475e-08, - "loss": 3.773, - "step": 2718500 - }, - { - "epoch": 30.2, - "learning_rate": 4.951080558825489e-08, - "loss": 3.7724, - "step": 2719000 - }, - { - "epoch": 30.2, - "learning_rate": 4.949692379450503e-08, - "loss": 3.7571, - "step": 2719500 - }, - { - "epoch": 30.21, - "learning_rate": 4.9483042000755164e-08, - "loss": 3.787, - "step": 2720000 - }, - { - "epoch": 30.21, - "learning_rate": 4.9469160207005306e-08, - "loss": 3.7637, - "step": 2720500 - }, - { - "epoch": 30.22, - "learning_rate": 4.945527841325545e-08, - "loss": 3.7551, - "step": 2721000 - }, - { - "epoch": 30.22, - "learning_rate": 4.9441396619505584e-08, - "loss": 3.7619, - "step": 2721500 - }, - { - "epoch": 30.23, - "learning_rate": 4.9427514825755726e-08, - "loss": 3.7712, - "step": 2722000 - }, - { - "epoch": 30.23, - "learning_rate": 4.941363303200586e-08, - "loss": 3.751, - "step": 2722500 - }, - { - "epoch": 30.24, - "learning_rate": 4.9399751238256e-08, - "loss": 3.7662, - "step": 2723000 - }, - { - "epoch": 30.25, - "learning_rate": 4.938586944450614e-08, - "loss": 3.7664, - "step": 2723500 - }, - { - "epoch": 30.25, - "learning_rate": 4.937198765075628e-08, - "loss": 3.7866, - "step": 2724000 - }, - { - "epoch": 30.26, - "learning_rate": 4.935810585700642e-08, - "loss": 3.771, - "step": 2724500 - }, - { - "epoch": 30.26, - "learning_rate": 4.934422406325656e-08, - "loss": 3.7731, - "step": 2725000 - }, - { - "epoch": 30.27, - "learning_rate": 4.933034226950669e-08, - "loss": 3.7862, - "step": 2725500 - }, - { - "epoch": 30.27, - "learning_rate": 4.931646047575683e-08, - "loss": 3.7815, - "step": 2726000 - }, - { - "epoch": 30.28, - "learning_rate": 4.930257868200697e-08, - "loss": 3.7546, - "step": 2726500 - }, - { - "epoch": 30.28, - "learning_rate": 4.928869688825711e-08, - "loss": 3.7842, - "step": 2727000 - }, - { - "epoch": 30.29, - "learning_rate": 4.927481509450725e-08, - "loss": 3.7531, - "step": 2727500 - }, - { - "epoch": 30.3, - "learning_rate": 4.9260933300757393e-08, - "loss": 3.7476, - "step": 2728000 - }, - { - "epoch": 30.3, - "learning_rate": 4.924705150700752e-08, - "loss": 3.7653, - "step": 2728500 - }, - { - "epoch": 30.31, - "learning_rate": 4.9233169713257665e-08, - "loss": 3.7563, - "step": 2729000 - }, - { - "epoch": 30.31, - "learning_rate": 4.921928791950781e-08, - "loss": 3.7709, - "step": 2729500 - }, - { - "epoch": 30.32, - "learning_rate": 4.920540612575794e-08, - "loss": 3.7798, - "step": 2730000 - }, - { - "epoch": 30.32, - "learning_rate": 4.9191524332008085e-08, - "loss": 3.7673, - "step": 2730500 - }, - { - "epoch": 30.33, - "learning_rate": 4.917764253825822e-08, - "loss": 3.7492, - "step": 2731000 - }, - { - "epoch": 30.33, - "learning_rate": 4.9163760744508356e-08, - "loss": 3.7526, - "step": 2731500 - }, - { - "epoch": 30.34, - "learning_rate": 4.91498789507585e-08, - "loss": 3.763, - "step": 2732000 - }, - { - "epoch": 30.35, - "learning_rate": 4.9135997157008634e-08, - "loss": 3.7607, - "step": 2732500 - }, - { - "epoch": 30.35, - "learning_rate": 4.9122115363258776e-08, - "loss": 3.7733, - "step": 2733000 - }, - { - "epoch": 30.36, - "learning_rate": 4.910823356950892e-08, - "loss": 3.7695, - "step": 2733500 - }, - { - "epoch": 30.36, - "learning_rate": 4.9094351775759054e-08, - "loss": 3.7745, - "step": 2734000 - }, - { - "epoch": 30.37, - "learning_rate": 4.908046998200919e-08, - "loss": 3.7843, - "step": 2734500 - }, - { - "epoch": 30.37, - "learning_rate": 4.906658818825933e-08, - "loss": 3.7719, - "step": 2735000 - }, - { - "epoch": 30.38, - "learning_rate": 4.905270639450947e-08, - "loss": 3.7753, - "step": 2735500 - }, - { - "epoch": 30.38, - "learning_rate": 4.903882460075961e-08, - "loss": 3.7879, - "step": 2736000 - }, - { - "epoch": 30.39, - "learning_rate": 4.902494280700975e-08, - "loss": 3.7513, - "step": 2736500 - }, - { - "epoch": 30.4, - "learning_rate": 4.901106101325989e-08, - "loss": 3.7607, - "step": 2737000 - }, - { - "epoch": 30.4, - "learning_rate": 4.8997179219510024e-08, - "loss": 3.7629, - "step": 2737500 - }, - { - "epoch": 30.41, - "learning_rate": 4.8983297425760166e-08, - "loss": 3.7629, - "step": 2738000 - }, - { - "epoch": 30.41, - "learning_rate": 4.89694156320103e-08, - "loss": 3.7572, - "step": 2738500 - }, - { - "epoch": 30.42, - "learning_rate": 4.8955533838260444e-08, - "loss": 3.7711, - "step": 2739000 - }, - { - "epoch": 30.42, - "learning_rate": 4.894165204451058e-08, - "loss": 3.7762, - "step": 2739500 - }, - { - "epoch": 30.43, - "learning_rate": 4.892777025076072e-08, - "loss": 3.7607, - "step": 2740000 - }, - { - "epoch": 30.43, - "learning_rate": 4.891388845701086e-08, - "loss": 3.7783, - "step": 2740500 - }, - { - "epoch": 30.44, - "learning_rate": 4.890000666326099e-08, - "loss": 3.7566, - "step": 2741000 - }, - { - "epoch": 30.45, - "learning_rate": 4.8886124869511135e-08, - "loss": 3.7483, - "step": 2741500 - }, - { - "epoch": 30.45, - "learning_rate": 4.887224307576128e-08, - "loss": 3.765, - "step": 2742000 - }, - { - "epoch": 30.46, - "learning_rate": 4.885836128201141e-08, - "loss": 3.7673, - "step": 2742500 - }, - { - "epoch": 30.46, - "learning_rate": 4.8844479488261556e-08, - "loss": 3.7414, - "step": 2743000 - }, - { - "epoch": 30.47, - "learning_rate": 4.883059769451169e-08, - "loss": 3.7608, - "step": 2743500 - }, - { - "epoch": 30.47, - "learning_rate": 4.881671590076183e-08, - "loss": 3.7728, - "step": 2744000 - }, - { - "epoch": 30.48, - "learning_rate": 4.880283410701197e-08, - "loss": 3.7762, - "step": 2744500 - }, - { - "epoch": 30.48, - "learning_rate": 4.878895231326211e-08, - "loss": 3.7671, - "step": 2745000 - }, - { - "epoch": 30.49, - "learning_rate": 4.877507051951225e-08, - "loss": 3.7777, - "step": 2745500 - }, - { - "epoch": 30.5, - "learning_rate": 4.876118872576239e-08, - "loss": 3.7601, - "step": 2746000 - }, - { - "epoch": 30.5, - "learning_rate": 4.8747306932012525e-08, - "loss": 3.7615, - "step": 2746500 - }, - { - "epoch": 30.51, - "learning_rate": 4.873342513826266e-08, - "loss": 3.7783, - "step": 2747000 - }, - { - "epoch": 30.51, - "learning_rate": 4.87195433445128e-08, - "loss": 3.7406, - "step": 2747500 - }, - { - "epoch": 30.52, - "learning_rate": 4.870566155076294e-08, - "loss": 3.7574, - "step": 2748000 - }, - { - "epoch": 30.52, - "learning_rate": 4.869177975701308e-08, - "loss": 3.7689, - "step": 2748500 - }, - { - "epoch": 30.53, - "learning_rate": 4.867789796326322e-08, - "loss": 3.7399, - "step": 2749000 - }, - { - "epoch": 30.53, - "learning_rate": 4.866401616951336e-08, - "loss": 3.7592, - "step": 2749500 - }, - { - "epoch": 30.54, - "learning_rate": 4.8650134375763494e-08, - "loss": 3.7522, - "step": 2750000 - }, - { - "epoch": 30.55, - "learning_rate": 4.8636252582013637e-08, - "loss": 3.7614, - "step": 2750500 - }, - { - "epoch": 30.55, - "learning_rate": 4.862237078826377e-08, - "loss": 3.7633, - "step": 2751000 - }, - { - "epoch": 30.56, - "learning_rate": 4.8608488994513914e-08, - "loss": 3.7892, - "step": 2751500 - }, - { - "epoch": 30.56, - "learning_rate": 4.859460720076406e-08, - "loss": 3.753, - "step": 2752000 - }, - { - "epoch": 30.57, - "learning_rate": 4.858072540701419e-08, - "loss": 3.7537, - "step": 2752500 - }, - { - "epoch": 30.57, - "learning_rate": 4.856684361326433e-08, - "loss": 3.7605, - "step": 2753000 - }, - { - "epoch": 30.58, - "learning_rate": 4.855296181951447e-08, - "loss": 3.7797, - "step": 2753500 - }, - { - "epoch": 30.58, - "learning_rate": 4.8539080025764606e-08, - "loss": 3.7838, - "step": 2754000 - }, - { - "epoch": 30.59, - "learning_rate": 4.852519823201475e-08, - "loss": 3.7578, - "step": 2754500 - }, - { - "epoch": 30.6, - "learning_rate": 4.8511316438264884e-08, - "loss": 3.7689, - "step": 2755000 - }, - { - "epoch": 30.6, - "learning_rate": 4.8497434644515026e-08, - "loss": 3.7545, - "step": 2755500 - }, - { - "epoch": 30.61, - "learning_rate": 4.848355285076516e-08, - "loss": 3.7466, - "step": 2756000 - }, - { - "epoch": 30.61, - "learning_rate": 4.84696710570153e-08, - "loss": 3.7481, - "step": 2756500 - }, - { - "epoch": 30.62, - "learning_rate": 4.845578926326544e-08, - "loss": 3.7532, - "step": 2757000 - }, - { - "epoch": 30.62, - "learning_rate": 4.844190746951558e-08, - "loss": 3.764, - "step": 2757500 - }, - { - "epoch": 30.63, - "learning_rate": 4.842802567576572e-08, - "loss": 3.7788, - "step": 2758000 - }, - { - "epoch": 30.63, - "learning_rate": 4.841414388201586e-08, - "loss": 3.7984, - "step": 2758500 - }, - { - "epoch": 30.64, - "learning_rate": 4.8400262088265995e-08, - "loss": 3.7503, - "step": 2759000 - }, - { - "epoch": 30.65, - "learning_rate": 4.838638029451613e-08, - "loss": 3.7653, - "step": 2759500 - }, - { - "epoch": 30.65, - "learning_rate": 4.8372498500766273e-08, - "loss": 3.745, - "step": 2760000 - }, - { - "epoch": 30.66, - "learning_rate": 4.8358616707016416e-08, - "loss": 3.7655, - "step": 2760500 - }, - { - "epoch": 30.66, - "learning_rate": 4.834473491326655e-08, - "loss": 3.7547, - "step": 2761000 - }, - { - "epoch": 30.67, - "learning_rate": 4.833085311951669e-08, - "loss": 3.7802, - "step": 2761500 - }, - { - "epoch": 30.67, - "learning_rate": 4.831697132576682e-08, - "loss": 3.7676, - "step": 2762000 - }, - { - "epoch": 30.68, - "learning_rate": 4.8303089532016965e-08, - "loss": 3.7852, - "step": 2762500 - }, - { - "epoch": 30.68, - "learning_rate": 4.828920773826711e-08, - "loss": 3.7764, - "step": 2763000 - }, - { - "epoch": 30.69, - "learning_rate": 4.827532594451724e-08, - "loss": 3.7755, - "step": 2763500 - }, - { - "epoch": 30.7, - "learning_rate": 4.8261444150767385e-08, - "loss": 3.785, - "step": 2764000 - }, - { - "epoch": 30.7, - "learning_rate": 4.824756235701752e-08, - "loss": 3.7597, - "step": 2764500 - }, - { - "epoch": 30.71, - "learning_rate": 4.8233680563267656e-08, - "loss": 3.7547, - "step": 2765000 - }, - { - "epoch": 30.71, - "learning_rate": 4.82197987695178e-08, - "loss": 3.7777, - "step": 2765500 - }, - { - "epoch": 30.72, - "learning_rate": 4.820591697576794e-08, - "loss": 3.7606, - "step": 2766000 - }, - { - "epoch": 30.72, - "learning_rate": 4.8192035182018076e-08, - "loss": 3.7601, - "step": 2766500 - }, - { - "epoch": 30.73, - "learning_rate": 4.817815338826822e-08, - "loss": 3.7643, - "step": 2767000 - }, - { - "epoch": 30.73, - "learning_rate": 4.8164271594518354e-08, - "loss": 3.7669, - "step": 2767500 - }, - { - "epoch": 30.74, - "learning_rate": 4.815038980076849e-08, - "loss": 3.7455, - "step": 2768000 - }, - { - "epoch": 30.75, - "learning_rate": 4.813650800701863e-08, - "loss": 3.7513, - "step": 2768500 - }, - { - "epoch": 30.75, - "learning_rate": 4.812262621326877e-08, - "loss": 3.7447, - "step": 2769000 - }, - { - "epoch": 30.76, - "learning_rate": 4.810874441951891e-08, - "loss": 3.7422, - "step": 2769500 - }, - { - "epoch": 30.76, - "learning_rate": 4.809486262576905e-08, - "loss": 3.7514, - "step": 2770000 - }, - { - "epoch": 30.77, - "learning_rate": 4.808098083201919e-08, - "loss": 3.7671, - "step": 2770500 - }, - { - "epoch": 30.77, - "learning_rate": 4.8067099038269324e-08, - "loss": 3.7531, - "step": 2771000 - }, - { - "epoch": 30.78, - "learning_rate": 4.8053217244519466e-08, - "loss": 3.7625, - "step": 2771500 - }, - { - "epoch": 30.78, - "learning_rate": 4.80393354507696e-08, - "loss": 3.7783, - "step": 2772000 - }, - { - "epoch": 30.79, - "learning_rate": 4.8025453657019744e-08, - "loss": 3.7741, - "step": 2772500 - }, - { - "epoch": 30.8, - "learning_rate": 4.8011571863269886e-08, - "loss": 3.7691, - "step": 2773000 - }, - { - "epoch": 30.8, - "learning_rate": 4.799769006952002e-08, - "loss": 3.7629, - "step": 2773500 - }, - { - "epoch": 30.81, - "learning_rate": 4.798380827577016e-08, - "loss": 3.7543, - "step": 2774000 - }, - { - "epoch": 30.81, - "learning_rate": 4.79699264820203e-08, - "loss": 3.738, - "step": 2774500 - }, - { - "epoch": 30.82, - "learning_rate": 4.7956044688270435e-08, - "loss": 3.752, - "step": 2775000 - }, - { - "epoch": 30.82, - "learning_rate": 4.794216289452058e-08, - "loss": 3.7767, - "step": 2775500 - }, - { - "epoch": 30.83, - "learning_rate": 4.792828110077072e-08, - "loss": 3.7817, - "step": 2776000 - }, - { - "epoch": 30.83, - "learning_rate": 4.7914399307020856e-08, - "loss": 3.7611, - "step": 2776500 - }, - { - "epoch": 30.84, - "learning_rate": 4.790051751327099e-08, - "loss": 3.7582, - "step": 2777000 - }, - { - "epoch": 30.85, - "learning_rate": 4.788663571952113e-08, - "loss": 3.7605, - "step": 2777500 - }, - { - "epoch": 30.85, - "learning_rate": 4.787275392577127e-08, - "loss": 3.7772, - "step": 2778000 - }, - { - "epoch": 30.86, - "learning_rate": 4.785887213202141e-08, - "loss": 3.7713, - "step": 2778500 - }, - { - "epoch": 30.86, - "learning_rate": 4.784499033827155e-08, - "loss": 3.7601, - "step": 2779000 - }, - { - "epoch": 30.87, - "learning_rate": 4.783110854452169e-08, - "loss": 3.7735, - "step": 2779500 - }, - { - "epoch": 30.87, - "learning_rate": 4.7817226750771825e-08, - "loss": 3.7685, - "step": 2780000 - }, - { - "epoch": 30.88, - "learning_rate": 4.780334495702196e-08, - "loss": 3.7622, - "step": 2780500 - }, - { - "epoch": 30.88, - "learning_rate": 4.77894631632721e-08, - "loss": 3.7698, - "step": 2781000 - }, - { - "epoch": 30.89, - "learning_rate": 4.7775581369522245e-08, - "loss": 3.774, - "step": 2781500 - }, - { - "epoch": 30.9, - "learning_rate": 4.776169957577238e-08, - "loss": 3.7798, - "step": 2782000 - }, - { - "epoch": 30.9, - "learning_rate": 4.774781778202252e-08, - "loss": 3.7505, - "step": 2782500 - }, - { - "epoch": 30.91, - "learning_rate": 4.773393598827266e-08, - "loss": 3.7601, - "step": 2783000 - }, - { - "epoch": 30.91, - "learning_rate": 4.7720054194522794e-08, - "loss": 3.7691, - "step": 2783500 - }, - { - "epoch": 30.92, - "learning_rate": 4.7706172400772937e-08, - "loss": 3.7786, - "step": 2784000 - }, - { - "epoch": 30.92, - "learning_rate": 4.769229060702307e-08, - "loss": 3.7866, - "step": 2784500 - }, - { - "epoch": 30.93, - "learning_rate": 4.7678408813273215e-08, - "loss": 3.7668, - "step": 2785000 - }, - { - "epoch": 30.93, - "learning_rate": 4.766452701952336e-08, - "loss": 3.7619, - "step": 2785500 - }, - { - "epoch": 30.94, - "learning_rate": 4.765064522577349e-08, - "loss": 3.7586, - "step": 2786000 - }, - { - "epoch": 30.95, - "learning_rate": 4.763676343202363e-08, - "loss": 3.7724, - "step": 2786500 - }, - { - "epoch": 30.95, - "learning_rate": 4.762288163827377e-08, - "loss": 3.7729, - "step": 2787000 - }, - { - "epoch": 30.96, - "learning_rate": 4.7608999844523906e-08, - "loss": 3.762, - "step": 2787500 - }, - { - "epoch": 30.96, - "learning_rate": 4.759511805077405e-08, - "loss": 3.7745, - "step": 2788000 - }, - { - "epoch": 30.97, - "learning_rate": 4.758123625702419e-08, - "loss": 3.7544, - "step": 2788500 - }, - { - "epoch": 30.97, - "learning_rate": 4.7567354463274326e-08, - "loss": 3.7569, - "step": 2789000 - }, - { - "epoch": 30.98, - "learning_rate": 4.755347266952446e-08, - "loss": 3.7644, - "step": 2789500 - }, - { - "epoch": 30.98, - "learning_rate": 4.7539590875774604e-08, - "loss": 3.77, - "step": 2790000 - }, - { - "epoch": 30.99, - "learning_rate": 4.752570908202474e-08, - "loss": 3.7653, - "step": 2790500 - }, - { - "epoch": 31.0, - "learning_rate": 4.751182728827488e-08, - "loss": 3.7616, - "step": 2791000 - }, - { - "epoch": 31.0, - "eval_loss": 3.8338966369628906, - "eval_runtime": 6.3047, - "eval_samples_per_second": 246.482, - "step": 2791426 - }, - { - "epoch": 31.0, - "learning_rate": 4.749794549452502e-08, - "loss": 3.7581, - "step": 2791500 - }, - { - "epoch": 31.01, - "learning_rate": 4.748406370077515e-08, - "loss": 3.7536, - "step": 2792000 - }, - { - "epoch": 31.01, - "learning_rate": 4.7470181907025296e-08, - "loss": 3.7805, - "step": 2792500 - }, - { - "epoch": 31.02, - "learning_rate": 4.745630011327543e-08, - "loss": 3.7684, - "step": 2793000 - }, - { - "epoch": 31.02, - "learning_rate": 4.7442418319525573e-08, - "loss": 3.7698, - "step": 2793500 - }, - { - "epoch": 31.03, - "learning_rate": 4.7428536525775716e-08, - "loss": 3.7578, - "step": 2794000 - }, - { - "epoch": 31.03, - "learning_rate": 4.741465473202585e-08, - "loss": 3.7723, - "step": 2794500 - }, - { - "epoch": 31.04, - "learning_rate": 4.740077293827599e-08, - "loss": 3.7905, - "step": 2795000 - }, - { - "epoch": 31.05, - "learning_rate": 4.738689114452613e-08, - "loss": 3.7624, - "step": 2795500 - }, - { - "epoch": 31.05, - "learning_rate": 4.7373009350776265e-08, - "loss": 3.7682, - "step": 2796000 - }, - { - "epoch": 31.06, - "learning_rate": 4.735912755702641e-08, - "loss": 3.7636, - "step": 2796500 - }, - { - "epoch": 31.06, - "learning_rate": 4.734524576327655e-08, - "loss": 3.7786, - "step": 2797000 - }, - { - "epoch": 31.07, - "learning_rate": 4.7331363969526685e-08, - "loss": 3.7601, - "step": 2797500 - }, - { - "epoch": 31.07, - "learning_rate": 4.731748217577682e-08, - "loss": 3.7745, - "step": 2798000 - }, - { - "epoch": 31.08, - "learning_rate": 4.7303600382026956e-08, - "loss": 3.7621, - "step": 2798500 - }, - { - "epoch": 31.08, - "learning_rate": 4.72897185882771e-08, - "loss": 3.783, - "step": 2799000 - }, - { - "epoch": 31.09, - "learning_rate": 4.727583679452724e-08, - "loss": 3.7673, - "step": 2799500 - }, - { - "epoch": 31.1, - "learning_rate": 4.7261955000777377e-08, - "loss": 3.7703, - "step": 2800000 - }, - { - "epoch": 31.1, - "learning_rate": 4.724807320702752e-08, - "loss": 3.7615, - "step": 2800500 - }, - { - "epoch": 31.11, - "learning_rate": 4.7234191413277654e-08, - "loss": 3.751, - "step": 2801000 - }, - { - "epoch": 31.11, - "learning_rate": 4.722030961952779e-08, - "loss": 3.7532, - "step": 2801500 - }, - { - "epoch": 31.12, - "learning_rate": 4.720642782577793e-08, - "loss": 3.7535, - "step": 2802000 - }, - { - "epoch": 31.12, - "learning_rate": 4.7192546032028075e-08, - "loss": 3.7627, - "step": 2802500 - }, - { - "epoch": 31.13, - "learning_rate": 4.717866423827821e-08, - "loss": 3.755, - "step": 2803000 - }, - { - "epoch": 31.13, - "learning_rate": 4.716478244452835e-08, - "loss": 3.7459, - "step": 2803500 - }, - { - "epoch": 31.14, - "learning_rate": 4.715090065077849e-08, - "loss": 3.755, - "step": 2804000 - }, - { - "epoch": 31.15, - "learning_rate": 4.7137018857028624e-08, - "loss": 3.7662, - "step": 2804500 - }, - { - "epoch": 31.15, - "learning_rate": 4.7123137063278766e-08, - "loss": 3.7722, - "step": 2805000 - }, - { - "epoch": 31.16, - "learning_rate": 4.710925526952891e-08, - "loss": 3.767, - "step": 2805500 - }, - { - "epoch": 31.16, - "learning_rate": 4.7095373475779044e-08, - "loss": 3.7613, - "step": 2806000 - }, - { - "epoch": 31.17, - "learning_rate": 4.7081491682029186e-08, - "loss": 3.7614, - "step": 2806500 - }, - { - "epoch": 31.17, - "learning_rate": 4.706760988827932e-08, - "loss": 3.7504, - "step": 2807000 - }, - { - "epoch": 31.18, - "learning_rate": 4.705372809452946e-08, - "loss": 3.7697, - "step": 2807500 - }, - { - "epoch": 31.18, - "learning_rate": 4.70398463007796e-08, - "loss": 3.7538, - "step": 2808000 - }, - { - "epoch": 31.19, - "learning_rate": 4.7025964507029735e-08, - "loss": 3.7673, - "step": 2808500 - }, - { - "epoch": 31.2, - "learning_rate": 4.701208271327988e-08, - "loss": 3.7548, - "step": 2809000 - }, - { - "epoch": 31.2, - "learning_rate": 4.699820091953002e-08, - "loss": 3.7716, - "step": 2809500 - }, - { - "epoch": 31.21, - "learning_rate": 4.6984319125780156e-08, - "loss": 3.7516, - "step": 2810000 - }, - { - "epoch": 31.21, - "learning_rate": 4.697043733203029e-08, - "loss": 3.7762, - "step": 2810500 - }, - { - "epoch": 31.22, - "learning_rate": 4.6956555538280434e-08, - "loss": 3.7706, - "step": 2811000 - }, - { - "epoch": 31.22, - "learning_rate": 4.694267374453057e-08, - "loss": 3.778, - "step": 2811500 - }, - { - "epoch": 31.23, - "learning_rate": 4.692879195078071e-08, - "loss": 3.7708, - "step": 2812000 - }, - { - "epoch": 31.23, - "learning_rate": 4.6914910157030854e-08, - "loss": 3.7671, - "step": 2812500 - }, - { - "epoch": 31.24, - "learning_rate": 4.690102836328099e-08, - "loss": 3.7641, - "step": 2813000 - }, - { - "epoch": 31.25, - "learning_rate": 4.6887146569531125e-08, - "loss": 3.7745, - "step": 2813500 - }, - { - "epoch": 31.25, - "learning_rate": 4.687326477578126e-08, - "loss": 3.7439, - "step": 2814000 - }, - { - "epoch": 31.26, - "learning_rate": 4.68593829820314e-08, - "loss": 3.7455, - "step": 2814500 - }, - { - "epoch": 31.26, - "learning_rate": 4.6845501188281545e-08, - "loss": 3.7705, - "step": 2815000 - }, - { - "epoch": 31.27, - "learning_rate": 4.683161939453168e-08, - "loss": 3.7672, - "step": 2815500 - }, - { - "epoch": 31.27, - "learning_rate": 4.681773760078182e-08, - "loss": 3.7728, - "step": 2816000 - }, - { - "epoch": 31.28, - "learning_rate": 4.680385580703196e-08, - "loss": 3.7733, - "step": 2816500 - }, - { - "epoch": 31.28, - "learning_rate": 4.6789974013282094e-08, - "loss": 3.7375, - "step": 2817000 - }, - { - "epoch": 31.29, - "learning_rate": 4.677609221953224e-08, - "loss": 3.7623, - "step": 2817500 - }, - { - "epoch": 31.3, - "learning_rate": 4.676221042578238e-08, - "loss": 3.7534, - "step": 2818000 - }, - { - "epoch": 31.3, - "learning_rate": 4.6748328632032515e-08, - "loss": 3.7799, - "step": 2818500 - }, - { - "epoch": 31.31, - "learning_rate": 4.673444683828266e-08, - "loss": 3.748, - "step": 2819000 - }, - { - "epoch": 31.31, - "learning_rate": 4.672056504453279e-08, - "loss": 3.7637, - "step": 2819500 - }, - { - "epoch": 31.32, - "learning_rate": 4.670668325078293e-08, - "loss": 3.7581, - "step": 2820000 - }, - { - "epoch": 31.32, - "learning_rate": 4.669280145703307e-08, - "loss": 3.7697, - "step": 2820500 - }, - { - "epoch": 31.33, - "learning_rate": 4.6678919663283206e-08, - "loss": 3.7616, - "step": 2821000 - }, - { - "epoch": 31.33, - "learning_rate": 4.666503786953335e-08, - "loss": 3.7594, - "step": 2821500 - }, - { - "epoch": 31.34, - "learning_rate": 4.665115607578349e-08, - "loss": 3.7485, - "step": 2822000 - }, - { - "epoch": 31.35, - "learning_rate": 4.663727428203362e-08, - "loss": 3.7614, - "step": 2822500 - }, - { - "epoch": 31.35, - "learning_rate": 4.662339248828376e-08, - "loss": 3.7604, - "step": 2823000 - }, - { - "epoch": 31.36, - "learning_rate": 4.6609510694533904e-08, - "loss": 3.7758, - "step": 2823500 - }, - { - "epoch": 31.36, - "learning_rate": 4.659562890078404e-08, - "loss": 3.7505, - "step": 2824000 - }, - { - "epoch": 31.37, - "learning_rate": 4.658174710703418e-08, - "loss": 3.7504, - "step": 2824500 - }, - { - "epoch": 31.37, - "learning_rate": 4.6567865313284324e-08, - "loss": 3.7532, - "step": 2825000 - }, - { - "epoch": 31.38, - "learning_rate": 4.6553983519534453e-08, - "loss": 3.7561, - "step": 2825500 - }, - { - "epoch": 31.38, - "learning_rate": 4.6540101725784596e-08, - "loss": 3.7645, - "step": 2826000 - }, - { - "epoch": 31.39, - "learning_rate": 4.652621993203474e-08, - "loss": 3.7773, - "step": 2826500 - }, - { - "epoch": 31.4, - "learning_rate": 4.6512338138284874e-08, - "loss": 3.769, - "step": 2827000 - }, - { - "epoch": 31.4, - "learning_rate": 4.6498456344535016e-08, - "loss": 3.7582, - "step": 2827500 - }, - { - "epoch": 31.41, - "learning_rate": 4.648457455078515e-08, - "loss": 3.7702, - "step": 2828000 - }, - { - "epoch": 31.41, - "learning_rate": 4.647069275703529e-08, - "loss": 3.7762, - "step": 2828500 - }, - { - "epoch": 31.42, - "learning_rate": 4.645681096328543e-08, - "loss": 3.7649, - "step": 2829000 - }, - { - "epoch": 31.42, - "learning_rate": 4.6442929169535565e-08, - "loss": 3.7741, - "step": 2829500 - }, - { - "epoch": 31.43, - "learning_rate": 4.642904737578571e-08, - "loss": 3.781, - "step": 2830000 - }, - { - "epoch": 31.43, - "learning_rate": 4.641516558203585e-08, - "loss": 3.7713, - "step": 2830500 - }, - { - "epoch": 31.44, - "learning_rate": 4.6401283788285985e-08, - "loss": 3.763, - "step": 2831000 - }, - { - "epoch": 31.45, - "learning_rate": 4.638740199453612e-08, - "loss": 3.7363, - "step": 2831500 - }, - { - "epoch": 31.45, - "learning_rate": 4.637352020078626e-08, - "loss": 3.773, - "step": 2832000 - }, - { - "epoch": 31.46, - "learning_rate": 4.63596384070364e-08, - "loss": 3.761, - "step": 2832500 - }, - { - "epoch": 31.46, - "learning_rate": 4.634575661328654e-08, - "loss": 3.7713, - "step": 2833000 - }, - { - "epoch": 31.47, - "learning_rate": 4.633187481953668e-08, - "loss": 3.7553, - "step": 2833500 - }, - { - "epoch": 31.47, - "learning_rate": 4.631799302578682e-08, - "loss": 3.7568, - "step": 2834000 - }, - { - "epoch": 31.48, - "learning_rate": 4.6304111232036955e-08, - "loss": 3.7464, - "step": 2834500 - }, - { - "epoch": 31.48, - "learning_rate": 4.62902294382871e-08, - "loss": 3.7585, - "step": 2835000 - }, - { - "epoch": 31.49, - "learning_rate": 4.627634764453723e-08, - "loss": 3.751, - "step": 2835500 - }, - { - "epoch": 31.5, - "learning_rate": 4.6262465850787375e-08, - "loss": 3.7632, - "step": 2836000 - }, - { - "epoch": 31.5, - "learning_rate": 4.624858405703751e-08, - "loss": 3.7599, - "step": 2836500 - }, - { - "epoch": 31.51, - "learning_rate": 4.623470226328765e-08, - "loss": 3.7642, - "step": 2837000 - }, - { - "epoch": 31.51, - "learning_rate": 4.622082046953779e-08, - "loss": 3.7762, - "step": 2837500 - }, - { - "epoch": 31.52, - "learning_rate": 4.6206938675787924e-08, - "loss": 3.7565, - "step": 2838000 - }, - { - "epoch": 31.52, - "learning_rate": 4.6193056882038066e-08, - "loss": 3.7649, - "step": 2838500 - }, - { - "epoch": 31.53, - "learning_rate": 4.617917508828821e-08, - "loss": 3.7484, - "step": 2839000 - }, - { - "epoch": 31.53, - "learning_rate": 4.6165293294538344e-08, - "loss": 3.7614, - "step": 2839500 - }, - { - "epoch": 31.54, - "learning_rate": 4.6151411500788486e-08, - "loss": 3.7485, - "step": 2840000 - }, - { - "epoch": 31.54, - "learning_rate": 4.613752970703862e-08, - "loss": 3.7689, - "step": 2840500 - }, - { - "epoch": 31.55, - "learning_rate": 4.612364791328876e-08, - "loss": 3.7763, - "step": 2841000 - }, - { - "epoch": 31.56, - "learning_rate": 4.61097661195389e-08, - "loss": 3.7628, - "step": 2841500 - }, - { - "epoch": 31.56, - "learning_rate": 4.609588432578904e-08, - "loss": 3.7629, - "step": 2842000 - }, - { - "epoch": 31.57, - "learning_rate": 4.608200253203918e-08, - "loss": 3.7501, - "step": 2842500 - }, - { - "epoch": 31.57, - "learning_rate": 4.606812073828932e-08, - "loss": 3.7719, - "step": 2843000 - }, - { - "epoch": 31.58, - "learning_rate": 4.6054238944539456e-08, - "loss": 3.7434, - "step": 2843500 - }, - { - "epoch": 31.58, - "learning_rate": 4.604035715078959e-08, - "loss": 3.7637, - "step": 2844000 - }, - { - "epoch": 31.59, - "learning_rate": 4.6026475357039734e-08, - "loss": 3.761, - "step": 2844500 - }, - { - "epoch": 31.59, - "learning_rate": 4.601259356328987e-08, - "loss": 3.7501, - "step": 2845000 - }, - { - "epoch": 31.6, - "learning_rate": 4.599871176954001e-08, - "loss": 3.7439, - "step": 2845500 - }, - { - "epoch": 31.61, - "learning_rate": 4.5984829975790154e-08, - "loss": 3.7883, - "step": 2846000 - }, - { - "epoch": 31.61, - "learning_rate": 4.597094818204029e-08, - "loss": 3.7409, - "step": 2846500 - }, - { - "epoch": 31.62, - "learning_rate": 4.5957066388290425e-08, - "loss": 3.76, - "step": 2847000 - }, - { - "epoch": 31.62, - "learning_rate": 4.594318459454057e-08, - "loss": 3.7451, - "step": 2847500 - }, - { - "epoch": 31.63, - "learning_rate": 4.59293028007907e-08, - "loss": 3.7725, - "step": 2848000 - }, - { - "epoch": 31.63, - "learning_rate": 4.5915421007040845e-08, - "loss": 3.7908, - "step": 2848500 - }, - { - "epoch": 31.64, - "learning_rate": 4.590153921329099e-08, - "loss": 3.7723, - "step": 2849000 - }, - { - "epoch": 31.64, - "learning_rate": 4.588765741954112e-08, - "loss": 3.7503, - "step": 2849500 - }, - { - "epoch": 31.65, - "learning_rate": 4.587377562579126e-08, - "loss": 3.7759, - "step": 2850000 - }, - { - "epoch": 31.66, - "learning_rate": 4.5859893832041395e-08, - "loss": 3.7487, - "step": 2850500 - }, - { - "epoch": 31.66, - "learning_rate": 4.584601203829154e-08, - "loss": 3.7704, - "step": 2851000 - }, - { - "epoch": 31.67, - "learning_rate": 4.583213024454168e-08, - "loss": 3.7585, - "step": 2851500 - }, - { - "epoch": 31.67, - "learning_rate": 4.5818248450791815e-08, - "loss": 3.7587, - "step": 2852000 - }, - { - "epoch": 31.68, - "learning_rate": 4.580436665704196e-08, - "loss": 3.7733, - "step": 2852500 - }, - { - "epoch": 31.68, - "learning_rate": 4.579048486329209e-08, - "loss": 3.77, - "step": 2853000 - }, - { - "epoch": 31.69, - "learning_rate": 4.577660306954223e-08, - "loss": 3.7599, - "step": 2853500 - }, - { - "epoch": 31.69, - "learning_rate": 4.576272127579237e-08, - "loss": 3.7651, - "step": 2854000 - }, - { - "epoch": 31.7, - "learning_rate": 4.574883948204251e-08, - "loss": 3.7514, - "step": 2854500 - }, - { - "epoch": 31.71, - "learning_rate": 4.573495768829265e-08, - "loss": 3.7541, - "step": 2855000 - }, - { - "epoch": 31.71, - "learning_rate": 4.572107589454279e-08, - "loss": 3.7595, - "step": 2855500 - }, - { - "epoch": 31.72, - "learning_rate": 4.5707194100792926e-08, - "loss": 3.7714, - "step": 2856000 - }, - { - "epoch": 31.72, - "learning_rate": 4.569331230704306e-08, - "loss": 3.7711, - "step": 2856500 - }, - { - "epoch": 31.73, - "learning_rate": 4.5679430513293204e-08, - "loss": 3.7599, - "step": 2857000 - }, - { - "epoch": 31.73, - "learning_rate": 4.566554871954334e-08, - "loss": 3.7643, - "step": 2857500 - }, - { - "epoch": 31.74, - "learning_rate": 4.565166692579348e-08, - "loss": 3.7623, - "step": 2858000 - }, - { - "epoch": 31.74, - "learning_rate": 4.5637785132043624e-08, - "loss": 3.7573, - "step": 2858500 - }, - { - "epoch": 31.75, - "learning_rate": 4.5623903338293753e-08, - "loss": 3.7769, - "step": 2859000 - }, - { - "epoch": 31.76, - "learning_rate": 4.5610021544543896e-08, - "loss": 3.7722, - "step": 2859500 - }, - { - "epoch": 31.76, - "learning_rate": 4.559613975079404e-08, - "loss": 3.7551, - "step": 2860000 - }, - { - "epoch": 31.77, - "learning_rate": 4.5582257957044174e-08, - "loss": 3.7694, - "step": 2860500 - }, - { - "epoch": 31.77, - "learning_rate": 4.5568376163294316e-08, - "loss": 3.7382, - "step": 2861000 - }, - { - "epoch": 31.78, - "learning_rate": 4.555449436954446e-08, - "loss": 3.7636, - "step": 2861500 - }, - { - "epoch": 31.78, - "learning_rate": 4.554061257579459e-08, - "loss": 3.7406, - "step": 2862000 - }, - { - "epoch": 31.79, - "learning_rate": 4.552673078204473e-08, - "loss": 3.7686, - "step": 2862500 - }, - { - "epoch": 31.79, - "learning_rate": 4.551284898829487e-08, - "loss": 3.7664, - "step": 2863000 - }, - { - "epoch": 31.8, - "learning_rate": 4.549896719454501e-08, - "loss": 3.7655, - "step": 2863500 - }, - { - "epoch": 31.81, - "learning_rate": 4.548508540079515e-08, - "loss": 3.7701, - "step": 2864000 - }, - { - "epoch": 31.81, - "learning_rate": 4.5471203607045285e-08, - "loss": 3.7878, - "step": 2864500 - }, - { - "epoch": 31.82, - "learning_rate": 4.545732181329542e-08, - "loss": 3.7815, - "step": 2865000 - }, - { - "epoch": 31.82, - "learning_rate": 4.544344001954556e-08, - "loss": 3.7646, - "step": 2865500 - }, - { - "epoch": 31.83, - "learning_rate": 4.54295582257957e-08, - "loss": 3.7722, - "step": 2866000 - }, - { - "epoch": 31.83, - "learning_rate": 4.541567643204584e-08, - "loss": 3.7622, - "step": 2866500 - }, - { - "epoch": 31.84, - "learning_rate": 4.5401794638295983e-08, - "loss": 3.7374, - "step": 2867000 - }, - { - "epoch": 31.84, - "learning_rate": 4.538791284454612e-08, - "loss": 3.778, - "step": 2867500 - }, - { - "epoch": 31.85, - "learning_rate": 4.5374031050796255e-08, - "loss": 3.7671, - "step": 2868000 - }, - { - "epoch": 31.86, - "learning_rate": 4.53601492570464e-08, - "loss": 3.7698, - "step": 2868500 - }, - { - "epoch": 31.86, - "learning_rate": 4.534626746329653e-08, - "loss": 3.7464, - "step": 2869000 - }, - { - "epoch": 31.87, - "learning_rate": 4.5332385669546675e-08, - "loss": 3.7634, - "step": 2869500 - }, - { - "epoch": 31.87, - "learning_rate": 4.531850387579682e-08, - "loss": 3.7616, - "step": 2870000 - }, - { - "epoch": 31.88, - "learning_rate": 4.530462208204695e-08, - "loss": 3.7455, - "step": 2870500 - }, - { - "epoch": 31.88, - "learning_rate": 4.529074028829709e-08, - "loss": 3.7615, - "step": 2871000 - }, - { - "epoch": 31.89, - "learning_rate": 4.527685849454723e-08, - "loss": 3.7356, - "step": 2871500 - }, - { - "epoch": 31.89, - "learning_rate": 4.5262976700797366e-08, - "loss": 3.7739, - "step": 2872000 - }, - { - "epoch": 31.9, - "learning_rate": 4.524909490704751e-08, - "loss": 3.7543, - "step": 2872500 - }, - { - "epoch": 31.91, - "learning_rate": 4.5235213113297644e-08, - "loss": 3.7721, - "step": 2873000 - }, - { - "epoch": 31.91, - "learning_rate": 4.5221331319547786e-08, - "loss": 3.7474, - "step": 2873500 - }, - { - "epoch": 31.92, - "learning_rate": 4.520744952579792e-08, - "loss": 3.7654, - "step": 2874000 - }, - { - "epoch": 31.92, - "learning_rate": 4.519356773204806e-08, - "loss": 3.7854, - "step": 2874500 - }, - { - "epoch": 31.93, - "learning_rate": 4.51796859382982e-08, - "loss": 3.7678, - "step": 2875000 - }, - { - "epoch": 31.93, - "learning_rate": 4.516580414454834e-08, - "loss": 3.7674, - "step": 2875500 - }, - { - "epoch": 31.94, - "learning_rate": 4.515192235079848e-08, - "loss": 3.7666, - "step": 2876000 - }, - { - "epoch": 31.94, - "learning_rate": 4.513804055704862e-08, - "loss": 3.7623, - "step": 2876500 - }, - { - "epoch": 31.95, - "learning_rate": 4.5124158763298756e-08, - "loss": 3.7535, - "step": 2877000 - }, - { - "epoch": 31.96, - "learning_rate": 4.511027696954889e-08, - "loss": 3.7513, - "step": 2877500 - }, - { - "epoch": 31.96, - "learning_rate": 4.5096395175799034e-08, - "loss": 3.7576, - "step": 2878000 - }, - { - "epoch": 31.97, - "learning_rate": 4.5082513382049176e-08, - "loss": 3.7583, - "step": 2878500 - }, - { - "epoch": 31.97, - "learning_rate": 4.506863158829931e-08, - "loss": 3.7719, - "step": 2879000 - }, - { - "epoch": 31.98, - "learning_rate": 4.5054749794549454e-08, - "loss": 3.7655, - "step": 2879500 - }, - { - "epoch": 31.98, - "learning_rate": 4.504086800079959e-08, - "loss": 3.7603, - "step": 2880000 - }, - { - "epoch": 31.99, - "learning_rate": 4.5026986207049725e-08, - "loss": 3.761, - "step": 2880500 - }, - { - "epoch": 31.99, - "learning_rate": 4.501310441329987e-08, - "loss": 3.7671, - "step": 2881000 - }, - { - "epoch": 32.0, - "eval_loss": 3.8329031467437744, - "eval_runtime": 6.2965, - "eval_samples_per_second": 246.805, - "step": 2881472 - }, - { - "epoch": 32.0, - "learning_rate": 4.499922261955e-08, - "loss": 3.7718, - "step": 2881500 - }, - { - "epoch": 32.01, - "learning_rate": 4.4985340825800145e-08, - "loss": 3.7558, - "step": 2882000 - }, - { - "epoch": 32.01, - "learning_rate": 4.497145903205029e-08, - "loss": 3.7661, - "step": 2882500 - }, - { - "epoch": 32.02, - "learning_rate": 4.495757723830042e-08, - "loss": 3.7756, - "step": 2883000 - }, - { - "epoch": 32.02, - "learning_rate": 4.494369544455056e-08, - "loss": 3.7572, - "step": 2883500 - }, - { - "epoch": 32.03, - "learning_rate": 4.49298136508007e-08, - "loss": 3.7516, - "step": 2884000 - }, - { - "epoch": 32.03, - "learning_rate": 4.491593185705084e-08, - "loss": 3.7526, - "step": 2884500 - }, - { - "epoch": 32.04, - "learning_rate": 4.490205006330098e-08, - "loss": 3.7636, - "step": 2885000 - }, - { - "epoch": 32.04, - "learning_rate": 4.488816826955112e-08, - "loss": 3.7707, - "step": 2885500 - }, - { - "epoch": 32.05, - "learning_rate": 4.487428647580126e-08, - "loss": 3.7604, - "step": 2886000 - }, - { - "epoch": 32.06, - "learning_rate": 4.486040468205139e-08, - "loss": 3.7571, - "step": 2886500 - }, - { - "epoch": 32.06, - "learning_rate": 4.484652288830153e-08, - "loss": 3.7739, - "step": 2887000 - }, - { - "epoch": 32.07, - "learning_rate": 4.483264109455167e-08, - "loss": 3.7697, - "step": 2887500 - }, - { - "epoch": 32.07, - "learning_rate": 4.481875930080181e-08, - "loss": 3.7586, - "step": 2888000 - }, - { - "epoch": 32.08, - "learning_rate": 4.480487750705195e-08, - "loss": 3.7649, - "step": 2888500 - }, - { - "epoch": 32.08, - "learning_rate": 4.479099571330209e-08, - "loss": 3.7708, - "step": 2889000 - }, - { - "epoch": 32.09, - "learning_rate": 4.4777113919552226e-08, - "loss": 3.7477, - "step": 2889500 - }, - { - "epoch": 32.09, - "learning_rate": 4.476323212580236e-08, - "loss": 3.7805, - "step": 2890000 - }, - { - "epoch": 32.1, - "learning_rate": 4.4749350332052504e-08, - "loss": 3.7628, - "step": 2890500 - }, - { - "epoch": 32.11, - "learning_rate": 4.4735468538302647e-08, - "loss": 3.7618, - "step": 2891000 - }, - { - "epoch": 32.11, - "learning_rate": 4.472158674455278e-08, - "loss": 3.7862, - "step": 2891500 - }, - { - "epoch": 32.12, - "learning_rate": 4.4707704950802924e-08, - "loss": 3.7491, - "step": 2892000 - }, - { - "epoch": 32.12, - "learning_rate": 4.469382315705306e-08, - "loss": 3.755, - "step": 2892500 - }, - { - "epoch": 32.13, - "learning_rate": 4.4679941363303196e-08, - "loss": 3.757, - "step": 2893000 - }, - { - "epoch": 32.13, - "learning_rate": 4.466605956955334e-08, - "loss": 3.771, - "step": 2893500 - }, - { - "epoch": 32.14, - "learning_rate": 4.4652177775803474e-08, - "loss": 3.7449, - "step": 2894000 - }, - { - "epoch": 32.14, - "learning_rate": 4.4638295982053616e-08, - "loss": 3.7597, - "step": 2894500 - }, - { - "epoch": 32.15, - "learning_rate": 4.462441418830376e-08, - "loss": 3.7508, - "step": 2895000 - }, - { - "epoch": 32.16, - "learning_rate": 4.461053239455389e-08, - "loss": 3.7661, - "step": 2895500 - }, - { - "epoch": 32.16, - "learning_rate": 4.459665060080403e-08, - "loss": 3.758, - "step": 2896000 - }, - { - "epoch": 32.17, - "learning_rate": 4.458276880705417e-08, - "loss": 3.7647, - "step": 2896500 - }, - { - "epoch": 32.17, - "learning_rate": 4.456888701330431e-08, - "loss": 3.7464, - "step": 2897000 - }, - { - "epoch": 32.18, - "learning_rate": 4.455500521955445e-08, - "loss": 3.7596, - "step": 2897500 - }, - { - "epoch": 32.18, - "learning_rate": 4.454112342580459e-08, - "loss": 3.756, - "step": 2898000 - }, - { - "epoch": 32.19, - "learning_rate": 4.452724163205472e-08, - "loss": 3.764, - "step": 2898500 - }, - { - "epoch": 32.19, - "learning_rate": 4.451335983830486e-08, - "loss": 3.7585, - "step": 2899000 - }, - { - "epoch": 32.2, - "learning_rate": 4.4499478044555006e-08, - "loss": 3.7627, - "step": 2899500 - }, - { - "epoch": 32.21, - "learning_rate": 4.448559625080514e-08, - "loss": 3.7478, - "step": 2900000 - }, - { - "epoch": 32.21, - "learning_rate": 4.4471714457055283e-08, - "loss": 3.7645, - "step": 2900500 - }, - { - "epoch": 32.22, - "learning_rate": 4.445783266330542e-08, - "loss": 3.7541, - "step": 2901000 - }, - { - "epoch": 32.22, - "learning_rate": 4.4443950869555555e-08, - "loss": 3.7643, - "step": 2901500 - }, - { - "epoch": 32.23, - "learning_rate": 4.44300690758057e-08, - "loss": 3.7763, - "step": 2902000 - }, - { - "epoch": 32.23, - "learning_rate": 4.441618728205583e-08, - "loss": 3.7658, - "step": 2902500 - }, - { - "epoch": 32.24, - "learning_rate": 4.4402305488305975e-08, - "loss": 3.7637, - "step": 2903000 - }, - { - "epoch": 32.24, - "learning_rate": 4.438842369455612e-08, - "loss": 3.7652, - "step": 2903500 - }, - { - "epoch": 32.25, - "learning_rate": 4.437454190080625e-08, - "loss": 3.7585, - "step": 2904000 - }, - { - "epoch": 32.26, - "learning_rate": 4.436066010705639e-08, - "loss": 3.7384, - "step": 2904500 - }, - { - "epoch": 32.26, - "learning_rate": 4.434677831330653e-08, - "loss": 3.7683, - "step": 2905000 - }, - { - "epoch": 32.27, - "learning_rate": 4.4332896519556666e-08, - "loss": 3.7554, - "step": 2905500 - }, - { - "epoch": 32.27, - "learning_rate": 4.431901472580681e-08, - "loss": 3.7596, - "step": 2906000 - }, - { - "epoch": 32.28, - "learning_rate": 4.430513293205695e-08, - "loss": 3.7407, - "step": 2906500 - }, - { - "epoch": 32.28, - "learning_rate": 4.4291251138307087e-08, - "loss": 3.7728, - "step": 2907000 - }, - { - "epoch": 32.29, - "learning_rate": 4.427736934455722e-08, - "loss": 3.7572, - "step": 2907500 - }, - { - "epoch": 32.29, - "learning_rate": 4.4263487550807364e-08, - "loss": 3.737, - "step": 2908000 - }, - { - "epoch": 32.3, - "learning_rate": 4.42496057570575e-08, - "loss": 3.7615, - "step": 2908500 - }, - { - "epoch": 32.31, - "learning_rate": 4.423572396330764e-08, - "loss": 3.7604, - "step": 2909000 - }, - { - "epoch": 32.31, - "learning_rate": 4.422184216955778e-08, - "loss": 3.7491, - "step": 2909500 - }, - { - "epoch": 32.32, - "learning_rate": 4.420796037580792e-08, - "loss": 3.7687, - "step": 2910000 - }, - { - "epoch": 32.32, - "learning_rate": 4.4194078582058056e-08, - "loss": 3.7661, - "step": 2910500 - }, - { - "epoch": 32.33, - "learning_rate": 4.418019678830819e-08, - "loss": 3.7766, - "step": 2911000 - }, - { - "epoch": 32.33, - "learning_rate": 4.4166314994558334e-08, - "loss": 3.765, - "step": 2911500 - }, - { - "epoch": 32.34, - "learning_rate": 4.4152433200808476e-08, - "loss": 3.7821, - "step": 2912000 - }, - { - "epoch": 32.34, - "learning_rate": 4.413855140705861e-08, - "loss": 3.7942, - "step": 2912500 - }, - { - "epoch": 32.35, - "learning_rate": 4.4124669613308754e-08, - "loss": 3.767, - "step": 2913000 - }, - { - "epoch": 32.36, - "learning_rate": 4.411078781955889e-08, - "loss": 3.7776, - "step": 2913500 - }, - { - "epoch": 32.36, - "learning_rate": 4.4096906025809025e-08, - "loss": 3.7549, - "step": 2914000 - }, - { - "epoch": 32.37, - "learning_rate": 4.408302423205917e-08, - "loss": 3.7848, - "step": 2914500 - }, - { - "epoch": 32.37, - "learning_rate": 4.406914243830931e-08, - "loss": 3.7703, - "step": 2915000 - }, - { - "epoch": 32.38, - "learning_rate": 4.4055260644559445e-08, - "loss": 3.7769, - "step": 2915500 - }, - { - "epoch": 32.38, - "learning_rate": 4.404137885080959e-08, - "loss": 3.7403, - "step": 2916000 - }, - { - "epoch": 32.39, - "learning_rate": 4.4027497057059723e-08, - "loss": 3.7579, - "step": 2916500 - }, - { - "epoch": 32.39, - "learning_rate": 4.401361526330986e-08, - "loss": 3.7853, - "step": 2917000 - }, - { - "epoch": 32.4, - "learning_rate": 4.399973346956e-08, - "loss": 3.7661, - "step": 2917500 - }, - { - "epoch": 32.41, - "learning_rate": 4.398585167581014e-08, - "loss": 3.7605, - "step": 2918000 - }, - { - "epoch": 32.41, - "learning_rate": 4.397196988206028e-08, - "loss": 3.7604, - "step": 2918500 - }, - { - "epoch": 32.42, - "learning_rate": 4.395808808831042e-08, - "loss": 3.7866, - "step": 2919000 - }, - { - "epoch": 32.42, - "learning_rate": 4.394420629456056e-08, - "loss": 3.7694, - "step": 2919500 - }, - { - "epoch": 32.43, - "learning_rate": 4.393032450081069e-08, - "loss": 3.7735, - "step": 2920000 - }, - { - "epoch": 32.43, - "learning_rate": 4.3916442707060835e-08, - "loss": 3.7436, - "step": 2920500 - }, - { - "epoch": 32.44, - "learning_rate": 4.390256091331097e-08, - "loss": 3.7456, - "step": 2921000 - }, - { - "epoch": 32.44, - "learning_rate": 4.388867911956111e-08, - "loss": 3.7682, - "step": 2921500 - }, - { - "epoch": 32.45, - "learning_rate": 4.3874797325811255e-08, - "loss": 3.7458, - "step": 2922000 - }, - { - "epoch": 32.46, - "learning_rate": 4.386091553206139e-08, - "loss": 3.7691, - "step": 2922500 - }, - { - "epoch": 32.46, - "learning_rate": 4.3847033738311526e-08, - "loss": 3.7418, - "step": 2923000 - }, - { - "epoch": 32.47, - "learning_rate": 4.383315194456166e-08, - "loss": 3.7662, - "step": 2923500 - }, - { - "epoch": 32.47, - "learning_rate": 4.3819270150811804e-08, - "loss": 3.7551, - "step": 2924000 - }, - { - "epoch": 32.48, - "learning_rate": 4.3805388357061947e-08, - "loss": 3.7528, - "step": 2924500 - }, - { - "epoch": 32.48, - "learning_rate": 4.379150656331208e-08, - "loss": 3.7577, - "step": 2925000 - }, - { - "epoch": 32.49, - "learning_rate": 4.3777624769562225e-08, - "loss": 3.7692, - "step": 2925500 - }, - { - "epoch": 32.49, - "learning_rate": 4.376374297581236e-08, - "loss": 3.7541, - "step": 2926000 - }, - { - "epoch": 32.5, - "learning_rate": 4.3749861182062496e-08, - "loss": 3.7732, - "step": 2926500 - }, - { - "epoch": 32.51, - "learning_rate": 4.373597938831264e-08, - "loss": 3.7502, - "step": 2927000 - }, - { - "epoch": 32.51, - "learning_rate": 4.372209759456278e-08, - "loss": 3.7676, - "step": 2927500 - }, - { - "epoch": 32.52, - "learning_rate": 4.3708215800812916e-08, - "loss": 3.7692, - "step": 2928000 - }, - { - "epoch": 32.52, - "learning_rate": 4.369433400706306e-08, - "loss": 3.7638, - "step": 2928500 - }, - { - "epoch": 32.53, - "learning_rate": 4.3680452213313194e-08, - "loss": 3.7326, - "step": 2929000 - }, - { - "epoch": 32.53, - "learning_rate": 4.366657041956333e-08, - "loss": 3.7494, - "step": 2929500 - }, - { - "epoch": 32.54, - "learning_rate": 4.365268862581347e-08, - "loss": 3.7645, - "step": 2930000 - }, - { - "epoch": 32.54, - "learning_rate": 4.3638806832063614e-08, - "loss": 3.7549, - "step": 2930500 - }, - { - "epoch": 32.55, - "learning_rate": 4.362492503831375e-08, - "loss": 3.7675, - "step": 2931000 - }, - { - "epoch": 32.56, - "learning_rate": 4.3611043244563885e-08, - "loss": 3.7538, - "step": 2931500 - }, - { - "epoch": 32.56, - "learning_rate": 4.359716145081402e-08, - "loss": 3.78, - "step": 2932000 - }, - { - "epoch": 32.57, - "learning_rate": 4.3583279657064163e-08, - "loss": 3.7514, - "step": 2932500 - }, - { - "epoch": 32.57, - "learning_rate": 4.3569397863314306e-08, - "loss": 3.7748, - "step": 2933000 - }, - { - "epoch": 32.58, - "learning_rate": 4.355551606956444e-08, - "loss": 3.7552, - "step": 2933500 - }, - { - "epoch": 32.58, - "learning_rate": 4.3541634275814584e-08, - "loss": 3.7586, - "step": 2934000 - }, - { - "epoch": 32.59, - "learning_rate": 4.352775248206472e-08, - "loss": 3.7554, - "step": 2934500 - }, - { - "epoch": 32.59, - "learning_rate": 4.3513870688314855e-08, - "loss": 3.7595, - "step": 2935000 - }, - { - "epoch": 32.6, - "learning_rate": 4.3499988894565e-08, - "loss": 3.7534, - "step": 2935500 - }, - { - "epoch": 32.61, - "learning_rate": 4.348610710081514e-08, - "loss": 3.7545, - "step": 2936000 - }, - { - "epoch": 32.61, - "learning_rate": 4.3472225307065275e-08, - "loss": 3.7649, - "step": 2936500 - }, - { - "epoch": 32.62, - "learning_rate": 4.345834351331542e-08, - "loss": 3.748, - "step": 2937000 - }, - { - "epoch": 32.62, - "learning_rate": 4.344446171956555e-08, - "loss": 3.7764, - "step": 2937500 - }, - { - "epoch": 32.63, - "learning_rate": 4.343057992581569e-08, - "loss": 3.7488, - "step": 2938000 - }, - { - "epoch": 32.63, - "learning_rate": 4.341669813206583e-08, - "loss": 3.7843, - "step": 2938500 - }, - { - "epoch": 32.64, - "learning_rate": 4.3402816338315966e-08, - "loss": 3.7544, - "step": 2939000 - }, - { - "epoch": 32.64, - "learning_rate": 4.338893454456611e-08, - "loss": 3.7571, - "step": 2939500 - }, - { - "epoch": 32.65, - "learning_rate": 4.337505275081625e-08, - "loss": 3.7696, - "step": 2940000 - }, - { - "epoch": 32.66, - "learning_rate": 4.3361170957066387e-08, - "loss": 3.7596, - "step": 2940500 - }, - { - "epoch": 32.66, - "learning_rate": 4.334728916331652e-08, - "loss": 3.7791, - "step": 2941000 - }, - { - "epoch": 32.67, - "learning_rate": 4.3333407369566665e-08, - "loss": 3.7632, - "step": 2941500 - }, - { - "epoch": 32.67, - "learning_rate": 4.33195255758168e-08, - "loss": 3.7645, - "step": 2942000 - }, - { - "epoch": 32.68, - "learning_rate": 4.330564378206694e-08, - "loss": 3.769, - "step": 2942500 - }, - { - "epoch": 32.68, - "learning_rate": 4.3291761988317085e-08, - "loss": 3.763, - "step": 2943000 - }, - { - "epoch": 32.69, - "learning_rate": 4.327788019456722e-08, - "loss": 3.763, - "step": 2943500 - }, - { - "epoch": 32.69, - "learning_rate": 4.3263998400817356e-08, - "loss": 3.7635, - "step": 2944000 - }, - { - "epoch": 32.7, - "learning_rate": 4.32501166070675e-08, - "loss": 3.7683, - "step": 2944500 - }, - { - "epoch": 32.71, - "learning_rate": 4.3236234813317634e-08, - "loss": 3.7428, - "step": 2945000 - }, - { - "epoch": 32.71, - "learning_rate": 4.3222353019567776e-08, - "loss": 3.7381, - "step": 2945500 - }, - { - "epoch": 32.72, - "learning_rate": 4.320847122581791e-08, - "loss": 3.7751, - "step": 2946000 - }, - { - "epoch": 32.72, - "learning_rate": 4.3194589432068054e-08, - "loss": 3.7728, - "step": 2946500 - }, - { - "epoch": 32.73, - "learning_rate": 4.318070763831819e-08, - "loss": 3.7699, - "step": 2947000 - }, - { - "epoch": 32.73, - "learning_rate": 4.3166825844568325e-08, - "loss": 3.7628, - "step": 2947500 - }, - { - "epoch": 32.74, - "learning_rate": 4.315294405081847e-08, - "loss": 3.757, - "step": 2948000 - }, - { - "epoch": 32.74, - "learning_rate": 4.313906225706861e-08, - "loss": 3.7636, - "step": 2948500 - }, - { - "epoch": 32.75, - "learning_rate": 4.3125180463318746e-08, - "loss": 3.7597, - "step": 2949000 - }, - { - "epoch": 32.76, - "learning_rate": 4.311129866956889e-08, - "loss": 3.7664, - "step": 2949500 - }, - { - "epoch": 32.76, - "learning_rate": 4.3097416875819023e-08, - "loss": 3.7609, - "step": 2950000 - }, - { - "epoch": 32.77, - "learning_rate": 4.308353508206916e-08, - "loss": 3.7506, - "step": 2950500 - }, - { - "epoch": 32.77, - "learning_rate": 4.30696532883193e-08, - "loss": 3.7589, - "step": 2951000 - }, - { - "epoch": 32.78, - "learning_rate": 4.3055771494569444e-08, - "loss": 3.7634, - "step": 2951500 - }, - { - "epoch": 32.78, - "learning_rate": 4.304188970081958e-08, - "loss": 3.757, - "step": 2952000 - }, - { - "epoch": 32.79, - "learning_rate": 4.302800790706972e-08, - "loss": 3.7416, - "step": 2952500 - }, - { - "epoch": 32.79, - "learning_rate": 4.301412611331986e-08, - "loss": 3.7597, - "step": 2953000 - }, - { - "epoch": 32.8, - "learning_rate": 4.300024431956999e-08, - "loss": 3.7543, - "step": 2953500 - }, - { - "epoch": 32.81, - "learning_rate": 4.2986362525820135e-08, - "loss": 3.7661, - "step": 2954000 - }, - { - "epoch": 32.81, - "learning_rate": 4.297248073207027e-08, - "loss": 3.7557, - "step": 2954500 - }, - { - "epoch": 32.82, - "learning_rate": 4.295859893832041e-08, - "loss": 3.7564, - "step": 2955000 - }, - { - "epoch": 32.82, - "learning_rate": 4.2944717144570555e-08, - "loss": 3.7538, - "step": 2955500 - }, - { - "epoch": 32.83, - "learning_rate": 4.293083535082069e-08, - "loss": 3.7671, - "step": 2956000 - }, - { - "epoch": 32.83, - "learning_rate": 4.2916953557070827e-08, - "loss": 3.7622, - "step": 2956500 - }, - { - "epoch": 32.84, - "learning_rate": 4.290307176332097e-08, - "loss": 3.7508, - "step": 2957000 - }, - { - "epoch": 32.84, - "learning_rate": 4.2889189969571104e-08, - "loss": 3.7583, - "step": 2957500 - }, - { - "epoch": 32.85, - "learning_rate": 4.287530817582125e-08, - "loss": 3.7672, - "step": 2958000 - }, - { - "epoch": 32.86, - "learning_rate": 4.286142638207139e-08, - "loss": 3.7523, - "step": 2958500 - }, - { - "epoch": 32.86, - "learning_rate": 4.2847544588321525e-08, - "loss": 3.7561, - "step": 2959000 - }, - { - "epoch": 32.87, - "learning_rate": 4.283366279457166e-08, - "loss": 3.7496, - "step": 2959500 - }, - { - "epoch": 32.87, - "learning_rate": 4.28197810008218e-08, - "loss": 3.7694, - "step": 2960000 - }, - { - "epoch": 32.88, - "learning_rate": 4.280589920707194e-08, - "loss": 3.7495, - "step": 2960500 - }, - { - "epoch": 32.88, - "learning_rate": 4.279201741332208e-08, - "loss": 3.7545, - "step": 2961000 - }, - { - "epoch": 32.89, - "learning_rate": 4.2778135619572216e-08, - "loss": 3.7625, - "step": 2961500 - }, - { - "epoch": 32.89, - "learning_rate": 4.276425382582235e-08, - "loss": 3.7661, - "step": 2962000 - }, - { - "epoch": 32.9, - "learning_rate": 4.2750372032072494e-08, - "loss": 3.7644, - "step": 2962500 - }, - { - "epoch": 32.91, - "learning_rate": 4.273649023832263e-08, - "loss": 3.7606, - "step": 2963000 - }, - { - "epoch": 32.91, - "learning_rate": 4.272260844457277e-08, - "loss": 3.7532, - "step": 2963500 - }, - { - "epoch": 32.92, - "learning_rate": 4.2708726650822914e-08, - "loss": 3.7812, - "step": 2964000 - }, - { - "epoch": 32.92, - "learning_rate": 4.269484485707305e-08, - "loss": 3.7498, - "step": 2964500 - }, - { - "epoch": 32.93, - "learning_rate": 4.2680963063323186e-08, - "loss": 3.7682, - "step": 2965000 - }, - { - "epoch": 32.93, - "learning_rate": 4.266708126957333e-08, - "loss": 3.7603, - "step": 2965500 - }, - { - "epoch": 32.94, - "learning_rate": 4.2653199475823463e-08, - "loss": 3.7575, - "step": 2966000 - }, - { - "epoch": 32.94, - "learning_rate": 4.2639317682073606e-08, - "loss": 3.7574, - "step": 2966500 - }, - { - "epoch": 32.95, - "learning_rate": 4.262543588832375e-08, - "loss": 3.7667, - "step": 2967000 - }, - { - "epoch": 32.96, - "learning_rate": 4.2611554094573884e-08, - "loss": 3.767, - "step": 2967500 - }, - { - "epoch": 32.96, - "learning_rate": 4.259767230082402e-08, - "loss": 3.7593, - "step": 2968000 - }, - { - "epoch": 32.97, - "learning_rate": 4.2583790507074155e-08, - "loss": 3.7385, - "step": 2968500 - }, - { - "epoch": 32.97, - "learning_rate": 4.25699087133243e-08, - "loss": 3.7377, - "step": 2969000 - }, - { - "epoch": 32.98, - "learning_rate": 4.255602691957444e-08, - "loss": 3.7575, - "step": 2969500 - }, - { - "epoch": 32.98, - "learning_rate": 4.2542145125824575e-08, - "loss": 3.7583, - "step": 2970000 - }, - { - "epoch": 32.99, - "learning_rate": 4.252826333207472e-08, - "loss": 3.7637, - "step": 2970500 - }, - { - "epoch": 32.99, - "learning_rate": 4.251438153832485e-08, - "loss": 3.7625, - "step": 2971000 - }, - { - "epoch": 33.0, - "learning_rate": 4.250049974457499e-08, - "loss": 3.7628, - "step": 2971500 - }, - { - "epoch": 33.0, - "eval_loss": 3.8318238258361816, - "eval_runtime": 6.2995, - "eval_samples_per_second": 246.685, - "step": 2971518 - }, - { - "epoch": 33.01, - "learning_rate": 4.248661795082513e-08, - "loss": 3.7672, - "step": 2972000 - }, - { - "epoch": 33.01, - "learning_rate": 4.247273615707527e-08, - "loss": 3.7588, - "step": 2972500 - }, - { - "epoch": 33.02, - "learning_rate": 4.245885436332541e-08, - "loss": 3.7506, - "step": 2973000 - }, - { - "epoch": 33.02, - "learning_rate": 4.244497256957555e-08, - "loss": 3.7592, - "step": 2973500 - }, - { - "epoch": 33.03, - "learning_rate": 4.243109077582569e-08, - "loss": 3.7514, - "step": 2974000 - }, - { - "epoch": 33.03, - "learning_rate": 4.241720898207582e-08, - "loss": 3.7465, - "step": 2974500 - }, - { - "epoch": 33.04, - "learning_rate": 4.2403327188325965e-08, - "loss": 3.7638, - "step": 2975000 - }, - { - "epoch": 33.04, - "learning_rate": 4.23894453945761e-08, - "loss": 3.7594, - "step": 2975500 - }, - { - "epoch": 33.05, - "learning_rate": 4.237556360082624e-08, - "loss": 3.751, - "step": 2976000 - }, - { - "epoch": 33.06, - "learning_rate": 4.2361681807076385e-08, - "loss": 3.7603, - "step": 2976500 - }, - { - "epoch": 33.06, - "learning_rate": 4.234780001332652e-08, - "loss": 3.7406, - "step": 2977000 - }, - { - "epoch": 33.07, - "learning_rate": 4.2333918219576656e-08, - "loss": 3.7563, - "step": 2977500 - }, - { - "epoch": 33.07, - "learning_rate": 4.23200364258268e-08, - "loss": 3.7728, - "step": 2978000 - }, - { - "epoch": 33.08, - "learning_rate": 4.2306154632076934e-08, - "loss": 3.7653, - "step": 2978500 - }, - { - "epoch": 33.08, - "learning_rate": 4.2292272838327076e-08, - "loss": 3.7594, - "step": 2979000 - }, - { - "epoch": 33.09, - "learning_rate": 4.227839104457722e-08, - "loss": 3.7651, - "step": 2979500 - }, - { - "epoch": 33.09, - "learning_rate": 4.2264509250827354e-08, - "loss": 3.7488, - "step": 2980000 - }, - { - "epoch": 33.1, - "learning_rate": 4.225062745707749e-08, - "loss": 3.7471, - "step": 2980500 - }, - { - "epoch": 33.11, - "learning_rate": 4.223674566332763e-08, - "loss": 3.7588, - "step": 2981000 - }, - { - "epoch": 33.11, - "learning_rate": 4.222286386957777e-08, - "loss": 3.7564, - "step": 2981500 - }, - { - "epoch": 33.12, - "learning_rate": 4.220898207582791e-08, - "loss": 3.7624, - "step": 2982000 - }, - { - "epoch": 33.12, - "learning_rate": 4.2195100282078046e-08, - "loss": 3.7513, - "step": 2982500 - }, - { - "epoch": 33.13, - "learning_rate": 4.218121848832819e-08, - "loss": 3.7534, - "step": 2983000 - }, - { - "epoch": 33.13, - "learning_rate": 4.2167336694578324e-08, - "loss": 3.7573, - "step": 2983500 - }, - { - "epoch": 33.14, - "learning_rate": 4.215345490082846e-08, - "loss": 3.746, - "step": 2984000 - }, - { - "epoch": 33.14, - "learning_rate": 4.21395731070786e-08, - "loss": 3.7505, - "step": 2984500 - }, - { - "epoch": 33.15, - "learning_rate": 4.2125691313328744e-08, - "loss": 3.7718, - "step": 2985000 - }, - { - "epoch": 33.16, - "learning_rate": 4.211180951957888e-08, - "loss": 3.7519, - "step": 2985500 - }, - { - "epoch": 33.16, - "learning_rate": 4.209792772582902e-08, - "loss": 3.7223, - "step": 2986000 - }, - { - "epoch": 33.17, - "learning_rate": 4.208404593207916e-08, - "loss": 3.762, - "step": 2986500 - }, - { - "epoch": 33.17, - "learning_rate": 4.207016413832929e-08, - "loss": 3.7671, - "step": 2987000 - }, - { - "epoch": 33.18, - "learning_rate": 4.2056282344579435e-08, - "loss": 3.7652, - "step": 2987500 - }, - { - "epoch": 33.18, - "learning_rate": 4.204240055082958e-08, - "loss": 3.7673, - "step": 2988000 - }, - { - "epoch": 33.19, - "learning_rate": 4.202851875707971e-08, - "loss": 3.7631, - "step": 2988500 - }, - { - "epoch": 33.19, - "learning_rate": 4.2014636963329855e-08, - "loss": 3.7392, - "step": 2989000 - }, - { - "epoch": 33.2, - "learning_rate": 4.200075516957999e-08, - "loss": 3.7444, - "step": 2989500 - }, - { - "epoch": 33.21, - "learning_rate": 4.1986873375830127e-08, - "loss": 3.7564, - "step": 2990000 - }, - { - "epoch": 33.21, - "learning_rate": 4.197299158208027e-08, - "loss": 3.7541, - "step": 2990500 - }, - { - "epoch": 33.22, - "learning_rate": 4.1959109788330405e-08, - "loss": 3.7734, - "step": 2991000 - }, - { - "epoch": 33.22, - "learning_rate": 4.194522799458055e-08, - "loss": 3.7489, - "step": 2991500 - }, - { - "epoch": 33.23, - "learning_rate": 4.193134620083069e-08, - "loss": 3.7701, - "step": 2992000 - }, - { - "epoch": 33.23, - "learning_rate": 4.1917464407080825e-08, - "loss": 3.7663, - "step": 2992500 - }, - { - "epoch": 33.24, - "learning_rate": 4.190358261333096e-08, - "loss": 3.7604, - "step": 2993000 - }, - { - "epoch": 33.24, - "learning_rate": 4.18897008195811e-08, - "loss": 3.7661, - "step": 2993500 - }, - { - "epoch": 33.25, - "learning_rate": 4.187581902583124e-08, - "loss": 3.7687, - "step": 2994000 - }, - { - "epoch": 33.26, - "learning_rate": 4.186193723208138e-08, - "loss": 3.7297, - "step": 2994500 - }, - { - "epoch": 33.26, - "learning_rate": 4.184805543833152e-08, - "loss": 3.7729, - "step": 2995000 - }, - { - "epoch": 33.27, - "learning_rate": 4.183417364458165e-08, - "loss": 3.7532, - "step": 2995500 - }, - { - "epoch": 33.27, - "learning_rate": 4.1820291850831794e-08, - "loss": 3.7651, - "step": 2996000 - }, - { - "epoch": 33.28, - "learning_rate": 4.1806410057081936e-08, - "loss": 3.7615, - "step": 2996500 - }, - { - "epoch": 33.28, - "learning_rate": 4.179252826333207e-08, - "loss": 3.7717, - "step": 2997000 - }, - { - "epoch": 33.29, - "learning_rate": 4.1778646469582214e-08, - "loss": 3.7716, - "step": 2997500 - }, - { - "epoch": 33.29, - "learning_rate": 4.176476467583235e-08, - "loss": 3.7686, - "step": 2998000 - }, - { - "epoch": 33.3, - "learning_rate": 4.1750882882082486e-08, - "loss": 3.7659, - "step": 2998500 - }, - { - "epoch": 33.31, - "learning_rate": 4.173700108833263e-08, - "loss": 3.765, - "step": 2999000 - }, - { - "epoch": 33.31, - "learning_rate": 4.1723119294582763e-08, - "loss": 3.759, - "step": 2999500 - }, - { - "epoch": 33.32, - "learning_rate": 4.1709237500832906e-08, - "loss": 3.7735, - "step": 3000000 - }, - { - "epoch": 33.32, - "learning_rate": 4.169535570708305e-08, - "loss": 3.7671, - "step": 3000500 - }, - { - "epoch": 33.33, - "learning_rate": 4.1681473913333184e-08, - "loss": 3.7485, - "step": 3001000 - }, - { - "epoch": 33.33, - "learning_rate": 4.166759211958332e-08, - "loss": 3.7685, - "step": 3001500 - }, - { - "epoch": 33.34, - "learning_rate": 4.165371032583346e-08, - "loss": 3.7545, - "step": 3002000 - }, - { - "epoch": 33.34, - "learning_rate": 4.16398285320836e-08, - "loss": 3.7649, - "step": 3002500 - }, - { - "epoch": 33.35, - "learning_rate": 4.162594673833374e-08, - "loss": 3.7882, - "step": 3003000 - }, - { - "epoch": 33.36, - "learning_rate": 4.161206494458388e-08, - "loss": 3.7508, - "step": 3003500 - }, - { - "epoch": 33.36, - "learning_rate": 4.159818315083402e-08, - "loss": 3.7391, - "step": 3004000 - }, - { - "epoch": 33.37, - "learning_rate": 4.158430135708415e-08, - "loss": 3.7653, - "step": 3004500 - }, - { - "epoch": 33.37, - "learning_rate": 4.157041956333429e-08, - "loss": 3.769, - "step": 3005000 - }, - { - "epoch": 33.38, - "learning_rate": 4.155653776958443e-08, - "loss": 3.7645, - "step": 3005500 - }, - { - "epoch": 33.38, - "learning_rate": 4.154265597583457e-08, - "loss": 3.7727, - "step": 3006000 - }, - { - "epoch": 33.39, - "learning_rate": 4.152877418208471e-08, - "loss": 3.7668, - "step": 3006500 - }, - { - "epoch": 33.39, - "learning_rate": 4.151489238833485e-08, - "loss": 3.747, - "step": 3007000 - }, - { - "epoch": 33.4, - "learning_rate": 4.150101059458499e-08, - "loss": 3.7493, - "step": 3007500 - }, - { - "epoch": 33.41, - "learning_rate": 4.148712880083512e-08, - "loss": 3.7632, - "step": 3008000 - }, - { - "epoch": 33.41, - "learning_rate": 4.1473247007085265e-08, - "loss": 3.7494, - "step": 3008500 - }, - { - "epoch": 33.42, - "learning_rate": 4.145936521333541e-08, - "loss": 3.7536, - "step": 3009000 - }, - { - "epoch": 33.42, - "learning_rate": 4.144548341958554e-08, - "loss": 3.7584, - "step": 3009500 - }, - { - "epoch": 33.43, - "learning_rate": 4.1431601625835685e-08, - "loss": 3.7868, - "step": 3010000 - }, - { - "epoch": 33.43, - "learning_rate": 4.141771983208582e-08, - "loss": 3.7545, - "step": 3010500 - }, - { - "epoch": 33.44, - "learning_rate": 4.1403838038335956e-08, - "loss": 3.7691, - "step": 3011000 - }, - { - "epoch": 33.44, - "learning_rate": 4.13899562445861e-08, - "loss": 3.7371, - "step": 3011500 - }, - { - "epoch": 33.45, - "learning_rate": 4.1376074450836234e-08, - "loss": 3.7668, - "step": 3012000 - }, - { - "epoch": 33.46, - "learning_rate": 4.1362192657086376e-08, - "loss": 3.7444, - "step": 3012500 - }, - { - "epoch": 33.46, - "learning_rate": 4.134831086333652e-08, - "loss": 3.747, - "step": 3013000 - }, - { - "epoch": 33.47, - "learning_rate": 4.1334429069586654e-08, - "loss": 3.7682, - "step": 3013500 - }, - { - "epoch": 33.47, - "learning_rate": 4.132054727583679e-08, - "loss": 3.7794, - "step": 3014000 - }, - { - "epoch": 33.48, - "learning_rate": 4.130666548208693e-08, - "loss": 3.7429, - "step": 3014500 - }, - { - "epoch": 33.48, - "learning_rate": 4.129278368833707e-08, - "loss": 3.7653, - "step": 3015000 - }, - { - "epoch": 33.49, - "learning_rate": 4.127890189458721e-08, - "loss": 3.7317, - "step": 3015500 - }, - { - "epoch": 33.49, - "learning_rate": 4.126502010083735e-08, - "loss": 3.7666, - "step": 3016000 - }, - { - "epoch": 33.5, - "learning_rate": 4.125113830708749e-08, - "loss": 3.7334, - "step": 3016500 - }, - { - "epoch": 33.51, - "learning_rate": 4.1237256513337624e-08, - "loss": 3.7695, - "step": 3017000 - }, - { - "epoch": 33.51, - "learning_rate": 4.1223374719587766e-08, - "loss": 3.7699, - "step": 3017500 - }, - { - "epoch": 33.52, - "learning_rate": 4.12094929258379e-08, - "loss": 3.7669, - "step": 3018000 - }, - { - "epoch": 33.52, - "learning_rate": 4.1195611132088044e-08, - "loss": 3.7645, - "step": 3018500 - }, - { - "epoch": 33.53, - "learning_rate": 4.118172933833818e-08, - "loss": 3.7424, - "step": 3019000 - }, - { - "epoch": 33.53, - "learning_rate": 4.116784754458832e-08, - "loss": 3.7601, - "step": 3019500 - }, - { - "epoch": 33.54, - "learning_rate": 4.115396575083846e-08, - "loss": 3.7823, - "step": 3020000 - }, - { - "epoch": 33.54, - "learning_rate": 4.114008395708859e-08, - "loss": 3.7645, - "step": 3020500 - }, - { - "epoch": 33.55, - "learning_rate": 4.1126202163338735e-08, - "loss": 3.7502, - "step": 3021000 - }, - { - "epoch": 33.56, - "learning_rate": 4.111232036958888e-08, - "loss": 3.7572, - "step": 3021500 - }, - { - "epoch": 33.56, - "learning_rate": 4.109843857583901e-08, - "loss": 3.7665, - "step": 3022000 - }, - { - "epoch": 33.57, - "learning_rate": 4.1084556782089155e-08, - "loss": 3.7527, - "step": 3022500 - }, - { - "epoch": 33.57, - "learning_rate": 4.107067498833929e-08, - "loss": 3.758, - "step": 3023000 - }, - { - "epoch": 33.58, - "learning_rate": 4.105679319458943e-08, - "loss": 3.7579, - "step": 3023500 - }, - { - "epoch": 33.58, - "learning_rate": 4.104291140083957e-08, - "loss": 3.7492, - "step": 3024000 - }, - { - "epoch": 33.59, - "learning_rate": 4.102902960708971e-08, - "loss": 3.7726, - "step": 3024500 - }, - { - "epoch": 33.59, - "learning_rate": 4.101514781333985e-08, - "loss": 3.7638, - "step": 3025000 - }, - { - "epoch": 33.6, - "learning_rate": 4.100126601958999e-08, - "loss": 3.7738, - "step": 3025500 - }, - { - "epoch": 33.61, - "learning_rate": 4.0987384225840125e-08, - "loss": 3.7373, - "step": 3026000 - }, - { - "epoch": 33.61, - "learning_rate": 4.097350243209026e-08, - "loss": 3.7331, - "step": 3026500 - }, - { - "epoch": 33.62, - "learning_rate": 4.09596206383404e-08, - "loss": 3.7807, - "step": 3027000 - }, - { - "epoch": 33.62, - "learning_rate": 4.094573884459054e-08, - "loss": 3.7439, - "step": 3027500 - }, - { - "epoch": 33.63, - "learning_rate": 4.093185705084068e-08, - "loss": 3.7509, - "step": 3028000 - }, - { - "epoch": 33.63, - "learning_rate": 4.091797525709082e-08, - "loss": 3.7572, - "step": 3028500 - }, - { - "epoch": 33.64, - "learning_rate": 4.090409346334095e-08, - "loss": 3.7641, - "step": 3029000 - }, - { - "epoch": 33.64, - "learning_rate": 4.0890211669591094e-08, - "loss": 3.7662, - "step": 3029500 - }, - { - "epoch": 33.65, - "learning_rate": 4.0876329875841236e-08, - "loss": 3.7678, - "step": 3030000 - }, - { - "epoch": 33.66, - "learning_rate": 4.086244808209137e-08, - "loss": 3.7716, - "step": 3030500 - }, - { - "epoch": 33.66, - "learning_rate": 4.0848566288341514e-08, - "loss": 3.7802, - "step": 3031000 - }, - { - "epoch": 33.67, - "learning_rate": 4.0834684494591657e-08, - "loss": 3.7705, - "step": 3031500 - }, - { - "epoch": 33.67, - "learning_rate": 4.0820802700841786e-08, - "loss": 3.7669, - "step": 3032000 - }, - { - "epoch": 33.68, - "learning_rate": 4.080692090709193e-08, - "loss": 3.7599, - "step": 3032500 - }, - { - "epoch": 33.68, - "learning_rate": 4.079303911334207e-08, - "loss": 3.7531, - "step": 3033000 - }, - { - "epoch": 33.69, - "learning_rate": 4.0779157319592206e-08, - "loss": 3.767, - "step": 3033500 - }, - { - "epoch": 33.69, - "learning_rate": 4.076527552584235e-08, - "loss": 3.7454, - "step": 3034000 - }, - { - "epoch": 33.7, - "learning_rate": 4.0751393732092484e-08, - "loss": 3.7582, - "step": 3034500 - }, - { - "epoch": 33.7, - "learning_rate": 4.073751193834262e-08, - "loss": 3.7813, - "step": 3035000 - }, - { - "epoch": 33.71, - "learning_rate": 4.072363014459276e-08, - "loss": 3.778, - "step": 3035500 - }, - { - "epoch": 33.72, - "learning_rate": 4.07097483508429e-08, - "loss": 3.7557, - "step": 3036000 - }, - { - "epoch": 33.72, - "learning_rate": 4.069586655709304e-08, - "loss": 3.7444, - "step": 3036500 - }, - { - "epoch": 33.73, - "learning_rate": 4.068198476334318e-08, - "loss": 3.7545, - "step": 3037000 - }, - { - "epoch": 33.73, - "learning_rate": 4.066810296959332e-08, - "loss": 3.7755, - "step": 3037500 - }, - { - "epoch": 33.74, - "learning_rate": 4.065422117584345e-08, - "loss": 3.7734, - "step": 3038000 - }, - { - "epoch": 33.74, - "learning_rate": 4.0640339382093595e-08, - "loss": 3.7723, - "step": 3038500 - }, - { - "epoch": 33.75, - "learning_rate": 4.062645758834373e-08, - "loss": 3.7541, - "step": 3039000 - }, - { - "epoch": 33.75, - "learning_rate": 4.061257579459387e-08, - "loss": 3.7627, - "step": 3039500 - }, - { - "epoch": 33.76, - "learning_rate": 4.0598694000844016e-08, - "loss": 3.7728, - "step": 3040000 - }, - { - "epoch": 33.77, - "learning_rate": 4.058481220709415e-08, - "loss": 3.7335, - "step": 3040500 - }, - { - "epoch": 33.77, - "learning_rate": 4.057093041334429e-08, - "loss": 3.7653, - "step": 3041000 - }, - { - "epoch": 33.78, - "learning_rate": 4.055704861959442e-08, - "loss": 3.767, - "step": 3041500 - }, - { - "epoch": 33.78, - "learning_rate": 4.0543166825844565e-08, - "loss": 3.782, - "step": 3042000 - }, - { - "epoch": 33.79, - "learning_rate": 4.052928503209471e-08, - "loss": 3.7644, - "step": 3042500 - }, - { - "epoch": 33.79, - "learning_rate": 4.051540323834484e-08, - "loss": 3.7563, - "step": 3043000 - }, - { - "epoch": 33.8, - "learning_rate": 4.0501521444594985e-08, - "loss": 3.7428, - "step": 3043500 - }, - { - "epoch": 33.8, - "learning_rate": 4.048763965084512e-08, - "loss": 3.7448, - "step": 3044000 - }, - { - "epoch": 33.81, - "learning_rate": 4.0473757857095256e-08, - "loss": 3.758, - "step": 3044500 - }, - { - "epoch": 33.82, - "learning_rate": 4.04598760633454e-08, - "loss": 3.7472, - "step": 3045000 - }, - { - "epoch": 33.82, - "learning_rate": 4.044599426959554e-08, - "loss": 3.751, - "step": 3045500 - }, - { - "epoch": 33.83, - "learning_rate": 4.0432112475845676e-08, - "loss": 3.755, - "step": 3046000 - }, - { - "epoch": 33.83, - "learning_rate": 4.041823068209582e-08, - "loss": 3.7539, - "step": 3046500 - }, - { - "epoch": 33.84, - "learning_rate": 4.0404348888345954e-08, - "loss": 3.7658, - "step": 3047000 - }, - { - "epoch": 33.84, - "learning_rate": 4.039046709459609e-08, - "loss": 3.7604, - "step": 3047500 - }, - { - "epoch": 33.85, - "learning_rate": 4.037658530084623e-08, - "loss": 3.7448, - "step": 3048000 - }, - { - "epoch": 33.85, - "learning_rate": 4.036270350709637e-08, - "loss": 3.7496, - "step": 3048500 - }, - { - "epoch": 33.86, - "learning_rate": 4.034882171334651e-08, - "loss": 3.7755, - "step": 3049000 - }, - { - "epoch": 33.87, - "learning_rate": 4.033493991959665e-08, - "loss": 3.7601, - "step": 3049500 - }, - { - "epoch": 33.87, - "learning_rate": 4.032105812584679e-08, - "loss": 3.7645, - "step": 3050000 - }, - { - "epoch": 33.88, - "learning_rate": 4.0307176332096924e-08, - "loss": 3.7533, - "step": 3050500 - }, - { - "epoch": 33.88, - "learning_rate": 4.0293294538347066e-08, - "loss": 3.7591, - "step": 3051000 - }, - { - "epoch": 33.89, - "learning_rate": 4.02794127445972e-08, - "loss": 3.7464, - "step": 3051500 - }, - { - "epoch": 33.89, - "learning_rate": 4.0265530950847344e-08, - "loss": 3.7609, - "step": 3052000 - }, - { - "epoch": 33.9, - "learning_rate": 4.0251649157097486e-08, - "loss": 3.7481, - "step": 3052500 - }, - { - "epoch": 33.9, - "learning_rate": 4.023776736334762e-08, - "loss": 3.7715, - "step": 3053000 - }, - { - "epoch": 33.91, - "learning_rate": 4.022388556959776e-08, - "loss": 3.7559, - "step": 3053500 - }, - { - "epoch": 33.92, - "learning_rate": 4.02100037758479e-08, - "loss": 3.7524, - "step": 3054000 - }, - { - "epoch": 33.92, - "learning_rate": 4.0196121982098035e-08, - "loss": 3.7324, - "step": 3054500 - }, - { - "epoch": 33.93, - "learning_rate": 4.018224018834818e-08, - "loss": 3.7638, - "step": 3055000 - }, - { - "epoch": 33.93, - "learning_rate": 4.016835839459832e-08, - "loss": 3.7416, - "step": 3055500 - }, - { - "epoch": 33.94, - "learning_rate": 4.0154476600848456e-08, - "loss": 3.7695, - "step": 3056000 - }, - { - "epoch": 33.94, - "learning_rate": 4.014059480709859e-08, - "loss": 3.7618, - "step": 3056500 - }, - { - "epoch": 33.95, - "learning_rate": 4.012671301334873e-08, - "loss": 3.7486, - "step": 3057000 - }, - { - "epoch": 33.95, - "learning_rate": 4.011283121959887e-08, - "loss": 3.7653, - "step": 3057500 - }, - { - "epoch": 33.96, - "learning_rate": 4.009894942584901e-08, - "loss": 3.7647, - "step": 3058000 - }, - { - "epoch": 33.97, - "learning_rate": 4.008506763209915e-08, - "loss": 3.7524, - "step": 3058500 - }, - { - "epoch": 33.97, - "learning_rate": 4.007118583834929e-08, - "loss": 3.7609, - "step": 3059000 - }, - { - "epoch": 33.98, - "learning_rate": 4.0057304044599425e-08, - "loss": 3.7714, - "step": 3059500 - }, - { - "epoch": 33.98, - "learning_rate": 4.004342225084956e-08, - "loss": 3.7555, - "step": 3060000 - }, - { - "epoch": 33.99, - "learning_rate": 4.00295404570997e-08, - "loss": 3.792, - "step": 3060500 - }, - { - "epoch": 33.99, - "learning_rate": 4.0015658663349845e-08, - "loss": 3.762, - "step": 3061000 - }, - { - "epoch": 34.0, - "learning_rate": 4.000177686959998e-08, - "loss": 3.7541, - "step": 3061500 - }, - { - "epoch": 34.0, - "eval_loss": 3.8307807445526123, - "eval_runtime": 6.3013, - "eval_samples_per_second": 246.615, - "step": 3061564 - }, - { - "epoch": 34.0, - "learning_rate": 3.998789507585012e-08, - "loss": 3.7586, - "step": 3062000 - }, - { - "epoch": 34.01, - "learning_rate": 3.997401328210026e-08, - "loss": 3.7673, - "step": 3062500 - }, - { - "epoch": 34.02, - "learning_rate": 3.9960131488350394e-08, - "loss": 3.7677, - "step": 3063000 - }, - { - "epoch": 34.02, - "learning_rate": 3.9946249694600537e-08, - "loss": 3.7803, - "step": 3063500 - }, - { - "epoch": 34.03, - "learning_rate": 3.993236790085067e-08, - "loss": 3.7327, - "step": 3064000 - }, - { - "epoch": 34.03, - "learning_rate": 3.9918486107100814e-08, - "loss": 3.7614, - "step": 3064500 - }, - { - "epoch": 34.04, - "learning_rate": 3.990460431335096e-08, - "loss": 3.7546, - "step": 3065000 - }, - { - "epoch": 34.04, - "learning_rate": 3.9890722519601086e-08, - "loss": 3.7374, - "step": 3065500 - }, - { - "epoch": 34.05, - "learning_rate": 3.987684072585123e-08, - "loss": 3.7578, - "step": 3066000 - }, - { - "epoch": 34.05, - "learning_rate": 3.986295893210137e-08, - "loss": 3.7645, - "step": 3066500 - }, - { - "epoch": 34.06, - "learning_rate": 3.9849077138351506e-08, - "loss": 3.7533, - "step": 3067000 - }, - { - "epoch": 34.07, - "learning_rate": 3.983519534460165e-08, - "loss": 3.7707, - "step": 3067500 - }, - { - "epoch": 34.07, - "learning_rate": 3.982131355085179e-08, - "loss": 3.77, - "step": 3068000 - }, - { - "epoch": 34.08, - "learning_rate": 3.980743175710192e-08, - "loss": 3.7595, - "step": 3068500 - }, - { - "epoch": 34.08, - "learning_rate": 3.979354996335206e-08, - "loss": 3.7671, - "step": 3069000 - }, - { - "epoch": 34.09, - "learning_rate": 3.9779668169602204e-08, - "loss": 3.7681, - "step": 3069500 - }, - { - "epoch": 34.09, - "learning_rate": 3.976578637585234e-08, - "loss": 3.7587, - "step": 3070000 - }, - { - "epoch": 34.1, - "learning_rate": 3.975190458210248e-08, - "loss": 3.7568, - "step": 3070500 - }, - { - "epoch": 34.1, - "learning_rate": 3.973802278835262e-08, - "loss": 3.7506, - "step": 3071000 - }, - { - "epoch": 34.11, - "learning_rate": 3.972414099460275e-08, - "loss": 3.7685, - "step": 3071500 - }, - { - "epoch": 34.12, - "learning_rate": 3.9710259200852895e-08, - "loss": 3.7535, - "step": 3072000 - }, - { - "epoch": 34.12, - "learning_rate": 3.969637740710303e-08, - "loss": 3.7549, - "step": 3072500 - }, - { - "epoch": 34.13, - "learning_rate": 3.9682495613353173e-08, - "loss": 3.738, - "step": 3073000 - }, - { - "epoch": 34.13, - "learning_rate": 3.9668613819603316e-08, - "loss": 3.7708, - "step": 3073500 - }, - { - "epoch": 34.14, - "learning_rate": 3.965473202585345e-08, - "loss": 3.76, - "step": 3074000 - }, - { - "epoch": 34.14, - "learning_rate": 3.964085023210359e-08, - "loss": 3.7726, - "step": 3074500 - }, - { - "epoch": 34.15, - "learning_rate": 3.962696843835373e-08, - "loss": 3.7713, - "step": 3075000 - }, - { - "epoch": 34.15, - "learning_rate": 3.9613086644603865e-08, - "loss": 3.7556, - "step": 3075500 - }, - { - "epoch": 34.16, - "learning_rate": 3.959920485085401e-08, - "loss": 3.7419, - "step": 3076000 - }, - { - "epoch": 34.17, - "learning_rate": 3.958532305710415e-08, - "loss": 3.7425, - "step": 3076500 - }, - { - "epoch": 34.17, - "learning_rate": 3.9571441263354285e-08, - "loss": 3.7538, - "step": 3077000 - }, - { - "epoch": 34.18, - "learning_rate": 3.955755946960442e-08, - "loss": 3.7672, - "step": 3077500 - }, - { - "epoch": 34.18, - "learning_rate": 3.9543677675854556e-08, - "loss": 3.7443, - "step": 3078000 - }, - { - "epoch": 34.19, - "learning_rate": 3.95297958821047e-08, - "loss": 3.7455, - "step": 3078500 - }, - { - "epoch": 34.19, - "learning_rate": 3.951591408835484e-08, - "loss": 3.7496, - "step": 3079000 - }, - { - "epoch": 34.2, - "learning_rate": 3.9502032294604976e-08, - "loss": 3.7551, - "step": 3079500 - }, - { - "epoch": 34.2, - "learning_rate": 3.948815050085512e-08, - "loss": 3.7451, - "step": 3080000 - }, - { - "epoch": 34.21, - "learning_rate": 3.9474268707105254e-08, - "loss": 3.746, - "step": 3080500 - }, - { - "epoch": 34.22, - "learning_rate": 3.946038691335539e-08, - "loss": 3.7701, - "step": 3081000 - }, - { - "epoch": 34.22, - "learning_rate": 3.944650511960553e-08, - "loss": 3.7606, - "step": 3081500 - }, - { - "epoch": 34.23, - "learning_rate": 3.9432623325855675e-08, - "loss": 3.7756, - "step": 3082000 - }, - { - "epoch": 34.23, - "learning_rate": 3.941874153210581e-08, - "loss": 3.7615, - "step": 3082500 - }, - { - "epoch": 34.24, - "learning_rate": 3.940485973835595e-08, - "loss": 3.7339, - "step": 3083000 - }, - { - "epoch": 34.24, - "learning_rate": 3.939097794460609e-08, - "loss": 3.7747, - "step": 3083500 - }, - { - "epoch": 34.25, - "learning_rate": 3.9377096150856224e-08, - "loss": 3.7732, - "step": 3084000 - }, - { - "epoch": 34.25, - "learning_rate": 3.9363214357106366e-08, - "loss": 3.7539, - "step": 3084500 - }, - { - "epoch": 34.26, - "learning_rate": 3.934933256335651e-08, - "loss": 3.7646, - "step": 3085000 - }, - { - "epoch": 34.27, - "learning_rate": 3.9335450769606644e-08, - "loss": 3.7511, - "step": 3085500 - }, - { - "epoch": 34.27, - "learning_rate": 3.9321568975856786e-08, - "loss": 3.751, - "step": 3086000 - }, - { - "epoch": 34.28, - "learning_rate": 3.930768718210692e-08, - "loss": 3.7548, - "step": 3086500 - }, - { - "epoch": 34.28, - "learning_rate": 3.929380538835706e-08, - "loss": 3.7585, - "step": 3087000 - }, - { - "epoch": 34.29, - "learning_rate": 3.92799235946072e-08, - "loss": 3.7645, - "step": 3087500 - }, - { - "epoch": 34.29, - "learning_rate": 3.9266041800857335e-08, - "loss": 3.7591, - "step": 3088000 - }, - { - "epoch": 34.3, - "learning_rate": 3.925216000710748e-08, - "loss": 3.7538, - "step": 3088500 - }, - { - "epoch": 34.3, - "learning_rate": 3.923827821335762e-08, - "loss": 3.7505, - "step": 3089000 - }, - { - "epoch": 34.31, - "learning_rate": 3.9224396419607756e-08, - "loss": 3.7544, - "step": 3089500 - }, - { - "epoch": 34.32, - "learning_rate": 3.921051462585789e-08, - "loss": 3.765, - "step": 3090000 - }, - { - "epoch": 34.32, - "learning_rate": 3.9196632832108034e-08, - "loss": 3.7463, - "step": 3090500 - }, - { - "epoch": 34.33, - "learning_rate": 3.918275103835817e-08, - "loss": 3.7681, - "step": 3091000 - }, - { - "epoch": 34.33, - "learning_rate": 3.916886924460831e-08, - "loss": 3.7592, - "step": 3091500 - }, - { - "epoch": 34.34, - "learning_rate": 3.9154987450858454e-08, - "loss": 3.7626, - "step": 3092000 - }, - { - "epoch": 34.34, - "learning_rate": 3.914110565710859e-08, - "loss": 3.7506, - "step": 3092500 - }, - { - "epoch": 34.35, - "learning_rate": 3.9127223863358725e-08, - "loss": 3.7755, - "step": 3093000 - }, - { - "epoch": 34.35, - "learning_rate": 3.911334206960886e-08, - "loss": 3.7635, - "step": 3093500 - }, - { - "epoch": 34.36, - "learning_rate": 3.9099460275859e-08, - "loss": 3.7514, - "step": 3094000 - }, - { - "epoch": 34.37, - "learning_rate": 3.9085578482109145e-08, - "loss": 3.7677, - "step": 3094500 - }, - { - "epoch": 34.37, - "learning_rate": 3.907169668835928e-08, - "loss": 3.7773, - "step": 3095000 - }, - { - "epoch": 34.38, - "learning_rate": 3.905781489460942e-08, - "loss": 3.7555, - "step": 3095500 - }, - { - "epoch": 34.38, - "learning_rate": 3.904393310085956e-08, - "loss": 3.735, - "step": 3096000 - }, - { - "epoch": 34.39, - "learning_rate": 3.9030051307109694e-08, - "loss": 3.7562, - "step": 3096500 - }, - { - "epoch": 34.39, - "learning_rate": 3.9016169513359837e-08, - "loss": 3.7827, - "step": 3097000 - }, - { - "epoch": 34.4, - "learning_rate": 3.900228771960998e-08, - "loss": 3.7614, - "step": 3097500 - }, - { - "epoch": 34.4, - "learning_rate": 3.8988405925860115e-08, - "loss": 3.759, - "step": 3098000 - }, - { - "epoch": 34.41, - "learning_rate": 3.897452413211026e-08, - "loss": 3.7534, - "step": 3098500 - }, - { - "epoch": 34.42, - "learning_rate": 3.896064233836039e-08, - "loss": 3.7772, - "step": 3099000 - }, - { - "epoch": 34.42, - "learning_rate": 3.894676054461053e-08, - "loss": 3.7426, - "step": 3099500 - }, - { - "epoch": 34.43, - "learning_rate": 3.893287875086067e-08, - "loss": 3.7605, - "step": 3100000 - }, - { - "epoch": 34.43, - "learning_rate": 3.8918996957110806e-08, - "loss": 3.7555, - "step": 3100500 - }, - { - "epoch": 34.44, - "learning_rate": 3.890511516336095e-08, - "loss": 3.7471, - "step": 3101000 - }, - { - "epoch": 34.44, - "learning_rate": 3.889123336961109e-08, - "loss": 3.7554, - "step": 3101500 - }, - { - "epoch": 34.45, - "learning_rate": 3.887735157586122e-08, - "loss": 3.7529, - "step": 3102000 - }, - { - "epoch": 34.45, - "learning_rate": 3.886346978211136e-08, - "loss": 3.767, - "step": 3102500 - }, - { - "epoch": 34.46, - "learning_rate": 3.8849587988361504e-08, - "loss": 3.7566, - "step": 3103000 - }, - { - "epoch": 34.47, - "learning_rate": 3.883570619461164e-08, - "loss": 3.7764, - "step": 3103500 - }, - { - "epoch": 34.47, - "learning_rate": 3.882182440086178e-08, - "loss": 3.7602, - "step": 3104000 - }, - { - "epoch": 34.48, - "learning_rate": 3.880794260711192e-08, - "loss": 3.7342, - "step": 3104500 - }, - { - "epoch": 34.48, - "learning_rate": 3.879406081336205e-08, - "loss": 3.7563, - "step": 3105000 - }, - { - "epoch": 34.49, - "learning_rate": 3.8780179019612196e-08, - "loss": 3.755, - "step": 3105500 - }, - { - "epoch": 34.49, - "learning_rate": 3.876629722586234e-08, - "loss": 3.7632, - "step": 3106000 - }, - { - "epoch": 34.5, - "learning_rate": 3.8752415432112473e-08, - "loss": 3.7747, - "step": 3106500 - }, - { - "epoch": 34.5, - "learning_rate": 3.8738533638362616e-08, - "loss": 3.7706, - "step": 3107000 - }, - { - "epoch": 34.51, - "learning_rate": 3.872465184461275e-08, - "loss": 3.7522, - "step": 3107500 - }, - { - "epoch": 34.52, - "learning_rate": 3.871077005086289e-08, - "loss": 3.7508, - "step": 3108000 - }, - { - "epoch": 34.52, - "learning_rate": 3.869688825711303e-08, - "loss": 3.7621, - "step": 3108500 - }, - { - "epoch": 34.53, - "learning_rate": 3.8683006463363165e-08, - "loss": 3.7503, - "step": 3109000 - }, - { - "epoch": 34.53, - "learning_rate": 3.866912466961331e-08, - "loss": 3.7479, - "step": 3109500 - }, - { - "epoch": 34.54, - "learning_rate": 3.865524287586345e-08, - "loss": 3.7465, - "step": 3110000 - }, - { - "epoch": 34.54, - "learning_rate": 3.8641361082113585e-08, - "loss": 3.7474, - "step": 3110500 - }, - { - "epoch": 34.55, - "learning_rate": 3.862747928836372e-08, - "loss": 3.7562, - "step": 3111000 - }, - { - "epoch": 34.55, - "learning_rate": 3.861359749461386e-08, - "loss": 3.7551, - "step": 3111500 - }, - { - "epoch": 34.56, - "learning_rate": 3.8599715700864e-08, - "loss": 3.7683, - "step": 3112000 - }, - { - "epoch": 34.57, - "learning_rate": 3.858583390711414e-08, - "loss": 3.7422, - "step": 3112500 - }, - { - "epoch": 34.57, - "learning_rate": 3.857195211336428e-08, - "loss": 3.7582, - "step": 3113000 - }, - { - "epoch": 34.58, - "learning_rate": 3.855807031961442e-08, - "loss": 3.7583, - "step": 3113500 - }, - { - "epoch": 34.58, - "learning_rate": 3.8544188525864554e-08, - "loss": 3.764, - "step": 3114000 - }, - { - "epoch": 34.59, - "learning_rate": 3.85303067321147e-08, - "loss": 3.7664, - "step": 3114500 - }, - { - "epoch": 34.59, - "learning_rate": 3.851642493836483e-08, - "loss": 3.7628, - "step": 3115000 - }, - { - "epoch": 34.6, - "learning_rate": 3.8502543144614975e-08, - "loss": 3.7654, - "step": 3115500 - }, - { - "epoch": 34.6, - "learning_rate": 3.848866135086511e-08, - "loss": 3.7717, - "step": 3116000 - }, - { - "epoch": 34.61, - "learning_rate": 3.847477955711525e-08, - "loss": 3.755, - "step": 3116500 - }, - { - "epoch": 34.62, - "learning_rate": 3.846089776336539e-08, - "loss": 3.768, - "step": 3117000 - }, - { - "epoch": 34.62, - "learning_rate": 3.8447015969615524e-08, - "loss": 3.7646, - "step": 3117500 - }, - { - "epoch": 34.63, - "learning_rate": 3.8433134175865666e-08, - "loss": 3.7688, - "step": 3118000 - }, - { - "epoch": 34.63, - "learning_rate": 3.841925238211581e-08, - "loss": 3.7484, - "step": 3118500 - }, - { - "epoch": 34.64, - "learning_rate": 3.8405370588365944e-08, - "loss": 3.7559, - "step": 3119000 - }, - { - "epoch": 34.64, - "learning_rate": 3.8391488794616086e-08, - "loss": 3.7616, - "step": 3119500 - }, - { - "epoch": 34.65, - "learning_rate": 3.837760700086622e-08, - "loss": 3.763, - "step": 3120000 - }, - { - "epoch": 34.65, - "learning_rate": 3.836372520711636e-08, - "loss": 3.7592, - "step": 3120500 - }, - { - "epoch": 34.66, - "learning_rate": 3.83498434133665e-08, - "loss": 3.7625, - "step": 3121000 - }, - { - "epoch": 34.67, - "learning_rate": 3.833596161961664e-08, - "loss": 3.7424, - "step": 3121500 - }, - { - "epoch": 34.67, - "learning_rate": 3.832207982586678e-08, - "loss": 3.7649, - "step": 3122000 - }, - { - "epoch": 34.68, - "learning_rate": 3.830819803211692e-08, - "loss": 3.7609, - "step": 3122500 - }, - { - "epoch": 34.68, - "learning_rate": 3.8294316238367056e-08, - "loss": 3.767, - "step": 3123000 - }, - { - "epoch": 34.69, - "learning_rate": 3.828043444461719e-08, - "loss": 3.752, - "step": 3123500 - }, - { - "epoch": 34.69, - "learning_rate": 3.8266552650867334e-08, - "loss": 3.7593, - "step": 3124000 - }, - { - "epoch": 34.7, - "learning_rate": 3.825267085711747e-08, - "loss": 3.7542, - "step": 3124500 - }, - { - "epoch": 34.7, - "learning_rate": 3.823878906336761e-08, - "loss": 3.7822, - "step": 3125000 - }, - { - "epoch": 34.71, - "learning_rate": 3.8224907269617754e-08, - "loss": 3.7662, - "step": 3125500 - }, - { - "epoch": 34.72, - "learning_rate": 3.821102547586789e-08, - "loss": 3.7438, - "step": 3126000 - }, - { - "epoch": 34.72, - "learning_rate": 3.8197143682118025e-08, - "loss": 3.7587, - "step": 3126500 - }, - { - "epoch": 34.73, - "learning_rate": 3.818326188836817e-08, - "loss": 3.7748, - "step": 3127000 - }, - { - "epoch": 34.73, - "learning_rate": 3.81693800946183e-08, - "loss": 3.7873, - "step": 3127500 - }, - { - "epoch": 34.74, - "learning_rate": 3.8155498300868445e-08, - "loss": 3.7582, - "step": 3128000 - }, - { - "epoch": 34.74, - "learning_rate": 3.814161650711859e-08, - "loss": 3.7499, - "step": 3128500 - }, - { - "epoch": 34.75, - "learning_rate": 3.812773471336872e-08, - "loss": 3.7235, - "step": 3129000 - }, - { - "epoch": 34.75, - "learning_rate": 3.811385291961886e-08, - "loss": 3.7482, - "step": 3129500 - }, - { - "epoch": 34.76, - "learning_rate": 3.8099971125868994e-08, - "loss": 3.7629, - "step": 3130000 - }, - { - "epoch": 34.77, - "learning_rate": 3.808608933211914e-08, - "loss": 3.7643, - "step": 3130500 - }, - { - "epoch": 34.77, - "learning_rate": 3.807220753836928e-08, - "loss": 3.751, - "step": 3131000 - }, - { - "epoch": 34.78, - "learning_rate": 3.8058325744619415e-08, - "loss": 3.7472, - "step": 3131500 - }, - { - "epoch": 34.78, - "learning_rate": 3.804444395086956e-08, - "loss": 3.7704, - "step": 3132000 - }, - { - "epoch": 34.79, - "learning_rate": 3.803056215711969e-08, - "loss": 3.7546, - "step": 3132500 - }, - { - "epoch": 34.79, - "learning_rate": 3.801668036336983e-08, - "loss": 3.7607, - "step": 3133000 - }, - { - "epoch": 34.8, - "learning_rate": 3.800279856961997e-08, - "loss": 3.7579, - "step": 3133500 - }, - { - "epoch": 34.8, - "learning_rate": 3.798891677587011e-08, - "loss": 3.7418, - "step": 3134000 - }, - { - "epoch": 34.81, - "learning_rate": 3.797503498212025e-08, - "loss": 3.765, - "step": 3134500 - }, - { - "epoch": 34.82, - "learning_rate": 3.7961153188370384e-08, - "loss": 3.7546, - "step": 3135000 - }, - { - "epoch": 34.82, - "learning_rate": 3.7947271394620526e-08, - "loss": 3.7518, - "step": 3135500 - }, - { - "epoch": 34.83, - "learning_rate": 3.793338960087066e-08, - "loss": 3.7348, - "step": 3136000 - }, - { - "epoch": 34.83, - "learning_rate": 3.7919507807120804e-08, - "loss": 3.7574, - "step": 3136500 - }, - { - "epoch": 34.84, - "learning_rate": 3.790562601337094e-08, - "loss": 3.7515, - "step": 3137000 - }, - { - "epoch": 34.84, - "learning_rate": 3.789174421962108e-08, - "loss": 3.7448, - "step": 3137500 - }, - { - "epoch": 34.85, - "learning_rate": 3.787786242587122e-08, - "loss": 3.7546, - "step": 3138000 - }, - { - "epoch": 34.85, - "learning_rate": 3.7863980632121353e-08, - "loss": 3.7513, - "step": 3138500 - }, - { - "epoch": 34.86, - "learning_rate": 3.7850098838371496e-08, - "loss": 3.7429, - "step": 3139000 - }, - { - "epoch": 34.87, - "learning_rate": 3.783621704462164e-08, - "loss": 3.7597, - "step": 3139500 - }, - { - "epoch": 34.87, - "learning_rate": 3.7822335250871774e-08, - "loss": 3.7675, - "step": 3140000 - }, - { - "epoch": 34.88, - "learning_rate": 3.7808453457121916e-08, - "loss": 3.7708, - "step": 3140500 - }, - { - "epoch": 34.88, - "learning_rate": 3.779457166337205e-08, - "loss": 3.7486, - "step": 3141000 - }, - { - "epoch": 34.89, - "learning_rate": 3.778068986962219e-08, - "loss": 3.7719, - "step": 3141500 - }, - { - "epoch": 34.89, - "learning_rate": 3.776680807587233e-08, - "loss": 3.7507, - "step": 3142000 - }, - { - "epoch": 34.9, - "learning_rate": 3.775292628212247e-08, - "loss": 3.7551, - "step": 3142500 - }, - { - "epoch": 34.9, - "learning_rate": 3.773904448837261e-08, - "loss": 3.7521, - "step": 3143000 - }, - { - "epoch": 34.91, - "learning_rate": 3.772516269462275e-08, - "loss": 3.7683, - "step": 3143500 - }, - { - "epoch": 34.92, - "learning_rate": 3.7711280900872885e-08, - "loss": 3.7665, - "step": 3144000 - }, - { - "epoch": 34.92, - "learning_rate": 3.769739910712302e-08, - "loss": 3.7494, - "step": 3144500 - }, - { - "epoch": 34.93, - "learning_rate": 3.768351731337316e-08, - "loss": 3.7558, - "step": 3145000 - }, - { - "epoch": 34.93, - "learning_rate": 3.76696355196233e-08, - "loss": 3.7423, - "step": 3145500 - }, - { - "epoch": 34.94, - "learning_rate": 3.765575372587344e-08, - "loss": 3.7395, - "step": 3146000 - }, - { - "epoch": 34.94, - "learning_rate": 3.764187193212358e-08, - "loss": 3.7369, - "step": 3146500 - }, - { - "epoch": 34.95, - "learning_rate": 3.762799013837372e-08, - "loss": 3.7865, - "step": 3147000 - }, - { - "epoch": 34.95, - "learning_rate": 3.7614108344623855e-08, - "loss": 3.7693, - "step": 3147500 - }, - { - "epoch": 34.96, - "learning_rate": 3.7600226550874e-08, - "loss": 3.7596, - "step": 3148000 - }, - { - "epoch": 34.97, - "learning_rate": 3.758634475712413e-08, - "loss": 3.7511, - "step": 3148500 - }, - { - "epoch": 34.97, - "learning_rate": 3.7572462963374275e-08, - "loss": 3.7729, - "step": 3149000 - }, - { - "epoch": 34.98, - "learning_rate": 3.755858116962442e-08, - "loss": 3.7677, - "step": 3149500 - }, - { - "epoch": 34.98, - "learning_rate": 3.754469937587455e-08, - "loss": 3.7457, - "step": 3150000 - }, - { - "epoch": 34.99, - "learning_rate": 3.753081758212469e-08, - "loss": 3.7405, - "step": 3150500 - }, - { - "epoch": 34.99, - "learning_rate": 3.751693578837483e-08, - "loss": 3.7475, - "step": 3151000 - }, - { - "epoch": 35.0, - "learning_rate": 3.7503053994624966e-08, - "loss": 3.7551, - "step": 3151500 - }, - { - "epoch": 35.0, - "eval_loss": 3.830213785171509, - "eval_runtime": 6.3055, - "eval_samples_per_second": 246.453, - "step": 3151610 - }, - { - "epoch": 35.0, - "learning_rate": 3.748917220087511e-08, - "loss": 3.7463, - "step": 3152000 - }, - { - "epoch": 35.01, - "learning_rate": 3.7475290407125244e-08, - "loss": 3.7487, - "step": 3152500 - }, - { - "epoch": 35.02, - "learning_rate": 3.7461408613375386e-08, - "loss": 3.7578, - "step": 3153000 - }, - { - "epoch": 35.02, - "learning_rate": 3.744752681962552e-08, - "loss": 3.7618, - "step": 3153500 - }, - { - "epoch": 35.03, - "learning_rate": 3.743364502587566e-08, - "loss": 3.7607, - "step": 3154000 - }, - { - "epoch": 35.03, - "learning_rate": 3.74197632321258e-08, - "loss": 3.7737, - "step": 3154500 - }, - { - "epoch": 35.04, - "learning_rate": 3.740588143837594e-08, - "loss": 3.7488, - "step": 3155000 - }, - { - "epoch": 35.04, - "learning_rate": 3.739199964462608e-08, - "loss": 3.7399, - "step": 3155500 - }, - { - "epoch": 35.05, - "learning_rate": 3.737811785087622e-08, - "loss": 3.7491, - "step": 3156000 - }, - { - "epoch": 35.05, - "learning_rate": 3.7364236057126356e-08, - "loss": 3.7565, - "step": 3156500 - }, - { - "epoch": 35.06, - "learning_rate": 3.735035426337649e-08, - "loss": 3.756, - "step": 3157000 - }, - { - "epoch": 35.07, - "learning_rate": 3.7336472469626634e-08, - "loss": 3.7755, - "step": 3157500 - }, - { - "epoch": 35.07, - "learning_rate": 3.7322590675876776e-08, - "loss": 3.7493, - "step": 3158000 - }, - { - "epoch": 35.08, - "learning_rate": 3.730870888212691e-08, - "loss": 3.7618, - "step": 3158500 - }, - { - "epoch": 35.08, - "learning_rate": 3.7294827088377054e-08, - "loss": 3.7549, - "step": 3159000 - }, - { - "epoch": 35.09, - "learning_rate": 3.728094529462719e-08, - "loss": 3.7502, - "step": 3159500 - }, - { - "epoch": 35.09, - "learning_rate": 3.7267063500877325e-08, - "loss": 3.7477, - "step": 3160000 - }, - { - "epoch": 35.1, - "learning_rate": 3.725318170712747e-08, - "loss": 3.734, - "step": 3160500 - }, - { - "epoch": 35.1, - "learning_rate": 3.72392999133776e-08, - "loss": 3.741, - "step": 3161000 - }, - { - "epoch": 35.11, - "learning_rate": 3.7225418119627745e-08, - "loss": 3.7469, - "step": 3161500 - }, - { - "epoch": 35.12, - "learning_rate": 3.721153632587789e-08, - "loss": 3.7429, - "step": 3162000 - }, - { - "epoch": 35.12, - "learning_rate": 3.719765453212802e-08, - "loss": 3.7518, - "step": 3162500 - }, - { - "epoch": 35.13, - "learning_rate": 3.718377273837816e-08, - "loss": 3.7553, - "step": 3163000 - }, - { - "epoch": 35.13, - "learning_rate": 3.71698909446283e-08, - "loss": 3.7537, - "step": 3163500 - }, - { - "epoch": 35.14, - "learning_rate": 3.715600915087844e-08, - "loss": 3.7698, - "step": 3164000 - }, - { - "epoch": 35.14, - "learning_rate": 3.714212735712858e-08, - "loss": 3.7763, - "step": 3164500 - }, - { - "epoch": 35.15, - "learning_rate": 3.712824556337872e-08, - "loss": 3.7581, - "step": 3165000 - }, - { - "epoch": 35.15, - "learning_rate": 3.711436376962885e-08, - "loss": 3.773, - "step": 3165500 - }, - { - "epoch": 35.16, - "learning_rate": 3.710048197587899e-08, - "loss": 3.7381, - "step": 3166000 - }, - { - "epoch": 35.17, - "learning_rate": 3.708660018212913e-08, - "loss": 3.7466, - "step": 3166500 - }, - { - "epoch": 35.17, - "learning_rate": 3.707271838837927e-08, - "loss": 3.7495, - "step": 3167000 - }, - { - "epoch": 35.18, - "learning_rate": 3.705883659462941e-08, - "loss": 3.7502, - "step": 3167500 - }, - { - "epoch": 35.18, - "learning_rate": 3.704495480087955e-08, - "loss": 3.7641, - "step": 3168000 - }, - { - "epoch": 35.19, - "learning_rate": 3.7031073007129684e-08, - "loss": 3.7522, - "step": 3168500 - }, - { - "epoch": 35.19, - "learning_rate": 3.7017191213379826e-08, - "loss": 3.759, - "step": 3169000 - }, - { - "epoch": 35.2, - "learning_rate": 3.700330941962996e-08, - "loss": 3.7438, - "step": 3169500 - }, - { - "epoch": 35.2, - "learning_rate": 3.6989427625880104e-08, - "loss": 3.7538, - "step": 3170000 - }, - { - "epoch": 35.21, - "learning_rate": 3.6975545832130247e-08, - "loss": 3.7502, - "step": 3170500 - }, - { - "epoch": 35.22, - "learning_rate": 3.696166403838038e-08, - "loss": 3.7547, - "step": 3171000 - }, - { - "epoch": 35.22, - "learning_rate": 3.694778224463052e-08, - "loss": 3.7578, - "step": 3171500 - }, - { - "epoch": 35.23, - "learning_rate": 3.693390045088066e-08, - "loss": 3.7497, - "step": 3172000 - }, - { - "epoch": 35.23, - "learning_rate": 3.6920018657130796e-08, - "loss": 3.7629, - "step": 3172500 - }, - { - "epoch": 35.24, - "learning_rate": 3.690613686338094e-08, - "loss": 3.7435, - "step": 3173000 - }, - { - "epoch": 35.24, - "learning_rate": 3.6892255069631074e-08, - "loss": 3.7534, - "step": 3173500 - }, - { - "epoch": 35.25, - "learning_rate": 3.6878373275881216e-08, - "loss": 3.7687, - "step": 3174000 - }, - { - "epoch": 35.25, - "learning_rate": 3.686449148213135e-08, - "loss": 3.7672, - "step": 3174500 - }, - { - "epoch": 35.26, - "learning_rate": 3.685060968838149e-08, - "loss": 3.7835, - "step": 3175000 - }, - { - "epoch": 35.27, - "learning_rate": 3.683672789463163e-08, - "loss": 3.7439, - "step": 3175500 - }, - { - "epoch": 35.27, - "learning_rate": 3.682284610088177e-08, - "loss": 3.744, - "step": 3176000 - }, - { - "epoch": 35.28, - "learning_rate": 3.680896430713191e-08, - "loss": 3.7513, - "step": 3176500 - }, - { - "epoch": 35.28, - "learning_rate": 3.679508251338205e-08, - "loss": 3.7625, - "step": 3177000 - }, - { - "epoch": 35.29, - "learning_rate": 3.6781200719632185e-08, - "loss": 3.7385, - "step": 3177500 - }, - { - "epoch": 35.29, - "learning_rate": 3.676731892588232e-08, - "loss": 3.7522, - "step": 3178000 - }, - { - "epoch": 35.3, - "learning_rate": 3.675343713213246e-08, - "loss": 3.7561, - "step": 3178500 - }, - { - "epoch": 35.3, - "learning_rate": 3.6739555338382605e-08, - "loss": 3.7456, - "step": 3179000 - }, - { - "epoch": 35.31, - "learning_rate": 3.672567354463274e-08, - "loss": 3.7454, - "step": 3179500 - }, - { - "epoch": 35.32, - "learning_rate": 3.6711791750882883e-08, - "loss": 3.7511, - "step": 3180000 - }, - { - "epoch": 35.32, - "learning_rate": 3.669790995713302e-08, - "loss": 3.7618, - "step": 3180500 - }, - { - "epoch": 35.33, - "learning_rate": 3.6684028163383155e-08, - "loss": 3.7713, - "step": 3181000 - }, - { - "epoch": 35.33, - "learning_rate": 3.66701463696333e-08, - "loss": 3.7798, - "step": 3181500 - }, - { - "epoch": 35.34, - "learning_rate": 3.665626457588343e-08, - "loss": 3.7554, - "step": 3182000 - }, - { - "epoch": 35.34, - "learning_rate": 3.6642382782133575e-08, - "loss": 3.7655, - "step": 3182500 - }, - { - "epoch": 35.35, - "learning_rate": 3.662850098838372e-08, - "loss": 3.7565, - "step": 3183000 - }, - { - "epoch": 35.35, - "learning_rate": 3.661461919463385e-08, - "loss": 3.7595, - "step": 3183500 - }, - { - "epoch": 35.36, - "learning_rate": 3.660073740088399e-08, - "loss": 3.764, - "step": 3184000 - }, - { - "epoch": 35.37, - "learning_rate": 3.658685560713413e-08, - "loss": 3.7733, - "step": 3184500 - }, - { - "epoch": 35.37, - "learning_rate": 3.6572973813384266e-08, - "loss": 3.7582, - "step": 3185000 - }, - { - "epoch": 35.38, - "learning_rate": 3.655909201963441e-08, - "loss": 3.7737, - "step": 3185500 - }, - { - "epoch": 35.38, - "learning_rate": 3.654521022588455e-08, - "loss": 3.7383, - "step": 3186000 - }, - { - "epoch": 35.39, - "learning_rate": 3.6531328432134686e-08, - "loss": 3.7705, - "step": 3186500 - }, - { - "epoch": 35.39, - "learning_rate": 3.651744663838482e-08, - "loss": 3.7517, - "step": 3187000 - }, - { - "epoch": 35.4, - "learning_rate": 3.6503564844634964e-08, - "loss": 3.7594, - "step": 3187500 - }, - { - "epoch": 35.4, - "learning_rate": 3.64896830508851e-08, - "loss": 3.7564, - "step": 3188000 - }, - { - "epoch": 35.41, - "learning_rate": 3.647580125713524e-08, - "loss": 3.7401, - "step": 3188500 - }, - { - "epoch": 35.42, - "learning_rate": 3.646191946338538e-08, - "loss": 3.7408, - "step": 3189000 - }, - { - "epoch": 35.42, - "learning_rate": 3.644803766963552e-08, - "loss": 3.7641, - "step": 3189500 - }, - { - "epoch": 35.43, - "learning_rate": 3.6434155875885656e-08, - "loss": 3.7475, - "step": 3190000 - }, - { - "epoch": 35.43, - "learning_rate": 3.642027408213579e-08, - "loss": 3.7772, - "step": 3190500 - }, - { - "epoch": 35.44, - "learning_rate": 3.6406392288385934e-08, - "loss": 3.7566, - "step": 3191000 - }, - { - "epoch": 35.44, - "learning_rate": 3.6392510494636076e-08, - "loss": 3.7662, - "step": 3191500 - }, - { - "epoch": 35.45, - "learning_rate": 3.637862870088621e-08, - "loss": 3.7541, - "step": 3192000 - }, - { - "epoch": 35.45, - "learning_rate": 3.6364746907136354e-08, - "loss": 3.77, - "step": 3192500 - }, - { - "epoch": 35.46, - "learning_rate": 3.635086511338649e-08, - "loss": 3.7598, - "step": 3193000 - }, - { - "epoch": 35.47, - "learning_rate": 3.6336983319636625e-08, - "loss": 3.761, - "step": 3193500 - }, - { - "epoch": 35.47, - "learning_rate": 3.632310152588677e-08, - "loss": 3.7398, - "step": 3194000 - }, - { - "epoch": 35.48, - "learning_rate": 3.630921973213691e-08, - "loss": 3.748, - "step": 3194500 - }, - { - "epoch": 35.48, - "learning_rate": 3.6295337938387045e-08, - "loss": 3.753, - "step": 3195000 - }, - { - "epoch": 35.49, - "learning_rate": 3.628145614463719e-08, - "loss": 3.769, - "step": 3195500 - }, - { - "epoch": 35.49, - "learning_rate": 3.626757435088732e-08, - "loss": 3.7737, - "step": 3196000 - }, - { - "epoch": 35.5, - "learning_rate": 3.625369255713746e-08, - "loss": 3.7464, - "step": 3196500 - }, - { - "epoch": 35.5, - "learning_rate": 3.62398107633876e-08, - "loss": 3.7455, - "step": 3197000 - }, - { - "epoch": 35.51, - "learning_rate": 3.622592896963774e-08, - "loss": 3.7634, - "step": 3197500 - }, - { - "epoch": 35.52, - "learning_rate": 3.621204717588788e-08, - "loss": 3.7662, - "step": 3198000 - }, - { - "epoch": 35.52, - "learning_rate": 3.619816538213802e-08, - "loss": 3.7411, - "step": 3198500 - }, - { - "epoch": 35.53, - "learning_rate": 3.618428358838815e-08, - "loss": 3.7484, - "step": 3199000 - }, - { - "epoch": 35.53, - "learning_rate": 3.617040179463829e-08, - "loss": 3.7596, - "step": 3199500 - }, - { - "epoch": 35.54, - "learning_rate": 3.6156520000888435e-08, - "loss": 3.748, - "step": 3200000 - }, - { - "epoch": 35.54, - "learning_rate": 3.614263820713857e-08, - "loss": 3.7569, - "step": 3200500 - }, - { - "epoch": 35.55, - "learning_rate": 3.612875641338871e-08, - "loss": 3.7558, - "step": 3201000 - }, - { - "epoch": 35.55, - "learning_rate": 3.6114874619638855e-08, - "loss": 3.7606, - "step": 3201500 - }, - { - "epoch": 35.56, - "learning_rate": 3.6100992825888984e-08, - "loss": 3.7583, - "step": 3202000 - }, - { - "epoch": 35.57, - "learning_rate": 3.6087111032139126e-08, - "loss": 3.7683, - "step": 3202500 - }, - { - "epoch": 35.57, - "learning_rate": 3.607322923838926e-08, - "loss": 3.7568, - "step": 3203000 - }, - { - "epoch": 35.58, - "learning_rate": 3.6059347444639404e-08, - "loss": 3.769, - "step": 3203500 - }, - { - "epoch": 35.58, - "learning_rate": 3.6045465650889547e-08, - "loss": 3.7725, - "step": 3204000 - }, - { - "epoch": 35.59, - "learning_rate": 3.603158385713968e-08, - "loss": 3.757, - "step": 3204500 - }, - { - "epoch": 35.59, - "learning_rate": 3.601770206338982e-08, - "loss": 3.7577, - "step": 3205000 - }, - { - "epoch": 35.6, - "learning_rate": 3.600382026963996e-08, - "loss": 3.76, - "step": 3205500 - }, - { - "epoch": 35.6, - "learning_rate": 3.5989938475890096e-08, - "loss": 3.7528, - "step": 3206000 - }, - { - "epoch": 35.61, - "learning_rate": 3.597605668214024e-08, - "loss": 3.7507, - "step": 3206500 - }, - { - "epoch": 35.62, - "learning_rate": 3.596217488839038e-08, - "loss": 3.7656, - "step": 3207000 - }, - { - "epoch": 35.62, - "learning_rate": 3.5948293094640516e-08, - "loss": 3.7352, - "step": 3207500 - }, - { - "epoch": 35.63, - "learning_rate": 3.593441130089065e-08, - "loss": 3.759, - "step": 3208000 - }, - { - "epoch": 35.63, - "learning_rate": 3.5920529507140794e-08, - "loss": 3.7655, - "step": 3208500 - }, - { - "epoch": 35.64, - "learning_rate": 3.590664771339093e-08, - "loss": 3.754, - "step": 3209000 - }, - { - "epoch": 35.64, - "learning_rate": 3.589276591964107e-08, - "loss": 3.73, - "step": 3209500 - }, - { - "epoch": 35.65, - "learning_rate": 3.5878884125891214e-08, - "loss": 3.747, - "step": 3210000 - }, - { - "epoch": 35.65, - "learning_rate": 3.586500233214135e-08, - "loss": 3.7393, - "step": 3210500 - }, - { - "epoch": 35.66, - "learning_rate": 3.5851120538391485e-08, - "loss": 3.7643, - "step": 3211000 - }, - { - "epoch": 35.67, - "learning_rate": 3.583723874464162e-08, - "loss": 3.7619, - "step": 3211500 - }, - { - "epoch": 35.67, - "learning_rate": 3.582335695089176e-08, - "loss": 3.7634, - "step": 3212000 - }, - { - "epoch": 35.68, - "learning_rate": 3.5809475157141906e-08, - "loss": 3.7449, - "step": 3212500 - }, - { - "epoch": 35.68, - "learning_rate": 3.579559336339204e-08, - "loss": 3.7526, - "step": 3213000 - }, - { - "epoch": 35.69, - "learning_rate": 3.5781711569642183e-08, - "loss": 3.7451, - "step": 3213500 - }, - { - "epoch": 35.69, - "learning_rate": 3.576782977589232e-08, - "loss": 3.7564, - "step": 3214000 - }, - { - "epoch": 35.7, - "learning_rate": 3.5753947982142455e-08, - "loss": 3.7436, - "step": 3214500 - }, - { - "epoch": 35.7, - "learning_rate": 3.57400661883926e-08, - "loss": 3.7375, - "step": 3215000 - }, - { - "epoch": 35.71, - "learning_rate": 3.572618439464274e-08, - "loss": 3.7577, - "step": 3215500 - }, - { - "epoch": 35.72, - "learning_rate": 3.5712302600892875e-08, - "loss": 3.7375, - "step": 3216000 - }, - { - "epoch": 35.72, - "learning_rate": 3.569842080714302e-08, - "loss": 3.7672, - "step": 3216500 - }, - { - "epoch": 35.73, - "learning_rate": 3.568453901339315e-08, - "loss": 3.7586, - "step": 3217000 - }, - { - "epoch": 35.73, - "learning_rate": 3.567065721964329e-08, - "loss": 3.7487, - "step": 3217500 - }, - { - "epoch": 35.74, - "learning_rate": 3.565677542589343e-08, - "loss": 3.7439, - "step": 3218000 - }, - { - "epoch": 35.74, - "learning_rate": 3.5642893632143566e-08, - "loss": 3.7588, - "step": 3218500 - }, - { - "epoch": 35.75, - "learning_rate": 3.562901183839371e-08, - "loss": 3.7543, - "step": 3219000 - }, - { - "epoch": 35.75, - "learning_rate": 3.561513004464385e-08, - "loss": 3.7582, - "step": 3219500 - }, - { - "epoch": 35.76, - "learning_rate": 3.5601248250893987e-08, - "loss": 3.7641, - "step": 3220000 - }, - { - "epoch": 35.77, - "learning_rate": 3.558736645714412e-08, - "loss": 3.7602, - "step": 3220500 - }, - { - "epoch": 35.77, - "learning_rate": 3.5573484663394264e-08, - "loss": 3.7685, - "step": 3221000 - }, - { - "epoch": 35.78, - "learning_rate": 3.55596028696444e-08, - "loss": 3.7328, - "step": 3221500 - }, - { - "epoch": 35.78, - "learning_rate": 3.554572107589454e-08, - "loss": 3.7602, - "step": 3222000 - }, - { - "epoch": 35.79, - "learning_rate": 3.5531839282144685e-08, - "loss": 3.742, - "step": 3222500 - }, - { - "epoch": 35.79, - "learning_rate": 3.551795748839482e-08, - "loss": 3.7656, - "step": 3223000 - }, - { - "epoch": 35.8, - "learning_rate": 3.5504075694644956e-08, - "loss": 3.7798, - "step": 3223500 - }, - { - "epoch": 35.8, - "learning_rate": 3.54901939008951e-08, - "loss": 3.7635, - "step": 3224000 - }, - { - "epoch": 35.81, - "learning_rate": 3.5476312107145234e-08, - "loss": 3.7718, - "step": 3224500 - }, - { - "epoch": 35.82, - "learning_rate": 3.5462430313395376e-08, - "loss": 3.7622, - "step": 3225000 - }, - { - "epoch": 35.82, - "learning_rate": 3.544854851964551e-08, - "loss": 3.7606, - "step": 3225500 - }, - { - "epoch": 35.83, - "learning_rate": 3.5434666725895654e-08, - "loss": 3.7641, - "step": 3226000 - }, - { - "epoch": 35.83, - "learning_rate": 3.542078493214579e-08, - "loss": 3.7644, - "step": 3226500 - }, - { - "epoch": 35.84, - "learning_rate": 3.5406903138395925e-08, - "loss": 3.7601, - "step": 3227000 - }, - { - "epoch": 35.84, - "learning_rate": 3.539302134464607e-08, - "loss": 3.7502, - "step": 3227500 - }, - { - "epoch": 35.85, - "learning_rate": 3.537913955089621e-08, - "loss": 3.7508, - "step": 3228000 - }, - { - "epoch": 35.85, - "learning_rate": 3.5365257757146345e-08, - "loss": 3.7565, - "step": 3228500 - }, - { - "epoch": 35.86, - "learning_rate": 3.535137596339649e-08, - "loss": 3.76, - "step": 3229000 - }, - { - "epoch": 35.87, - "learning_rate": 3.5337494169646623e-08, - "loss": 3.7603, - "step": 3229500 - }, - { - "epoch": 35.87, - "learning_rate": 3.532361237589676e-08, - "loss": 3.7698, - "step": 3230000 - }, - { - "epoch": 35.88, - "learning_rate": 3.53097305821469e-08, - "loss": 3.7701, - "step": 3230500 - }, - { - "epoch": 35.88, - "learning_rate": 3.5295848788397044e-08, - "loss": 3.7565, - "step": 3231000 - }, - { - "epoch": 35.89, - "learning_rate": 3.528196699464718e-08, - "loss": 3.7539, - "step": 3231500 - }, - { - "epoch": 35.89, - "learning_rate": 3.526808520089732e-08, - "loss": 3.7697, - "step": 3232000 - }, - { - "epoch": 35.9, - "learning_rate": 3.525420340714745e-08, - "loss": 3.7601, - "step": 3232500 - }, - { - "epoch": 35.9, - "learning_rate": 3.524032161339759e-08, - "loss": 3.7525, - "step": 3233000 - }, - { - "epoch": 35.91, - "learning_rate": 3.5226439819647735e-08, - "loss": 3.7544, - "step": 3233500 - }, - { - "epoch": 35.91, - "learning_rate": 3.521255802589787e-08, - "loss": 3.773, - "step": 3234000 - }, - { - "epoch": 35.92, - "learning_rate": 3.519867623214801e-08, - "loss": 3.7572, - "step": 3234500 - }, - { - "epoch": 35.93, - "learning_rate": 3.5184794438398155e-08, - "loss": 3.7558, - "step": 3235000 - }, - { - "epoch": 35.93, - "learning_rate": 3.5170912644648284e-08, - "loss": 3.756, - "step": 3235500 - }, - { - "epoch": 35.94, - "learning_rate": 3.5157030850898427e-08, - "loss": 3.7628, - "step": 3236000 - }, - { - "epoch": 35.94, - "learning_rate": 3.514314905714857e-08, - "loss": 3.7487, - "step": 3236500 - }, - { - "epoch": 35.95, - "learning_rate": 3.5129267263398704e-08, - "loss": 3.7499, - "step": 3237000 - }, - { - "epoch": 35.95, - "learning_rate": 3.5115385469648847e-08, - "loss": 3.7341, - "step": 3237500 - }, - { - "epoch": 35.96, - "learning_rate": 3.510150367589899e-08, - "loss": 3.7722, - "step": 3238000 - }, - { - "epoch": 35.96, - "learning_rate": 3.508762188214912e-08, - "loss": 3.7618, - "step": 3238500 - }, - { - "epoch": 35.97, - "learning_rate": 3.507374008839926e-08, - "loss": 3.7518, - "step": 3239000 - }, - { - "epoch": 35.98, - "learning_rate": 3.50598582946494e-08, - "loss": 3.7574, - "step": 3239500 - }, - { - "epoch": 35.98, - "learning_rate": 3.504597650089954e-08, - "loss": 3.7626, - "step": 3240000 - }, - { - "epoch": 35.99, - "learning_rate": 3.503209470714968e-08, - "loss": 3.7609, - "step": 3240500 - }, - { - "epoch": 35.99, - "learning_rate": 3.5018212913399816e-08, - "loss": 3.7792, - "step": 3241000 - }, - { - "epoch": 36.0, - "learning_rate": 3.500433111964995e-08, - "loss": 3.7314, - "step": 3241500 - }, - { - "epoch": 36.0, - "eval_loss": 3.829056978225708, - "eval_runtime": 6.3058, - "eval_samples_per_second": 246.44, - "step": 3241656 - }, - { - "epoch": 36.0, - "learning_rate": 3.4990449325900094e-08, - "loss": 3.7589, - "step": 3242000 - }, - { - "epoch": 36.01, - "learning_rate": 3.497656753215023e-08, - "loss": 3.7573, - "step": 3242500 - }, - { - "epoch": 36.01, - "learning_rate": 3.496268573840037e-08, - "loss": 3.755, - "step": 3243000 - }, - { - "epoch": 36.02, - "learning_rate": 3.4948803944650514e-08, - "loss": 3.7551, - "step": 3243500 - }, - { - "epoch": 36.03, - "learning_rate": 3.493492215090065e-08, - "loss": 3.7473, - "step": 3244000 - }, - { - "epoch": 36.03, - "learning_rate": 3.4921040357150785e-08, - "loss": 3.7427, - "step": 3244500 - }, - { - "epoch": 36.04, - "learning_rate": 3.490715856340093e-08, - "loss": 3.7479, - "step": 3245000 - }, - { - "epoch": 36.04, - "learning_rate": 3.4893276769651063e-08, - "loss": 3.7498, - "step": 3245500 - }, - { - "epoch": 36.05, - "learning_rate": 3.4879394975901206e-08, - "loss": 3.76, - "step": 3246000 - }, - { - "epoch": 36.05, - "learning_rate": 3.486551318215135e-08, - "loss": 3.7631, - "step": 3246500 - }, - { - "epoch": 36.06, - "learning_rate": 3.4851631388401484e-08, - "loss": 3.7717, - "step": 3247000 - }, - { - "epoch": 36.06, - "learning_rate": 3.483774959465162e-08, - "loss": 3.7592, - "step": 3247500 - }, - { - "epoch": 36.07, - "learning_rate": 3.4823867800901755e-08, - "loss": 3.7684, - "step": 3248000 - }, - { - "epoch": 36.08, - "learning_rate": 3.48099860071519e-08, - "loss": 3.7549, - "step": 3248500 - }, - { - "epoch": 36.08, - "learning_rate": 3.479610421340204e-08, - "loss": 3.7557, - "step": 3249000 - }, - { - "epoch": 36.09, - "learning_rate": 3.4782222419652175e-08, - "loss": 3.7463, - "step": 3249500 - }, - { - "epoch": 36.09, - "learning_rate": 3.476834062590232e-08, - "loss": 3.7789, - "step": 3250000 - }, - { - "epoch": 36.1, - "learning_rate": 3.475445883215245e-08, - "loss": 3.7622, - "step": 3250500 - }, - { - "epoch": 36.1, - "learning_rate": 3.474057703840259e-08, - "loss": 3.7508, - "step": 3251000 - }, - { - "epoch": 36.11, - "learning_rate": 3.472669524465273e-08, - "loss": 3.764, - "step": 3251500 - }, - { - "epoch": 36.11, - "learning_rate": 3.471281345090287e-08, - "loss": 3.7582, - "step": 3252000 - }, - { - "epoch": 36.12, - "learning_rate": 3.469893165715301e-08, - "loss": 3.747, - "step": 3252500 - }, - { - "epoch": 36.13, - "learning_rate": 3.468504986340315e-08, - "loss": 3.7286, - "step": 3253000 - }, - { - "epoch": 36.13, - "learning_rate": 3.4671168069653287e-08, - "loss": 3.7528, - "step": 3253500 - }, - { - "epoch": 36.14, - "learning_rate": 3.465728627590342e-08, - "loss": 3.7389, - "step": 3254000 - }, - { - "epoch": 36.14, - "learning_rate": 3.4643404482153565e-08, - "loss": 3.7494, - "step": 3254500 - }, - { - "epoch": 36.15, - "learning_rate": 3.46295226884037e-08, - "loss": 3.7599, - "step": 3255000 - }, - { - "epoch": 36.15, - "learning_rate": 3.461564089465384e-08, - "loss": 3.7641, - "step": 3255500 - }, - { - "epoch": 36.16, - "learning_rate": 3.4601759100903985e-08, - "loss": 3.7531, - "step": 3256000 - }, - { - "epoch": 36.16, - "learning_rate": 3.458787730715412e-08, - "loss": 3.7652, - "step": 3256500 - }, - { - "epoch": 36.17, - "learning_rate": 3.4573995513404256e-08, - "loss": 3.76, - "step": 3257000 - }, - { - "epoch": 36.18, - "learning_rate": 3.45601137196544e-08, - "loss": 3.7339, - "step": 3257500 - }, - { - "epoch": 36.18, - "learning_rate": 3.4546231925904534e-08, - "loss": 3.7297, - "step": 3258000 - }, - { - "epoch": 36.19, - "learning_rate": 3.4532350132154676e-08, - "loss": 3.7563, - "step": 3258500 - }, - { - "epoch": 36.19, - "learning_rate": 3.451846833840482e-08, - "loss": 3.7446, - "step": 3259000 - }, - { - "epoch": 36.2, - "learning_rate": 3.4504586544654954e-08, - "loss": 3.761, - "step": 3259500 - }, - { - "epoch": 36.2, - "learning_rate": 3.449070475090509e-08, - "loss": 3.7579, - "step": 3260000 - }, - { - "epoch": 36.21, - "learning_rate": 3.447682295715523e-08, - "loss": 3.7581, - "step": 3260500 - }, - { - "epoch": 36.21, - "learning_rate": 3.446294116340537e-08, - "loss": 3.7704, - "step": 3261000 - }, - { - "epoch": 36.22, - "learning_rate": 3.444905936965551e-08, - "loss": 3.754, - "step": 3261500 - }, - { - "epoch": 36.23, - "learning_rate": 3.4435177575905646e-08, - "loss": 3.7573, - "step": 3262000 - }, - { - "epoch": 36.23, - "learning_rate": 3.442129578215579e-08, - "loss": 3.7276, - "step": 3262500 - }, - { - "epoch": 36.24, - "learning_rate": 3.4407413988405923e-08, - "loss": 3.7663, - "step": 3263000 - }, - { - "epoch": 36.24, - "learning_rate": 3.439353219465606e-08, - "loss": 3.7356, - "step": 3263500 - }, - { - "epoch": 36.25, - "learning_rate": 3.43796504009062e-08, - "loss": 3.7452, - "step": 3264000 - }, - { - "epoch": 36.25, - "learning_rate": 3.4365768607156344e-08, - "loss": 3.756, - "step": 3264500 - }, - { - "epoch": 36.26, - "learning_rate": 3.435188681340648e-08, - "loss": 3.7943, - "step": 3265000 - }, - { - "epoch": 36.26, - "learning_rate": 3.433800501965662e-08, - "loss": 3.7502, - "step": 3265500 - }, - { - "epoch": 36.27, - "learning_rate": 3.432412322590676e-08, - "loss": 3.7672, - "step": 3266000 - }, - { - "epoch": 36.28, - "learning_rate": 3.431024143215689e-08, - "loss": 3.7641, - "step": 3266500 - }, - { - "epoch": 36.28, - "learning_rate": 3.4296359638407035e-08, - "loss": 3.7484, - "step": 3267000 - }, - { - "epoch": 36.29, - "learning_rate": 3.428247784465718e-08, - "loss": 3.7545, - "step": 3267500 - }, - { - "epoch": 36.29, - "learning_rate": 3.426859605090731e-08, - "loss": 3.7515, - "step": 3268000 - }, - { - "epoch": 36.3, - "learning_rate": 3.4254714257157455e-08, - "loss": 3.7541, - "step": 3268500 - }, - { - "epoch": 36.3, - "learning_rate": 3.4240832463407584e-08, - "loss": 3.7685, - "step": 3269000 - }, - { - "epoch": 36.31, - "learning_rate": 3.4226950669657727e-08, - "loss": 3.7552, - "step": 3269500 - }, - { - "epoch": 36.31, - "learning_rate": 3.421306887590787e-08, - "loss": 3.7673, - "step": 3270000 - }, - { - "epoch": 36.32, - "learning_rate": 3.4199187082158004e-08, - "loss": 3.7421, - "step": 3270500 - }, - { - "epoch": 36.33, - "learning_rate": 3.418530528840815e-08, - "loss": 3.7581, - "step": 3271000 - }, - { - "epoch": 36.33, - "learning_rate": 3.417142349465829e-08, - "loss": 3.7389, - "step": 3271500 - }, - { - "epoch": 36.34, - "learning_rate": 3.415754170090842e-08, - "loss": 3.7568, - "step": 3272000 - }, - { - "epoch": 36.34, - "learning_rate": 3.414365990715856e-08, - "loss": 3.7645, - "step": 3272500 - }, - { - "epoch": 36.35, - "learning_rate": 3.41297781134087e-08, - "loss": 3.7609, - "step": 3273000 - }, - { - "epoch": 36.35, - "learning_rate": 3.411589631965884e-08, - "loss": 3.7816, - "step": 3273500 - }, - { - "epoch": 36.36, - "learning_rate": 3.410201452590898e-08, - "loss": 3.7398, - "step": 3274000 - }, - { - "epoch": 36.36, - "learning_rate": 3.4088132732159116e-08, - "loss": 3.7734, - "step": 3274500 - }, - { - "epoch": 36.37, - "learning_rate": 3.407425093840925e-08, - "loss": 3.76, - "step": 3275000 - }, - { - "epoch": 36.38, - "learning_rate": 3.4060369144659394e-08, - "loss": 3.7396, - "step": 3275500 - }, - { - "epoch": 36.38, - "learning_rate": 3.4046487350909536e-08, - "loss": 3.7485, - "step": 3276000 - }, - { - "epoch": 36.39, - "learning_rate": 3.403260555715967e-08, - "loss": 3.7517, - "step": 3276500 - }, - { - "epoch": 36.39, - "learning_rate": 3.4018723763409814e-08, - "loss": 3.7601, - "step": 3277000 - }, - { - "epoch": 36.4, - "learning_rate": 3.400484196965995e-08, - "loss": 3.7581, - "step": 3277500 - }, - { - "epoch": 36.4, - "learning_rate": 3.3990960175910086e-08, - "loss": 3.7576, - "step": 3278000 - }, - { - "epoch": 36.41, - "learning_rate": 3.397707838216023e-08, - "loss": 3.7629, - "step": 3278500 - }, - { - "epoch": 36.41, - "learning_rate": 3.3963196588410363e-08, - "loss": 3.7686, - "step": 3279000 - }, - { - "epoch": 36.42, - "learning_rate": 3.3949314794660506e-08, - "loss": 3.757, - "step": 3279500 - }, - { - "epoch": 36.43, - "learning_rate": 3.393543300091065e-08, - "loss": 3.7583, - "step": 3280000 - }, - { - "epoch": 36.43, - "learning_rate": 3.3921551207160784e-08, - "loss": 3.7525, - "step": 3280500 - }, - { - "epoch": 36.44, - "learning_rate": 3.390766941341092e-08, - "loss": 3.7442, - "step": 3281000 - }, - { - "epoch": 36.44, - "learning_rate": 3.389378761966106e-08, - "loss": 3.7442, - "step": 3281500 - }, - { - "epoch": 36.45, - "learning_rate": 3.38799058259112e-08, - "loss": 3.777, - "step": 3282000 - }, - { - "epoch": 36.45, - "learning_rate": 3.386602403216134e-08, - "loss": 3.7565, - "step": 3282500 - }, - { - "epoch": 36.46, - "learning_rate": 3.385214223841148e-08, - "loss": 3.7799, - "step": 3283000 - }, - { - "epoch": 36.46, - "learning_rate": 3.383826044466162e-08, - "loss": 3.7727, - "step": 3283500 - }, - { - "epoch": 36.47, - "learning_rate": 3.382437865091175e-08, - "loss": 3.7478, - "step": 3284000 - }, - { - "epoch": 36.48, - "learning_rate": 3.381049685716189e-08, - "loss": 3.749, - "step": 3284500 - }, - { - "epoch": 36.48, - "learning_rate": 3.379661506341203e-08, - "loss": 3.7545, - "step": 3285000 - }, - { - "epoch": 36.49, - "learning_rate": 3.378273326966217e-08, - "loss": 3.7678, - "step": 3285500 - }, - { - "epoch": 36.49, - "learning_rate": 3.376885147591231e-08, - "loss": 3.7347, - "step": 3286000 - }, - { - "epoch": 36.5, - "learning_rate": 3.375496968216245e-08, - "loss": 3.7648, - "step": 3286500 - }, - { - "epoch": 36.5, - "learning_rate": 3.374108788841259e-08, - "loss": 3.7561, - "step": 3287000 - }, - { - "epoch": 36.51, - "learning_rate": 3.372720609466272e-08, - "loss": 3.7523, - "step": 3287500 - }, - { - "epoch": 36.51, - "learning_rate": 3.3713324300912865e-08, - "loss": 3.7595, - "step": 3288000 - }, - { - "epoch": 36.52, - "learning_rate": 3.369944250716301e-08, - "loss": 3.7514, - "step": 3288500 - }, - { - "epoch": 36.53, - "learning_rate": 3.368556071341314e-08, - "loss": 3.7581, - "step": 3289000 - }, - { - "epoch": 36.53, - "learning_rate": 3.3671678919663285e-08, - "loss": 3.7511, - "step": 3289500 - }, - { - "epoch": 36.54, - "learning_rate": 3.365779712591342e-08, - "loss": 3.7545, - "step": 3290000 - }, - { - "epoch": 36.54, - "learning_rate": 3.3643915332163556e-08, - "loss": 3.7466, - "step": 3290500 - }, - { - "epoch": 36.55, - "learning_rate": 3.36300335384137e-08, - "loss": 3.7731, - "step": 3291000 - }, - { - "epoch": 36.55, - "learning_rate": 3.3616151744663834e-08, - "loss": 3.7558, - "step": 3291500 - }, - { - "epoch": 36.56, - "learning_rate": 3.3602269950913976e-08, - "loss": 3.7731, - "step": 3292000 - }, - { - "epoch": 36.56, - "learning_rate": 3.358838815716412e-08, - "loss": 3.761, - "step": 3292500 - }, - { - "epoch": 36.57, - "learning_rate": 3.3574506363414254e-08, - "loss": 3.7406, - "step": 3293000 - }, - { - "epoch": 36.58, - "learning_rate": 3.356062456966439e-08, - "loss": 3.7477, - "step": 3293500 - }, - { - "epoch": 36.58, - "learning_rate": 3.354674277591453e-08, - "loss": 3.7609, - "step": 3294000 - }, - { - "epoch": 36.59, - "learning_rate": 3.353286098216467e-08, - "loss": 3.7491, - "step": 3294500 - }, - { - "epoch": 36.59, - "learning_rate": 3.351897918841481e-08, - "loss": 3.7619, - "step": 3295000 - }, - { - "epoch": 36.6, - "learning_rate": 3.350509739466495e-08, - "loss": 3.7434, - "step": 3295500 - }, - { - "epoch": 36.6, - "learning_rate": 3.349121560091509e-08, - "loss": 3.7527, - "step": 3296000 - }, - { - "epoch": 36.61, - "learning_rate": 3.3477333807165224e-08, - "loss": 3.75, - "step": 3296500 - }, - { - "epoch": 36.61, - "learning_rate": 3.3463452013415366e-08, - "loss": 3.7352, - "step": 3297000 - }, - { - "epoch": 36.62, - "learning_rate": 3.34495702196655e-08, - "loss": 3.7685, - "step": 3297500 - }, - { - "epoch": 36.63, - "learning_rate": 3.3435688425915644e-08, - "loss": 3.759, - "step": 3298000 - }, - { - "epoch": 36.63, - "learning_rate": 3.342180663216578e-08, - "loss": 3.758, - "step": 3298500 - }, - { - "epoch": 36.64, - "learning_rate": 3.340792483841592e-08, - "loss": 3.7538, - "step": 3299000 - }, - { - "epoch": 36.64, - "learning_rate": 3.339404304466606e-08, - "loss": 3.7657, - "step": 3299500 - }, - { - "epoch": 36.65, - "learning_rate": 3.338016125091619e-08, - "loss": 3.7775, - "step": 3300000 - }, - { - "epoch": 36.65, - "learning_rate": 3.3366279457166335e-08, - "loss": 3.7829, - "step": 3300500 - }, - { - "epoch": 36.66, - "learning_rate": 3.335239766341648e-08, - "loss": 3.7693, - "step": 3301000 - }, - { - "epoch": 36.66, - "learning_rate": 3.333851586966661e-08, - "loss": 3.7751, - "step": 3301500 - }, - { - "epoch": 36.67, - "learning_rate": 3.3324634075916755e-08, - "loss": 3.741, - "step": 3302000 - }, - { - "epoch": 36.68, - "learning_rate": 3.331075228216689e-08, - "loss": 3.7541, - "step": 3302500 - }, - { - "epoch": 36.68, - "learning_rate": 3.3296870488417027e-08, - "loss": 3.7384, - "step": 3303000 - }, - { - "epoch": 36.69, - "learning_rate": 3.328298869466717e-08, - "loss": 3.7503, - "step": 3303500 - }, - { - "epoch": 36.69, - "learning_rate": 3.326910690091731e-08, - "loss": 3.7439, - "step": 3304000 - }, - { - "epoch": 36.7, - "learning_rate": 3.325522510716745e-08, - "loss": 3.7484, - "step": 3304500 - }, - { - "epoch": 36.7, - "learning_rate": 3.324134331341759e-08, - "loss": 3.7363, - "step": 3305000 - }, - { - "epoch": 36.71, - "learning_rate": 3.3227461519667725e-08, - "loss": 3.7508, - "step": 3305500 - }, - { - "epoch": 36.71, - "learning_rate": 3.321357972591786e-08, - "loss": 3.7536, - "step": 3306000 - }, - { - "epoch": 36.72, - "learning_rate": 3.3199697932168e-08, - "loss": 3.7773, - "step": 3306500 - }, - { - "epoch": 36.73, - "learning_rate": 3.318581613841814e-08, - "loss": 3.7575, - "step": 3307000 - }, - { - "epoch": 36.73, - "learning_rate": 3.317193434466828e-08, - "loss": 3.7656, - "step": 3307500 - }, - { - "epoch": 36.74, - "learning_rate": 3.3158052550918416e-08, - "loss": 3.7407, - "step": 3308000 - }, - { - "epoch": 36.74, - "learning_rate": 3.314417075716855e-08, - "loss": 3.7642, - "step": 3308500 - }, - { - "epoch": 36.75, - "learning_rate": 3.3130288963418694e-08, - "loss": 3.749, - "step": 3309000 - }, - { - "epoch": 36.75, - "learning_rate": 3.3116407169668836e-08, - "loss": 3.7476, - "step": 3309500 - }, - { - "epoch": 36.76, - "learning_rate": 3.310252537591897e-08, - "loss": 3.74, - "step": 3310000 - }, - { - "epoch": 36.76, - "learning_rate": 3.3088643582169114e-08, - "loss": 3.7618, - "step": 3310500 - }, - { - "epoch": 36.77, - "learning_rate": 3.307476178841925e-08, - "loss": 3.7678, - "step": 3311000 - }, - { - "epoch": 36.78, - "learning_rate": 3.3060879994669386e-08, - "loss": 3.7612, - "step": 3311500 - }, - { - "epoch": 36.78, - "learning_rate": 3.304699820091953e-08, - "loss": 3.7596, - "step": 3312000 - }, - { - "epoch": 36.79, - "learning_rate": 3.303311640716967e-08, - "loss": 3.7429, - "step": 3312500 - }, - { - "epoch": 36.79, - "learning_rate": 3.3019234613419806e-08, - "loss": 3.7553, - "step": 3313000 - }, - { - "epoch": 36.8, - "learning_rate": 3.300535281966995e-08, - "loss": 3.748, - "step": 3313500 - }, - { - "epoch": 36.8, - "learning_rate": 3.2991471025920084e-08, - "loss": 3.7643, - "step": 3314000 - }, - { - "epoch": 36.81, - "learning_rate": 3.297758923217022e-08, - "loss": 3.7431, - "step": 3314500 - }, - { - "epoch": 36.81, - "learning_rate": 3.296370743842036e-08, - "loss": 3.7361, - "step": 3315000 - }, - { - "epoch": 36.82, - "learning_rate": 3.29498256446705e-08, - "loss": 3.729, - "step": 3315500 - }, - { - "epoch": 36.83, - "learning_rate": 3.293594385092064e-08, - "loss": 3.7562, - "step": 3316000 - }, - { - "epoch": 36.83, - "learning_rate": 3.292206205717078e-08, - "loss": 3.7377, - "step": 3316500 - }, - { - "epoch": 36.84, - "learning_rate": 3.290818026342092e-08, - "loss": 3.7573, - "step": 3317000 - }, - { - "epoch": 36.84, - "learning_rate": 3.289429846967105e-08, - "loss": 3.7563, - "step": 3317500 - }, - { - "epoch": 36.85, - "learning_rate": 3.2880416675921195e-08, - "loss": 3.7548, - "step": 3318000 - }, - { - "epoch": 36.85, - "learning_rate": 3.286653488217133e-08, - "loss": 3.757, - "step": 3318500 - }, - { - "epoch": 36.86, - "learning_rate": 3.285265308842147e-08, - "loss": 3.7806, - "step": 3319000 - }, - { - "epoch": 36.86, - "learning_rate": 3.2838771294671615e-08, - "loss": 3.7635, - "step": 3319500 - }, - { - "epoch": 36.87, - "learning_rate": 3.282488950092175e-08, - "loss": 3.7515, - "step": 3320000 - }, - { - "epoch": 36.88, - "learning_rate": 3.281100770717189e-08, - "loss": 3.7389, - "step": 3320500 - }, - { - "epoch": 36.88, - "learning_rate": 3.279712591342202e-08, - "loss": 3.7553, - "step": 3321000 - }, - { - "epoch": 36.89, - "learning_rate": 3.2783244119672165e-08, - "loss": 3.7461, - "step": 3321500 - }, - { - "epoch": 36.89, - "learning_rate": 3.276936232592231e-08, - "loss": 3.7716, - "step": 3322000 - }, - { - "epoch": 36.9, - "learning_rate": 3.275548053217244e-08, - "loss": 3.7601, - "step": 3322500 - }, - { - "epoch": 36.9, - "learning_rate": 3.2741598738422585e-08, - "loss": 3.7518, - "step": 3323000 - }, - { - "epoch": 36.91, - "learning_rate": 3.272771694467272e-08, - "loss": 3.7527, - "step": 3323500 - }, - { - "epoch": 36.91, - "learning_rate": 3.2713835150922856e-08, - "loss": 3.7434, - "step": 3324000 - }, - { - "epoch": 36.92, - "learning_rate": 3.2699953357173e-08, - "loss": 3.7581, - "step": 3324500 - }, - { - "epoch": 36.93, - "learning_rate": 3.268607156342314e-08, - "loss": 3.7519, - "step": 3325000 - }, - { - "epoch": 36.93, - "learning_rate": 3.2672189769673276e-08, - "loss": 3.753, - "step": 3325500 - }, - { - "epoch": 36.94, - "learning_rate": 3.265830797592342e-08, - "loss": 3.7534, - "step": 3326000 - }, - { - "epoch": 36.94, - "learning_rate": 3.2644426182173554e-08, - "loss": 3.7557, - "step": 3326500 - }, - { - "epoch": 36.95, - "learning_rate": 3.263054438842369e-08, - "loss": 3.7656, - "step": 3327000 - }, - { - "epoch": 36.95, - "learning_rate": 3.261666259467383e-08, - "loss": 3.7492, - "step": 3327500 - }, - { - "epoch": 36.96, - "learning_rate": 3.260278080092397e-08, - "loss": 3.7355, - "step": 3328000 - }, - { - "epoch": 36.96, - "learning_rate": 3.258889900717411e-08, - "loss": 3.7599, - "step": 3328500 - }, - { - "epoch": 36.97, - "learning_rate": 3.257501721342425e-08, - "loss": 3.761, - "step": 3329000 - }, - { - "epoch": 36.98, - "learning_rate": 3.256113541967439e-08, - "loss": 3.7553, - "step": 3329500 - }, - { - "epoch": 36.98, - "learning_rate": 3.2547253625924524e-08, - "loss": 3.7394, - "step": 3330000 - }, - { - "epoch": 36.99, - "learning_rate": 3.2533371832174666e-08, - "loss": 3.7374, - "step": 3330500 - }, - { - "epoch": 36.99, - "learning_rate": 3.25194900384248e-08, - "loss": 3.7448, - "step": 3331000 - }, - { - "epoch": 37.0, - "learning_rate": 3.2505608244674944e-08, - "loss": 3.7357, - "step": 3331500 - }, - { - "epoch": 37.0, - "eval_loss": 3.8285868167877197, - "eval_runtime": 6.3016, - "eval_samples_per_second": 246.605, - "step": 3331702 - }, - { - "epoch": 37.0, - "learning_rate": 3.2491726450925086e-08, - "loss": 3.7674, - "step": 3332000 - }, - { - "epoch": 37.01, - "learning_rate": 3.247784465717522e-08, - "loss": 3.7627, - "step": 3332500 - }, - { - "epoch": 37.01, - "learning_rate": 3.246396286342536e-08, - "loss": 3.7528, - "step": 3333000 - }, - { - "epoch": 37.02, - "learning_rate": 3.24500810696755e-08, - "loss": 3.758, - "step": 3333500 - }, - { - "epoch": 37.03, - "learning_rate": 3.2436199275925635e-08, - "loss": 3.7433, - "step": 3334000 - }, - { - "epoch": 37.03, - "learning_rate": 3.242231748217578e-08, - "loss": 3.7402, - "step": 3334500 - }, - { - "epoch": 37.04, - "learning_rate": 3.240843568842592e-08, - "loss": 3.7674, - "step": 3335000 - }, - { - "epoch": 37.04, - "learning_rate": 3.2394553894676055e-08, - "loss": 3.7388, - "step": 3335500 - }, - { - "epoch": 37.05, - "learning_rate": 3.238067210092619e-08, - "loss": 3.7443, - "step": 3336000 - }, - { - "epoch": 37.05, - "learning_rate": 3.236679030717633e-08, - "loss": 3.7599, - "step": 3336500 - }, - { - "epoch": 37.06, - "learning_rate": 3.235290851342647e-08, - "loss": 3.7855, - "step": 3337000 - }, - { - "epoch": 37.06, - "learning_rate": 3.233902671967661e-08, - "loss": 3.7608, - "step": 3337500 - }, - { - "epoch": 37.07, - "learning_rate": 3.232514492592675e-08, - "loss": 3.7549, - "step": 3338000 - }, - { - "epoch": 37.08, - "learning_rate": 3.231126313217688e-08, - "loss": 3.748, - "step": 3338500 - }, - { - "epoch": 37.08, - "learning_rate": 3.2297381338427025e-08, - "loss": 3.7428, - "step": 3339000 - }, - { - "epoch": 37.09, - "learning_rate": 3.228349954467716e-08, - "loss": 3.7516, - "step": 3339500 - }, - { - "epoch": 37.09, - "learning_rate": 3.22696177509273e-08, - "loss": 3.7639, - "step": 3340000 - }, - { - "epoch": 37.1, - "learning_rate": 3.2255735957177445e-08, - "loss": 3.752, - "step": 3340500 - }, - { - "epoch": 37.1, - "learning_rate": 3.224185416342758e-08, - "loss": 3.7565, - "step": 3341000 - }, - { - "epoch": 37.11, - "learning_rate": 3.2227972369677716e-08, - "loss": 3.7648, - "step": 3341500 - }, - { - "epoch": 37.11, - "learning_rate": 3.221409057592786e-08, - "loss": 3.7706, - "step": 3342000 - }, - { - "epoch": 37.12, - "learning_rate": 3.2200208782177994e-08, - "loss": 3.7527, - "step": 3342500 - }, - { - "epoch": 37.13, - "learning_rate": 3.2186326988428136e-08, - "loss": 3.7359, - "step": 3343000 - }, - { - "epoch": 37.13, - "learning_rate": 3.217244519467827e-08, - "loss": 3.7478, - "step": 3343500 - }, - { - "epoch": 37.14, - "learning_rate": 3.2158563400928414e-08, - "loss": 3.7665, - "step": 3344000 - }, - { - "epoch": 37.14, - "learning_rate": 3.214468160717855e-08, - "loss": 3.7741, - "step": 3344500 - }, - { - "epoch": 37.15, - "learning_rate": 3.2130799813428686e-08, - "loss": 3.7599, - "step": 3345000 - }, - { - "epoch": 37.15, - "learning_rate": 3.211691801967883e-08, - "loss": 3.7647, - "step": 3345500 - }, - { - "epoch": 37.16, - "learning_rate": 3.210303622592897e-08, - "loss": 3.7407, - "step": 3346000 - }, - { - "epoch": 37.16, - "learning_rate": 3.2089154432179106e-08, - "loss": 3.7566, - "step": 3346500 - }, - { - "epoch": 37.17, - "learning_rate": 3.207527263842925e-08, - "loss": 3.7688, - "step": 3347000 - }, - { - "epoch": 37.18, - "learning_rate": 3.2061390844679384e-08, - "loss": 3.7577, - "step": 3347500 - }, - { - "epoch": 37.18, - "learning_rate": 3.204750905092952e-08, - "loss": 3.7662, - "step": 3348000 - }, - { - "epoch": 37.19, - "learning_rate": 3.203362725717966e-08, - "loss": 3.759, - "step": 3348500 - }, - { - "epoch": 37.19, - "learning_rate": 3.2019745463429804e-08, - "loss": 3.749, - "step": 3349000 - }, - { - "epoch": 37.2, - "learning_rate": 3.200586366967994e-08, - "loss": 3.7605, - "step": 3349500 - }, - { - "epoch": 37.2, - "learning_rate": 3.199198187593008e-08, - "loss": 3.7458, - "step": 3350000 - }, - { - "epoch": 37.21, - "learning_rate": 3.197810008218022e-08, - "loss": 3.7652, - "step": 3350500 - }, - { - "epoch": 37.21, - "learning_rate": 3.196421828843035e-08, - "loss": 3.7664, - "step": 3351000 - }, - { - "epoch": 37.22, - "learning_rate": 3.1950336494680495e-08, - "loss": 3.759, - "step": 3351500 - }, - { - "epoch": 37.23, - "learning_rate": 3.193645470093063e-08, - "loss": 3.7555, - "step": 3352000 - }, - { - "epoch": 37.23, - "learning_rate": 3.1922572907180773e-08, - "loss": 3.7478, - "step": 3352500 - }, - { - "epoch": 37.24, - "learning_rate": 3.1908691113430916e-08, - "loss": 3.7581, - "step": 3353000 - }, - { - "epoch": 37.24, - "learning_rate": 3.189480931968105e-08, - "loss": 3.7562, - "step": 3353500 - }, - { - "epoch": 37.25, - "learning_rate": 3.188092752593119e-08, - "loss": 3.7527, - "step": 3354000 - }, - { - "epoch": 37.25, - "learning_rate": 3.186704573218133e-08, - "loss": 3.762, - "step": 3354500 - }, - { - "epoch": 37.26, - "learning_rate": 3.1853163938431465e-08, - "loss": 3.7496, - "step": 3355000 - }, - { - "epoch": 37.26, - "learning_rate": 3.183928214468161e-08, - "loss": 3.7467, - "step": 3355500 - }, - { - "epoch": 37.27, - "learning_rate": 3.182540035093175e-08, - "loss": 3.7363, - "step": 3356000 - }, - { - "epoch": 37.28, - "learning_rate": 3.1811518557181885e-08, - "loss": 3.7561, - "step": 3356500 - }, - { - "epoch": 37.28, - "learning_rate": 3.179763676343202e-08, - "loss": 3.7629, - "step": 3357000 - }, - { - "epoch": 37.29, - "learning_rate": 3.1783754969682156e-08, - "loss": 3.7565, - "step": 3357500 - }, - { - "epoch": 37.29, - "learning_rate": 3.17698731759323e-08, - "loss": 3.7675, - "step": 3358000 - }, - { - "epoch": 37.3, - "learning_rate": 3.175599138218244e-08, - "loss": 3.7512, - "step": 3358500 - }, - { - "epoch": 37.3, - "learning_rate": 3.1742109588432576e-08, - "loss": 3.755, - "step": 3359000 - }, - { - "epoch": 37.31, - "learning_rate": 3.172822779468272e-08, - "loss": 3.7664, - "step": 3359500 - }, - { - "epoch": 37.31, - "learning_rate": 3.1714346000932854e-08, - "loss": 3.7526, - "step": 3360000 - }, - { - "epoch": 37.32, - "learning_rate": 3.170046420718299e-08, - "loss": 3.7588, - "step": 3360500 - }, - { - "epoch": 37.33, - "learning_rate": 3.168658241343313e-08, - "loss": 3.7548, - "step": 3361000 - }, - { - "epoch": 37.33, - "learning_rate": 3.1672700619683275e-08, - "loss": 3.7603, - "step": 3361500 - }, - { - "epoch": 37.34, - "learning_rate": 3.165881882593341e-08, - "loss": 3.7507, - "step": 3362000 - }, - { - "epoch": 37.34, - "learning_rate": 3.164493703218355e-08, - "loss": 3.7634, - "step": 3362500 - }, - { - "epoch": 37.35, - "learning_rate": 3.163105523843369e-08, - "loss": 3.7463, - "step": 3363000 - }, - { - "epoch": 37.35, - "learning_rate": 3.1617173444683824e-08, - "loss": 3.7564, - "step": 3363500 - }, - { - "epoch": 37.36, - "learning_rate": 3.1603291650933966e-08, - "loss": 3.7643, - "step": 3364000 - }, - { - "epoch": 37.36, - "learning_rate": 3.158940985718411e-08, - "loss": 3.7553, - "step": 3364500 - }, - { - "epoch": 37.37, - "learning_rate": 3.1575528063434244e-08, - "loss": 3.7384, - "step": 3365000 - }, - { - "epoch": 37.38, - "learning_rate": 3.1561646269684386e-08, - "loss": 3.7471, - "step": 3365500 - }, - { - "epoch": 37.38, - "learning_rate": 3.154776447593452e-08, - "loss": 3.7494, - "step": 3366000 - }, - { - "epoch": 37.39, - "learning_rate": 3.153388268218466e-08, - "loss": 3.7442, - "step": 3366500 - }, - { - "epoch": 37.39, - "learning_rate": 3.15200008884348e-08, - "loss": 3.7769, - "step": 3367000 - }, - { - "epoch": 37.4, - "learning_rate": 3.1506119094684935e-08, - "loss": 3.7337, - "step": 3367500 - }, - { - "epoch": 37.4, - "learning_rate": 3.149223730093508e-08, - "loss": 3.7813, - "step": 3368000 - }, - { - "epoch": 37.41, - "learning_rate": 3.147835550718522e-08, - "loss": 3.7316, - "step": 3368500 - }, - { - "epoch": 37.41, - "learning_rate": 3.146447371343535e-08, - "loss": 3.7523, - "step": 3369000 - }, - { - "epoch": 37.42, - "learning_rate": 3.145059191968549e-08, - "loss": 3.7453, - "step": 3369500 - }, - { - "epoch": 37.43, - "learning_rate": 3.1436710125935633e-08, - "loss": 3.7436, - "step": 3370000 - }, - { - "epoch": 37.43, - "learning_rate": 3.142282833218577e-08, - "loss": 3.7641, - "step": 3370500 - }, - { - "epoch": 37.44, - "learning_rate": 3.140894653843591e-08, - "loss": 3.7519, - "step": 3371000 - }, - { - "epoch": 37.44, - "learning_rate": 3.1395064744686054e-08, - "loss": 3.7592, - "step": 3371500 - }, - { - "epoch": 37.45, - "learning_rate": 3.138118295093618e-08, - "loss": 3.7397, - "step": 3372000 - }, - { - "epoch": 37.45, - "learning_rate": 3.1367301157186325e-08, - "loss": 3.7388, - "step": 3372500 - }, - { - "epoch": 37.46, - "learning_rate": 3.135341936343646e-08, - "loss": 3.7509, - "step": 3373000 - }, - { - "epoch": 37.46, - "learning_rate": 3.13395375696866e-08, - "loss": 3.7596, - "step": 3373500 - }, - { - "epoch": 37.47, - "learning_rate": 3.1325655775936745e-08, - "loss": 3.7439, - "step": 3374000 - }, - { - "epoch": 37.48, - "learning_rate": 3.131177398218688e-08, - "loss": 3.7576, - "step": 3374500 - }, - { - "epoch": 37.48, - "learning_rate": 3.1297892188437016e-08, - "loss": 3.7385, - "step": 3375000 - }, - { - "epoch": 37.49, - "learning_rate": 3.128401039468716e-08, - "loss": 3.7527, - "step": 3375500 - }, - { - "epoch": 37.49, - "learning_rate": 3.1270128600937294e-08, - "loss": 3.7459, - "step": 3376000 - }, - { - "epoch": 37.5, - "learning_rate": 3.1256246807187437e-08, - "loss": 3.7583, - "step": 3376500 - }, - { - "epoch": 37.5, - "learning_rate": 3.124236501343758e-08, - "loss": 3.7548, - "step": 3377000 - }, - { - "epoch": 37.51, - "learning_rate": 3.1228483219687714e-08, - "loss": 3.7378, - "step": 3377500 - }, - { - "epoch": 37.51, - "learning_rate": 3.121460142593785e-08, - "loss": 3.7376, - "step": 3378000 - }, - { - "epoch": 37.52, - "learning_rate": 3.120071963218799e-08, - "loss": 3.7718, - "step": 3378500 - }, - { - "epoch": 37.53, - "learning_rate": 3.118683783843813e-08, - "loss": 3.746, - "step": 3379000 - }, - { - "epoch": 37.53, - "learning_rate": 3.117295604468827e-08, - "loss": 3.7418, - "step": 3379500 - }, - { - "epoch": 37.54, - "learning_rate": 3.1159074250938406e-08, - "loss": 3.745, - "step": 3380000 - }, - { - "epoch": 37.54, - "learning_rate": 3.114519245718855e-08, - "loss": 3.7314, - "step": 3380500 - }, - { - "epoch": 37.55, - "learning_rate": 3.1131310663438684e-08, - "loss": 3.7621, - "step": 3381000 - }, - { - "epoch": 37.55, - "learning_rate": 3.111742886968882e-08, - "loss": 3.7492, - "step": 3381500 - }, - { - "epoch": 37.56, - "learning_rate": 3.110354707593896e-08, - "loss": 3.7492, - "step": 3382000 - }, - { - "epoch": 37.56, - "learning_rate": 3.1089665282189104e-08, - "loss": 3.7376, - "step": 3382500 - }, - { - "epoch": 37.57, - "learning_rate": 3.107578348843924e-08, - "loss": 3.7621, - "step": 3383000 - }, - { - "epoch": 37.58, - "learning_rate": 3.106190169468938e-08, - "loss": 3.7644, - "step": 3383500 - }, - { - "epoch": 37.58, - "learning_rate": 3.104801990093952e-08, - "loss": 3.7444, - "step": 3384000 - }, - { - "epoch": 37.59, - "learning_rate": 3.103413810718965e-08, - "loss": 3.7507, - "step": 3384500 - }, - { - "epoch": 37.59, - "learning_rate": 3.1020256313439795e-08, - "loss": 3.7446, - "step": 3385000 - }, - { - "epoch": 37.6, - "learning_rate": 3.100637451968994e-08, - "loss": 3.7623, - "step": 3385500 - }, - { - "epoch": 37.6, - "learning_rate": 3.0992492725940073e-08, - "loss": 3.7418, - "step": 3386000 - }, - { - "epoch": 37.61, - "learning_rate": 3.0978610932190216e-08, - "loss": 3.7487, - "step": 3386500 - }, - { - "epoch": 37.61, - "learning_rate": 3.096472913844035e-08, - "loss": 3.7393, - "step": 3387000 - }, - { - "epoch": 37.62, - "learning_rate": 3.095084734469049e-08, - "loss": 3.7549, - "step": 3387500 - }, - { - "epoch": 37.63, - "learning_rate": 3.093696555094063e-08, - "loss": 3.7511, - "step": 3388000 - }, - { - "epoch": 37.63, - "learning_rate": 3.0923083757190765e-08, - "loss": 3.754, - "step": 3388500 - }, - { - "epoch": 37.64, - "learning_rate": 3.090920196344091e-08, - "loss": 3.7522, - "step": 3389000 - }, - { - "epoch": 37.64, - "learning_rate": 3.089532016969105e-08, - "loss": 3.7507, - "step": 3389500 - }, - { - "epoch": 37.65, - "learning_rate": 3.0881438375941185e-08, - "loss": 3.757, - "step": 3390000 - }, - { - "epoch": 37.65, - "learning_rate": 3.086755658219132e-08, - "loss": 3.7612, - "step": 3390500 - }, - { - "epoch": 37.66, - "learning_rate": 3.085367478844146e-08, - "loss": 3.7552, - "step": 3391000 - }, - { - "epoch": 37.66, - "learning_rate": 3.08397929946916e-08, - "loss": 3.7644, - "step": 3391500 - }, - { - "epoch": 37.67, - "learning_rate": 3.082591120094174e-08, - "loss": 3.7576, - "step": 3392000 - }, - { - "epoch": 37.68, - "learning_rate": 3.081202940719188e-08, - "loss": 3.7415, - "step": 3392500 - }, - { - "epoch": 37.68, - "learning_rate": 3.079814761344202e-08, - "loss": 3.7595, - "step": 3393000 - }, - { - "epoch": 37.69, - "learning_rate": 3.0784265819692154e-08, - "loss": 3.7499, - "step": 3393500 - }, - { - "epoch": 37.69, - "learning_rate": 3.0770384025942297e-08, - "loss": 3.7425, - "step": 3394000 - }, - { - "epoch": 37.7, - "learning_rate": 3.075650223219243e-08, - "loss": 3.7547, - "step": 3394500 - }, - { - "epoch": 37.7, - "learning_rate": 3.0742620438442575e-08, - "loss": 3.7531, - "step": 3395000 - }, - { - "epoch": 37.71, - "learning_rate": 3.072873864469271e-08, - "loss": 3.7303, - "step": 3395500 - }, - { - "epoch": 37.71, - "learning_rate": 3.071485685094285e-08, - "loss": 3.7471, - "step": 3396000 - }, - { - "epoch": 37.72, - "learning_rate": 3.070097505719299e-08, - "loss": 3.7319, - "step": 3396500 - }, - { - "epoch": 37.73, - "learning_rate": 3.0687093263443124e-08, - "loss": 3.7544, - "step": 3397000 - }, - { - "epoch": 37.73, - "learning_rate": 3.0673211469693266e-08, - "loss": 3.7547, - "step": 3397500 - }, - { - "epoch": 37.74, - "learning_rate": 3.065932967594341e-08, - "loss": 3.7591, - "step": 3398000 - }, - { - "epoch": 37.74, - "learning_rate": 3.0645447882193544e-08, - "loss": 3.7565, - "step": 3398500 - }, - { - "epoch": 37.75, - "learning_rate": 3.0631566088443686e-08, - "loss": 3.7447, - "step": 3399000 - }, - { - "epoch": 37.75, - "learning_rate": 3.061768429469382e-08, - "loss": 3.744, - "step": 3399500 - }, - { - "epoch": 37.76, - "learning_rate": 3.060380250094396e-08, - "loss": 3.7565, - "step": 3400000 - }, - { - "epoch": 37.76, - "learning_rate": 3.05899207071941e-08, - "loss": 3.7659, - "step": 3400500 - }, - { - "epoch": 37.77, - "learning_rate": 3.057603891344424e-08, - "loss": 3.7541, - "step": 3401000 - }, - { - "epoch": 37.78, - "learning_rate": 3.056215711969438e-08, - "loss": 3.769, - "step": 3401500 - }, - { - "epoch": 37.78, - "learning_rate": 3.054827532594452e-08, - "loss": 3.7546, - "step": 3402000 - }, - { - "epoch": 37.79, - "learning_rate": 3.053439353219465e-08, - "loss": 3.7453, - "step": 3402500 - }, - { - "epoch": 37.79, - "learning_rate": 3.052051173844479e-08, - "loss": 3.743, - "step": 3403000 - }, - { - "epoch": 37.8, - "learning_rate": 3.0506629944694934e-08, - "loss": 3.745, - "step": 3403500 - }, - { - "epoch": 37.8, - "learning_rate": 3.049274815094507e-08, - "loss": 3.7554, - "step": 3404000 - }, - { - "epoch": 37.81, - "learning_rate": 3.047886635719521e-08, - "loss": 3.7689, - "step": 3404500 - }, - { - "epoch": 37.81, - "learning_rate": 3.0464984563445354e-08, - "loss": 3.7539, - "step": 3405000 - }, - { - "epoch": 37.82, - "learning_rate": 3.045110276969548e-08, - "loss": 3.734, - "step": 3405500 - }, - { - "epoch": 37.83, - "learning_rate": 3.0437220975945625e-08, - "loss": 3.7547, - "step": 3406000 - }, - { - "epoch": 37.83, - "learning_rate": 3.042333918219577e-08, - "loss": 3.76, - "step": 3406500 - }, - { - "epoch": 37.84, - "learning_rate": 3.04094573884459e-08, - "loss": 3.7463, - "step": 3407000 - }, - { - "epoch": 37.84, - "learning_rate": 3.0395575594696045e-08, - "loss": 3.7683, - "step": 3407500 - }, - { - "epoch": 37.85, - "learning_rate": 3.038169380094619e-08, - "loss": 3.7528, - "step": 3408000 - }, - { - "epoch": 37.85, - "learning_rate": 3.0367812007196316e-08, - "loss": 3.7601, - "step": 3408500 - }, - { - "epoch": 37.86, - "learning_rate": 3.035393021344646e-08, - "loss": 3.7699, - "step": 3409000 - }, - { - "epoch": 37.86, - "learning_rate": 3.0340048419696594e-08, - "loss": 3.7559, - "step": 3409500 - }, - { - "epoch": 37.87, - "learning_rate": 3.0326166625946737e-08, - "loss": 3.7554, - "step": 3410000 - }, - { - "epoch": 37.88, - "learning_rate": 3.031228483219688e-08, - "loss": 3.7509, - "step": 3410500 - }, - { - "epoch": 37.88, - "learning_rate": 3.0298403038447015e-08, - "loss": 3.7593, - "step": 3411000 - }, - { - "epoch": 37.89, - "learning_rate": 3.028452124469715e-08, - "loss": 3.7492, - "step": 3411500 - }, - { - "epoch": 37.89, - "learning_rate": 3.027063945094729e-08, - "loss": 3.7598, - "step": 3412000 - }, - { - "epoch": 37.9, - "learning_rate": 3.025675765719743e-08, - "loss": 3.7339, - "step": 3412500 - }, - { - "epoch": 37.9, - "learning_rate": 3.024287586344757e-08, - "loss": 3.7586, - "step": 3413000 - }, - { - "epoch": 37.91, - "learning_rate": 3.022899406969771e-08, - "loss": 3.7677, - "step": 3413500 - }, - { - "epoch": 37.91, - "learning_rate": 3.021511227594785e-08, - "loss": 3.754, - "step": 3414000 - }, - { - "epoch": 37.92, - "learning_rate": 3.0201230482197984e-08, - "loss": 3.7471, - "step": 3414500 - }, - { - "epoch": 37.93, - "learning_rate": 3.0187348688448126e-08, - "loss": 3.7551, - "step": 3415000 - }, - { - "epoch": 37.93, - "learning_rate": 3.017346689469826e-08, - "loss": 3.7552, - "step": 3415500 - }, - { - "epoch": 37.94, - "learning_rate": 3.0159585100948404e-08, - "loss": 3.7701, - "step": 3416000 - }, - { - "epoch": 37.94, - "learning_rate": 3.014570330719854e-08, - "loss": 3.7689, - "step": 3416500 - }, - { - "epoch": 37.95, - "learning_rate": 3.013182151344868e-08, - "loss": 3.7536, - "step": 3417000 - }, - { - "epoch": 37.95, - "learning_rate": 3.011793971969882e-08, - "loss": 3.7631, - "step": 3417500 - }, - { - "epoch": 37.96, - "learning_rate": 3.010405792594895e-08, - "loss": 3.7546, - "step": 3418000 - }, - { - "epoch": 37.96, - "learning_rate": 3.0090176132199096e-08, - "loss": 3.7525, - "step": 3418500 - }, - { - "epoch": 37.97, - "learning_rate": 3.007629433844924e-08, - "loss": 3.7527, - "step": 3419000 - }, - { - "epoch": 37.98, - "learning_rate": 3.0062412544699373e-08, - "loss": 3.731, - "step": 3419500 - }, - { - "epoch": 37.98, - "learning_rate": 3.0048530750949516e-08, - "loss": 3.7615, - "step": 3420000 - }, - { - "epoch": 37.99, - "learning_rate": 3.003464895719965e-08, - "loss": 3.7465, - "step": 3420500 - }, - { - "epoch": 37.99, - "learning_rate": 3.002076716344979e-08, - "loss": 3.7562, - "step": 3421000 - }, - { - "epoch": 38.0, - "learning_rate": 3.000688536969993e-08, - "loss": 3.7605, - "step": 3421500 - }, - { - "epoch": 38.0, - "eval_loss": 3.8280832767486572, - "eval_runtime": 6.3072, - "eval_samples_per_second": 246.384, - "step": 3421748 - }, - { - "epoch": 38.0, - "learning_rate": 2.999300357595007e-08, - "loss": 3.7658, - "step": 3422000 - }, - { - "epoch": 38.01, - "learning_rate": 2.997912178220021e-08, - "loss": 3.7584, - "step": 3422500 - }, - { - "epoch": 38.01, - "learning_rate": 2.996523998845035e-08, - "loss": 3.7427, - "step": 3423000 - }, - { - "epoch": 38.02, - "learning_rate": 2.9951358194700485e-08, - "loss": 3.735, - "step": 3423500 - }, - { - "epoch": 38.03, - "learning_rate": 2.993747640095062e-08, - "loss": 3.735, - "step": 3424000 - }, - { - "epoch": 38.03, - "learning_rate": 2.992359460720076e-08, - "loss": 3.739, - "step": 3424500 - }, - { - "epoch": 38.04, - "learning_rate": 2.99097128134509e-08, - "loss": 3.7649, - "step": 3425000 - }, - { - "epoch": 38.04, - "learning_rate": 2.989583101970104e-08, - "loss": 3.7447, - "step": 3425500 - }, - { - "epoch": 38.05, - "learning_rate": 2.988194922595118e-08, - "loss": 3.7576, - "step": 3426000 - }, - { - "epoch": 38.05, - "learning_rate": 2.986806743220132e-08, - "loss": 3.7477, - "step": 3426500 - }, - { - "epoch": 38.06, - "learning_rate": 2.9854185638451454e-08, - "loss": 3.7629, - "step": 3427000 - }, - { - "epoch": 38.06, - "learning_rate": 2.98403038447016e-08, - "loss": 3.7544, - "step": 3427500 - }, - { - "epoch": 38.07, - "learning_rate": 2.982642205095173e-08, - "loss": 3.7341, - "step": 3428000 - }, - { - "epoch": 38.07, - "learning_rate": 2.9812540257201875e-08, - "loss": 3.7559, - "step": 3428500 - }, - { - "epoch": 38.08, - "learning_rate": 2.9798658463452014e-08, - "loss": 3.7492, - "step": 3429000 - }, - { - "epoch": 38.09, - "learning_rate": 2.978477666970215e-08, - "loss": 3.7555, - "step": 3429500 - }, - { - "epoch": 38.09, - "learning_rate": 2.977089487595229e-08, - "loss": 3.7478, - "step": 3430000 - }, - { - "epoch": 38.1, - "learning_rate": 2.975701308220243e-08, - "loss": 3.764, - "step": 3430500 - }, - { - "epoch": 38.1, - "learning_rate": 2.9743131288452566e-08, - "loss": 3.749, - "step": 3431000 - }, - { - "epoch": 38.11, - "learning_rate": 2.972924949470271e-08, - "loss": 3.7642, - "step": 3431500 - }, - { - "epoch": 38.11, - "learning_rate": 2.9715367700952844e-08, - "loss": 3.7605, - "step": 3432000 - }, - { - "epoch": 38.12, - "learning_rate": 2.9701485907202983e-08, - "loss": 3.7785, - "step": 3432500 - }, - { - "epoch": 38.12, - "learning_rate": 2.9687604113453125e-08, - "loss": 3.7455, - "step": 3433000 - }, - { - "epoch": 38.13, - "learning_rate": 2.967372231970326e-08, - "loss": 3.7649, - "step": 3433500 - }, - { - "epoch": 38.14, - "learning_rate": 2.96598405259534e-08, - "loss": 3.7552, - "step": 3434000 - }, - { - "epoch": 38.14, - "learning_rate": 2.9645958732203542e-08, - "loss": 3.7456, - "step": 3434500 - }, - { - "epoch": 38.15, - "learning_rate": 2.9632076938453678e-08, - "loss": 3.7597, - "step": 3435000 - }, - { - "epoch": 38.15, - "learning_rate": 2.9618195144703817e-08, - "loss": 3.7324, - "step": 3435500 - }, - { - "epoch": 38.16, - "learning_rate": 2.960431335095396e-08, - "loss": 3.7703, - "step": 3436000 - }, - { - "epoch": 38.16, - "learning_rate": 2.9590431557204095e-08, - "loss": 3.7595, - "step": 3436500 - }, - { - "epoch": 38.17, - "learning_rate": 2.9576549763454234e-08, - "loss": 3.733, - "step": 3437000 - }, - { - "epoch": 38.17, - "learning_rate": 2.9562667969704376e-08, - "loss": 3.7789, - "step": 3437500 - }, - { - "epoch": 38.18, - "learning_rate": 2.9548786175954508e-08, - "loss": 3.7548, - "step": 3438000 - }, - { - "epoch": 38.19, - "learning_rate": 2.953490438220465e-08, - "loss": 3.7639, - "step": 3438500 - }, - { - "epoch": 38.19, - "learning_rate": 2.9521022588454786e-08, - "loss": 3.773, - "step": 3439000 - }, - { - "epoch": 38.2, - "learning_rate": 2.9507140794704925e-08, - "loss": 3.7394, - "step": 3439500 - }, - { - "epoch": 38.2, - "learning_rate": 2.9493259000955067e-08, - "loss": 3.7502, - "step": 3440000 - }, - { - "epoch": 38.21, - "learning_rate": 2.9479377207205203e-08, - "loss": 3.7541, - "step": 3440500 - }, - { - "epoch": 38.21, - "learning_rate": 2.9465495413455342e-08, - "loss": 3.7506, - "step": 3441000 - }, - { - "epoch": 38.22, - "learning_rate": 2.9451613619705484e-08, - "loss": 3.7602, - "step": 3441500 - }, - { - "epoch": 38.22, - "learning_rate": 2.943773182595562e-08, - "loss": 3.7532, - "step": 3442000 - }, - { - "epoch": 38.23, - "learning_rate": 2.942385003220576e-08, - "loss": 3.7475, - "step": 3442500 - }, - { - "epoch": 38.24, - "learning_rate": 2.94099682384559e-08, - "loss": 3.7576, - "step": 3443000 - }, - { - "epoch": 38.24, - "learning_rate": 2.9396086444706037e-08, - "loss": 3.749, - "step": 3443500 - }, - { - "epoch": 38.25, - "learning_rate": 2.9382204650956176e-08, - "loss": 3.7464, - "step": 3444000 - }, - { - "epoch": 38.25, - "learning_rate": 2.9368322857206318e-08, - "loss": 3.7677, - "step": 3444500 - }, - { - "epoch": 38.26, - "learning_rate": 2.9354441063456454e-08, - "loss": 3.7481, - "step": 3445000 - }, - { - "epoch": 38.26, - "learning_rate": 2.9340559269706593e-08, - "loss": 3.7454, - "step": 3445500 - }, - { - "epoch": 38.27, - "learning_rate": 2.9326677475956728e-08, - "loss": 3.766, - "step": 3446000 - }, - { - "epoch": 38.27, - "learning_rate": 2.931279568220687e-08, - "loss": 3.746, - "step": 3446500 - }, - { - "epoch": 38.28, - "learning_rate": 2.929891388845701e-08, - "loss": 3.7496, - "step": 3447000 - }, - { - "epoch": 38.29, - "learning_rate": 2.9285032094707145e-08, - "loss": 3.7606, - "step": 3447500 - }, - { - "epoch": 38.29, - "learning_rate": 2.9271150300957287e-08, - "loss": 3.7598, - "step": 3448000 - }, - { - "epoch": 38.3, - "learning_rate": 2.9257268507207426e-08, - "loss": 3.7538, - "step": 3448500 - }, - { - "epoch": 38.3, - "learning_rate": 2.9243386713457562e-08, - "loss": 3.7618, - "step": 3449000 - }, - { - "epoch": 38.31, - "learning_rate": 2.9229504919707704e-08, - "loss": 3.7546, - "step": 3449500 - }, - { - "epoch": 38.31, - "learning_rate": 2.9215623125957843e-08, - "loss": 3.7478, - "step": 3450000 - }, - { - "epoch": 38.32, - "learning_rate": 2.920174133220798e-08, - "loss": 3.7631, - "step": 3450500 - }, - { - "epoch": 38.32, - "learning_rate": 2.918785953845812e-08, - "loss": 3.76, - "step": 3451000 - }, - { - "epoch": 38.33, - "learning_rate": 2.917397774470826e-08, - "loss": 3.7539, - "step": 3451500 - }, - { - "epoch": 38.34, - "learning_rate": 2.9160095950958396e-08, - "loss": 3.745, - "step": 3452000 - }, - { - "epoch": 38.34, - "learning_rate": 2.9146214157208538e-08, - "loss": 3.7284, - "step": 3452500 - }, - { - "epoch": 38.35, - "learning_rate": 2.9132332363458674e-08, - "loss": 3.7579, - "step": 3453000 - }, - { - "epoch": 38.35, - "learning_rate": 2.9118450569708813e-08, - "loss": 3.7519, - "step": 3453500 - }, - { - "epoch": 38.36, - "learning_rate": 2.9104568775958955e-08, - "loss": 3.7567, - "step": 3454000 - }, - { - "epoch": 38.36, - "learning_rate": 2.909068698220909e-08, - "loss": 3.7407, - "step": 3454500 - }, - { - "epoch": 38.37, - "learning_rate": 2.907680518845923e-08, - "loss": 3.7542, - "step": 3455000 - }, - { - "epoch": 38.37, - "learning_rate": 2.906292339470937e-08, - "loss": 3.7651, - "step": 3455500 - }, - { - "epoch": 38.38, - "learning_rate": 2.9049041600959507e-08, - "loss": 3.7661, - "step": 3456000 - }, - { - "epoch": 38.39, - "learning_rate": 2.9035159807209646e-08, - "loss": 3.7559, - "step": 3456500 - }, - { - "epoch": 38.39, - "learning_rate": 2.902127801345979e-08, - "loss": 3.7596, - "step": 3457000 - }, - { - "epoch": 38.4, - "learning_rate": 2.9007396219709924e-08, - "loss": 3.753, - "step": 3457500 - }, - { - "epoch": 38.4, - "learning_rate": 2.8993514425960063e-08, - "loss": 3.7527, - "step": 3458000 - }, - { - "epoch": 38.41, - "learning_rate": 2.8979632632210205e-08, - "loss": 3.7503, - "step": 3458500 - }, - { - "epoch": 38.41, - "learning_rate": 2.896575083846034e-08, - "loss": 3.7453, - "step": 3459000 - }, - { - "epoch": 38.42, - "learning_rate": 2.895186904471048e-08, - "loss": 3.7524, - "step": 3459500 - }, - { - "epoch": 38.42, - "learning_rate": 2.8937987250960622e-08, - "loss": 3.7489, - "step": 3460000 - }, - { - "epoch": 38.43, - "learning_rate": 2.8924105457210758e-08, - "loss": 3.745, - "step": 3460500 - }, - { - "epoch": 38.44, - "learning_rate": 2.8910223663460897e-08, - "loss": 3.765, - "step": 3461000 - }, - { - "epoch": 38.44, - "learning_rate": 2.8896341869711032e-08, - "loss": 3.7431, - "step": 3461500 - }, - { - "epoch": 38.45, - "learning_rate": 2.8882460075961175e-08, - "loss": 3.7287, - "step": 3462000 - }, - { - "epoch": 38.45, - "learning_rate": 2.8868578282211314e-08, - "loss": 3.7557, - "step": 3462500 - }, - { - "epoch": 38.46, - "learning_rate": 2.885469648846145e-08, - "loss": 3.7507, - "step": 3463000 - }, - { - "epoch": 38.46, - "learning_rate": 2.884081469471159e-08, - "loss": 3.7603, - "step": 3463500 - }, - { - "epoch": 38.47, - "learning_rate": 2.882693290096173e-08, - "loss": 3.7438, - "step": 3464000 - }, - { - "epoch": 38.47, - "learning_rate": 2.8813051107211866e-08, - "loss": 3.7376, - "step": 3464500 - }, - { - "epoch": 38.48, - "learning_rate": 2.879916931346201e-08, - "loss": 3.7416, - "step": 3465000 - }, - { - "epoch": 38.49, - "learning_rate": 2.8785287519712147e-08, - "loss": 3.7641, - "step": 3465500 - }, - { - "epoch": 38.49, - "learning_rate": 2.8771405725962283e-08, - "loss": 3.7652, - "step": 3466000 - }, - { - "epoch": 38.5, - "learning_rate": 2.8757523932212425e-08, - "loss": 3.7758, - "step": 3466500 - }, - { - "epoch": 38.5, - "learning_rate": 2.8743642138462564e-08, - "loss": 3.7463, - "step": 3467000 - }, - { - "epoch": 38.51, - "learning_rate": 2.87297603447127e-08, - "loss": 3.7515, - "step": 3467500 - }, - { - "epoch": 38.51, - "learning_rate": 2.8715878550962842e-08, - "loss": 3.7448, - "step": 3468000 - }, - { - "epoch": 38.52, - "learning_rate": 2.8701996757212975e-08, - "loss": 3.7526, - "step": 3468500 - }, - { - "epoch": 38.52, - "learning_rate": 2.8688114963463117e-08, - "loss": 3.7561, - "step": 3469000 - }, - { - "epoch": 38.53, - "learning_rate": 2.867423316971326e-08, - "loss": 3.7667, - "step": 3469500 - }, - { - "epoch": 38.54, - "learning_rate": 2.866035137596339e-08, - "loss": 3.7514, - "step": 3470000 - }, - { - "epoch": 38.54, - "learning_rate": 2.8646469582213534e-08, - "loss": 3.7546, - "step": 3470500 - }, - { - "epoch": 38.55, - "learning_rate": 2.8632587788463676e-08, - "loss": 3.7418, - "step": 3471000 - }, - { - "epoch": 38.55, - "learning_rate": 2.8618705994713808e-08, - "loss": 3.7401, - "step": 3471500 - }, - { - "epoch": 38.56, - "learning_rate": 2.860482420096395e-08, - "loss": 3.7646, - "step": 3472000 - }, - { - "epoch": 38.56, - "learning_rate": 2.8590942407214093e-08, - "loss": 3.751, - "step": 3472500 - }, - { - "epoch": 38.57, - "learning_rate": 2.8577060613464225e-08, - "loss": 3.7566, - "step": 3473000 - }, - { - "epoch": 38.57, - "learning_rate": 2.8563178819714367e-08, - "loss": 3.7523, - "step": 3473500 - }, - { - "epoch": 38.58, - "learning_rate": 2.854929702596451e-08, - "loss": 3.7539, - "step": 3474000 - }, - { - "epoch": 38.59, - "learning_rate": 2.8535415232214642e-08, - "loss": 3.7471, - "step": 3474500 - }, - { - "epoch": 38.59, - "learning_rate": 2.8521533438464784e-08, - "loss": 3.744, - "step": 3475000 - }, - { - "epoch": 38.6, - "learning_rate": 2.850765164471492e-08, - "loss": 3.7605, - "step": 3475500 - }, - { - "epoch": 38.6, - "learning_rate": 2.849376985096506e-08, - "loss": 3.7393, - "step": 3476000 - }, - { - "epoch": 38.61, - "learning_rate": 2.84798880572152e-08, - "loss": 3.7382, - "step": 3476500 - }, - { - "epoch": 38.61, - "learning_rate": 2.8466006263465337e-08, - "loss": 3.736, - "step": 3477000 - }, - { - "epoch": 38.62, - "learning_rate": 2.8452124469715476e-08, - "loss": 3.7554, - "step": 3477500 - }, - { - "epoch": 38.62, - "learning_rate": 2.8438242675965618e-08, - "loss": 3.7476, - "step": 3478000 - }, - { - "epoch": 38.63, - "learning_rate": 2.8424360882215754e-08, - "loss": 3.7556, - "step": 3478500 - }, - { - "epoch": 38.64, - "learning_rate": 2.8410479088465893e-08, - "loss": 3.7514, - "step": 3479000 - }, - { - "epoch": 38.64, - "learning_rate": 2.8396597294716035e-08, - "loss": 3.7473, - "step": 3479500 - }, - { - "epoch": 38.65, - "learning_rate": 2.838271550096617e-08, - "loss": 3.7356, - "step": 3480000 - }, - { - "epoch": 38.65, - "learning_rate": 2.836883370721631e-08, - "loss": 3.7608, - "step": 3480500 - }, - { - "epoch": 38.66, - "learning_rate": 2.8354951913466452e-08, - "loss": 3.7694, - "step": 3481000 - }, - { - "epoch": 38.66, - "learning_rate": 2.8341070119716587e-08, - "loss": 3.7454, - "step": 3481500 - }, - { - "epoch": 38.67, - "learning_rate": 2.8327188325966726e-08, - "loss": 3.7566, - "step": 3482000 - }, - { - "epoch": 38.67, - "learning_rate": 2.8313306532216862e-08, - "loss": 3.743, - "step": 3482500 - }, - { - "epoch": 38.68, - "learning_rate": 2.8299424738467004e-08, - "loss": 3.745, - "step": 3483000 - }, - { - "epoch": 38.69, - "learning_rate": 2.8285542944717143e-08, - "loss": 3.7596, - "step": 3483500 - }, - { - "epoch": 38.69, - "learning_rate": 2.827166115096728e-08, - "loss": 3.7651, - "step": 3484000 - }, - { - "epoch": 38.7, - "learning_rate": 2.825777935721742e-08, - "loss": 3.752, - "step": 3484500 - }, - { - "epoch": 38.7, - "learning_rate": 2.824389756346756e-08, - "loss": 3.746, - "step": 3485000 - }, - { - "epoch": 38.71, - "learning_rate": 2.8230015769717696e-08, - "loss": 3.7606, - "step": 3485500 - }, - { - "epoch": 38.71, - "learning_rate": 2.8216133975967838e-08, - "loss": 3.7707, - "step": 3486000 - }, - { - "epoch": 38.72, - "learning_rate": 2.8202252182217977e-08, - "loss": 3.742, - "step": 3486500 - }, - { - "epoch": 38.72, - "learning_rate": 2.8188370388468113e-08, - "loss": 3.745, - "step": 3487000 - }, - { - "epoch": 38.73, - "learning_rate": 2.8174488594718255e-08, - "loss": 3.7541, - "step": 3487500 - }, - { - "epoch": 38.74, - "learning_rate": 2.8160606800968394e-08, - "loss": 3.7565, - "step": 3488000 - }, - { - "epoch": 38.74, - "learning_rate": 2.814672500721853e-08, - "loss": 3.7212, - "step": 3488500 - }, - { - "epoch": 38.75, - "learning_rate": 2.8132843213468672e-08, - "loss": 3.7444, - "step": 3489000 - }, - { - "epoch": 38.75, - "learning_rate": 2.811896141971881e-08, - "loss": 3.75, - "step": 3489500 - }, - { - "epoch": 38.76, - "learning_rate": 2.8105079625968946e-08, - "loss": 3.7609, - "step": 3490000 - }, - { - "epoch": 38.76, - "learning_rate": 2.809119783221909e-08, - "loss": 3.7626, - "step": 3490500 - }, - { - "epoch": 38.77, - "learning_rate": 2.8077316038469224e-08, - "loss": 3.7413, - "step": 3491000 - }, - { - "epoch": 38.77, - "learning_rate": 2.8063434244719363e-08, - "loss": 3.739, - "step": 3491500 - }, - { - "epoch": 38.78, - "learning_rate": 2.8049552450969505e-08, - "loss": 3.7489, - "step": 3492000 - }, - { - "epoch": 38.79, - "learning_rate": 2.803567065721964e-08, - "loss": 3.7706, - "step": 3492500 - }, - { - "epoch": 38.79, - "learning_rate": 2.802178886346978e-08, - "loss": 3.7609, - "step": 3493000 - }, - { - "epoch": 38.8, - "learning_rate": 2.8007907069719922e-08, - "loss": 3.7575, - "step": 3493500 - }, - { - "epoch": 38.8, - "learning_rate": 2.7994025275970058e-08, - "loss": 3.7854, - "step": 3494000 - }, - { - "epoch": 38.81, - "learning_rate": 2.7980143482220197e-08, - "loss": 3.7427, - "step": 3494500 - }, - { - "epoch": 38.81, - "learning_rate": 2.796626168847034e-08, - "loss": 3.7524, - "step": 3495000 - }, - { - "epoch": 38.82, - "learning_rate": 2.7952379894720475e-08, - "loss": 3.7357, - "step": 3495500 - }, - { - "epoch": 38.82, - "learning_rate": 2.7938498100970614e-08, - "loss": 3.7485, - "step": 3496000 - }, - { - "epoch": 38.83, - "learning_rate": 2.7924616307220756e-08, - "loss": 3.7661, - "step": 3496500 - }, - { - "epoch": 38.84, - "learning_rate": 2.7910734513470892e-08, - "loss": 3.7673, - "step": 3497000 - }, - { - "epoch": 38.84, - "learning_rate": 2.789685271972103e-08, - "loss": 3.7539, - "step": 3497500 - }, - { - "epoch": 38.85, - "learning_rate": 2.7882970925971166e-08, - "loss": 3.7616, - "step": 3498000 - }, - { - "epoch": 38.85, - "learning_rate": 2.786908913222131e-08, - "loss": 3.7651, - "step": 3498500 - }, - { - "epoch": 38.86, - "learning_rate": 2.7855207338471448e-08, - "loss": 3.7554, - "step": 3499000 - }, - { - "epoch": 38.86, - "learning_rate": 2.7841325544721583e-08, - "loss": 3.7668, - "step": 3499500 - }, - { - "epoch": 38.87, - "learning_rate": 2.7827443750971725e-08, - "loss": 3.7682, - "step": 3500000 - }, - { - "epoch": 38.87, - "learning_rate": 2.7813561957221864e-08, - "loss": 3.7428, - "step": 3500500 - }, - { - "epoch": 38.88, - "learning_rate": 2.7799680163472e-08, - "loss": 3.7513, - "step": 3501000 - }, - { - "epoch": 38.89, - "learning_rate": 2.7785798369722142e-08, - "loss": 3.7702, - "step": 3501500 - }, - { - "epoch": 38.89, - "learning_rate": 2.777191657597228e-08, - "loss": 3.7647, - "step": 3502000 - }, - { - "epoch": 38.9, - "learning_rate": 2.7758034782222417e-08, - "loss": 3.7353, - "step": 3502500 - }, - { - "epoch": 38.9, - "learning_rate": 2.774415298847256e-08, - "loss": 3.7527, - "step": 3503000 - }, - { - "epoch": 38.91, - "learning_rate": 2.7730271194722698e-08, - "loss": 3.7537, - "step": 3503500 - }, - { - "epoch": 38.91, - "learning_rate": 2.7716389400972834e-08, - "loss": 3.7481, - "step": 3504000 - }, - { - "epoch": 38.92, - "learning_rate": 2.7702507607222976e-08, - "loss": 3.7445, - "step": 3504500 - }, - { - "epoch": 38.92, - "learning_rate": 2.768862581347311e-08, - "loss": 3.7493, - "step": 3505000 - }, - { - "epoch": 38.93, - "learning_rate": 2.767474401972325e-08, - "loss": 3.7441, - "step": 3505500 - }, - { - "epoch": 38.94, - "learning_rate": 2.7660862225973393e-08, - "loss": 3.7656, - "step": 3506000 - }, - { - "epoch": 38.94, - "learning_rate": 2.7646980432223525e-08, - "loss": 3.7527, - "step": 3506500 - }, - { - "epoch": 38.95, - "learning_rate": 2.7633098638473667e-08, - "loss": 3.7352, - "step": 3507000 - }, - { - "epoch": 38.95, - "learning_rate": 2.761921684472381e-08, - "loss": 3.7693, - "step": 3507500 - }, - { - "epoch": 38.96, - "learning_rate": 2.7605335050973942e-08, - "loss": 3.7429, - "step": 3508000 - }, - { - "epoch": 38.96, - "learning_rate": 2.7591453257224084e-08, - "loss": 3.7384, - "step": 3508500 - }, - { - "epoch": 38.97, - "learning_rate": 2.7577571463474227e-08, - "loss": 3.748, - "step": 3509000 - }, - { - "epoch": 38.97, - "learning_rate": 2.756368966972436e-08, - "loss": 3.7727, - "step": 3509500 - }, - { - "epoch": 38.98, - "learning_rate": 2.75498078759745e-08, - "loss": 3.7509, - "step": 3510000 - }, - { - "epoch": 38.99, - "learning_rate": 2.7535926082224643e-08, - "loss": 3.7379, - "step": 3510500 - }, - { - "epoch": 38.99, - "learning_rate": 2.7522044288474776e-08, - "loss": 3.7386, - "step": 3511000 - }, - { - "epoch": 39.0, - "learning_rate": 2.7508162494724918e-08, - "loss": 3.7521, - "step": 3511500 - }, - { - "epoch": 39.0, - "eval_loss": 3.82729172706604, - "eval_runtime": 6.3024, - "eval_samples_per_second": 246.573, - "step": 3511794 - }, - { - "epoch": 39.0, - "learning_rate": 2.7494280700975054e-08, - "loss": 3.7364, - "step": 3512000 - }, - { - "epoch": 39.01, - "learning_rate": 2.7480398907225193e-08, - "loss": 3.7451, - "step": 3512500 - }, - { - "epoch": 39.01, - "learning_rate": 2.7466517113475335e-08, - "loss": 3.7426, - "step": 3513000 - }, - { - "epoch": 39.02, - "learning_rate": 2.745263531972547e-08, - "loss": 3.764, - "step": 3513500 - }, - { - "epoch": 39.02, - "learning_rate": 2.743875352597561e-08, - "loss": 3.7632, - "step": 3514000 - }, - { - "epoch": 39.03, - "learning_rate": 2.7424871732225752e-08, - "loss": 3.7508, - "step": 3514500 - }, - { - "epoch": 39.04, - "learning_rate": 2.7410989938475887e-08, - "loss": 3.7539, - "step": 3515000 - }, - { - "epoch": 39.04, - "learning_rate": 2.7397108144726026e-08, - "loss": 3.7615, - "step": 3515500 - }, - { - "epoch": 39.05, - "learning_rate": 2.738322635097617e-08, - "loss": 3.7502, - "step": 3516000 - }, - { - "epoch": 39.05, - "learning_rate": 2.7369344557226304e-08, - "loss": 3.7465, - "step": 3516500 - }, - { - "epoch": 39.06, - "learning_rate": 2.7355462763476443e-08, - "loss": 3.7516, - "step": 3517000 - }, - { - "epoch": 39.06, - "learning_rate": 2.7341580969726586e-08, - "loss": 3.7589, - "step": 3517500 - }, - { - "epoch": 39.07, - "learning_rate": 2.732769917597672e-08, - "loss": 3.7642, - "step": 3518000 - }, - { - "epoch": 39.07, - "learning_rate": 2.731381738222686e-08, - "loss": 3.7521, - "step": 3518500 - }, - { - "epoch": 39.08, - "learning_rate": 2.7299935588477002e-08, - "loss": 3.7461, - "step": 3519000 - }, - { - "epoch": 39.09, - "learning_rate": 2.7286053794727138e-08, - "loss": 3.7551, - "step": 3519500 - }, - { - "epoch": 39.09, - "learning_rate": 2.7272172000977277e-08, - "loss": 3.7707, - "step": 3520000 - }, - { - "epoch": 39.1, - "learning_rate": 2.7258290207227413e-08, - "loss": 3.7479, - "step": 3520500 - }, - { - "epoch": 39.1, - "learning_rate": 2.7244408413477555e-08, - "loss": 3.7665, - "step": 3521000 - }, - { - "epoch": 39.11, - "learning_rate": 2.7230526619727694e-08, - "loss": 3.7669, - "step": 3521500 - }, - { - "epoch": 39.11, - "learning_rate": 2.721664482597783e-08, - "loss": 3.731, - "step": 3522000 - }, - { - "epoch": 39.12, - "learning_rate": 2.7202763032227972e-08, - "loss": 3.7581, - "step": 3522500 - }, - { - "epoch": 39.12, - "learning_rate": 2.718888123847811e-08, - "loss": 3.7739, - "step": 3523000 - }, - { - "epoch": 39.13, - "learning_rate": 2.7174999444728246e-08, - "loss": 3.7391, - "step": 3523500 - }, - { - "epoch": 39.14, - "learning_rate": 2.716111765097839e-08, - "loss": 3.7496, - "step": 3524000 - }, - { - "epoch": 39.14, - "learning_rate": 2.7147235857228528e-08, - "loss": 3.7448, - "step": 3524500 - }, - { - "epoch": 39.15, - "learning_rate": 2.7133354063478663e-08, - "loss": 3.7557, - "step": 3525000 - }, - { - "epoch": 39.15, - "learning_rate": 2.7119472269728806e-08, - "loss": 3.734, - "step": 3525500 - }, - { - "epoch": 39.16, - "learning_rate": 2.7105590475978944e-08, - "loss": 3.7516, - "step": 3526000 - }, - { - "epoch": 39.16, - "learning_rate": 2.709170868222908e-08, - "loss": 3.7597, - "step": 3526500 - }, - { - "epoch": 39.17, - "learning_rate": 2.7077826888479222e-08, - "loss": 3.7613, - "step": 3527000 - }, - { - "epoch": 39.17, - "learning_rate": 2.7063945094729358e-08, - "loss": 3.7499, - "step": 3527500 - }, - { - "epoch": 39.18, - "learning_rate": 2.7050063300979497e-08, - "loss": 3.7534, - "step": 3528000 - }, - { - "epoch": 39.19, - "learning_rate": 2.703618150722964e-08, - "loss": 3.7728, - "step": 3528500 - }, - { - "epoch": 39.19, - "learning_rate": 2.7022299713479775e-08, - "loss": 3.7418, - "step": 3529000 - }, - { - "epoch": 39.2, - "learning_rate": 2.7008417919729914e-08, - "loss": 3.7534, - "step": 3529500 - }, - { - "epoch": 39.2, - "learning_rate": 2.6994536125980056e-08, - "loss": 3.7543, - "step": 3530000 - }, - { - "epoch": 39.21, - "learning_rate": 2.6980654332230192e-08, - "loss": 3.7337, - "step": 3530500 - }, - { - "epoch": 39.21, - "learning_rate": 2.696677253848033e-08, - "loss": 3.7574, - "step": 3531000 - }, - { - "epoch": 39.22, - "learning_rate": 2.6952890744730473e-08, - "loss": 3.7431, - "step": 3531500 - }, - { - "epoch": 39.22, - "learning_rate": 2.693900895098061e-08, - "loss": 3.7423, - "step": 3532000 - }, - { - "epoch": 39.23, - "learning_rate": 2.6925127157230748e-08, - "loss": 3.7417, - "step": 3532500 - }, - { - "epoch": 39.24, - "learning_rate": 2.691124536348089e-08, - "loss": 3.7629, - "step": 3533000 - }, - { - "epoch": 39.24, - "learning_rate": 2.6897363569731026e-08, - "loss": 3.7343, - "step": 3533500 - }, - { - "epoch": 39.25, - "learning_rate": 2.6883481775981164e-08, - "loss": 3.7511, - "step": 3534000 - }, - { - "epoch": 39.25, - "learning_rate": 2.68695999822313e-08, - "loss": 3.7429, - "step": 3534500 - }, - { - "epoch": 39.26, - "learning_rate": 2.6855718188481442e-08, - "loss": 3.7562, - "step": 3535000 - }, - { - "epoch": 39.26, - "learning_rate": 2.684183639473158e-08, - "loss": 3.752, - "step": 3535500 - }, - { - "epoch": 39.27, - "learning_rate": 2.6827954600981717e-08, - "loss": 3.7493, - "step": 3536000 - }, - { - "epoch": 39.27, - "learning_rate": 2.681407280723186e-08, - "loss": 3.7525, - "step": 3536500 - }, - { - "epoch": 39.28, - "learning_rate": 2.6800191013481998e-08, - "loss": 3.7383, - "step": 3537000 - }, - { - "epoch": 39.29, - "learning_rate": 2.6786309219732134e-08, - "loss": 3.7495, - "step": 3537500 - }, - { - "epoch": 39.29, - "learning_rate": 2.6772427425982276e-08, - "loss": 3.7643, - "step": 3538000 - }, - { - "epoch": 39.3, - "learning_rate": 2.6758545632232415e-08, - "loss": 3.7354, - "step": 3538500 - }, - { - "epoch": 39.3, - "learning_rate": 2.674466383848255e-08, - "loss": 3.7526, - "step": 3539000 - }, - { - "epoch": 39.31, - "learning_rate": 2.6730782044732693e-08, - "loss": 3.7405, - "step": 3539500 - }, - { - "epoch": 39.31, - "learning_rate": 2.6716900250982832e-08, - "loss": 3.7404, - "step": 3540000 - }, - { - "epoch": 39.32, - "learning_rate": 2.6703018457232968e-08, - "loss": 3.7521, - "step": 3540500 - }, - { - "epoch": 39.32, - "learning_rate": 2.668913666348311e-08, - "loss": 3.728, - "step": 3541000 - }, - { - "epoch": 39.33, - "learning_rate": 2.6675254869733242e-08, - "loss": 3.7332, - "step": 3541500 - }, - { - "epoch": 39.34, - "learning_rate": 2.6661373075983384e-08, - "loss": 3.7437, - "step": 3542000 - }, - { - "epoch": 39.34, - "learning_rate": 2.6647491282233527e-08, - "loss": 3.7499, - "step": 3542500 - }, - { - "epoch": 39.35, - "learning_rate": 2.663360948848366e-08, - "loss": 3.7398, - "step": 3543000 - }, - { - "epoch": 39.35, - "learning_rate": 2.66197276947338e-08, - "loss": 3.7479, - "step": 3543500 - }, - { - "epoch": 39.36, - "learning_rate": 2.6605845900983944e-08, - "loss": 3.7543, - "step": 3544000 - }, - { - "epoch": 39.36, - "learning_rate": 2.6591964107234076e-08, - "loss": 3.7407, - "step": 3544500 - }, - { - "epoch": 39.37, - "learning_rate": 2.6578082313484218e-08, - "loss": 3.7523, - "step": 3545000 - }, - { - "epoch": 39.37, - "learning_rate": 2.656420051973436e-08, - "loss": 3.7489, - "step": 3545500 - }, - { - "epoch": 39.38, - "learning_rate": 2.6550318725984493e-08, - "loss": 3.7331, - "step": 3546000 - }, - { - "epoch": 39.39, - "learning_rate": 2.6536436932234635e-08, - "loss": 3.7488, - "step": 3546500 - }, - { - "epoch": 39.39, - "learning_rate": 2.6522555138484777e-08, - "loss": 3.7547, - "step": 3547000 - }, - { - "epoch": 39.4, - "learning_rate": 2.650867334473491e-08, - "loss": 3.7471, - "step": 3547500 - }, - { - "epoch": 39.4, - "learning_rate": 2.6494791550985052e-08, - "loss": 3.7585, - "step": 3548000 - }, - { - "epoch": 39.41, - "learning_rate": 2.6480909757235188e-08, - "loss": 3.7486, - "step": 3548500 - }, - { - "epoch": 39.41, - "learning_rate": 2.6467027963485327e-08, - "loss": 3.7359, - "step": 3549000 - }, - { - "epoch": 39.42, - "learning_rate": 2.645314616973547e-08, - "loss": 3.7748, - "step": 3549500 - }, - { - "epoch": 39.42, - "learning_rate": 2.6439264375985604e-08, - "loss": 3.7561, - "step": 3550000 - }, - { - "epoch": 39.43, - "learning_rate": 2.6425382582235743e-08, - "loss": 3.7611, - "step": 3550500 - }, - { - "epoch": 39.44, - "learning_rate": 2.6411500788485886e-08, - "loss": 3.7402, - "step": 3551000 - }, - { - "epoch": 39.44, - "learning_rate": 2.639761899473602e-08, - "loss": 3.7467, - "step": 3551500 - }, - { - "epoch": 39.45, - "learning_rate": 2.638373720098616e-08, - "loss": 3.7491, - "step": 3552000 - }, - { - "epoch": 39.45, - "learning_rate": 2.6369855407236303e-08, - "loss": 3.7595, - "step": 3552500 - }, - { - "epoch": 39.46, - "learning_rate": 2.6355973613486438e-08, - "loss": 3.7478, - "step": 3553000 - }, - { - "epoch": 39.46, - "learning_rate": 2.6342091819736577e-08, - "loss": 3.7376, - "step": 3553500 - }, - { - "epoch": 39.47, - "learning_rate": 2.632821002598672e-08, - "loss": 3.7532, - "step": 3554000 - }, - { - "epoch": 39.47, - "learning_rate": 2.6314328232236855e-08, - "loss": 3.7675, - "step": 3554500 - }, - { - "epoch": 39.48, - "learning_rate": 2.6300446438486994e-08, - "loss": 3.7457, - "step": 3555000 - }, - { - "epoch": 39.49, - "learning_rate": 2.6286564644737136e-08, - "loss": 3.7635, - "step": 3555500 - }, - { - "epoch": 39.49, - "learning_rate": 2.6272682850987272e-08, - "loss": 3.7608, - "step": 3556000 - }, - { - "epoch": 39.5, - "learning_rate": 2.625880105723741e-08, - "loss": 3.7744, - "step": 3556500 - }, - { - "epoch": 39.5, - "learning_rate": 2.6244919263487546e-08, - "loss": 3.752, - "step": 3557000 - }, - { - "epoch": 39.51, - "learning_rate": 2.623103746973769e-08, - "loss": 3.7555, - "step": 3557500 - }, - { - "epoch": 39.51, - "learning_rate": 2.6217155675987828e-08, - "loss": 3.7638, - "step": 3558000 - }, - { - "epoch": 39.52, - "learning_rate": 2.6203273882237963e-08, - "loss": 3.77, - "step": 3558500 - }, - { - "epoch": 39.52, - "learning_rate": 2.6189392088488106e-08, - "loss": 3.7632, - "step": 3559000 - }, - { - "epoch": 39.53, - "learning_rate": 2.6175510294738245e-08, - "loss": 3.7377, - "step": 3559500 - }, - { - "epoch": 39.54, - "learning_rate": 2.616162850098838e-08, - "loss": 3.7491, - "step": 3560000 - }, - { - "epoch": 39.54, - "learning_rate": 2.6147746707238522e-08, - "loss": 3.7468, - "step": 3560500 - }, - { - "epoch": 39.55, - "learning_rate": 2.613386491348866e-08, - "loss": 3.7298, - "step": 3561000 - }, - { - "epoch": 39.55, - "learning_rate": 2.6119983119738797e-08, - "loss": 3.7588, - "step": 3561500 - }, - { - "epoch": 39.56, - "learning_rate": 2.610610132598894e-08, - "loss": 3.7634, - "step": 3562000 - }, - { - "epoch": 39.56, - "learning_rate": 2.6092219532239078e-08, - "loss": 3.7626, - "step": 3562500 - }, - { - "epoch": 39.57, - "learning_rate": 2.6078337738489214e-08, - "loss": 3.7645, - "step": 3563000 - }, - { - "epoch": 39.57, - "learning_rate": 2.6064455944739356e-08, - "loss": 3.7373, - "step": 3563500 - }, - { - "epoch": 39.58, - "learning_rate": 2.6050574150989492e-08, - "loss": 3.7687, - "step": 3564000 - }, - { - "epoch": 39.59, - "learning_rate": 2.603669235723963e-08, - "loss": 3.7589, - "step": 3564500 - }, - { - "epoch": 39.59, - "learning_rate": 2.6022810563489773e-08, - "loss": 3.75, - "step": 3565000 - }, - { - "epoch": 39.6, - "learning_rate": 2.600892876973991e-08, - "loss": 3.7374, - "step": 3565500 - }, - { - "epoch": 39.6, - "learning_rate": 2.5995046975990048e-08, - "loss": 3.7585, - "step": 3566000 - }, - { - "epoch": 39.61, - "learning_rate": 2.598116518224019e-08, - "loss": 3.7565, - "step": 3566500 - }, - { - "epoch": 39.61, - "learning_rate": 2.5967283388490326e-08, - "loss": 3.7653, - "step": 3567000 - }, - { - "epoch": 39.62, - "learning_rate": 2.5953401594740465e-08, - "loss": 3.7511, - "step": 3567500 - }, - { - "epoch": 39.62, - "learning_rate": 2.5939519800990607e-08, - "loss": 3.7425, - "step": 3568000 - }, - { - "epoch": 39.63, - "learning_rate": 2.5925638007240742e-08, - "loss": 3.7602, - "step": 3568500 - }, - { - "epoch": 39.64, - "learning_rate": 2.591175621349088e-08, - "loss": 3.74, - "step": 3569000 - }, - { - "epoch": 39.64, - "learning_rate": 2.5897874419741024e-08, - "loss": 3.7633, - "step": 3569500 - }, - { - "epoch": 39.65, - "learning_rate": 2.588399262599116e-08, - "loss": 3.7459, - "step": 3570000 - }, - { - "epoch": 39.65, - "learning_rate": 2.5870110832241298e-08, - "loss": 3.7427, - "step": 3570500 - }, - { - "epoch": 39.66, - "learning_rate": 2.5856229038491434e-08, - "loss": 3.7378, - "step": 3571000 - }, - { - "epoch": 39.66, - "learning_rate": 2.5842347244741576e-08, - "loss": 3.7611, - "step": 3571500 - }, - { - "epoch": 39.67, - "learning_rate": 2.5828465450991715e-08, - "loss": 3.7641, - "step": 3572000 - }, - { - "epoch": 39.67, - "learning_rate": 2.581458365724185e-08, - "loss": 3.7389, - "step": 3572500 - }, - { - "epoch": 39.68, - "learning_rate": 2.5800701863491993e-08, - "loss": 3.7494, - "step": 3573000 - }, - { - "epoch": 39.69, - "learning_rate": 2.5786820069742132e-08, - "loss": 3.7645, - "step": 3573500 - }, - { - "epoch": 39.69, - "learning_rate": 2.5772938275992268e-08, - "loss": 3.7459, - "step": 3574000 - }, - { - "epoch": 39.7, - "learning_rate": 2.575905648224241e-08, - "loss": 3.7464, - "step": 3574500 - }, - { - "epoch": 39.7, - "learning_rate": 2.574517468849255e-08, - "loss": 3.7359, - "step": 3575000 - }, - { - "epoch": 39.71, - "learning_rate": 2.5731292894742685e-08, - "loss": 3.735, - "step": 3575500 - }, - { - "epoch": 39.71, - "learning_rate": 2.5717411100992827e-08, - "loss": 3.7456, - "step": 3576000 - }, - { - "epoch": 39.72, - "learning_rate": 2.5703529307242966e-08, - "loss": 3.7683, - "step": 3576500 - }, - { - "epoch": 39.72, - "learning_rate": 2.56896475134931e-08, - "loss": 3.7525, - "step": 3577000 - }, - { - "epoch": 39.73, - "learning_rate": 2.5675765719743244e-08, - "loss": 3.7698, - "step": 3577500 - }, - { - "epoch": 39.74, - "learning_rate": 2.5661883925993376e-08, - "loss": 3.7659, - "step": 3578000 - }, - { - "epoch": 39.74, - "learning_rate": 2.5648002132243518e-08, - "loss": 3.7428, - "step": 3578500 - }, - { - "epoch": 39.75, - "learning_rate": 2.5634120338493657e-08, - "loss": 3.7521, - "step": 3579000 - }, - { - "epoch": 39.75, - "learning_rate": 2.5620238544743793e-08, - "loss": 3.7483, - "step": 3579500 - }, - { - "epoch": 39.76, - "learning_rate": 2.5606356750993935e-08, - "loss": 3.762, - "step": 3580000 - }, - { - "epoch": 39.76, - "learning_rate": 2.5592474957244074e-08, - "loss": 3.7581, - "step": 3580500 - }, - { - "epoch": 39.77, - "learning_rate": 2.557859316349421e-08, - "loss": 3.7611, - "step": 3581000 - }, - { - "epoch": 39.77, - "learning_rate": 2.5564711369744352e-08, - "loss": 3.7358, - "step": 3581500 - }, - { - "epoch": 39.78, - "learning_rate": 2.555082957599449e-08, - "loss": 3.7613, - "step": 3582000 - }, - { - "epoch": 39.79, - "learning_rate": 2.5536947782244627e-08, - "loss": 3.7352, - "step": 3582500 - }, - { - "epoch": 39.79, - "learning_rate": 2.552306598849477e-08, - "loss": 3.7403, - "step": 3583000 - }, - { - "epoch": 39.8, - "learning_rate": 2.5509184194744908e-08, - "loss": 3.7634, - "step": 3583500 - }, - { - "epoch": 39.8, - "learning_rate": 2.5495302400995043e-08, - "loss": 3.7427, - "step": 3584000 - }, - { - "epoch": 39.81, - "learning_rate": 2.5481420607245186e-08, - "loss": 3.7565, - "step": 3584500 - }, - { - "epoch": 39.81, - "learning_rate": 2.5467538813495325e-08, - "loss": 3.7362, - "step": 3585000 - }, - { - "epoch": 39.82, - "learning_rate": 2.545365701974546e-08, - "loss": 3.7578, - "step": 3585500 - }, - { - "epoch": 39.82, - "learning_rate": 2.5439775225995603e-08, - "loss": 3.768, - "step": 3586000 - }, - { - "epoch": 39.83, - "learning_rate": 2.5425893432245738e-08, - "loss": 3.7456, - "step": 3586500 - }, - { - "epoch": 39.84, - "learning_rate": 2.5412011638495877e-08, - "loss": 3.7613, - "step": 3587000 - }, - { - "epoch": 39.84, - "learning_rate": 2.539812984474602e-08, - "loss": 3.7482, - "step": 3587500 - }, - { - "epoch": 39.85, - "learning_rate": 2.5384248050996155e-08, - "loss": 3.7567, - "step": 3588000 - }, - { - "epoch": 39.85, - "learning_rate": 2.5370366257246294e-08, - "loss": 3.7471, - "step": 3588500 - }, - { - "epoch": 39.86, - "learning_rate": 2.5356484463496436e-08, - "loss": 3.7614, - "step": 3589000 - }, - { - "epoch": 39.86, - "learning_rate": 2.5342602669746572e-08, - "loss": 3.7504, - "step": 3589500 - }, - { - "epoch": 39.87, - "learning_rate": 2.532872087599671e-08, - "loss": 3.7457, - "step": 3590000 - }, - { - "epoch": 39.87, - "learning_rate": 2.5314839082246853e-08, - "loss": 3.7433, - "step": 3590500 - }, - { - "epoch": 39.88, - "learning_rate": 2.530095728849699e-08, - "loss": 3.7718, - "step": 3591000 - }, - { - "epoch": 39.89, - "learning_rate": 2.5287075494747128e-08, - "loss": 3.749, - "step": 3591500 - }, - { - "epoch": 39.89, - "learning_rate": 2.527319370099727e-08, - "loss": 3.746, - "step": 3592000 - }, - { - "epoch": 39.9, - "learning_rate": 2.5259311907247406e-08, - "loss": 3.7566, - "step": 3592500 - }, - { - "epoch": 39.9, - "learning_rate": 2.5245430113497545e-08, - "loss": 3.7559, - "step": 3593000 - }, - { - "epoch": 39.91, - "learning_rate": 2.523154831974768e-08, - "loss": 3.7397, - "step": 3593500 - }, - { - "epoch": 39.91, - "learning_rate": 2.5217666525997823e-08, - "loss": 3.7608, - "step": 3594000 - }, - { - "epoch": 39.92, - "learning_rate": 2.520378473224796e-08, - "loss": 3.755, - "step": 3594500 - }, - { - "epoch": 39.92, - "learning_rate": 2.5189902938498097e-08, - "loss": 3.7598, - "step": 3595000 - }, - { - "epoch": 39.93, - "learning_rate": 2.517602114474824e-08, - "loss": 3.7525, - "step": 3595500 - }, - { - "epoch": 39.94, - "learning_rate": 2.516213935099838e-08, - "loss": 3.7196, - "step": 3596000 - }, - { - "epoch": 39.94, - "learning_rate": 2.5148257557248514e-08, - "loss": 3.7425, - "step": 3596500 - }, - { - "epoch": 39.95, - "learning_rate": 2.5134375763498656e-08, - "loss": 3.7719, - "step": 3597000 - }, - { - "epoch": 39.95, - "learning_rate": 2.5120493969748795e-08, - "loss": 3.7451, - "step": 3597500 - }, - { - "epoch": 39.96, - "learning_rate": 2.510661217599893e-08, - "loss": 3.7648, - "step": 3598000 - }, - { - "epoch": 39.96, - "learning_rate": 2.5092730382249073e-08, - "loss": 3.7464, - "step": 3598500 - }, - { - "epoch": 39.97, - "learning_rate": 2.5078848588499212e-08, - "loss": 3.7555, - "step": 3599000 - }, - { - "epoch": 39.97, - "learning_rate": 2.5064966794749348e-08, - "loss": 3.7586, - "step": 3599500 - }, - { - "epoch": 39.98, - "learning_rate": 2.505108500099949e-08, - "loss": 3.7495, - "step": 3600000 - }, - { - "epoch": 39.99, - "learning_rate": 2.5037203207249626e-08, - "loss": 3.767, - "step": 3600500 - }, - { - "epoch": 39.99, - "learning_rate": 2.5023321413499765e-08, - "loss": 3.7543, - "step": 3601000 - }, - { - "epoch": 40.0, - "learning_rate": 2.5009439619749907e-08, - "loss": 3.7413, - "step": 3601500 - }, - { - "epoch": 40.0, - "eval_loss": 3.8263349533081055, - "eval_runtime": 6.3013, - "eval_samples_per_second": 246.615, - "step": 3601840 - }, - { - "epoch": 40.0, - "learning_rate": 2.4995557826000043e-08, - "loss": 3.7471, - "step": 3602000 - }, - { - "epoch": 40.01, - "learning_rate": 2.498167603225018e-08, - "loss": 3.7482, - "step": 3602500 - }, - { - "epoch": 40.01, - "learning_rate": 2.496779423850032e-08, - "loss": 3.7522, - "step": 3603000 - }, - { - "epoch": 40.02, - "learning_rate": 2.495391244475046e-08, - "loss": 3.7459, - "step": 3603500 - }, - { - "epoch": 40.02, - "learning_rate": 2.49400306510006e-08, - "loss": 3.748, - "step": 3604000 - }, - { - "epoch": 40.03, - "learning_rate": 2.4926148857250737e-08, - "loss": 3.7795, - "step": 3604500 - }, - { - "epoch": 40.04, - "learning_rate": 2.4912267063500876e-08, - "loss": 3.7414, - "step": 3605000 - }, - { - "epoch": 40.04, - "learning_rate": 2.4898385269751015e-08, - "loss": 3.7623, - "step": 3605500 - }, - { - "epoch": 40.05, - "learning_rate": 2.4884503476001154e-08, - "loss": 3.7607, - "step": 3606000 - }, - { - "epoch": 40.05, - "learning_rate": 2.4870621682251293e-08, - "loss": 3.7598, - "step": 3606500 - }, - { - "epoch": 40.06, - "learning_rate": 2.4856739888501432e-08, - "loss": 3.7501, - "step": 3607000 - }, - { - "epoch": 40.06, - "learning_rate": 2.484285809475157e-08, - "loss": 3.7554, - "step": 3607500 - }, - { - "epoch": 40.07, - "learning_rate": 2.482897630100171e-08, - "loss": 3.7579, - "step": 3608000 - }, - { - "epoch": 40.07, - "learning_rate": 2.481509450725185e-08, - "loss": 3.7378, - "step": 3608500 - }, - { - "epoch": 40.08, - "learning_rate": 2.4801212713501988e-08, - "loss": 3.7683, - "step": 3609000 - }, - { - "epoch": 40.09, - "learning_rate": 2.4787330919752124e-08, - "loss": 3.7673, - "step": 3609500 - }, - { - "epoch": 40.09, - "learning_rate": 2.4773449126002263e-08, - "loss": 3.7278, - "step": 3610000 - }, - { - "epoch": 40.1, - "learning_rate": 2.4759567332252405e-08, - "loss": 3.7523, - "step": 3610500 - }, - { - "epoch": 40.1, - "learning_rate": 2.474568553850254e-08, - "loss": 3.7508, - "step": 3611000 - }, - { - "epoch": 40.11, - "learning_rate": 2.473180374475268e-08, - "loss": 3.7685, - "step": 3611500 - }, - { - "epoch": 40.11, - "learning_rate": 2.471792195100282e-08, - "loss": 3.7623, - "step": 3612000 - }, - { - "epoch": 40.12, - "learning_rate": 2.4704040157252957e-08, - "loss": 3.7518, - "step": 3612500 - }, - { - "epoch": 40.12, - "learning_rate": 2.4690158363503096e-08, - "loss": 3.7546, - "step": 3613000 - }, - { - "epoch": 40.13, - "learning_rate": 2.4676276569753235e-08, - "loss": 3.7676, - "step": 3613500 - }, - { - "epoch": 40.14, - "learning_rate": 2.4662394776003374e-08, - "loss": 3.7538, - "step": 3614000 - }, - { - "epoch": 40.14, - "learning_rate": 2.4648512982253513e-08, - "loss": 3.7411, - "step": 3614500 - }, - { - "epoch": 40.15, - "learning_rate": 2.4634631188503652e-08, - "loss": 3.761, - "step": 3615000 - }, - { - "epoch": 40.15, - "learning_rate": 2.462074939475379e-08, - "loss": 3.7577, - "step": 3615500 - }, - { - "epoch": 40.16, - "learning_rate": 2.460686760100393e-08, - "loss": 3.7587, - "step": 3616000 - }, - { - "epoch": 40.16, - "learning_rate": 2.459298580725407e-08, - "loss": 3.7592, - "step": 3616500 - }, - { - "epoch": 40.17, - "learning_rate": 2.4579104013504208e-08, - "loss": 3.7605, - "step": 3617000 - }, - { - "epoch": 40.17, - "learning_rate": 2.4565222219754347e-08, - "loss": 3.7463, - "step": 3617500 - }, - { - "epoch": 40.18, - "learning_rate": 2.4551340426004486e-08, - "loss": 3.7551, - "step": 3618000 - }, - { - "epoch": 40.19, - "learning_rate": 2.4537458632254625e-08, - "loss": 3.7524, - "step": 3618500 - }, - { - "epoch": 40.19, - "learning_rate": 2.4523576838504764e-08, - "loss": 3.7378, - "step": 3619000 - }, - { - "epoch": 40.2, - "learning_rate": 2.4509695044754903e-08, - "loss": 3.7618, - "step": 3619500 - }, - { - "epoch": 40.2, - "learning_rate": 2.449581325100504e-08, - "loss": 3.7568, - "step": 3620000 - }, - { - "epoch": 40.21, - "learning_rate": 2.4481931457255177e-08, - "loss": 3.7693, - "step": 3620500 - }, - { - "epoch": 40.21, - "learning_rate": 2.446804966350532e-08, - "loss": 3.743, - "step": 3621000 - }, - { - "epoch": 40.22, - "learning_rate": 2.445416786975546e-08, - "loss": 3.7329, - "step": 3621500 - }, - { - "epoch": 40.22, - "learning_rate": 2.4440286076005594e-08, - "loss": 3.7491, - "step": 3622000 - }, - { - "epoch": 40.23, - "learning_rate": 2.4426404282255736e-08, - "loss": 3.7615, - "step": 3622500 - }, - { - "epoch": 40.23, - "learning_rate": 2.4412522488505875e-08, - "loss": 3.7561, - "step": 3623000 - }, - { - "epoch": 40.24, - "learning_rate": 2.439864069475601e-08, - "loss": 3.7551, - "step": 3623500 - }, - { - "epoch": 40.25, - "learning_rate": 2.438475890100615e-08, - "loss": 3.7568, - "step": 3624000 - }, - { - "epoch": 40.25, - "learning_rate": 2.4370877107256292e-08, - "loss": 3.7457, - "step": 3624500 - }, - { - "epoch": 40.26, - "learning_rate": 2.4356995313506428e-08, - "loss": 3.7534, - "step": 3625000 - }, - { - "epoch": 40.26, - "learning_rate": 2.4343113519756567e-08, - "loss": 3.7401, - "step": 3625500 - }, - { - "epoch": 40.27, - "learning_rate": 2.432923172600671e-08, - "loss": 3.752, - "step": 3626000 - }, - { - "epoch": 40.27, - "learning_rate": 2.4315349932256845e-08, - "loss": 3.7561, - "step": 3626500 - }, - { - "epoch": 40.28, - "learning_rate": 2.4301468138506984e-08, - "loss": 3.7625, - "step": 3627000 - }, - { - "epoch": 40.28, - "learning_rate": 2.4287586344757126e-08, - "loss": 3.7336, - "step": 3627500 - }, - { - "epoch": 40.29, - "learning_rate": 2.427370455100726e-08, - "loss": 3.7718, - "step": 3628000 - }, - { - "epoch": 40.3, - "learning_rate": 2.42598227572574e-08, - "loss": 3.7393, - "step": 3628500 - }, - { - "epoch": 40.3, - "learning_rate": 2.424594096350754e-08, - "loss": 3.7511, - "step": 3629000 - }, - { - "epoch": 40.31, - "learning_rate": 2.423205916975768e-08, - "loss": 3.7438, - "step": 3629500 - }, - { - "epoch": 40.31, - "learning_rate": 2.4218177376007817e-08, - "loss": 3.7411, - "step": 3630000 - }, - { - "epoch": 40.32, - "learning_rate": 2.4204295582257956e-08, - "loss": 3.765, - "step": 3630500 - }, - { - "epoch": 40.32, - "learning_rate": 2.4190413788508095e-08, - "loss": 3.7321, - "step": 3631000 - }, - { - "epoch": 40.33, - "learning_rate": 2.4176531994758234e-08, - "loss": 3.7451, - "step": 3631500 - }, - { - "epoch": 40.33, - "learning_rate": 2.4162650201008373e-08, - "loss": 3.7369, - "step": 3632000 - }, - { - "epoch": 40.34, - "learning_rate": 2.414876840725851e-08, - "loss": 3.76, - "step": 3632500 - }, - { - "epoch": 40.35, - "learning_rate": 2.413488661350865e-08, - "loss": 3.7482, - "step": 3633000 - }, - { - "epoch": 40.35, - "learning_rate": 2.412100481975879e-08, - "loss": 3.7557, - "step": 3633500 - }, - { - "epoch": 40.36, - "learning_rate": 2.4107123026008926e-08, - "loss": 3.7295, - "step": 3634000 - }, - { - "epoch": 40.36, - "learning_rate": 2.4093241232259068e-08, - "loss": 3.7425, - "step": 3634500 - }, - { - "epoch": 40.37, - "learning_rate": 2.4079359438509207e-08, - "loss": 3.7603, - "step": 3635000 - }, - { - "epoch": 40.37, - "learning_rate": 2.4065477644759343e-08, - "loss": 3.7547, - "step": 3635500 - }, - { - "epoch": 40.38, - "learning_rate": 2.405159585100948e-08, - "loss": 3.7487, - "step": 3636000 - }, - { - "epoch": 40.38, - "learning_rate": 2.4037714057259624e-08, - "loss": 3.7447, - "step": 3636500 - }, - { - "epoch": 40.39, - "learning_rate": 2.402383226350976e-08, - "loss": 3.7536, - "step": 3637000 - }, - { - "epoch": 40.4, - "learning_rate": 2.40099504697599e-08, - "loss": 3.7613, - "step": 3637500 - }, - { - "epoch": 40.4, - "learning_rate": 2.399606867601004e-08, - "loss": 3.7338, - "step": 3638000 - }, - { - "epoch": 40.41, - "learning_rate": 2.3982186882260176e-08, - "loss": 3.752, - "step": 3638500 - }, - { - "epoch": 40.41, - "learning_rate": 2.3968305088510315e-08, - "loss": 3.7721, - "step": 3639000 - }, - { - "epoch": 40.42, - "learning_rate": 2.3954423294760454e-08, - "loss": 3.7477, - "step": 3639500 - }, - { - "epoch": 40.42, - "learning_rate": 2.3940541501010593e-08, - "loss": 3.751, - "step": 3640000 - }, - { - "epoch": 40.43, - "learning_rate": 2.3926659707260732e-08, - "loss": 3.7506, - "step": 3640500 - }, - { - "epoch": 40.43, - "learning_rate": 2.391277791351087e-08, - "loss": 3.76, - "step": 3641000 - }, - { - "epoch": 40.44, - "learning_rate": 2.389889611976101e-08, - "loss": 3.7506, - "step": 3641500 - }, - { - "epoch": 40.45, - "learning_rate": 2.388501432601115e-08, - "loss": 3.7783, - "step": 3642000 - }, - { - "epoch": 40.45, - "learning_rate": 2.3871132532261288e-08, - "loss": 3.7562, - "step": 3642500 - }, - { - "epoch": 40.46, - "learning_rate": 2.3857250738511424e-08, - "loss": 3.731, - "step": 3643000 - }, - { - "epoch": 40.46, - "learning_rate": 2.3843368944761566e-08, - "loss": 3.7573, - "step": 3643500 - }, - { - "epoch": 40.47, - "learning_rate": 2.3829487151011705e-08, - "loss": 3.7492, - "step": 3644000 - }, - { - "epoch": 40.47, - "learning_rate": 2.381560535726184e-08, - "loss": 3.7654, - "step": 3644500 - }, - { - "epoch": 40.48, - "learning_rate": 2.3801723563511983e-08, - "loss": 3.749, - "step": 3645000 - }, - { - "epoch": 40.48, - "learning_rate": 2.3787841769762122e-08, - "loss": 3.7503, - "step": 3645500 - }, - { - "epoch": 40.49, - "learning_rate": 2.3773959976012257e-08, - "loss": 3.7483, - "step": 3646000 - }, - { - "epoch": 40.5, - "learning_rate": 2.3760078182262396e-08, - "loss": 3.7522, - "step": 3646500 - }, - { - "epoch": 40.5, - "learning_rate": 2.374619638851254e-08, - "loss": 3.7469, - "step": 3647000 - }, - { - "epoch": 40.51, - "learning_rate": 2.3732314594762674e-08, - "loss": 3.7486, - "step": 3647500 - }, - { - "epoch": 40.51, - "learning_rate": 2.3718432801012813e-08, - "loss": 3.764, - "step": 3648000 - }, - { - "epoch": 40.52, - "learning_rate": 2.3704551007262955e-08, - "loss": 3.7301, - "step": 3648500 - }, - { - "epoch": 40.52, - "learning_rate": 2.369066921351309e-08, - "loss": 3.7509, - "step": 3649000 - }, - { - "epoch": 40.53, - "learning_rate": 2.367678741976323e-08, - "loss": 3.7427, - "step": 3649500 - }, - { - "epoch": 40.53, - "learning_rate": 2.366290562601337e-08, - "loss": 3.7599, - "step": 3650000 - }, - { - "epoch": 40.54, - "learning_rate": 2.3649023832263508e-08, - "loss": 3.7573, - "step": 3650500 - }, - { - "epoch": 40.55, - "learning_rate": 2.3635142038513647e-08, - "loss": 3.7548, - "step": 3651000 - }, - { - "epoch": 40.55, - "learning_rate": 2.3621260244763786e-08, - "loss": 3.725, - "step": 3651500 - }, - { - "epoch": 40.56, - "learning_rate": 2.3607378451013925e-08, - "loss": 3.7511, - "step": 3652000 - }, - { - "epoch": 40.56, - "learning_rate": 2.3593496657264064e-08, - "loss": 3.7554, - "step": 3652500 - }, - { - "epoch": 40.57, - "learning_rate": 2.3579614863514203e-08, - "loss": 3.7447, - "step": 3653000 - }, - { - "epoch": 40.57, - "learning_rate": 2.3565733069764342e-08, - "loss": 3.7581, - "step": 3653500 - }, - { - "epoch": 40.58, - "learning_rate": 2.355185127601448e-08, - "loss": 3.7376, - "step": 3654000 - }, - { - "epoch": 40.58, - "learning_rate": 2.353796948226462e-08, - "loss": 3.7443, - "step": 3654500 - }, - { - "epoch": 40.59, - "learning_rate": 2.352408768851476e-08, - "loss": 3.7425, - "step": 3655000 - }, - { - "epoch": 40.6, - "learning_rate": 2.3510205894764898e-08, - "loss": 3.7513, - "step": 3655500 - }, - { - "epoch": 40.6, - "learning_rate": 2.3496324101015036e-08, - "loss": 3.7469, - "step": 3656000 - }, - { - "epoch": 40.61, - "learning_rate": 2.3482442307265175e-08, - "loss": 3.7767, - "step": 3656500 - }, - { - "epoch": 40.61, - "learning_rate": 2.346856051351531e-08, - "loss": 3.7529, - "step": 3657000 - }, - { - "epoch": 40.62, - "learning_rate": 2.3454678719765453e-08, - "loss": 3.7441, - "step": 3657500 - }, - { - "epoch": 40.62, - "learning_rate": 2.3440796926015592e-08, - "loss": 3.7323, - "step": 3658000 - }, - { - "epoch": 40.63, - "learning_rate": 2.3426915132265728e-08, - "loss": 3.7337, - "step": 3658500 - }, - { - "epoch": 40.63, - "learning_rate": 2.341303333851587e-08, - "loss": 3.755, - "step": 3659000 - }, - { - "epoch": 40.64, - "learning_rate": 2.339915154476601e-08, - "loss": 3.7399, - "step": 3659500 - }, - { - "epoch": 40.65, - "learning_rate": 2.3385269751016145e-08, - "loss": 3.7402, - "step": 3660000 - }, - { - "epoch": 40.65, - "learning_rate": 2.3371387957266287e-08, - "loss": 3.7521, - "step": 3660500 - }, - { - "epoch": 40.66, - "learning_rate": 2.3357506163516426e-08, - "loss": 3.7618, - "step": 3661000 - }, - { - "epoch": 40.66, - "learning_rate": 2.3343624369766562e-08, - "loss": 3.747, - "step": 3661500 - }, - { - "epoch": 40.67, - "learning_rate": 2.33297425760167e-08, - "loss": 3.7524, - "step": 3662000 - }, - { - "epoch": 40.67, - "learning_rate": 2.3315860782266843e-08, - "loss": 3.7458, - "step": 3662500 - }, - { - "epoch": 40.68, - "learning_rate": 2.330197898851698e-08, - "loss": 3.7401, - "step": 3663000 - }, - { - "epoch": 40.68, - "learning_rate": 2.3288097194767118e-08, - "loss": 3.7383, - "step": 3663500 - }, - { - "epoch": 40.69, - "learning_rate": 2.3274215401017256e-08, - "loss": 3.7525, - "step": 3664000 - }, - { - "epoch": 40.7, - "learning_rate": 2.3260333607267395e-08, - "loss": 3.7486, - "step": 3664500 - }, - { - "epoch": 40.7, - "learning_rate": 2.3246451813517534e-08, - "loss": 3.7263, - "step": 3665000 - }, - { - "epoch": 40.71, - "learning_rate": 2.3232570019767673e-08, - "loss": 3.7393, - "step": 3665500 - }, - { - "epoch": 40.71, - "learning_rate": 2.3218688226017812e-08, - "loss": 3.7416, - "step": 3666000 - }, - { - "epoch": 40.72, - "learning_rate": 2.320480643226795e-08, - "loss": 3.7504, - "step": 3666500 - }, - { - "epoch": 40.72, - "learning_rate": 2.319092463851809e-08, - "loss": 3.7592, - "step": 3667000 - }, - { - "epoch": 40.73, - "learning_rate": 2.317704284476823e-08, - "loss": 3.7557, - "step": 3667500 - }, - { - "epoch": 40.73, - "learning_rate": 2.3163161051018368e-08, - "loss": 3.7418, - "step": 3668000 - }, - { - "epoch": 40.74, - "learning_rate": 2.3149279257268507e-08, - "loss": 3.7464, - "step": 3668500 - }, - { - "epoch": 40.75, - "learning_rate": 2.3135397463518643e-08, - "loss": 3.7624, - "step": 3669000 - }, - { - "epoch": 40.75, - "learning_rate": 2.3121515669768785e-08, - "loss": 3.7725, - "step": 3669500 - }, - { - "epoch": 40.76, - "learning_rate": 2.3107633876018924e-08, - "loss": 3.7491, - "step": 3670000 - }, - { - "epoch": 40.76, - "learning_rate": 2.309375208226906e-08, - "loss": 3.7542, - "step": 3670500 - }, - { - "epoch": 40.77, - "learning_rate": 2.3079870288519202e-08, - "loss": 3.7444, - "step": 3671000 - }, - { - "epoch": 40.77, - "learning_rate": 2.306598849476934e-08, - "loss": 3.7429, - "step": 3671500 - }, - { - "epoch": 40.78, - "learning_rate": 2.3052106701019476e-08, - "loss": 3.7363, - "step": 3672000 - }, - { - "epoch": 40.78, - "learning_rate": 2.3038224907269615e-08, - "loss": 3.7357, - "step": 3672500 - }, - { - "epoch": 40.79, - "learning_rate": 2.3024343113519758e-08, - "loss": 3.7491, - "step": 3673000 - }, - { - "epoch": 40.8, - "learning_rate": 2.3010461319769893e-08, - "loss": 3.7528, - "step": 3673500 - }, - { - "epoch": 40.8, - "learning_rate": 2.2996579526020032e-08, - "loss": 3.7521, - "step": 3674000 - }, - { - "epoch": 40.81, - "learning_rate": 2.2982697732270175e-08, - "loss": 3.7462, - "step": 3674500 - }, - { - "epoch": 40.81, - "learning_rate": 2.296881593852031e-08, - "loss": 3.7502, - "step": 3675000 - }, - { - "epoch": 40.82, - "learning_rate": 2.295493414477045e-08, - "loss": 3.7353, - "step": 3675500 - }, - { - "epoch": 40.82, - "learning_rate": 2.2941052351020588e-08, - "loss": 3.7356, - "step": 3676000 - }, - { - "epoch": 40.83, - "learning_rate": 2.2927170557270727e-08, - "loss": 3.7673, - "step": 3676500 - }, - { - "epoch": 40.83, - "learning_rate": 2.2913288763520866e-08, - "loss": 3.7283, - "step": 3677000 - }, - { - "epoch": 40.84, - "learning_rate": 2.2899406969771005e-08, - "loss": 3.7487, - "step": 3677500 - }, - { - "epoch": 40.85, - "learning_rate": 2.2885525176021144e-08, - "loss": 3.7555, - "step": 3678000 - }, - { - "epoch": 40.85, - "learning_rate": 2.2871643382271283e-08, - "loss": 3.7416, - "step": 3678500 - }, - { - "epoch": 40.86, - "learning_rate": 2.2857761588521422e-08, - "loss": 3.7408, - "step": 3679000 - }, - { - "epoch": 40.86, - "learning_rate": 2.2843879794771557e-08, - "loss": 3.7335, - "step": 3679500 - }, - { - "epoch": 40.87, - "learning_rate": 2.28299980010217e-08, - "loss": 3.7644, - "step": 3680000 - }, - { - "epoch": 40.87, - "learning_rate": 2.281611620727184e-08, - "loss": 3.7474, - "step": 3680500 - }, - { - "epoch": 40.88, - "learning_rate": 2.2802234413521974e-08, - "loss": 3.7358, - "step": 3681000 - }, - { - "epoch": 40.88, - "learning_rate": 2.2788352619772117e-08, - "loss": 3.7395, - "step": 3681500 - }, - { - "epoch": 40.89, - "learning_rate": 2.2774470826022256e-08, - "loss": 3.7353, - "step": 3682000 - }, - { - "epoch": 40.9, - "learning_rate": 2.276058903227239e-08, - "loss": 3.7454, - "step": 3682500 - }, - { - "epoch": 40.9, - "learning_rate": 2.274670723852253e-08, - "loss": 3.7577, - "step": 3683000 - }, - { - "epoch": 40.91, - "learning_rate": 2.2732825444772672e-08, - "loss": 3.7287, - "step": 3683500 - }, - { - "epoch": 40.91, - "learning_rate": 2.2718943651022808e-08, - "loss": 3.745, - "step": 3684000 - }, - { - "epoch": 40.92, - "learning_rate": 2.2705061857272947e-08, - "loss": 3.7586, - "step": 3684500 - }, - { - "epoch": 40.92, - "learning_rate": 2.269118006352309e-08, - "loss": 3.7679, - "step": 3685000 - }, - { - "epoch": 40.93, - "learning_rate": 2.2677298269773225e-08, - "loss": 3.7601, - "step": 3685500 - }, - { - "epoch": 40.93, - "learning_rate": 2.2663416476023364e-08, - "loss": 3.7607, - "step": 3686000 - }, - { - "epoch": 40.94, - "learning_rate": 2.2649534682273503e-08, - "loss": 3.7488, - "step": 3686500 - }, - { - "epoch": 40.95, - "learning_rate": 2.2635652888523642e-08, - "loss": 3.7738, - "step": 3687000 - }, - { - "epoch": 40.95, - "learning_rate": 2.262177109477378e-08, - "loss": 3.7532, - "step": 3687500 - }, - { - "epoch": 40.96, - "learning_rate": 2.260788930102392e-08, - "loss": 3.7496, - "step": 3688000 - }, - { - "epoch": 40.96, - "learning_rate": 2.259400750727406e-08, - "loss": 3.7555, - "step": 3688500 - }, - { - "epoch": 40.97, - "learning_rate": 2.2580125713524198e-08, - "loss": 3.7515, - "step": 3689000 - }, - { - "epoch": 40.97, - "learning_rate": 2.2566243919774337e-08, - "loss": 3.7609, - "step": 3689500 - }, - { - "epoch": 40.98, - "learning_rate": 2.2552362126024476e-08, - "loss": 3.7799, - "step": 3690000 - }, - { - "epoch": 40.98, - "learning_rate": 2.2538480332274614e-08, - "loss": 3.7663, - "step": 3690500 - }, - { - "epoch": 40.99, - "learning_rate": 2.2524598538524753e-08, - "loss": 3.7547, - "step": 3691000 - }, - { - "epoch": 41.0, - "learning_rate": 2.2510716744774892e-08, - "loss": 3.7379, - "step": 3691500 - }, - { - "epoch": 41.0, - "eval_loss": 3.826106548309326, - "eval_runtime": 6.3033, - "eval_samples_per_second": 246.537, - "step": 3691886 - }, - { - "epoch": 41.0, - "learning_rate": 2.249683495102503e-08, - "loss": 3.7603, - "step": 3692000 - }, - { - "epoch": 41.01, - "learning_rate": 2.248295315727517e-08, - "loss": 3.7643, - "step": 3692500 - }, - { - "epoch": 41.01, - "learning_rate": 2.246907136352531e-08, - "loss": 3.7455, - "step": 3693000 - }, - { - "epoch": 41.02, - "learning_rate": 2.2455189569775448e-08, - "loss": 3.7449, - "step": 3693500 - }, - { - "epoch": 41.02, - "learning_rate": 2.2441307776025587e-08, - "loss": 3.7448, - "step": 3694000 - }, - { - "epoch": 41.03, - "learning_rate": 2.2427425982275726e-08, - "loss": 3.7556, - "step": 3694500 - }, - { - "epoch": 41.03, - "learning_rate": 2.2413544188525862e-08, - "loss": 3.7418, - "step": 3695000 - }, - { - "epoch": 41.04, - "learning_rate": 2.2399662394776004e-08, - "loss": 3.7766, - "step": 3695500 - }, - { - "epoch": 41.05, - "learning_rate": 2.238578060102614e-08, - "loss": 3.7308, - "step": 3696000 - }, - { - "epoch": 41.05, - "learning_rate": 2.237189880727628e-08, - "loss": 3.7448, - "step": 3696500 - }, - { - "epoch": 41.06, - "learning_rate": 2.235801701352642e-08, - "loss": 3.7586, - "step": 3697000 - }, - { - "epoch": 41.06, - "learning_rate": 2.2344135219776557e-08, - "loss": 3.7438, - "step": 3697500 - }, - { - "epoch": 41.07, - "learning_rate": 2.2330253426026695e-08, - "loss": 3.7499, - "step": 3698000 - }, - { - "epoch": 41.07, - "learning_rate": 2.2316371632276834e-08, - "loss": 3.7575, - "step": 3698500 - }, - { - "epoch": 41.08, - "learning_rate": 2.2302489838526973e-08, - "loss": 3.7529, - "step": 3699000 - }, - { - "epoch": 41.08, - "learning_rate": 2.2288608044777112e-08, - "loss": 3.7491, - "step": 3699500 - }, - { - "epoch": 41.09, - "learning_rate": 2.227472625102725e-08, - "loss": 3.7335, - "step": 3700000 - }, - { - "epoch": 41.1, - "learning_rate": 2.226084445727739e-08, - "loss": 3.7453, - "step": 3700500 - }, - { - "epoch": 41.1, - "learning_rate": 2.224696266352753e-08, - "loss": 3.7685, - "step": 3701000 - }, - { - "epoch": 41.11, - "learning_rate": 2.2233080869777668e-08, - "loss": 3.7471, - "step": 3701500 - }, - { - "epoch": 41.11, - "learning_rate": 2.2219199076027807e-08, - "loss": 3.7441, - "step": 3702000 - }, - { - "epoch": 41.12, - "learning_rate": 2.2205317282277946e-08, - "loss": 3.7526, - "step": 3702500 - }, - { - "epoch": 41.12, - "learning_rate": 2.2191435488528085e-08, - "loss": 3.7518, - "step": 3703000 - }, - { - "epoch": 41.13, - "learning_rate": 2.2177553694778224e-08, - "loss": 3.7561, - "step": 3703500 - }, - { - "epoch": 41.13, - "learning_rate": 2.2163671901028363e-08, - "loss": 3.7603, - "step": 3704000 - }, - { - "epoch": 41.14, - "learning_rate": 2.2149790107278502e-08, - "loss": 3.7537, - "step": 3704500 - }, - { - "epoch": 41.15, - "learning_rate": 2.213590831352864e-08, - "loss": 3.7676, - "step": 3705000 - }, - { - "epoch": 41.15, - "learning_rate": 2.2122026519778777e-08, - "loss": 3.7581, - "step": 3705500 - }, - { - "epoch": 41.16, - "learning_rate": 2.210814472602892e-08, - "loss": 3.734, - "step": 3706000 - }, - { - "epoch": 41.16, - "learning_rate": 2.2094262932279058e-08, - "loss": 3.767, - "step": 3706500 - }, - { - "epoch": 41.17, - "learning_rate": 2.2080381138529193e-08, - "loss": 3.7578, - "step": 3707000 - }, - { - "epoch": 41.17, - "learning_rate": 2.2066499344779336e-08, - "loss": 3.74, - "step": 3707500 - }, - { - "epoch": 41.18, - "learning_rate": 2.2052617551029475e-08, - "loss": 3.7551, - "step": 3708000 - }, - { - "epoch": 41.18, - "learning_rate": 2.203873575727961e-08, - "loss": 3.7496, - "step": 3708500 - }, - { - "epoch": 41.19, - "learning_rate": 2.202485396352975e-08, - "loss": 3.7714, - "step": 3709000 - }, - { - "epoch": 41.2, - "learning_rate": 2.201097216977989e-08, - "loss": 3.7273, - "step": 3709500 - }, - { - "epoch": 41.2, - "learning_rate": 2.1997090376030027e-08, - "loss": 3.7518, - "step": 3710000 - }, - { - "epoch": 41.21, - "learning_rate": 2.1983208582280166e-08, - "loss": 3.7663, - "step": 3710500 - }, - { - "epoch": 41.21, - "learning_rate": 2.1969326788530308e-08, - "loss": 3.7453, - "step": 3711000 - }, - { - "epoch": 41.22, - "learning_rate": 2.1955444994780444e-08, - "loss": 3.7401, - "step": 3711500 - }, - { - "epoch": 41.22, - "learning_rate": 2.1941563201030583e-08, - "loss": 3.7375, - "step": 3712000 - }, - { - "epoch": 41.23, - "learning_rate": 2.1927681407280722e-08, - "loss": 3.7493, - "step": 3712500 - }, - { - "epoch": 41.23, - "learning_rate": 2.191379961353086e-08, - "loss": 3.7453, - "step": 3713000 - }, - { - "epoch": 41.24, - "learning_rate": 2.1899917819781e-08, - "loss": 3.756, - "step": 3713500 - }, - { - "epoch": 41.25, - "learning_rate": 2.188603602603114e-08, - "loss": 3.7488, - "step": 3714000 - }, - { - "epoch": 41.25, - "learning_rate": 2.1872154232281278e-08, - "loss": 3.7675, - "step": 3714500 - }, - { - "epoch": 41.26, - "learning_rate": 2.1858272438531417e-08, - "loss": 3.7388, - "step": 3715000 - }, - { - "epoch": 41.26, - "learning_rate": 2.1844390644781556e-08, - "loss": 3.733, - "step": 3715500 - }, - { - "epoch": 41.27, - "learning_rate": 2.183050885103169e-08, - "loss": 3.7553, - "step": 3716000 - }, - { - "epoch": 41.27, - "learning_rate": 2.1816627057281834e-08, - "loss": 3.7665, - "step": 3716500 - }, - { - "epoch": 41.28, - "learning_rate": 2.1802745263531972e-08, - "loss": 3.7475, - "step": 3717000 - }, - { - "epoch": 41.28, - "learning_rate": 2.1788863469782108e-08, - "loss": 3.7631, - "step": 3717500 - }, - { - "epoch": 41.29, - "learning_rate": 2.177498167603225e-08, - "loss": 3.7534, - "step": 3718000 - }, - { - "epoch": 41.3, - "learning_rate": 2.176109988228239e-08, - "loss": 3.7793, - "step": 3718500 - }, - { - "epoch": 41.3, - "learning_rate": 2.1747218088532525e-08, - "loss": 3.7399, - "step": 3719000 - }, - { - "epoch": 41.31, - "learning_rate": 2.1733336294782664e-08, - "loss": 3.7517, - "step": 3719500 - }, - { - "epoch": 41.31, - "learning_rate": 2.1719454501032806e-08, - "loss": 3.7603, - "step": 3720000 - }, - { - "epoch": 41.32, - "learning_rate": 2.1705572707282942e-08, - "loss": 3.7614, - "step": 3720500 - }, - { - "epoch": 41.32, - "learning_rate": 2.169169091353308e-08, - "loss": 3.7555, - "step": 3721000 - }, - { - "epoch": 41.33, - "learning_rate": 2.1677809119783223e-08, - "loss": 3.7324, - "step": 3721500 - }, - { - "epoch": 41.33, - "learning_rate": 2.166392732603336e-08, - "loss": 3.7434, - "step": 3722000 - }, - { - "epoch": 41.34, - "learning_rate": 2.1650045532283498e-08, - "loss": 3.744, - "step": 3722500 - }, - { - "epoch": 41.35, - "learning_rate": 2.163616373853364e-08, - "loss": 3.7342, - "step": 3723000 - }, - { - "epoch": 41.35, - "learning_rate": 2.1622281944783776e-08, - "loss": 3.7592, - "step": 3723500 - }, - { - "epoch": 41.36, - "learning_rate": 2.1608400151033915e-08, - "loss": 3.7443, - "step": 3724000 - }, - { - "epoch": 41.36, - "learning_rate": 2.1594518357284054e-08, - "loss": 3.7478, - "step": 3724500 - }, - { - "epoch": 41.37, - "learning_rate": 2.1580636563534192e-08, - "loss": 3.7618, - "step": 3725000 - }, - { - "epoch": 41.37, - "learning_rate": 2.156675476978433e-08, - "loss": 3.7597, - "step": 3725500 - }, - { - "epoch": 41.38, - "learning_rate": 2.155287297603447e-08, - "loss": 3.759, - "step": 3726000 - }, - { - "epoch": 41.38, - "learning_rate": 2.153899118228461e-08, - "loss": 3.7281, - "step": 3726500 - }, - { - "epoch": 41.39, - "learning_rate": 2.1525109388534748e-08, - "loss": 3.738, - "step": 3727000 - }, - { - "epoch": 41.4, - "learning_rate": 2.1511227594784887e-08, - "loss": 3.7371, - "step": 3727500 - }, - { - "epoch": 41.4, - "learning_rate": 2.1497345801035023e-08, - "loss": 3.7489, - "step": 3728000 - }, - { - "epoch": 41.41, - "learning_rate": 2.1483464007285165e-08, - "loss": 3.756, - "step": 3728500 - }, - { - "epoch": 41.41, - "learning_rate": 2.1469582213535304e-08, - "loss": 3.7448, - "step": 3729000 - }, - { - "epoch": 41.42, - "learning_rate": 2.145570041978544e-08, - "loss": 3.7548, - "step": 3729500 - }, - { - "epoch": 41.42, - "learning_rate": 2.1441818626035582e-08, - "loss": 3.7433, - "step": 3730000 - }, - { - "epoch": 41.43, - "learning_rate": 2.142793683228572e-08, - "loss": 3.7573, - "step": 3730500 - }, - { - "epoch": 41.43, - "learning_rate": 2.1414055038535857e-08, - "loss": 3.758, - "step": 3731000 - }, - { - "epoch": 41.44, - "learning_rate": 2.1400173244785996e-08, - "loss": 3.7446, - "step": 3731500 - }, - { - "epoch": 41.45, - "learning_rate": 2.1386291451036138e-08, - "loss": 3.7514, - "step": 3732000 - }, - { - "epoch": 41.45, - "learning_rate": 2.1372409657286273e-08, - "loss": 3.7488, - "step": 3732500 - }, - { - "epoch": 41.46, - "learning_rate": 2.1358527863536412e-08, - "loss": 3.7364, - "step": 3733000 - }, - { - "epoch": 41.46, - "learning_rate": 2.1344646069786555e-08, - "loss": 3.7523, - "step": 3733500 - }, - { - "epoch": 41.47, - "learning_rate": 2.133076427603669e-08, - "loss": 3.7517, - "step": 3734000 - }, - { - "epoch": 41.47, - "learning_rate": 2.131688248228683e-08, - "loss": 3.7579, - "step": 3734500 - }, - { - "epoch": 41.48, - "learning_rate": 2.1303000688536968e-08, - "loss": 3.7446, - "step": 3735000 - }, - { - "epoch": 41.48, - "learning_rate": 2.1289118894787107e-08, - "loss": 3.7363, - "step": 3735500 - }, - { - "epoch": 41.49, - "learning_rate": 2.1275237101037246e-08, - "loss": 3.752, - "step": 3736000 - }, - { - "epoch": 41.5, - "learning_rate": 2.1261355307287385e-08, - "loss": 3.7526, - "step": 3736500 - }, - { - "epoch": 41.5, - "learning_rate": 2.1247473513537524e-08, - "loss": 3.7606, - "step": 3737000 - }, - { - "epoch": 41.51, - "learning_rate": 2.1233591719787663e-08, - "loss": 3.7543, - "step": 3737500 - }, - { - "epoch": 41.51, - "learning_rate": 2.1219709926037802e-08, - "loss": 3.7508, - "step": 3738000 - }, - { - "epoch": 41.52, - "learning_rate": 2.120582813228794e-08, - "loss": 3.7646, - "step": 3738500 - }, - { - "epoch": 41.52, - "learning_rate": 2.119194633853808e-08, - "loss": 3.7461, - "step": 3739000 - }, - { - "epoch": 41.53, - "learning_rate": 2.117806454478822e-08, - "loss": 3.7575, - "step": 3739500 - }, - { - "epoch": 41.53, - "learning_rate": 2.1164182751038358e-08, - "loss": 3.7605, - "step": 3740000 - }, - { - "epoch": 41.54, - "learning_rate": 2.1150300957288497e-08, - "loss": 3.7633, - "step": 3740500 - }, - { - "epoch": 41.55, - "learning_rate": 2.1136419163538636e-08, - "loss": 3.7253, - "step": 3741000 - }, - { - "epoch": 41.55, - "learning_rate": 2.1122537369788775e-08, - "loss": 3.7594, - "step": 3741500 - }, - { - "epoch": 41.56, - "learning_rate": 2.110865557603891e-08, - "loss": 3.7596, - "step": 3742000 - }, - { - "epoch": 41.56, - "learning_rate": 2.1094773782289053e-08, - "loss": 3.7436, - "step": 3742500 - }, - { - "epoch": 41.57, - "learning_rate": 2.108089198853919e-08, - "loss": 3.7416, - "step": 3743000 - }, - { - "epoch": 41.57, - "learning_rate": 2.1067010194789327e-08, - "loss": 3.7358, - "step": 3743500 - }, - { - "epoch": 41.58, - "learning_rate": 2.105312840103947e-08, - "loss": 3.7479, - "step": 3744000 - }, - { - "epoch": 41.58, - "learning_rate": 2.103924660728961e-08, - "loss": 3.7513, - "step": 3744500 - }, - { - "epoch": 41.59, - "learning_rate": 2.1025364813539744e-08, - "loss": 3.7635, - "step": 3745000 - }, - { - "epoch": 41.6, - "learning_rate": 2.1011483019789883e-08, - "loss": 3.7684, - "step": 3745500 - }, - { - "epoch": 41.6, - "learning_rate": 2.0997601226040025e-08, - "loss": 3.7582, - "step": 3746000 - }, - { - "epoch": 41.61, - "learning_rate": 2.098371943229016e-08, - "loss": 3.7426, - "step": 3746500 - }, - { - "epoch": 41.61, - "learning_rate": 2.09698376385403e-08, - "loss": 3.752, - "step": 3747000 - }, - { - "epoch": 41.62, - "learning_rate": 2.0955955844790442e-08, - "loss": 3.7571, - "step": 3747500 - }, - { - "epoch": 41.62, - "learning_rate": 2.0942074051040578e-08, - "loss": 3.7346, - "step": 3748000 - }, - { - "epoch": 41.63, - "learning_rate": 2.0928192257290717e-08, - "loss": 3.7188, - "step": 3748500 - }, - { - "epoch": 41.63, - "learning_rate": 2.0914310463540856e-08, - "loss": 3.761, - "step": 3749000 - }, - { - "epoch": 41.64, - "learning_rate": 2.0900428669790995e-08, - "loss": 3.7587, - "step": 3749500 - }, - { - "epoch": 41.65, - "learning_rate": 2.0886546876041134e-08, - "loss": 3.7376, - "step": 3750000 - }, - { - "epoch": 41.65, - "learning_rate": 2.0872665082291273e-08, - "loss": 3.7643, - "step": 3750500 - }, - { - "epoch": 41.66, - "learning_rate": 2.085878328854141e-08, - "loss": 3.7413, - "step": 3751000 - }, - { - "epoch": 41.66, - "learning_rate": 2.084490149479155e-08, - "loss": 3.7477, - "step": 3751500 - }, - { - "epoch": 41.67, - "learning_rate": 2.083101970104169e-08, - "loss": 3.7441, - "step": 3752000 - }, - { - "epoch": 41.67, - "learning_rate": 2.081713790729183e-08, - "loss": 3.7458, - "step": 3752500 - }, - { - "epoch": 41.68, - "learning_rate": 2.0803256113541967e-08, - "loss": 3.748, - "step": 3753000 - }, - { - "epoch": 41.68, - "learning_rate": 2.0789374319792106e-08, - "loss": 3.7536, - "step": 3753500 - }, - { - "epoch": 41.69, - "learning_rate": 2.0775492526042242e-08, - "loss": 3.7538, - "step": 3754000 - }, - { - "epoch": 41.7, - "learning_rate": 2.0761610732292384e-08, - "loss": 3.7593, - "step": 3754500 - }, - { - "epoch": 41.7, - "learning_rate": 2.0747728938542523e-08, - "loss": 3.7641, - "step": 3755000 - }, - { - "epoch": 41.71, - "learning_rate": 2.073384714479266e-08, - "loss": 3.752, - "step": 3755500 - }, - { - "epoch": 41.71, - "learning_rate": 2.07199653510428e-08, - "loss": 3.7499, - "step": 3756000 - }, - { - "epoch": 41.72, - "learning_rate": 2.070608355729294e-08, - "loss": 3.7624, - "step": 3756500 - }, - { - "epoch": 41.72, - "learning_rate": 2.0692201763543076e-08, - "loss": 3.7275, - "step": 3757000 - }, - { - "epoch": 41.73, - "learning_rate": 2.0678319969793215e-08, - "loss": 3.7608, - "step": 3757500 - }, - { - "epoch": 41.73, - "learning_rate": 2.0664438176043357e-08, - "loss": 3.7511, - "step": 3758000 - }, - { - "epoch": 41.74, - "learning_rate": 2.0650556382293493e-08, - "loss": 3.7449, - "step": 3758500 - }, - { - "epoch": 41.75, - "learning_rate": 2.063667458854363e-08, - "loss": 3.7463, - "step": 3759000 - }, - { - "epoch": 41.75, - "learning_rate": 2.0622792794793774e-08, - "loss": 3.768, - "step": 3759500 - }, - { - "epoch": 41.76, - "learning_rate": 2.060891100104391e-08, - "loss": 3.752, - "step": 3760000 - }, - { - "epoch": 41.76, - "learning_rate": 2.059502920729405e-08, - "loss": 3.7528, - "step": 3760500 - }, - { - "epoch": 41.77, - "learning_rate": 2.0581147413544187e-08, - "loss": 3.7624, - "step": 3761000 - }, - { - "epoch": 41.77, - "learning_rate": 2.0567265619794326e-08, - "loss": 3.7313, - "step": 3761500 - }, - { - "epoch": 41.78, - "learning_rate": 2.0553383826044465e-08, - "loss": 3.7451, - "step": 3762000 - }, - { - "epoch": 41.78, - "learning_rate": 2.0539502032294604e-08, - "loss": 3.7545, - "step": 3762500 - }, - { - "epoch": 41.79, - "learning_rate": 2.0525620238544743e-08, - "loss": 3.7478, - "step": 3763000 - }, - { - "epoch": 41.8, - "learning_rate": 2.0511738444794882e-08, - "loss": 3.7376, - "step": 3763500 - }, - { - "epoch": 41.8, - "learning_rate": 2.049785665104502e-08, - "loss": 3.7487, - "step": 3764000 - }, - { - "epoch": 41.81, - "learning_rate": 2.0483974857295157e-08, - "loss": 3.7407, - "step": 3764500 - }, - { - "epoch": 41.81, - "learning_rate": 2.04700930635453e-08, - "loss": 3.758, - "step": 3765000 - }, - { - "epoch": 41.82, - "learning_rate": 2.0456211269795438e-08, - "loss": 3.7576, - "step": 3765500 - }, - { - "epoch": 41.82, - "learning_rate": 2.0442329476045574e-08, - "loss": 3.7412, - "step": 3766000 - }, - { - "epoch": 41.83, - "learning_rate": 2.0428447682295716e-08, - "loss": 3.7597, - "step": 3766500 - }, - { - "epoch": 41.83, - "learning_rate": 2.0414565888545855e-08, - "loss": 3.7439, - "step": 3767000 - }, - { - "epoch": 41.84, - "learning_rate": 2.040068409479599e-08, - "loss": 3.7394, - "step": 3767500 - }, - { - "epoch": 41.85, - "learning_rate": 2.038680230104613e-08, - "loss": 3.7137, - "step": 3768000 - }, - { - "epoch": 41.85, - "learning_rate": 2.037292050729627e-08, - "loss": 3.7631, - "step": 3768500 - }, - { - "epoch": 41.86, - "learning_rate": 2.0359038713546407e-08, - "loss": 3.7368, - "step": 3769000 - }, - { - "epoch": 41.86, - "learning_rate": 2.0345156919796546e-08, - "loss": 3.7518, - "step": 3769500 - }, - { - "epoch": 41.87, - "learning_rate": 2.033127512604669e-08, - "loss": 3.7503, - "step": 3770000 - }, - { - "epoch": 41.87, - "learning_rate": 2.0317393332296824e-08, - "loss": 3.7532, - "step": 3770500 - }, - { - "epoch": 41.88, - "learning_rate": 2.0303511538546963e-08, - "loss": 3.7434, - "step": 3771000 - }, - { - "epoch": 41.88, - "learning_rate": 2.0289629744797102e-08, - "loss": 3.7422, - "step": 3771500 - }, - { - "epoch": 41.89, - "learning_rate": 2.027574795104724e-08, - "loss": 3.7478, - "step": 3772000 - }, - { - "epoch": 41.9, - "learning_rate": 2.026186615729738e-08, - "loss": 3.7551, - "step": 3772500 - }, - { - "epoch": 41.9, - "learning_rate": 2.024798436354752e-08, - "loss": 3.7531, - "step": 3773000 - }, - { - "epoch": 41.91, - "learning_rate": 2.0234102569797658e-08, - "loss": 3.7409, - "step": 3773500 - }, - { - "epoch": 41.91, - "learning_rate": 2.0220220776047797e-08, - "loss": 3.757, - "step": 3774000 - }, - { - "epoch": 41.92, - "learning_rate": 2.0206338982297936e-08, - "loss": 3.7367, - "step": 3774500 - }, - { - "epoch": 41.92, - "learning_rate": 2.0192457188548075e-08, - "loss": 3.7459, - "step": 3775000 - }, - { - "epoch": 41.93, - "learning_rate": 2.0178575394798214e-08, - "loss": 3.7514, - "step": 3775500 - }, - { - "epoch": 41.93, - "learning_rate": 2.0164693601048353e-08, - "loss": 3.7525, - "step": 3776000 - }, - { - "epoch": 41.94, - "learning_rate": 2.015081180729849e-08, - "loss": 3.7296, - "step": 3776500 - }, - { - "epoch": 41.95, - "learning_rate": 2.013693001354863e-08, - "loss": 3.7335, - "step": 3777000 - }, - { - "epoch": 41.95, - "learning_rate": 2.012304821979877e-08, - "loss": 3.7467, - "step": 3777500 - }, - { - "epoch": 41.96, - "learning_rate": 2.010916642604891e-08, - "loss": 3.7427, - "step": 3778000 - }, - { - "epoch": 41.96, - "learning_rate": 2.0095284632299044e-08, - "loss": 3.7496, - "step": 3778500 - }, - { - "epoch": 41.97, - "learning_rate": 2.0081402838549186e-08, - "loss": 3.7426, - "step": 3779000 - }, - { - "epoch": 41.97, - "learning_rate": 2.0067521044799325e-08, - "loss": 3.7636, - "step": 3779500 - }, - { - "epoch": 41.98, - "learning_rate": 2.005363925104946e-08, - "loss": 3.7458, - "step": 3780000 - }, - { - "epoch": 41.98, - "learning_rate": 2.0039757457299603e-08, - "loss": 3.7414, - "step": 3780500 - }, - { - "epoch": 41.99, - "learning_rate": 2.002587566354974e-08, - "loss": 3.7337, - "step": 3781000 - }, - { - "epoch": 42.0, - "learning_rate": 2.0011993869799878e-08, - "loss": 3.7582, - "step": 3781500 - }, - { - "epoch": 42.0, - "eval_loss": 3.825657844543457, - "eval_runtime": 6.298, - "eval_samples_per_second": 246.746, - "step": 3781932 - }, - { - "epoch": 42.0, - "learning_rate": 1.9998112076050017e-08, - "loss": 3.7414, - "step": 3782000 - }, - { - "epoch": 42.01, - "learning_rate": 1.9984230282300156e-08, - "loss": 3.757, - "step": 3782500 - }, - { - "epoch": 42.01, - "learning_rate": 1.9970348488550295e-08, - "loss": 3.7462, - "step": 3783000 - }, - { - "epoch": 42.02, - "learning_rate": 1.9956466694800434e-08, - "loss": 3.7467, - "step": 3783500 - }, - { - "epoch": 42.02, - "learning_rate": 1.9942584901050573e-08, - "loss": 3.7842, - "step": 3784000 - }, - { - "epoch": 42.03, - "learning_rate": 1.992870310730071e-08, - "loss": 3.7375, - "step": 3784500 - }, - { - "epoch": 42.03, - "learning_rate": 1.991482131355085e-08, - "loss": 3.7367, - "step": 3785000 - }, - { - "epoch": 42.04, - "learning_rate": 1.990093951980099e-08, - "loss": 3.7641, - "step": 3785500 - }, - { - "epoch": 42.05, - "learning_rate": 1.988705772605113e-08, - "loss": 3.7621, - "step": 3786000 - }, - { - "epoch": 42.05, - "learning_rate": 1.9873175932301267e-08, - "loss": 3.7412, - "step": 3786500 - }, - { - "epoch": 42.06, - "learning_rate": 1.9859294138551406e-08, - "loss": 3.7382, - "step": 3787000 - }, - { - "epoch": 42.06, - "learning_rate": 1.9845412344801545e-08, - "loss": 3.7589, - "step": 3787500 - }, - { - "epoch": 42.07, - "learning_rate": 1.9831530551051684e-08, - "loss": 3.7544, - "step": 3788000 - }, - { - "epoch": 42.07, - "learning_rate": 1.9817648757301823e-08, - "loss": 3.7463, - "step": 3788500 - }, - { - "epoch": 42.08, - "learning_rate": 1.9803766963551962e-08, - "loss": 3.7774, - "step": 3789000 - }, - { - "epoch": 42.08, - "learning_rate": 1.97898851698021e-08, - "loss": 3.7423, - "step": 3789500 - }, - { - "epoch": 42.09, - "learning_rate": 1.977600337605224e-08, - "loss": 3.7436, - "step": 3790000 - }, - { - "epoch": 42.1, - "learning_rate": 1.9762121582302376e-08, - "loss": 3.7413, - "step": 3790500 - }, - { - "epoch": 42.1, - "learning_rate": 1.9748239788552518e-08, - "loss": 3.7564, - "step": 3791000 - }, - { - "epoch": 42.11, - "learning_rate": 1.9734357994802657e-08, - "loss": 3.7535, - "step": 3791500 - }, - { - "epoch": 42.11, - "learning_rate": 1.9720476201052793e-08, - "loss": 3.737, - "step": 3792000 - }, - { - "epoch": 42.12, - "learning_rate": 1.9706594407302935e-08, - "loss": 3.7388, - "step": 3792500 - }, - { - "epoch": 42.12, - "learning_rate": 1.9692712613553074e-08, - "loss": 3.7215, - "step": 3793000 - }, - { - "epoch": 42.13, - "learning_rate": 1.967883081980321e-08, - "loss": 3.7415, - "step": 3793500 - }, - { - "epoch": 42.13, - "learning_rate": 1.966494902605335e-08, - "loss": 3.7418, - "step": 3794000 - }, - { - "epoch": 42.14, - "learning_rate": 1.965106723230349e-08, - "loss": 3.7559, - "step": 3794500 - }, - { - "epoch": 42.15, - "learning_rate": 1.9637185438553626e-08, - "loss": 3.7462, - "step": 3795000 - }, - { - "epoch": 42.15, - "learning_rate": 1.9623303644803765e-08, - "loss": 3.754, - "step": 3795500 - }, - { - "epoch": 42.16, - "learning_rate": 1.9609421851053908e-08, - "loss": 3.7488, - "step": 3796000 - }, - { - "epoch": 42.16, - "learning_rate": 1.9595540057304043e-08, - "loss": 3.7534, - "step": 3796500 - }, - { - "epoch": 42.17, - "learning_rate": 1.9581658263554182e-08, - "loss": 3.7392, - "step": 3797000 - }, - { - "epoch": 42.17, - "learning_rate": 1.956777646980432e-08, - "loss": 3.744, - "step": 3797500 - }, - { - "epoch": 42.18, - "learning_rate": 1.955389467605446e-08, - "loss": 3.7621, - "step": 3798000 - }, - { - "epoch": 42.18, - "learning_rate": 1.95400128823046e-08, - "loss": 3.7517, - "step": 3798500 - }, - { - "epoch": 42.19, - "learning_rate": 1.9526131088554738e-08, - "loss": 3.7521, - "step": 3799000 - }, - { - "epoch": 42.2, - "learning_rate": 1.9512249294804877e-08, - "loss": 3.7508, - "step": 3799500 - }, - { - "epoch": 42.2, - "learning_rate": 1.9498367501055016e-08, - "loss": 3.7584, - "step": 3800000 - }, - { - "epoch": 42.21, - "learning_rate": 1.9484485707305155e-08, - "loss": 3.7598, - "step": 3800500 - }, - { - "epoch": 42.21, - "learning_rate": 1.947060391355529e-08, - "loss": 3.7573, - "step": 3801000 - }, - { - "epoch": 42.22, - "learning_rate": 1.9456722119805433e-08, - "loss": 3.7455, - "step": 3801500 - }, - { - "epoch": 42.22, - "learning_rate": 1.9442840326055572e-08, - "loss": 3.7331, - "step": 3802000 - }, - { - "epoch": 42.23, - "learning_rate": 1.9428958532305707e-08, - "loss": 3.7581, - "step": 3802500 - }, - { - "epoch": 42.23, - "learning_rate": 1.941507673855585e-08, - "loss": 3.7487, - "step": 3803000 - }, - { - "epoch": 42.24, - "learning_rate": 1.940119494480599e-08, - "loss": 3.7387, - "step": 3803500 - }, - { - "epoch": 42.25, - "learning_rate": 1.9387313151056124e-08, - "loss": 3.7291, - "step": 3804000 - }, - { - "epoch": 42.25, - "learning_rate": 1.9373431357306263e-08, - "loss": 3.7514, - "step": 3804500 - }, - { - "epoch": 42.26, - "learning_rate": 1.9359549563556405e-08, - "loss": 3.7562, - "step": 3805000 - }, - { - "epoch": 42.26, - "learning_rate": 1.934566776980654e-08, - "loss": 3.752, - "step": 3805500 - }, - { - "epoch": 42.27, - "learning_rate": 1.933178597605668e-08, - "loss": 3.7583, - "step": 3806000 - }, - { - "epoch": 42.27, - "learning_rate": 1.9317904182306822e-08, - "loss": 3.7646, - "step": 3806500 - }, - { - "epoch": 42.28, - "learning_rate": 1.9304022388556958e-08, - "loss": 3.7467, - "step": 3807000 - }, - { - "epoch": 42.28, - "learning_rate": 1.9290140594807097e-08, - "loss": 3.7651, - "step": 3807500 - }, - { - "epoch": 42.29, - "learning_rate": 1.9276258801057236e-08, - "loss": 3.7563, - "step": 3808000 - }, - { - "epoch": 42.3, - "learning_rate": 1.9262377007307375e-08, - "loss": 3.7503, - "step": 3808500 - }, - { - "epoch": 42.3, - "learning_rate": 1.9248495213557514e-08, - "loss": 3.7393, - "step": 3809000 - }, - { - "epoch": 42.31, - "learning_rate": 1.9234613419807653e-08, - "loss": 3.74, - "step": 3809500 - }, - { - "epoch": 42.31, - "learning_rate": 1.9220731626057792e-08, - "loss": 3.73, - "step": 3810000 - }, - { - "epoch": 42.32, - "learning_rate": 1.920684983230793e-08, - "loss": 3.7407, - "step": 3810500 - }, - { - "epoch": 42.32, - "learning_rate": 1.919296803855807e-08, - "loss": 3.719, - "step": 3811000 - }, - { - "epoch": 42.33, - "learning_rate": 1.9179086244808205e-08, - "loss": 3.751, - "step": 3811500 - }, - { - "epoch": 42.33, - "learning_rate": 1.9165204451058348e-08, - "loss": 3.7445, - "step": 3812000 - }, - { - "epoch": 42.34, - "learning_rate": 1.9151322657308486e-08, - "loss": 3.7381, - "step": 3812500 - }, - { - "epoch": 42.35, - "learning_rate": 1.9137440863558622e-08, - "loss": 3.7323, - "step": 3813000 - }, - { - "epoch": 42.35, - "learning_rate": 1.9123559069808764e-08, - "loss": 3.7512, - "step": 3813500 - }, - { - "epoch": 42.36, - "learning_rate": 1.9109677276058903e-08, - "loss": 3.7411, - "step": 3814000 - }, - { - "epoch": 42.36, - "learning_rate": 1.909579548230904e-08, - "loss": 3.7421, - "step": 3814500 - }, - { - "epoch": 42.37, - "learning_rate": 1.908191368855918e-08, - "loss": 3.7571, - "step": 3815000 - }, - { - "epoch": 42.37, - "learning_rate": 1.906803189480932e-08, - "loss": 3.7286, - "step": 3815500 - }, - { - "epoch": 42.38, - "learning_rate": 1.9054150101059456e-08, - "loss": 3.7598, - "step": 3816000 - }, - { - "epoch": 42.38, - "learning_rate": 1.9040268307309595e-08, - "loss": 3.7496, - "step": 3816500 - }, - { - "epoch": 42.39, - "learning_rate": 1.9026386513559737e-08, - "loss": 3.7444, - "step": 3817000 - }, - { - "epoch": 42.39, - "learning_rate": 1.9012504719809873e-08, - "loss": 3.7566, - "step": 3817500 - }, - { - "epoch": 42.4, - "learning_rate": 1.8998622926060012e-08, - "loss": 3.7505, - "step": 3818000 - }, - { - "epoch": 42.41, - "learning_rate": 1.8984741132310154e-08, - "loss": 3.7615, - "step": 3818500 - }, - { - "epoch": 42.41, - "learning_rate": 1.897085933856029e-08, - "loss": 3.7447, - "step": 3819000 - }, - { - "epoch": 42.42, - "learning_rate": 1.895697754481043e-08, - "loss": 3.7308, - "step": 3819500 - }, - { - "epoch": 42.42, - "learning_rate": 1.8943095751060568e-08, - "loss": 3.7757, - "step": 3820000 - }, - { - "epoch": 42.43, - "learning_rate": 1.8929213957310706e-08, - "loss": 3.757, - "step": 3820500 - }, - { - "epoch": 42.43, - "learning_rate": 1.8915332163560845e-08, - "loss": 3.7345, - "step": 3821000 - }, - { - "epoch": 42.44, - "learning_rate": 1.8901450369810984e-08, - "loss": 3.7321, - "step": 3821500 - }, - { - "epoch": 42.44, - "learning_rate": 1.8887568576061123e-08, - "loss": 3.7498, - "step": 3822000 - }, - { - "epoch": 42.45, - "learning_rate": 1.8873686782311262e-08, - "loss": 3.7938, - "step": 3822500 - }, - { - "epoch": 42.46, - "learning_rate": 1.88598049885614e-08, - "loss": 3.7506, - "step": 3823000 - }, - { - "epoch": 42.46, - "learning_rate": 1.884592319481154e-08, - "loss": 3.7895, - "step": 3823500 - }, - { - "epoch": 42.47, - "learning_rate": 1.883204140106168e-08, - "loss": 3.7406, - "step": 3824000 - }, - { - "epoch": 42.47, - "learning_rate": 1.8818159607311818e-08, - "loss": 3.7373, - "step": 3824500 - }, - { - "epoch": 42.48, - "learning_rate": 1.8804277813561957e-08, - "loss": 3.736, - "step": 3825000 - }, - { - "epoch": 42.48, - "learning_rate": 1.8790396019812096e-08, - "loss": 3.758, - "step": 3825500 - }, - { - "epoch": 42.49, - "learning_rate": 1.8776514226062235e-08, - "loss": 3.7689, - "step": 3826000 - }, - { - "epoch": 42.49, - "learning_rate": 1.8762632432312374e-08, - "loss": 3.7511, - "step": 3826500 - }, - { - "epoch": 42.5, - "learning_rate": 1.874875063856251e-08, - "loss": 3.7557, - "step": 3827000 - }, - { - "epoch": 42.51, - "learning_rate": 1.8734868844812652e-08, - "loss": 3.7505, - "step": 3827500 - }, - { - "epoch": 42.51, - "learning_rate": 1.872098705106279e-08, - "loss": 3.7355, - "step": 3828000 - }, - { - "epoch": 42.52, - "learning_rate": 1.8707105257312926e-08, - "loss": 3.7454, - "step": 3828500 - }, - { - "epoch": 42.52, - "learning_rate": 1.869322346356307e-08, - "loss": 3.7542, - "step": 3829000 - }, - { - "epoch": 42.53, - "learning_rate": 1.8679341669813208e-08, - "loss": 3.7695, - "step": 3829500 - }, - { - "epoch": 42.53, - "learning_rate": 1.8665459876063343e-08, - "loss": 3.7371, - "step": 3830000 - }, - { - "epoch": 42.54, - "learning_rate": 1.8651578082313482e-08, - "loss": 3.725, - "step": 3830500 - }, - { - "epoch": 42.54, - "learning_rate": 1.8637696288563625e-08, - "loss": 3.7504, - "step": 3831000 - }, - { - "epoch": 42.55, - "learning_rate": 1.862381449481376e-08, - "loss": 3.7388, - "step": 3831500 - }, - { - "epoch": 42.56, - "learning_rate": 1.86099327010639e-08, - "loss": 3.7387, - "step": 3832000 - }, - { - "epoch": 42.56, - "learning_rate": 1.859605090731404e-08, - "loss": 3.7515, - "step": 3832500 - }, - { - "epoch": 42.57, - "learning_rate": 1.8582169113564177e-08, - "loss": 3.7474, - "step": 3833000 - }, - { - "epoch": 42.57, - "learning_rate": 1.8568287319814316e-08, - "loss": 3.7727, - "step": 3833500 - }, - { - "epoch": 42.58, - "learning_rate": 1.8554405526064455e-08, - "loss": 3.7489, - "step": 3834000 - }, - { - "epoch": 42.58, - "learning_rate": 1.8540523732314594e-08, - "loss": 3.7557, - "step": 3834500 - }, - { - "epoch": 42.59, - "learning_rate": 1.8526641938564733e-08, - "loss": 3.7484, - "step": 3835000 - }, - { - "epoch": 42.59, - "learning_rate": 1.8512760144814872e-08, - "loss": 3.7547, - "step": 3835500 - }, - { - "epoch": 42.6, - "learning_rate": 1.849887835106501e-08, - "loss": 3.7546, - "step": 3836000 - }, - { - "epoch": 42.61, - "learning_rate": 1.848499655731515e-08, - "loss": 3.7361, - "step": 3836500 - }, - { - "epoch": 42.61, - "learning_rate": 1.847111476356529e-08, - "loss": 3.7453, - "step": 3837000 - }, - { - "epoch": 42.62, - "learning_rate": 1.8457232969815424e-08, - "loss": 3.7468, - "step": 3837500 - }, - { - "epoch": 42.62, - "learning_rate": 1.8443351176065567e-08, - "loss": 3.7656, - "step": 3838000 - }, - { - "epoch": 42.63, - "learning_rate": 1.8429469382315706e-08, - "loss": 3.7545, - "step": 3838500 - }, - { - "epoch": 42.63, - "learning_rate": 1.841558758856584e-08, - "loss": 3.7415, - "step": 3839000 - }, - { - "epoch": 42.64, - "learning_rate": 1.8401705794815983e-08, - "loss": 3.7599, - "step": 3839500 - }, - { - "epoch": 42.64, - "learning_rate": 1.8387824001066122e-08, - "loss": 3.754, - "step": 3840000 - }, - { - "epoch": 42.65, - "learning_rate": 1.8373942207316258e-08, - "loss": 3.7493, - "step": 3840500 - }, - { - "epoch": 42.66, - "learning_rate": 1.8360060413566397e-08, - "loss": 3.7377, - "step": 3841000 - }, - { - "epoch": 42.66, - "learning_rate": 1.834617861981654e-08, - "loss": 3.7589, - "step": 3841500 - }, - { - "epoch": 42.67, - "learning_rate": 1.8332296826066675e-08, - "loss": 3.7618, - "step": 3842000 - }, - { - "epoch": 42.67, - "learning_rate": 1.8318415032316814e-08, - "loss": 3.7473, - "step": 3842500 - }, - { - "epoch": 42.68, - "learning_rate": 1.8304533238566956e-08, - "loss": 3.7514, - "step": 3843000 - }, - { - "epoch": 42.68, - "learning_rate": 1.8290651444817092e-08, - "loss": 3.7404, - "step": 3843500 - }, - { - "epoch": 42.69, - "learning_rate": 1.827676965106723e-08, - "loss": 3.743, - "step": 3844000 - }, - { - "epoch": 42.69, - "learning_rate": 1.826288785731737e-08, - "loss": 3.7384, - "step": 3844500 - }, - { - "epoch": 42.7, - "learning_rate": 1.824900606356751e-08, - "loss": 3.7588, - "step": 3845000 - }, - { - "epoch": 42.71, - "learning_rate": 1.8235124269817648e-08, - "loss": 3.7412, - "step": 3845500 - }, - { - "epoch": 42.71, - "learning_rate": 1.8221242476067787e-08, - "loss": 3.7569, - "step": 3846000 - }, - { - "epoch": 42.72, - "learning_rate": 1.8207360682317926e-08, - "loss": 3.743, - "step": 3846500 - }, - { - "epoch": 42.72, - "learning_rate": 1.8193478888568064e-08, - "loss": 3.7522, - "step": 3847000 - }, - { - "epoch": 42.73, - "learning_rate": 1.8179597094818203e-08, - "loss": 3.7464, - "step": 3847500 - }, - { - "epoch": 42.73, - "learning_rate": 1.8165715301068342e-08, - "loss": 3.7456, - "step": 3848000 - }, - { - "epoch": 42.74, - "learning_rate": 1.815183350731848e-08, - "loss": 3.7551, - "step": 3848500 - }, - { - "epoch": 42.74, - "learning_rate": 1.813795171356862e-08, - "loss": 3.7489, - "step": 3849000 - }, - { - "epoch": 42.75, - "learning_rate": 1.8124069919818756e-08, - "loss": 3.7553, - "step": 3849500 - }, - { - "epoch": 42.76, - "learning_rate": 1.8110188126068898e-08, - "loss": 3.7448, - "step": 3850000 - }, - { - "epoch": 42.76, - "learning_rate": 1.8096306332319037e-08, - "loss": 3.7474, - "step": 3850500 - }, - { - "epoch": 42.77, - "learning_rate": 1.8082424538569173e-08, - "loss": 3.759, - "step": 3851000 - }, - { - "epoch": 42.77, - "learning_rate": 1.8068542744819315e-08, - "loss": 3.723, - "step": 3851500 - }, - { - "epoch": 42.78, - "learning_rate": 1.8054660951069454e-08, - "loss": 3.7386, - "step": 3852000 - }, - { - "epoch": 42.78, - "learning_rate": 1.804077915731959e-08, - "loss": 3.7521, - "step": 3852500 - }, - { - "epoch": 42.79, - "learning_rate": 1.802689736356973e-08, - "loss": 3.7472, - "step": 3853000 - }, - { - "epoch": 42.79, - "learning_rate": 1.801301556981987e-08, - "loss": 3.7389, - "step": 3853500 - }, - { - "epoch": 42.8, - "learning_rate": 1.7999133776070007e-08, - "loss": 3.736, - "step": 3854000 - }, - { - "epoch": 42.81, - "learning_rate": 1.7985251982320145e-08, - "loss": 3.7141, - "step": 3854500 - }, - { - "epoch": 42.81, - "learning_rate": 1.7971370188570288e-08, - "loss": 3.7617, - "step": 3855000 - }, - { - "epoch": 42.82, - "learning_rate": 1.7957488394820423e-08, - "loss": 3.7578, - "step": 3855500 - }, - { - "epoch": 42.82, - "learning_rate": 1.7943606601070562e-08, - "loss": 3.7573, - "step": 3856000 - }, - { - "epoch": 42.83, - "learning_rate": 1.79297248073207e-08, - "loss": 3.7686, - "step": 3856500 - }, - { - "epoch": 42.83, - "learning_rate": 1.791584301357084e-08, - "loss": 3.7626, - "step": 3857000 - }, - { - "epoch": 42.84, - "learning_rate": 1.790196121982098e-08, - "loss": 3.7464, - "step": 3857500 - }, - { - "epoch": 42.84, - "learning_rate": 1.7888079426071118e-08, - "loss": 3.7486, - "step": 3858000 - }, - { - "epoch": 42.85, - "learning_rate": 1.7874197632321257e-08, - "loss": 3.7533, - "step": 3858500 - }, - { - "epoch": 42.86, - "learning_rate": 1.7860315838571396e-08, - "loss": 3.7628, - "step": 3859000 - }, - { - "epoch": 42.86, - "learning_rate": 1.7846434044821535e-08, - "loss": 3.7502, - "step": 3859500 - }, - { - "epoch": 42.87, - "learning_rate": 1.7832552251071674e-08, - "loss": 3.7589, - "step": 3860000 - }, - { - "epoch": 42.87, - "learning_rate": 1.7818670457321813e-08, - "loss": 3.7314, - "step": 3860500 - }, - { - "epoch": 42.88, - "learning_rate": 1.7804788663571952e-08, - "loss": 3.7613, - "step": 3861000 - }, - { - "epoch": 42.88, - "learning_rate": 1.779090686982209e-08, - "loss": 3.7562, - "step": 3861500 - }, - { - "epoch": 42.89, - "learning_rate": 1.777702507607223e-08, - "loss": 3.7591, - "step": 3862000 - }, - { - "epoch": 42.89, - "learning_rate": 1.776314328232237e-08, - "loss": 3.7408, - "step": 3862500 - }, - { - "epoch": 42.9, - "learning_rate": 1.7749261488572508e-08, - "loss": 3.7382, - "step": 3863000 - }, - { - "epoch": 42.91, - "learning_rate": 1.7735379694822643e-08, - "loss": 3.7479, - "step": 3863500 - }, - { - "epoch": 42.91, - "learning_rate": 1.7721497901072786e-08, - "loss": 3.7333, - "step": 3864000 - }, - { - "epoch": 42.92, - "learning_rate": 1.7707616107322925e-08, - "loss": 3.7654, - "step": 3864500 - }, - { - "epoch": 42.92, - "learning_rate": 1.769373431357306e-08, - "loss": 3.75, - "step": 3865000 - }, - { - "epoch": 42.93, - "learning_rate": 1.7679852519823203e-08, - "loss": 3.7363, - "step": 3865500 - }, - { - "epoch": 42.93, - "learning_rate": 1.766597072607334e-08, - "loss": 3.7382, - "step": 3866000 - }, - { - "epoch": 42.94, - "learning_rate": 1.7652088932323477e-08, - "loss": 3.743, - "step": 3866500 - }, - { - "epoch": 42.94, - "learning_rate": 1.7638207138573616e-08, - "loss": 3.7381, - "step": 3867000 - }, - { - "epoch": 42.95, - "learning_rate": 1.7624325344823755e-08, - "loss": 3.7507, - "step": 3867500 - }, - { - "epoch": 42.96, - "learning_rate": 1.7610443551073894e-08, - "loss": 3.7458, - "step": 3868000 - }, - { - "epoch": 42.96, - "learning_rate": 1.7596561757324033e-08, - "loss": 3.7623, - "step": 3868500 - }, - { - "epoch": 42.97, - "learning_rate": 1.7582679963574172e-08, - "loss": 3.7407, - "step": 3869000 - }, - { - "epoch": 42.97, - "learning_rate": 1.756879816982431e-08, - "loss": 3.7582, - "step": 3869500 - }, - { - "epoch": 42.98, - "learning_rate": 1.755491637607445e-08, - "loss": 3.7495, - "step": 3870000 - }, - { - "epoch": 42.98, - "learning_rate": 1.754103458232459e-08, - "loss": 3.7433, - "step": 3870500 - }, - { - "epoch": 42.99, - "learning_rate": 1.7527152788574728e-08, - "loss": 3.751, - "step": 3871000 - }, - { - "epoch": 42.99, - "learning_rate": 1.7513270994824867e-08, - "loss": 3.7533, - "step": 3871500 - }, - { - "epoch": 43.0, - "eval_loss": 3.8255672454833984, - "eval_runtime": 6.3042, - "eval_samples_per_second": 246.502, - "step": 3871978 - }, - { - "epoch": 43.0, - "learning_rate": 1.7499389201075006e-08, - "loss": 3.7528, - "step": 3872000 - }, - { - "epoch": 43.01, - "learning_rate": 1.7485507407325145e-08, - "loss": 3.7406, - "step": 3872500 - }, - { - "epoch": 43.01, - "learning_rate": 1.7471625613575284e-08, - "loss": 3.7506, - "step": 3873000 - }, - { - "epoch": 43.02, - "learning_rate": 1.7457743819825422e-08, - "loss": 3.7445, - "step": 3873500 - }, - { - "epoch": 43.02, - "learning_rate": 1.7443862026075558e-08, - "loss": 3.7486, - "step": 3874000 - }, - { - "epoch": 43.03, - "learning_rate": 1.74299802323257e-08, - "loss": 3.7601, - "step": 3874500 - }, - { - "epoch": 43.03, - "learning_rate": 1.741609843857584e-08, - "loss": 3.7414, - "step": 3875000 - }, - { - "epoch": 43.04, - "learning_rate": 1.7402216644825975e-08, - "loss": 3.7485, - "step": 3875500 - }, - { - "epoch": 43.04, - "learning_rate": 1.7388334851076117e-08, - "loss": 3.7608, - "step": 3876000 - }, - { - "epoch": 43.05, - "learning_rate": 1.7374453057326256e-08, - "loss": 3.7693, - "step": 3876500 - }, - { - "epoch": 43.06, - "learning_rate": 1.7360571263576392e-08, - "loss": 3.7726, - "step": 3877000 - }, - { - "epoch": 43.06, - "learning_rate": 1.7346689469826534e-08, - "loss": 3.7592, - "step": 3877500 - }, - { - "epoch": 43.07, - "learning_rate": 1.7332807676076673e-08, - "loss": 3.7737, - "step": 3878000 - }, - { - "epoch": 43.07, - "learning_rate": 1.731892588232681e-08, - "loss": 3.7501, - "step": 3878500 - }, - { - "epoch": 43.08, - "learning_rate": 1.7305044088576948e-08, - "loss": 3.7322, - "step": 3879000 - }, - { - "epoch": 43.08, - "learning_rate": 1.729116229482709e-08, - "loss": 3.7318, - "step": 3879500 - }, - { - "epoch": 43.09, - "learning_rate": 1.7277280501077226e-08, - "loss": 3.7381, - "step": 3880000 - }, - { - "epoch": 43.09, - "learning_rate": 1.7263398707327365e-08, - "loss": 3.7397, - "step": 3880500 - }, - { - "epoch": 43.1, - "learning_rate": 1.7249516913577507e-08, - "loss": 3.7475, - "step": 3881000 - }, - { - "epoch": 43.11, - "learning_rate": 1.7235635119827642e-08, - "loss": 3.7414, - "step": 3881500 - }, - { - "epoch": 43.11, - "learning_rate": 1.722175332607778e-08, - "loss": 3.7565, - "step": 3882000 - }, - { - "epoch": 43.12, - "learning_rate": 1.720787153232792e-08, - "loss": 3.7487, - "step": 3882500 - }, - { - "epoch": 43.12, - "learning_rate": 1.719398973857806e-08, - "loss": 3.7308, - "step": 3883000 - }, - { - "epoch": 43.13, - "learning_rate": 1.7180107944828198e-08, - "loss": 3.7622, - "step": 3883500 - }, - { - "epoch": 43.13, - "learning_rate": 1.7166226151078337e-08, - "loss": 3.7475, - "step": 3884000 - }, - { - "epoch": 43.14, - "learning_rate": 1.7152344357328476e-08, - "loss": 3.7635, - "step": 3884500 - }, - { - "epoch": 43.14, - "learning_rate": 1.7138462563578615e-08, - "loss": 3.7495, - "step": 3885000 - }, - { - "epoch": 43.15, - "learning_rate": 1.7124580769828754e-08, - "loss": 3.7445, - "step": 3885500 - }, - { - "epoch": 43.16, - "learning_rate": 1.711069897607889e-08, - "loss": 3.7559, - "step": 3886000 - }, - { - "epoch": 43.16, - "learning_rate": 1.7096817182329032e-08, - "loss": 3.7689, - "step": 3886500 - }, - { - "epoch": 43.17, - "learning_rate": 1.708293538857917e-08, - "loss": 3.7366, - "step": 3887000 - }, - { - "epoch": 43.17, - "learning_rate": 1.7069053594829307e-08, - "loss": 3.7438, - "step": 3887500 - }, - { - "epoch": 43.18, - "learning_rate": 1.705517180107945e-08, - "loss": 3.7305, - "step": 3888000 - }, - { - "epoch": 43.18, - "learning_rate": 1.7041290007329588e-08, - "loss": 3.7471, - "step": 3888500 - }, - { - "epoch": 43.19, - "learning_rate": 1.7027408213579723e-08, - "loss": 3.7479, - "step": 3889000 - }, - { - "epoch": 43.19, - "learning_rate": 1.7013526419829862e-08, - "loss": 3.7439, - "step": 3889500 - }, - { - "epoch": 43.2, - "learning_rate": 1.6999644626080005e-08, - "loss": 3.7548, - "step": 3890000 - }, - { - "epoch": 43.21, - "learning_rate": 1.698576283233014e-08, - "loss": 3.7437, - "step": 3890500 - }, - { - "epoch": 43.21, - "learning_rate": 1.697188103858028e-08, - "loss": 3.7404, - "step": 3891000 - }, - { - "epoch": 43.22, - "learning_rate": 1.695799924483042e-08, - "loss": 3.7559, - "step": 3891500 - }, - { - "epoch": 43.22, - "learning_rate": 1.6944117451080557e-08, - "loss": 3.7526, - "step": 3892000 - }, - { - "epoch": 43.23, - "learning_rate": 1.6930235657330696e-08, - "loss": 3.7628, - "step": 3892500 - }, - { - "epoch": 43.23, - "learning_rate": 1.6916353863580835e-08, - "loss": 3.755, - "step": 3893000 - }, - { - "epoch": 43.24, - "learning_rate": 1.6902472069830974e-08, - "loss": 3.7483, - "step": 3893500 - }, - { - "epoch": 43.24, - "learning_rate": 1.6888590276081113e-08, - "loss": 3.732, - "step": 3894000 - }, - { - "epoch": 43.25, - "learning_rate": 1.6874708482331252e-08, - "loss": 3.7459, - "step": 3894500 - }, - { - "epoch": 43.26, - "learning_rate": 1.686082668858139e-08, - "loss": 3.7528, - "step": 3895000 - }, - { - "epoch": 43.26, - "learning_rate": 1.684694489483153e-08, - "loss": 3.7382, - "step": 3895500 - }, - { - "epoch": 43.27, - "learning_rate": 1.683306310108167e-08, - "loss": 3.7536, - "step": 3896000 - }, - { - "epoch": 43.27, - "learning_rate": 1.6819181307331808e-08, - "loss": 3.7462, - "step": 3896500 - }, - { - "epoch": 43.28, - "learning_rate": 1.6805299513581947e-08, - "loss": 3.7549, - "step": 3897000 - }, - { - "epoch": 43.28, - "learning_rate": 1.6791417719832086e-08, - "loss": 3.7586, - "step": 3897500 - }, - { - "epoch": 43.29, - "learning_rate": 1.677753592608222e-08, - "loss": 3.7454, - "step": 3898000 - }, - { - "epoch": 43.29, - "learning_rate": 1.6763654132332364e-08, - "loss": 3.7264, - "step": 3898500 - }, - { - "epoch": 43.3, - "learning_rate": 1.6749772338582503e-08, - "loss": 3.7493, - "step": 3899000 - }, - { - "epoch": 43.31, - "learning_rate": 1.6735890544832638e-08, - "loss": 3.7701, - "step": 3899500 - }, - { - "epoch": 43.31, - "learning_rate": 1.6722008751082777e-08, - "loss": 3.7509, - "step": 3900000 - }, - { - "epoch": 43.32, - "learning_rate": 1.670812695733292e-08, - "loss": 3.7465, - "step": 3900500 - }, - { - "epoch": 43.32, - "learning_rate": 1.6694245163583055e-08, - "loss": 3.7447, - "step": 3901000 - }, - { - "epoch": 43.33, - "learning_rate": 1.6680363369833194e-08, - "loss": 3.7437, - "step": 3901500 - }, - { - "epoch": 43.33, - "learning_rate": 1.6666481576083336e-08, - "loss": 3.7702, - "step": 3902000 - }, - { - "epoch": 43.34, - "learning_rate": 1.6652599782333472e-08, - "loss": 3.7513, - "step": 3902500 - }, - { - "epoch": 43.34, - "learning_rate": 1.663871798858361e-08, - "loss": 3.7491, - "step": 3903000 - }, - { - "epoch": 43.35, - "learning_rate": 1.662483619483375e-08, - "loss": 3.7572, - "step": 3903500 - }, - { - "epoch": 43.36, - "learning_rate": 1.661095440108389e-08, - "loss": 3.7428, - "step": 3904000 - }, - { - "epoch": 43.36, - "learning_rate": 1.6597072607334028e-08, - "loss": 3.735, - "step": 3904500 - }, - { - "epoch": 43.37, - "learning_rate": 1.6583190813584167e-08, - "loss": 3.7267, - "step": 3905000 - }, - { - "epoch": 43.37, - "learning_rate": 1.6569309019834306e-08, - "loss": 3.7736, - "step": 3905500 - }, - { - "epoch": 43.38, - "learning_rate": 1.6555427226084445e-08, - "loss": 3.749, - "step": 3906000 - }, - { - "epoch": 43.38, - "learning_rate": 1.6541545432334584e-08, - "loss": 3.7508, - "step": 3906500 - }, - { - "epoch": 43.39, - "learning_rate": 1.6527663638584723e-08, - "loss": 3.7404, - "step": 3907000 - }, - { - "epoch": 43.39, - "learning_rate": 1.651378184483486e-08, - "loss": 3.7419, - "step": 3907500 - }, - { - "epoch": 43.4, - "learning_rate": 1.6499900051085e-08, - "loss": 3.7409, - "step": 3908000 - }, - { - "epoch": 43.41, - "learning_rate": 1.648601825733514e-08, - "loss": 3.7618, - "step": 3908500 - }, - { - "epoch": 43.41, - "learning_rate": 1.647213646358528e-08, - "loss": 3.7564, - "step": 3909000 - }, - { - "epoch": 43.42, - "learning_rate": 1.6458254669835417e-08, - "loss": 3.755, - "step": 3909500 - }, - { - "epoch": 43.42, - "learning_rate": 1.6444372876085556e-08, - "loss": 3.744, - "step": 3910000 - }, - { - "epoch": 43.43, - "learning_rate": 1.6430491082335695e-08, - "loss": 3.7483, - "step": 3910500 - }, - { - "epoch": 43.43, - "learning_rate": 1.6416609288585834e-08, - "loss": 3.7437, - "step": 3911000 - }, - { - "epoch": 43.44, - "learning_rate": 1.6402727494835973e-08, - "loss": 3.7425, - "step": 3911500 - }, - { - "epoch": 43.44, - "learning_rate": 1.638884570108611e-08, - "loss": 3.7548, - "step": 3912000 - }, - { - "epoch": 43.45, - "learning_rate": 1.637496390733625e-08, - "loss": 3.7564, - "step": 3912500 - }, - { - "epoch": 43.46, - "learning_rate": 1.636108211358639e-08, - "loss": 3.7471, - "step": 3913000 - }, - { - "epoch": 43.46, - "learning_rate": 1.6347200319836526e-08, - "loss": 3.7405, - "step": 3913500 - }, - { - "epoch": 43.47, - "learning_rate": 1.6333318526086668e-08, - "loss": 3.7463, - "step": 3914000 - }, - { - "epoch": 43.47, - "learning_rate": 1.6319436732336807e-08, - "loss": 3.7577, - "step": 3914500 - }, - { - "epoch": 43.48, - "learning_rate": 1.6305554938586943e-08, - "loss": 3.7576, - "step": 3915000 - }, - { - "epoch": 43.48, - "learning_rate": 1.629167314483708e-08, - "loss": 3.7591, - "step": 3915500 - }, - { - "epoch": 43.49, - "learning_rate": 1.6277791351087224e-08, - "loss": 3.7517, - "step": 3916000 - }, - { - "epoch": 43.49, - "learning_rate": 1.626390955733736e-08, - "loss": 3.7541, - "step": 3916500 - }, - { - "epoch": 43.5, - "learning_rate": 1.62500277635875e-08, - "loss": 3.7497, - "step": 3917000 - }, - { - "epoch": 43.51, - "learning_rate": 1.623614596983764e-08, - "loss": 3.7563, - "step": 3917500 - }, - { - "epoch": 43.51, - "learning_rate": 1.6222264176087776e-08, - "loss": 3.7538, - "step": 3918000 - }, - { - "epoch": 43.52, - "learning_rate": 1.6208382382337915e-08, - "loss": 3.75, - "step": 3918500 - }, - { - "epoch": 43.52, - "learning_rate": 1.6194500588588054e-08, - "loss": 3.7583, - "step": 3919000 - }, - { - "epoch": 43.53, - "learning_rate": 1.6180618794838193e-08, - "loss": 3.7424, - "step": 3919500 - }, - { - "epoch": 43.53, - "learning_rate": 1.6166737001088332e-08, - "loss": 3.7493, - "step": 3920000 - }, - { - "epoch": 43.54, - "learning_rate": 1.615285520733847e-08, - "loss": 3.7348, - "step": 3920500 - }, - { - "epoch": 43.54, - "learning_rate": 1.613897341358861e-08, - "loss": 3.7469, - "step": 3921000 - }, - { - "epoch": 43.55, - "learning_rate": 1.612509161983875e-08, - "loss": 3.732, - "step": 3921500 - }, - { - "epoch": 43.56, - "learning_rate": 1.6111209826088888e-08, - "loss": 3.7543, - "step": 3922000 - }, - { - "epoch": 43.56, - "learning_rate": 1.6097328032339024e-08, - "loss": 3.7402, - "step": 3922500 - }, - { - "epoch": 43.57, - "learning_rate": 1.6083446238589166e-08, - "loss": 3.7561, - "step": 3923000 - }, - { - "epoch": 43.57, - "learning_rate": 1.6069564444839305e-08, - "loss": 3.7522, - "step": 3923500 - }, - { - "epoch": 43.58, - "learning_rate": 1.605568265108944e-08, - "loss": 3.7392, - "step": 3924000 - }, - { - "epoch": 43.58, - "learning_rate": 1.6041800857339583e-08, - "loss": 3.755, - "step": 3924500 - }, - { - "epoch": 43.59, - "learning_rate": 1.6027919063589722e-08, - "loss": 3.7657, - "step": 3925000 - }, - { - "epoch": 43.59, - "learning_rate": 1.6014037269839857e-08, - "loss": 3.7469, - "step": 3925500 - }, - { - "epoch": 43.6, - "learning_rate": 1.6000155476089996e-08, - "loss": 3.7536, - "step": 3926000 - }, - { - "epoch": 43.61, - "learning_rate": 1.598627368234014e-08, - "loss": 3.7645, - "step": 3926500 - }, - { - "epoch": 43.61, - "learning_rate": 1.5972391888590274e-08, - "loss": 3.761, - "step": 3927000 - }, - { - "epoch": 43.62, - "learning_rate": 1.5958510094840413e-08, - "loss": 3.7535, - "step": 3927500 - }, - { - "epoch": 43.62, - "learning_rate": 1.5944628301090555e-08, - "loss": 3.7536, - "step": 3928000 - }, - { - "epoch": 43.63, - "learning_rate": 1.593074650734069e-08, - "loss": 3.7379, - "step": 3928500 - }, - { - "epoch": 43.63, - "learning_rate": 1.591686471359083e-08, - "loss": 3.7404, - "step": 3929000 - }, - { - "epoch": 43.64, - "learning_rate": 1.590298291984097e-08, - "loss": 3.7342, - "step": 3929500 - }, - { - "epoch": 43.64, - "learning_rate": 1.5889101126091108e-08, - "loss": 3.7661, - "step": 3930000 - }, - { - "epoch": 43.65, - "learning_rate": 1.5875219332341247e-08, - "loss": 3.7399, - "step": 3930500 - }, - { - "epoch": 43.66, - "learning_rate": 1.5861337538591386e-08, - "loss": 3.7416, - "step": 3931000 - }, - { - "epoch": 43.66, - "learning_rate": 1.5847455744841525e-08, - "loss": 3.7442, - "step": 3931500 - }, - { - "epoch": 43.67, - "learning_rate": 1.5833573951091664e-08, - "loss": 3.7418, - "step": 3932000 - }, - { - "epoch": 43.67, - "learning_rate": 1.5819692157341803e-08, - "loss": 3.7529, - "step": 3932500 - }, - { - "epoch": 43.68, - "learning_rate": 1.5805810363591938e-08, - "loss": 3.7489, - "step": 3933000 - }, - { - "epoch": 43.68, - "learning_rate": 1.579192856984208e-08, - "loss": 3.7534, - "step": 3933500 - }, - { - "epoch": 43.69, - "learning_rate": 1.577804677609222e-08, - "loss": 3.7475, - "step": 3934000 - }, - { - "epoch": 43.69, - "learning_rate": 1.5764164982342355e-08, - "loss": 3.754, - "step": 3934500 - }, - { - "epoch": 43.7, - "learning_rate": 1.5750283188592497e-08, - "loss": 3.7412, - "step": 3935000 - }, - { - "epoch": 43.71, - "learning_rate": 1.5736401394842636e-08, - "loss": 3.7617, - "step": 3935500 - }, - { - "epoch": 43.71, - "learning_rate": 1.5722519601092772e-08, - "loss": 3.7397, - "step": 3936000 - }, - { - "epoch": 43.72, - "learning_rate": 1.570863780734291e-08, - "loss": 3.7407, - "step": 3936500 - }, - { - "epoch": 43.72, - "learning_rate": 1.5694756013593053e-08, - "loss": 3.7395, - "step": 3937000 - }, - { - "epoch": 43.73, - "learning_rate": 1.568087421984319e-08, - "loss": 3.7607, - "step": 3937500 - }, - { - "epoch": 43.73, - "learning_rate": 1.5666992426093328e-08, - "loss": 3.7512, - "step": 3938000 - }, - { - "epoch": 43.74, - "learning_rate": 1.565311063234347e-08, - "loss": 3.758, - "step": 3938500 - }, - { - "epoch": 43.74, - "learning_rate": 1.5639228838593606e-08, - "loss": 3.7398, - "step": 3939000 - }, - { - "epoch": 43.75, - "learning_rate": 1.5625347044843745e-08, - "loss": 3.7476, - "step": 3939500 - }, - { - "epoch": 43.76, - "learning_rate": 1.5611465251093887e-08, - "loss": 3.7398, - "step": 3940000 - }, - { - "epoch": 43.76, - "learning_rate": 1.5597583457344023e-08, - "loss": 3.7547, - "step": 3940500 - }, - { - "epoch": 43.77, - "learning_rate": 1.558370166359416e-08, - "loss": 3.7469, - "step": 3941000 - }, - { - "epoch": 43.77, - "learning_rate": 1.55698198698443e-08, - "loss": 3.746, - "step": 3941500 - }, - { - "epoch": 43.78, - "learning_rate": 1.555593807609444e-08, - "loss": 3.7542, - "step": 3942000 - }, - { - "epoch": 43.78, - "learning_rate": 1.554205628234458e-08, - "loss": 3.743, - "step": 3942500 - }, - { - "epoch": 43.79, - "learning_rate": 1.5528174488594717e-08, - "loss": 3.7359, - "step": 3943000 - }, - { - "epoch": 43.79, - "learning_rate": 1.5514292694844856e-08, - "loss": 3.7464, - "step": 3943500 - }, - { - "epoch": 43.8, - "learning_rate": 1.5500410901094995e-08, - "loss": 3.7674, - "step": 3944000 - }, - { - "epoch": 43.81, - "learning_rate": 1.5486529107345134e-08, - "loss": 3.7369, - "step": 3944500 - }, - { - "epoch": 43.81, - "learning_rate": 1.5472647313595273e-08, - "loss": 3.7428, - "step": 3945000 - }, - { - "epoch": 43.82, - "learning_rate": 1.5458765519845412e-08, - "loss": 3.7261, - "step": 3945500 - }, - { - "epoch": 43.82, - "learning_rate": 1.544488372609555e-08, - "loss": 3.7375, - "step": 3946000 - }, - { - "epoch": 43.83, - "learning_rate": 1.543100193234569e-08, - "loss": 3.7342, - "step": 3946500 - }, - { - "epoch": 43.83, - "learning_rate": 1.541712013859583e-08, - "loss": 3.7523, - "step": 3947000 - }, - { - "epoch": 43.84, - "learning_rate": 1.5403238344845968e-08, - "loss": 3.7373, - "step": 3947500 - }, - { - "epoch": 43.84, - "learning_rate": 1.5389356551096107e-08, - "loss": 3.7577, - "step": 3948000 - }, - { - "epoch": 43.85, - "learning_rate": 1.5375474757346243e-08, - "loss": 3.7551, - "step": 3948500 - }, - { - "epoch": 43.86, - "learning_rate": 1.5361592963596385e-08, - "loss": 3.7512, - "step": 3949000 - }, - { - "epoch": 43.86, - "learning_rate": 1.5347711169846524e-08, - "loss": 3.7569, - "step": 3949500 - }, - { - "epoch": 43.87, - "learning_rate": 1.533382937609666e-08, - "loss": 3.7532, - "step": 3950000 - }, - { - "epoch": 43.87, - "learning_rate": 1.5319947582346802e-08, - "loss": 3.7384, - "step": 3950500 - }, - { - "epoch": 43.88, - "learning_rate": 1.530606578859694e-08, - "loss": 3.7415, - "step": 3951000 - }, - { - "epoch": 43.88, - "learning_rate": 1.5292183994847076e-08, - "loss": 3.742, - "step": 3951500 - }, - { - "epoch": 43.89, - "learning_rate": 1.5278302201097215e-08, - "loss": 3.7574, - "step": 3952000 - }, - { - "epoch": 43.89, - "learning_rate": 1.5264420407347354e-08, - "loss": 3.7507, - "step": 3952500 - }, - { - "epoch": 43.9, - "learning_rate": 1.5250538613597493e-08, - "loss": 3.7542, - "step": 3953000 - }, - { - "epoch": 43.91, - "learning_rate": 1.5236656819847632e-08, - "loss": 3.7392, - "step": 3953500 - }, - { - "epoch": 43.91, - "learning_rate": 1.522277502609777e-08, - "loss": 3.7515, - "step": 3954000 - }, - { - "epoch": 43.92, - "learning_rate": 1.520889323234791e-08, - "loss": 3.7611, - "step": 3954500 - }, - { - "epoch": 43.92, - "learning_rate": 1.519501143859805e-08, - "loss": 3.77, - "step": 3955000 - }, - { - "epoch": 43.93, - "learning_rate": 1.5181129644848188e-08, - "loss": 3.743, - "step": 3955500 - }, - { - "epoch": 43.93, - "learning_rate": 1.5167247851098327e-08, - "loss": 3.7243, - "step": 3956000 - }, - { - "epoch": 43.94, - "learning_rate": 1.5153366057348466e-08, - "loss": 3.7485, - "step": 3956500 - }, - { - "epoch": 43.94, - "learning_rate": 1.5139484263598605e-08, - "loss": 3.7357, - "step": 3957000 - }, - { - "epoch": 43.95, - "learning_rate": 1.5125602469848744e-08, - "loss": 3.7501, - "step": 3957500 - }, - { - "epoch": 43.96, - "learning_rate": 1.5111720676098883e-08, - "loss": 3.7529, - "step": 3958000 - }, - { - "epoch": 43.96, - "learning_rate": 1.5097838882349022e-08, - "loss": 3.7492, - "step": 3958500 - }, - { - "epoch": 43.97, - "learning_rate": 1.5083957088599157e-08, - "loss": 3.7333, - "step": 3959000 - }, - { - "epoch": 43.97, - "learning_rate": 1.50700752948493e-08, - "loss": 3.7454, - "step": 3959500 - }, - { - "epoch": 43.98, - "learning_rate": 1.505619350109944e-08, - "loss": 3.7475, - "step": 3960000 - }, - { - "epoch": 43.98, - "learning_rate": 1.5042311707349574e-08, - "loss": 3.7323, - "step": 3960500 - }, - { - "epoch": 43.99, - "learning_rate": 1.5028429913599717e-08, - "loss": 3.7384, - "step": 3961000 - }, - { - "epoch": 43.99, - "learning_rate": 1.5014548119849855e-08, - "loss": 3.7494, - "step": 3961500 - }, - { - "epoch": 44.0, - "learning_rate": 1.500066632609999e-08, - "loss": 3.7449, - "step": 3962000 - }, - { - "epoch": 44.0, - "eval_loss": 3.8248701095581055, - "eval_runtime": 6.3081, - "eval_samples_per_second": 246.351, - "step": 3962024 - }, - { - "epoch": 44.01, - "learning_rate": 1.498678453235013e-08, - "loss": 3.7435, - "step": 3962500 - }, - { - "epoch": 44.01, - "learning_rate": 1.4972902738600272e-08, - "loss": 3.7607, - "step": 3963000 - }, - { - "epoch": 44.02, - "learning_rate": 1.4959020944850408e-08, - "loss": 3.7453, - "step": 3963500 - }, - { - "epoch": 44.02, - "learning_rate": 1.4945139151100547e-08, - "loss": 3.7395, - "step": 3964000 - }, - { - "epoch": 44.03, - "learning_rate": 1.493125735735069e-08, - "loss": 3.73, - "step": 3964500 - }, - { - "epoch": 44.03, - "learning_rate": 1.4917375563600825e-08, - "loss": 3.7628, - "step": 3965000 - }, - { - "epoch": 44.04, - "learning_rate": 1.4903493769850964e-08, - "loss": 3.7502, - "step": 3965500 - }, - { - "epoch": 44.04, - "learning_rate": 1.4889611976101101e-08, - "loss": 3.7414, - "step": 3966000 - }, - { - "epoch": 44.05, - "learning_rate": 1.4875730182351243e-08, - "loss": 3.7374, - "step": 3966500 - }, - { - "epoch": 44.06, - "learning_rate": 1.486184838860138e-08, - "loss": 3.7627, - "step": 3967000 - }, - { - "epoch": 44.06, - "learning_rate": 1.4847966594851518e-08, - "loss": 3.7364, - "step": 3967500 - }, - { - "epoch": 44.07, - "learning_rate": 1.483408480110166e-08, - "loss": 3.7583, - "step": 3968000 - }, - { - "epoch": 44.07, - "learning_rate": 1.4820203007351798e-08, - "loss": 3.7499, - "step": 3968500 - }, - { - "epoch": 44.08, - "learning_rate": 1.4806321213601935e-08, - "loss": 3.7442, - "step": 3969000 - }, - { - "epoch": 44.08, - "learning_rate": 1.4792439419852074e-08, - "loss": 3.7424, - "step": 3969500 - }, - { - "epoch": 44.09, - "learning_rate": 1.4778557626102214e-08, - "loss": 3.7549, - "step": 3970000 - }, - { - "epoch": 44.09, - "learning_rate": 1.4764675832352352e-08, - "loss": 3.7386, - "step": 3970500 - }, - { - "epoch": 44.1, - "learning_rate": 1.475079403860249e-08, - "loss": 3.7489, - "step": 3971000 - }, - { - "epoch": 44.11, - "learning_rate": 1.4736912244852631e-08, - "loss": 3.7499, - "step": 3971500 - }, - { - "epoch": 44.11, - "learning_rate": 1.4723030451102769e-08, - "loss": 3.7492, - "step": 3972000 - }, - { - "epoch": 44.12, - "learning_rate": 1.4709148657352908e-08, - "loss": 3.7406, - "step": 3972500 - }, - { - "epoch": 44.12, - "learning_rate": 1.4695266863603048e-08, - "loss": 3.7284, - "step": 3973000 - }, - { - "epoch": 44.13, - "learning_rate": 1.4681385069853185e-08, - "loss": 3.7371, - "step": 3973500 - }, - { - "epoch": 44.13, - "learning_rate": 1.4667503276103324e-08, - "loss": 3.7484, - "step": 3974000 - }, - { - "epoch": 44.14, - "learning_rate": 1.4653621482353462e-08, - "loss": 3.762, - "step": 3974500 - }, - { - "epoch": 44.14, - "learning_rate": 1.4639739688603602e-08, - "loss": 3.7443, - "step": 3975000 - }, - { - "epoch": 44.15, - "learning_rate": 1.4625857894853741e-08, - "loss": 3.7604, - "step": 3975500 - }, - { - "epoch": 44.16, - "learning_rate": 1.4611976101103879e-08, - "loss": 3.7367, - "step": 3976000 - }, - { - "epoch": 44.16, - "learning_rate": 1.4598094307354019e-08, - "loss": 3.7637, - "step": 3976500 - }, - { - "epoch": 44.17, - "learning_rate": 1.4584212513604158e-08, - "loss": 3.7452, - "step": 3977000 - }, - { - "epoch": 44.17, - "learning_rate": 1.4570330719854295e-08, - "loss": 3.7246, - "step": 3977500 - }, - { - "epoch": 44.18, - "learning_rate": 1.4556448926104434e-08, - "loss": 3.7642, - "step": 3978000 - }, - { - "epoch": 44.18, - "learning_rate": 1.4542567132354575e-08, - "loss": 3.7721, - "step": 3978500 - }, - { - "epoch": 44.19, - "learning_rate": 1.4528685338604712e-08, - "loss": 3.7495, - "step": 3979000 - }, - { - "epoch": 44.19, - "learning_rate": 1.4514803544854851e-08, - "loss": 3.7444, - "step": 3979500 - }, - { - "epoch": 44.2, - "learning_rate": 1.4500921751104992e-08, - "loss": 3.7519, - "step": 3980000 - }, - { - "epoch": 44.21, - "learning_rate": 1.4487039957355129e-08, - "loss": 3.7442, - "step": 3980500 - }, - { - "epoch": 44.21, - "learning_rate": 1.4473158163605268e-08, - "loss": 3.768, - "step": 3981000 - }, - { - "epoch": 44.22, - "learning_rate": 1.4459276369855405e-08, - "loss": 3.7554, - "step": 3981500 - }, - { - "epoch": 44.22, - "learning_rate": 1.4445394576105546e-08, - "loss": 3.7301, - "step": 3982000 - }, - { - "epoch": 44.23, - "learning_rate": 1.4431512782355685e-08, - "loss": 3.7521, - "step": 3982500 - }, - { - "epoch": 44.23, - "learning_rate": 1.4417630988605822e-08, - "loss": 3.7522, - "step": 3983000 - }, - { - "epoch": 44.24, - "learning_rate": 1.4403749194855963e-08, - "loss": 3.7554, - "step": 3983500 - }, - { - "epoch": 44.24, - "learning_rate": 1.4389867401106102e-08, - "loss": 3.7347, - "step": 3984000 - }, - { - "epoch": 44.25, - "learning_rate": 1.4375985607356239e-08, - "loss": 3.7266, - "step": 3984500 - }, - { - "epoch": 44.26, - "learning_rate": 1.4362103813606376e-08, - "loss": 3.741, - "step": 3985000 - }, - { - "epoch": 44.26, - "learning_rate": 1.4348222019856519e-08, - "loss": 3.7482, - "step": 3985500 - }, - { - "epoch": 44.27, - "learning_rate": 1.4334340226106656e-08, - "loss": 3.7459, - "step": 3986000 - }, - { - "epoch": 44.27, - "learning_rate": 1.4320458432356793e-08, - "loss": 3.7621, - "step": 3986500 - }, - { - "epoch": 44.28, - "learning_rate": 1.4306576638606936e-08, - "loss": 3.7382, - "step": 3987000 - }, - { - "epoch": 44.28, - "learning_rate": 1.4292694844857073e-08, - "loss": 3.748, - "step": 3987500 - }, - { - "epoch": 44.29, - "learning_rate": 1.427881305110721e-08, - "loss": 3.7637, - "step": 3988000 - }, - { - "epoch": 44.29, - "learning_rate": 1.4264931257357349e-08, - "loss": 3.7428, - "step": 3988500 - }, - { - "epoch": 44.3, - "learning_rate": 1.425104946360749e-08, - "loss": 3.7289, - "step": 3989000 - }, - { - "epoch": 44.31, - "learning_rate": 1.4237167669857627e-08, - "loss": 3.7704, - "step": 3989500 - }, - { - "epoch": 44.31, - "learning_rate": 1.4223285876107766e-08, - "loss": 3.7569, - "step": 3990000 - }, - { - "epoch": 44.32, - "learning_rate": 1.4209404082357907e-08, - "loss": 3.771, - "step": 3990500 - }, - { - "epoch": 44.32, - "learning_rate": 1.4195522288608044e-08, - "loss": 3.7529, - "step": 3991000 - }, - { - "epoch": 44.33, - "learning_rate": 1.4181640494858183e-08, - "loss": 3.7669, - "step": 3991500 - }, - { - "epoch": 44.33, - "learning_rate": 1.416775870110832e-08, - "loss": 3.7479, - "step": 3992000 - }, - { - "epoch": 44.34, - "learning_rate": 1.415387690735846e-08, - "loss": 3.7628, - "step": 3992500 - }, - { - "epoch": 44.34, - "learning_rate": 1.41399951136086e-08, - "loss": 3.7516, - "step": 3993000 - }, - { - "epoch": 44.35, - "learning_rate": 1.4126113319858737e-08, - "loss": 3.7432, - "step": 3993500 - }, - { - "epoch": 44.36, - "learning_rate": 1.4112231526108878e-08, - "loss": 3.7659, - "step": 3994000 - }, - { - "epoch": 44.36, - "learning_rate": 1.4098349732359017e-08, - "loss": 3.7552, - "step": 3994500 - }, - { - "epoch": 44.37, - "learning_rate": 1.4084467938609154e-08, - "loss": 3.7322, - "step": 3995000 - }, - { - "epoch": 44.37, - "learning_rate": 1.4070586144859293e-08, - "loss": 3.7449, - "step": 3995500 - }, - { - "epoch": 44.38, - "learning_rate": 1.4056704351109433e-08, - "loss": 3.7483, - "step": 3996000 - }, - { - "epoch": 44.38, - "learning_rate": 1.404282255735957e-08, - "loss": 3.7239, - "step": 3996500 - }, - { - "epoch": 44.39, - "learning_rate": 1.402894076360971e-08, - "loss": 3.7373, - "step": 3997000 - }, - { - "epoch": 44.39, - "learning_rate": 1.401505896985985e-08, - "loss": 3.7341, - "step": 3997500 - }, - { - "epoch": 44.4, - "learning_rate": 1.4001177176109988e-08, - "loss": 3.7541, - "step": 3998000 - }, - { - "epoch": 44.41, - "learning_rate": 1.3987295382360127e-08, - "loss": 3.7219, - "step": 3998500 - }, - { - "epoch": 44.41, - "learning_rate": 1.3973413588610264e-08, - "loss": 3.7479, - "step": 3999000 - }, - { - "epoch": 44.42, - "learning_rate": 1.3959531794860404e-08, - "loss": 3.7457, - "step": 3999500 - }, - { - "epoch": 44.42, - "learning_rate": 1.3945650001110543e-08, - "loss": 3.7491, - "step": 4000000 - }, - { - "epoch": 44.43, - "learning_rate": 1.393176820736068e-08, - "loss": 3.7566, - "step": 4000500 - }, - { - "epoch": 44.43, - "learning_rate": 1.3917886413610821e-08, - "loss": 3.7669, - "step": 4001000 - }, - { - "epoch": 44.44, - "learning_rate": 1.390400461986096e-08, - "loss": 3.7531, - "step": 4001500 - }, - { - "epoch": 44.44, - "learning_rate": 1.3890122826111098e-08, - "loss": 3.7343, - "step": 4002000 - }, - { - "epoch": 44.45, - "learning_rate": 1.3876241032361238e-08, - "loss": 3.7539, - "step": 4002500 - }, - { - "epoch": 44.46, - "learning_rate": 1.3862359238611376e-08, - "loss": 3.7508, - "step": 4003000 - }, - { - "epoch": 44.46, - "learning_rate": 1.3848477444861514e-08, - "loss": 3.7296, - "step": 4003500 - }, - { - "epoch": 44.47, - "learning_rate": 1.3834595651111652e-08, - "loss": 3.7681, - "step": 4004000 - }, - { - "epoch": 44.47, - "learning_rate": 1.3820713857361792e-08, - "loss": 3.7451, - "step": 4004500 - }, - { - "epoch": 44.48, - "learning_rate": 1.3806832063611931e-08, - "loss": 3.7555, - "step": 4005000 - }, - { - "epoch": 44.48, - "learning_rate": 1.3792950269862069e-08, - "loss": 3.745, - "step": 4005500 - }, - { - "epoch": 44.49, - "learning_rate": 1.377906847611221e-08, - "loss": 3.7486, - "step": 4006000 - }, - { - "epoch": 44.49, - "learning_rate": 1.3765186682362348e-08, - "loss": 3.7372, - "step": 4006500 - }, - { - "epoch": 44.5, - "learning_rate": 1.3751304888612486e-08, - "loss": 3.7507, - "step": 4007000 - }, - { - "epoch": 44.51, - "learning_rate": 1.3737423094862624e-08, - "loss": 3.7391, - "step": 4007500 - }, - { - "epoch": 44.51, - "learning_rate": 1.3723541301112765e-08, - "loss": 3.7495, - "step": 4008000 - }, - { - "epoch": 44.52, - "learning_rate": 1.3709659507362902e-08, - "loss": 3.7727, - "step": 4008500 - }, - { - "epoch": 44.52, - "learning_rate": 1.3695777713613041e-08, - "loss": 3.756, - "step": 4009000 - }, - { - "epoch": 44.53, - "learning_rate": 1.3681895919863182e-08, - "loss": 3.7414, - "step": 4009500 - }, - { - "epoch": 44.53, - "learning_rate": 1.366801412611332e-08, - "loss": 3.7522, - "step": 4010000 - }, - { - "epoch": 44.54, - "learning_rate": 1.3654132332363458e-08, - "loss": 3.7396, - "step": 4010500 - }, - { - "epoch": 44.54, - "learning_rate": 1.3640250538613596e-08, - "loss": 3.7509, - "step": 4011000 - }, - { - "epoch": 44.55, - "learning_rate": 1.3626368744863736e-08, - "loss": 3.7472, - "step": 4011500 - }, - { - "epoch": 44.56, - "learning_rate": 1.3612486951113875e-08, - "loss": 3.759, - "step": 4012000 - }, - { - "epoch": 44.56, - "learning_rate": 1.3598605157364012e-08, - "loss": 3.7635, - "step": 4012500 - }, - { - "epoch": 44.57, - "learning_rate": 1.3584723363614153e-08, - "loss": 3.7449, - "step": 4013000 - }, - { - "epoch": 44.57, - "learning_rate": 1.3570841569864292e-08, - "loss": 3.7508, - "step": 4013500 - }, - { - "epoch": 44.58, - "learning_rate": 1.355695977611443e-08, - "loss": 3.7459, - "step": 4014000 - }, - { - "epoch": 44.58, - "learning_rate": 1.3543077982364568e-08, - "loss": 3.7684, - "step": 4014500 - }, - { - "epoch": 44.59, - "learning_rate": 1.3529196188614709e-08, - "loss": 3.7569, - "step": 4015000 - }, - { - "epoch": 44.59, - "learning_rate": 1.3515314394864846e-08, - "loss": 3.7226, - "step": 4015500 - }, - { - "epoch": 44.6, - "learning_rate": 1.3501432601114985e-08, - "loss": 3.758, - "step": 4016000 - }, - { - "epoch": 44.6, - "learning_rate": 1.3487550807365126e-08, - "loss": 3.758, - "step": 4016500 - }, - { - "epoch": 44.61, - "learning_rate": 1.3473669013615263e-08, - "loss": 3.7382, - "step": 4017000 - }, - { - "epoch": 44.62, - "learning_rate": 1.3459787219865402e-08, - "loss": 3.7436, - "step": 4017500 - }, - { - "epoch": 44.62, - "learning_rate": 1.344590542611554e-08, - "loss": 3.7335, - "step": 4018000 - }, - { - "epoch": 44.63, - "learning_rate": 1.343202363236568e-08, - "loss": 3.7361, - "step": 4018500 - }, - { - "epoch": 44.63, - "learning_rate": 1.3418141838615817e-08, - "loss": 3.7572, - "step": 4019000 - }, - { - "epoch": 44.64, - "learning_rate": 1.3404260044865956e-08, - "loss": 3.7661, - "step": 4019500 - }, - { - "epoch": 44.64, - "learning_rate": 1.3390378251116097e-08, - "loss": 3.7308, - "step": 4020000 - }, - { - "epoch": 44.65, - "learning_rate": 1.3376496457366234e-08, - "loss": 3.7391, - "step": 4020500 - }, - { - "epoch": 44.65, - "learning_rate": 1.3362614663616373e-08, - "loss": 3.7574, - "step": 4021000 - }, - { - "epoch": 44.66, - "learning_rate": 1.334873286986651e-08, - "loss": 3.7558, - "step": 4021500 - }, - { - "epoch": 44.67, - "learning_rate": 1.3334851076116651e-08, - "loss": 3.7232, - "step": 4022000 - }, - { - "epoch": 44.67, - "learning_rate": 1.332096928236679e-08, - "loss": 3.7447, - "step": 4022500 - }, - { - "epoch": 44.68, - "learning_rate": 1.3307087488616927e-08, - "loss": 3.7503, - "step": 4023000 - }, - { - "epoch": 44.68, - "learning_rate": 1.3293205694867068e-08, - "loss": 3.756, - "step": 4023500 - }, - { - "epoch": 44.69, - "learning_rate": 1.3279323901117207e-08, - "loss": 3.7406, - "step": 4024000 - }, - { - "epoch": 44.69, - "learning_rate": 1.3265442107367344e-08, - "loss": 3.7481, - "step": 4024500 - }, - { - "epoch": 44.7, - "learning_rate": 1.3251560313617483e-08, - "loss": 3.7262, - "step": 4025000 - }, - { - "epoch": 44.7, - "learning_rate": 1.3237678519867624e-08, - "loss": 3.7111, - "step": 4025500 - }, - { - "epoch": 44.71, - "learning_rate": 1.3223796726117761e-08, - "loss": 3.7257, - "step": 4026000 - }, - { - "epoch": 44.72, - "learning_rate": 1.32099149323679e-08, - "loss": 3.7578, - "step": 4026500 - }, - { - "epoch": 44.72, - "learning_rate": 1.319603313861804e-08, - "loss": 3.7541, - "step": 4027000 - }, - { - "epoch": 44.73, - "learning_rate": 1.3182151344868178e-08, - "loss": 3.7401, - "step": 4027500 - }, - { - "epoch": 44.73, - "learning_rate": 1.3168269551118317e-08, - "loss": 3.7298, - "step": 4028000 - }, - { - "epoch": 44.74, - "learning_rate": 1.3154387757368454e-08, - "loss": 3.7379, - "step": 4028500 - }, - { - "epoch": 44.74, - "learning_rate": 1.3140505963618595e-08, - "loss": 3.7431, - "step": 4029000 - }, - { - "epoch": 44.75, - "learning_rate": 1.3126624169868734e-08, - "loss": 3.7572, - "step": 4029500 - }, - { - "epoch": 44.75, - "learning_rate": 1.3112742376118871e-08, - "loss": 3.744, - "step": 4030000 - }, - { - "epoch": 44.76, - "learning_rate": 1.3098860582369011e-08, - "loss": 3.7559, - "step": 4030500 - }, - { - "epoch": 44.77, - "learning_rate": 1.308497878861915e-08, - "loss": 3.7692, - "step": 4031000 - }, - { - "epoch": 44.77, - "learning_rate": 1.3071096994869288e-08, - "loss": 3.7629, - "step": 4031500 - }, - { - "epoch": 44.78, - "learning_rate": 1.3057215201119427e-08, - "loss": 3.7448, - "step": 4032000 - }, - { - "epoch": 44.78, - "learning_rate": 1.3043333407369567e-08, - "loss": 3.7355, - "step": 4032500 - }, - { - "epoch": 44.79, - "learning_rate": 1.3029451613619705e-08, - "loss": 3.754, - "step": 4033000 - }, - { - "epoch": 44.79, - "learning_rate": 1.3015569819869844e-08, - "loss": 3.7549, - "step": 4033500 - }, - { - "epoch": 44.8, - "learning_rate": 1.3001688026119984e-08, - "loss": 3.7674, - "step": 4034000 - }, - { - "epoch": 44.8, - "learning_rate": 1.2987806232370121e-08, - "loss": 3.7338, - "step": 4034500 - }, - { - "epoch": 44.81, - "learning_rate": 1.2973924438620259e-08, - "loss": 3.7445, - "step": 4035000 - }, - { - "epoch": 44.82, - "learning_rate": 1.2960042644870401e-08, - "loss": 3.7565, - "step": 4035500 - }, - { - "epoch": 44.82, - "learning_rate": 1.2946160851120538e-08, - "loss": 3.7377, - "step": 4036000 - }, - { - "epoch": 44.83, - "learning_rate": 1.2932279057370676e-08, - "loss": 3.7462, - "step": 4036500 - }, - { - "epoch": 44.83, - "learning_rate": 1.2918397263620815e-08, - "loss": 3.7426, - "step": 4037000 - }, - { - "epoch": 44.84, - "learning_rate": 1.2904515469870955e-08, - "loss": 3.7397, - "step": 4037500 - }, - { - "epoch": 44.84, - "learning_rate": 1.2890633676121092e-08, - "loss": 3.7572, - "step": 4038000 - }, - { - "epoch": 44.85, - "learning_rate": 1.2876751882371231e-08, - "loss": 3.7421, - "step": 4038500 - }, - { - "epoch": 44.85, - "learning_rate": 1.2862870088621372e-08, - "loss": 3.7649, - "step": 4039000 - }, - { - "epoch": 44.86, - "learning_rate": 1.284898829487151e-08, - "loss": 3.7551, - "step": 4039500 - }, - { - "epoch": 44.87, - "learning_rate": 1.2835106501121648e-08, - "loss": 3.7483, - "step": 4040000 - }, - { - "epoch": 44.87, - "learning_rate": 1.2821224707371786e-08, - "loss": 3.7292, - "step": 4040500 - }, - { - "epoch": 44.88, - "learning_rate": 1.2807342913621926e-08, - "loss": 3.7425, - "step": 4041000 - }, - { - "epoch": 44.88, - "learning_rate": 1.2793461119872065e-08, - "loss": 3.7643, - "step": 4041500 - }, - { - "epoch": 44.89, - "learning_rate": 1.2779579326122202e-08, - "loss": 3.751, - "step": 4042000 - }, - { - "epoch": 44.89, - "learning_rate": 1.2765697532372343e-08, - "loss": 3.7423, - "step": 4042500 - }, - { - "epoch": 44.9, - "learning_rate": 1.2751815738622482e-08, - "loss": 3.7656, - "step": 4043000 - }, - { - "epoch": 44.9, - "learning_rate": 1.273793394487262e-08, - "loss": 3.7598, - "step": 4043500 - }, - { - "epoch": 44.91, - "learning_rate": 1.2724052151122758e-08, - "loss": 3.7552, - "step": 4044000 - }, - { - "epoch": 44.92, - "learning_rate": 1.2710170357372899e-08, - "loss": 3.7349, - "step": 4044500 - }, - { - "epoch": 44.92, - "learning_rate": 1.2696288563623036e-08, - "loss": 3.743, - "step": 4045000 - }, - { - "epoch": 44.93, - "learning_rate": 1.2682406769873175e-08, - "loss": 3.7793, - "step": 4045500 - }, - { - "epoch": 44.93, - "learning_rate": 1.2668524976123316e-08, - "loss": 3.7613, - "step": 4046000 - }, - { - "epoch": 44.94, - "learning_rate": 1.2654643182373453e-08, - "loss": 3.7471, - "step": 4046500 - }, - { - "epoch": 44.94, - "learning_rate": 1.2640761388623592e-08, - "loss": 3.7377, - "step": 4047000 - }, - { - "epoch": 44.95, - "learning_rate": 1.262687959487373e-08, - "loss": 3.747, - "step": 4047500 - }, - { - "epoch": 44.95, - "learning_rate": 1.261299780112387e-08, - "loss": 3.7668, - "step": 4048000 - }, - { - "epoch": 44.96, - "learning_rate": 1.2599116007374009e-08, - "loss": 3.7604, - "step": 4048500 - }, - { - "epoch": 44.97, - "learning_rate": 1.2585234213624146e-08, - "loss": 3.7462, - "step": 4049000 - }, - { - "epoch": 44.97, - "learning_rate": 1.2571352419874287e-08, - "loss": 3.7346, - "step": 4049500 - }, - { - "epoch": 44.98, - "learning_rate": 1.2557470626124426e-08, - "loss": 3.7358, - "step": 4050000 - }, - { - "epoch": 44.98, - "learning_rate": 1.2543588832374563e-08, - "loss": 3.7457, - "step": 4050500 - }, - { - "epoch": 44.99, - "learning_rate": 1.25297070386247e-08, - "loss": 3.738, - "step": 4051000 - }, - { - "epoch": 44.99, - "learning_rate": 1.2515825244874843e-08, - "loss": 3.7374, - "step": 4051500 - }, - { - "epoch": 45.0, - "learning_rate": 1.250194345112498e-08, - "loss": 3.7573, - "step": 4052000 - }, - { - "epoch": 45.0, - "eval_loss": 3.8247101306915283, - "eval_runtime": 6.3052, - "eval_samples_per_second": 246.462, - "step": 4052070 - }, - { - "epoch": 45.0, - "learning_rate": 1.2488061657375119e-08, - "loss": 3.7472, - "step": 4052500 - }, - { - "epoch": 45.01, - "learning_rate": 1.2474179863625258e-08, - "loss": 3.7597, - "step": 4053000 - }, - { - "epoch": 45.02, - "learning_rate": 1.2460298069875397e-08, - "loss": 3.7478, - "step": 4053500 - }, - { - "epoch": 45.02, - "learning_rate": 1.2446416276125534e-08, - "loss": 3.7377, - "step": 4054000 - }, - { - "epoch": 45.03, - "learning_rate": 1.2432534482375675e-08, - "loss": 3.75, - "step": 4054500 - }, - { - "epoch": 45.03, - "learning_rate": 1.2418652688625812e-08, - "loss": 3.7509, - "step": 4055000 - }, - { - "epoch": 45.04, - "learning_rate": 1.2404770894875951e-08, - "loss": 3.7481, - "step": 4055500 - }, - { - "epoch": 45.04, - "learning_rate": 1.2390889101126092e-08, - "loss": 3.7611, - "step": 4056000 - }, - { - "epoch": 45.05, - "learning_rate": 1.2377007307376229e-08, - "loss": 3.7565, - "step": 4056500 - }, - { - "epoch": 45.05, - "learning_rate": 1.2363125513626368e-08, - "loss": 3.7633, - "step": 4057000 - }, - { - "epoch": 45.06, - "learning_rate": 1.2349243719876507e-08, - "loss": 3.7424, - "step": 4057500 - }, - { - "epoch": 45.07, - "learning_rate": 1.2335361926126646e-08, - "loss": 3.7399, - "step": 4058000 - }, - { - "epoch": 45.07, - "learning_rate": 1.2321480132376785e-08, - "loss": 3.7541, - "step": 4058500 - }, - { - "epoch": 45.08, - "learning_rate": 1.2307598338626924e-08, - "loss": 3.7407, - "step": 4059000 - }, - { - "epoch": 45.08, - "learning_rate": 1.2293716544877063e-08, - "loss": 3.7414, - "step": 4059500 - }, - { - "epoch": 45.09, - "learning_rate": 1.2279834751127202e-08, - "loss": 3.7469, - "step": 4060000 - }, - { - "epoch": 45.09, - "learning_rate": 1.226595295737734e-08, - "loss": 3.7551, - "step": 4060500 - }, - { - "epoch": 45.1, - "learning_rate": 1.2252071163627478e-08, - "loss": 3.739, - "step": 4061000 - }, - { - "epoch": 45.1, - "learning_rate": 1.2238189369877617e-08, - "loss": 3.7541, - "step": 4061500 - }, - { - "epoch": 45.11, - "learning_rate": 1.2224307576127757e-08, - "loss": 3.7308, - "step": 4062000 - }, - { - "epoch": 45.12, - "learning_rate": 1.2210425782377895e-08, - "loss": 3.7442, - "step": 4062500 - }, - { - "epoch": 45.12, - "learning_rate": 1.2196543988628034e-08, - "loss": 3.7178, - "step": 4063000 - }, - { - "epoch": 45.13, - "learning_rate": 1.2182662194878173e-08, - "loss": 3.752, - "step": 4063500 - }, - { - "epoch": 45.13, - "learning_rate": 1.2168780401128312e-08, - "loss": 3.7237, - "step": 4064000 - }, - { - "epoch": 45.14, - "learning_rate": 1.215489860737845e-08, - "loss": 3.7518, - "step": 4064500 - }, - { - "epoch": 45.14, - "learning_rate": 1.214101681362859e-08, - "loss": 3.7263, - "step": 4065000 - }, - { - "epoch": 45.15, - "learning_rate": 1.2127135019878728e-08, - "loss": 3.7328, - "step": 4065500 - }, - { - "epoch": 45.15, - "learning_rate": 1.2113253226128867e-08, - "loss": 3.7408, - "step": 4066000 - }, - { - "epoch": 45.16, - "learning_rate": 1.2099371432379006e-08, - "loss": 3.7489, - "step": 4066500 - }, - { - "epoch": 45.17, - "learning_rate": 1.2085489638629144e-08, - "loss": 3.7408, - "step": 4067000 - }, - { - "epoch": 45.17, - "learning_rate": 1.2071607844879284e-08, - "loss": 3.753, - "step": 4067500 - }, - { - "epoch": 45.18, - "learning_rate": 1.2057726051129422e-08, - "loss": 3.7521, - "step": 4068000 - }, - { - "epoch": 45.18, - "learning_rate": 1.204384425737956e-08, - "loss": 3.74, - "step": 4068500 - }, - { - "epoch": 45.19, - "learning_rate": 1.2029962463629701e-08, - "loss": 3.7246, - "step": 4069000 - }, - { - "epoch": 45.19, - "learning_rate": 1.2016080669879838e-08, - "loss": 3.7426, - "step": 4069500 - }, - { - "epoch": 45.2, - "learning_rate": 1.2002198876129977e-08, - "loss": 3.7464, - "step": 4070000 - }, - { - "epoch": 45.2, - "learning_rate": 1.1988317082380116e-08, - "loss": 3.7602, - "step": 4070500 - }, - { - "epoch": 45.21, - "learning_rate": 1.1974435288630255e-08, - "loss": 3.7413, - "step": 4071000 - }, - { - "epoch": 45.22, - "learning_rate": 1.1960553494880394e-08, - "loss": 3.7405, - "step": 4071500 - }, - { - "epoch": 45.22, - "learning_rate": 1.1946671701130533e-08, - "loss": 3.7508, - "step": 4072000 - }, - { - "epoch": 45.23, - "learning_rate": 1.1932789907380672e-08, - "loss": 3.7437, - "step": 4072500 - }, - { - "epoch": 45.23, - "learning_rate": 1.191890811363081e-08, - "loss": 3.7679, - "step": 4073000 - }, - { - "epoch": 45.24, - "learning_rate": 1.190502631988095e-08, - "loss": 3.7467, - "step": 4073500 - }, - { - "epoch": 45.24, - "learning_rate": 1.1891144526131087e-08, - "loss": 3.7599, - "step": 4074000 - }, - { - "epoch": 45.25, - "learning_rate": 1.1877262732381226e-08, - "loss": 3.7611, - "step": 4074500 - }, - { - "epoch": 45.25, - "learning_rate": 1.1863380938631367e-08, - "loss": 3.7522, - "step": 4075000 - }, - { - "epoch": 45.26, - "learning_rate": 1.1849499144881504e-08, - "loss": 3.756, - "step": 4075500 - }, - { - "epoch": 45.27, - "learning_rate": 1.1835617351131643e-08, - "loss": 3.7582, - "step": 4076000 - }, - { - "epoch": 45.27, - "learning_rate": 1.1821735557381782e-08, - "loss": 3.7371, - "step": 4076500 - }, - { - "epoch": 45.28, - "learning_rate": 1.1807853763631921e-08, - "loss": 3.7583, - "step": 4077000 - }, - { - "epoch": 45.28, - "learning_rate": 1.1793971969882058e-08, - "loss": 3.7402, - "step": 4077500 - }, - { - "epoch": 45.29, - "learning_rate": 1.1780090176132199e-08, - "loss": 3.7629, - "step": 4078000 - }, - { - "epoch": 45.29, - "learning_rate": 1.1766208382382338e-08, - "loss": 3.7404, - "step": 4078500 - }, - { - "epoch": 45.3, - "learning_rate": 1.1752326588632475e-08, - "loss": 3.7602, - "step": 4079000 - }, - { - "epoch": 45.3, - "learning_rate": 1.1738444794882616e-08, - "loss": 3.74, - "step": 4079500 - }, - { - "epoch": 45.31, - "learning_rate": 1.1724563001132753e-08, - "loss": 3.7437, - "step": 4080000 - }, - { - "epoch": 45.32, - "learning_rate": 1.1710681207382892e-08, - "loss": 3.7595, - "step": 4080500 - }, - { - "epoch": 45.32, - "learning_rate": 1.1696799413633031e-08, - "loss": 3.7593, - "step": 4081000 - }, - { - "epoch": 45.33, - "learning_rate": 1.168291761988317e-08, - "loss": 3.7379, - "step": 4081500 - }, - { - "epoch": 45.33, - "learning_rate": 1.1669035826133309e-08, - "loss": 3.7428, - "step": 4082000 - }, - { - "epoch": 45.34, - "learning_rate": 1.1655154032383448e-08, - "loss": 3.7346, - "step": 4082500 - }, - { - "epoch": 45.34, - "learning_rate": 1.1641272238633587e-08, - "loss": 3.7299, - "step": 4083000 - }, - { - "epoch": 45.35, - "learning_rate": 1.1627390444883726e-08, - "loss": 3.744, - "step": 4083500 - }, - { - "epoch": 45.35, - "learning_rate": 1.1613508651133865e-08, - "loss": 3.7515, - "step": 4084000 - }, - { - "epoch": 45.36, - "learning_rate": 1.1599626857384002e-08, - "loss": 3.7353, - "step": 4084500 - }, - { - "epoch": 45.37, - "learning_rate": 1.1585745063634143e-08, - "loss": 3.7435, - "step": 4085000 - }, - { - "epoch": 45.37, - "learning_rate": 1.1571863269884282e-08, - "loss": 3.7552, - "step": 4085500 - }, - { - "epoch": 45.38, - "learning_rate": 1.1557981476134419e-08, - "loss": 3.749, - "step": 4086000 - }, - { - "epoch": 45.38, - "learning_rate": 1.154409968238456e-08, - "loss": 3.7577, - "step": 4086500 - }, - { - "epoch": 45.39, - "learning_rate": 1.1530217888634697e-08, - "loss": 3.7472, - "step": 4087000 - }, - { - "epoch": 45.39, - "learning_rate": 1.1516336094884836e-08, - "loss": 3.7482, - "step": 4087500 - }, - { - "epoch": 45.4, - "learning_rate": 1.1502454301134976e-08, - "loss": 3.7297, - "step": 4088000 - }, - { - "epoch": 45.4, - "learning_rate": 1.1488572507385114e-08, - "loss": 3.7501, - "step": 4088500 - }, - { - "epoch": 45.41, - "learning_rate": 1.1474690713635253e-08, - "loss": 3.7697, - "step": 4089000 - }, - { - "epoch": 45.42, - "learning_rate": 1.1460808919885392e-08, - "loss": 3.7517, - "step": 4089500 - }, - { - "epoch": 45.42, - "learning_rate": 1.144692712613553e-08, - "loss": 3.744, - "step": 4090000 - }, - { - "epoch": 45.43, - "learning_rate": 1.1433045332385668e-08, - "loss": 3.7476, - "step": 4090500 - }, - { - "epoch": 45.43, - "learning_rate": 1.1419163538635809e-08, - "loss": 3.7611, - "step": 4091000 - }, - { - "epoch": 45.44, - "learning_rate": 1.1405281744885947e-08, - "loss": 3.7593, - "step": 4091500 - }, - { - "epoch": 45.44, - "learning_rate": 1.1391399951136085e-08, - "loss": 3.7525, - "step": 4092000 - }, - { - "epoch": 45.45, - "learning_rate": 1.1377518157386225e-08, - "loss": 3.7613, - "step": 4092500 - }, - { - "epoch": 45.45, - "learning_rate": 1.1363636363636363e-08, - "loss": 3.757, - "step": 4093000 - }, - { - "epoch": 45.46, - "learning_rate": 1.1349754569886502e-08, - "loss": 3.7462, - "step": 4093500 - }, - { - "epoch": 45.47, - "learning_rate": 1.133587277613664e-08, - "loss": 3.7684, - "step": 4094000 - }, - { - "epoch": 45.47, - "learning_rate": 1.132199098238678e-08, - "loss": 3.7631, - "step": 4094500 - }, - { - "epoch": 45.48, - "learning_rate": 1.1308109188636918e-08, - "loss": 3.7457, - "step": 4095000 - }, - { - "epoch": 45.48, - "learning_rate": 1.1294227394887057e-08, - "loss": 3.7521, - "step": 4095500 - }, - { - "epoch": 45.49, - "learning_rate": 1.1280345601137196e-08, - "loss": 3.7518, - "step": 4096000 - }, - { - "epoch": 45.49, - "learning_rate": 1.1266463807387334e-08, - "loss": 3.7475, - "step": 4096500 - }, - { - "epoch": 45.5, - "learning_rate": 1.1252582013637474e-08, - "loss": 3.7414, - "step": 4097000 - }, - { - "epoch": 45.5, - "learning_rate": 1.1238700219887612e-08, - "loss": 3.7509, - "step": 4097500 - }, - { - "epoch": 45.51, - "learning_rate": 1.122481842613775e-08, - "loss": 3.7135, - "step": 4098000 - }, - { - "epoch": 45.52, - "learning_rate": 1.1210936632387891e-08, - "loss": 3.7353, - "step": 4098500 - }, - { - "epoch": 45.52, - "learning_rate": 1.1197054838638028e-08, - "loss": 3.7351, - "step": 4099000 - }, - { - "epoch": 45.53, - "learning_rate": 1.1183173044888167e-08, - "loss": 3.7446, - "step": 4099500 - }, - { - "epoch": 45.53, - "learning_rate": 1.1169291251138306e-08, - "loss": 3.7458, - "step": 4100000 - }, - { - "epoch": 45.54, - "learning_rate": 1.1155409457388445e-08, - "loss": 3.7647, - "step": 4100500 - }, - { - "epoch": 45.54, - "learning_rate": 1.1141527663638584e-08, - "loss": 3.7459, - "step": 4101000 - }, - { - "epoch": 45.55, - "learning_rate": 1.1127645869888723e-08, - "loss": 3.7482, - "step": 4101500 - }, - { - "epoch": 45.55, - "learning_rate": 1.1113764076138862e-08, - "loss": 3.7484, - "step": 4102000 - }, - { - "epoch": 45.56, - "learning_rate": 1.1099882282389001e-08, - "loss": 3.7419, - "step": 4102500 - }, - { - "epoch": 45.57, - "learning_rate": 1.108600048863914e-08, - "loss": 3.7481, - "step": 4103000 - }, - { - "epoch": 45.57, - "learning_rate": 1.1072118694889277e-08, - "loss": 3.7291, - "step": 4103500 - }, - { - "epoch": 45.58, - "learning_rate": 1.1058236901139418e-08, - "loss": 3.7579, - "step": 4104000 - }, - { - "epoch": 45.58, - "learning_rate": 1.1044355107389557e-08, - "loss": 3.754, - "step": 4104500 - }, - { - "epoch": 45.59, - "learning_rate": 1.1030473313639694e-08, - "loss": 3.7544, - "step": 4105000 - }, - { - "epoch": 45.59, - "learning_rate": 1.1016591519889833e-08, - "loss": 3.7459, - "step": 4105500 - }, - { - "epoch": 45.6, - "learning_rate": 1.1002709726139972e-08, - "loss": 3.7469, - "step": 4106000 - }, - { - "epoch": 45.6, - "learning_rate": 1.0988827932390111e-08, - "loss": 3.7461, - "step": 4106500 - }, - { - "epoch": 45.61, - "learning_rate": 1.097494613864025e-08, - "loss": 3.7401, - "step": 4107000 - }, - { - "epoch": 45.62, - "learning_rate": 1.0961064344890389e-08, - "loss": 3.756, - "step": 4107500 - }, - { - "epoch": 45.62, - "learning_rate": 1.0947182551140528e-08, - "loss": 3.7323, - "step": 4108000 - }, - { - "epoch": 45.63, - "learning_rate": 1.0933300757390667e-08, - "loss": 3.7526, - "step": 4108500 - }, - { - "epoch": 45.63, - "learning_rate": 1.0919418963640806e-08, - "loss": 3.7568, - "step": 4109000 - }, - { - "epoch": 45.64, - "learning_rate": 1.0905537169890943e-08, - "loss": 3.7228, - "step": 4109500 - }, - { - "epoch": 45.64, - "learning_rate": 1.0891655376141084e-08, - "loss": 3.7769, - "step": 4110000 - }, - { - "epoch": 45.65, - "learning_rate": 1.0877773582391221e-08, - "loss": 3.7644, - "step": 4110500 - }, - { - "epoch": 45.65, - "learning_rate": 1.086389178864136e-08, - "loss": 3.7292, - "step": 4111000 - }, - { - "epoch": 45.66, - "learning_rate": 1.08500099948915e-08, - "loss": 3.7508, - "step": 4111500 - }, - { - "epoch": 45.67, - "learning_rate": 1.0836128201141638e-08, - "loss": 3.7341, - "step": 4112000 - }, - { - "epoch": 45.67, - "learning_rate": 1.0822246407391777e-08, - "loss": 3.7353, - "step": 4112500 - }, - { - "epoch": 45.68, - "learning_rate": 1.0808364613641916e-08, - "loss": 3.7593, - "step": 4113000 - }, - { - "epoch": 45.68, - "learning_rate": 1.0794482819892055e-08, - "loss": 3.7551, - "step": 4113500 - }, - { - "epoch": 45.69, - "learning_rate": 1.0780601026142192e-08, - "loss": 3.7555, - "step": 4114000 - }, - { - "epoch": 45.69, - "learning_rate": 1.0766719232392333e-08, - "loss": 3.7646, - "step": 4114500 - }, - { - "epoch": 45.7, - "learning_rate": 1.0752837438642472e-08, - "loss": 3.7526, - "step": 4115000 - }, - { - "epoch": 45.7, - "learning_rate": 1.0738955644892609e-08, - "loss": 3.7397, - "step": 4115500 - }, - { - "epoch": 45.71, - "learning_rate": 1.072507385114275e-08, - "loss": 3.7402, - "step": 4116000 - }, - { - "epoch": 45.72, - "learning_rate": 1.0711192057392887e-08, - "loss": 3.7589, - "step": 4116500 - }, - { - "epoch": 45.72, - "learning_rate": 1.0697310263643026e-08, - "loss": 3.7486, - "step": 4117000 - }, - { - "epoch": 45.73, - "learning_rate": 1.0683428469893165e-08, - "loss": 3.7497, - "step": 4117500 - }, - { - "epoch": 45.73, - "learning_rate": 1.0669546676143304e-08, - "loss": 3.7544, - "step": 4118000 - }, - { - "epoch": 45.74, - "learning_rate": 1.0655664882393443e-08, - "loss": 3.7395, - "step": 4118500 - }, - { - "epoch": 45.74, - "learning_rate": 1.0641783088643582e-08, - "loss": 3.7352, - "step": 4119000 - }, - { - "epoch": 45.75, - "learning_rate": 1.062790129489372e-08, - "loss": 3.7522, - "step": 4119500 - }, - { - "epoch": 45.75, - "learning_rate": 1.0614019501143858e-08, - "loss": 3.7602, - "step": 4120000 - }, - { - "epoch": 45.76, - "learning_rate": 1.0600137707393999e-08, - "loss": 3.7724, - "step": 4120500 - }, - { - "epoch": 45.77, - "learning_rate": 1.0586255913644138e-08, - "loss": 3.7349, - "step": 4121000 - }, - { - "epoch": 45.77, - "learning_rate": 1.0572374119894275e-08, - "loss": 3.7577, - "step": 4121500 - }, - { - "epoch": 45.78, - "learning_rate": 1.0558492326144415e-08, - "loss": 3.7606, - "step": 4122000 - }, - { - "epoch": 45.78, - "learning_rate": 1.0544610532394553e-08, - "loss": 3.7307, - "step": 4122500 - }, - { - "epoch": 45.79, - "learning_rate": 1.0530728738644692e-08, - "loss": 3.7421, - "step": 4123000 - }, - { - "epoch": 45.79, - "learning_rate": 1.051684694489483e-08, - "loss": 3.7552, - "step": 4123500 - }, - { - "epoch": 45.8, - "learning_rate": 1.050296515114497e-08, - "loss": 3.7399, - "step": 4124000 - }, - { - "epoch": 45.8, - "learning_rate": 1.0489083357395109e-08, - "loss": 3.7693, - "step": 4124500 - }, - { - "epoch": 45.81, - "learning_rate": 1.0475201563645248e-08, - "loss": 3.7475, - "step": 4125000 - }, - { - "epoch": 45.82, - "learning_rate": 1.0461319769895386e-08, - "loss": 3.7391, - "step": 4125500 - }, - { - "epoch": 45.82, - "learning_rate": 1.0447437976145525e-08, - "loss": 3.7574, - "step": 4126000 - }, - { - "epoch": 45.83, - "learning_rate": 1.0433556182395664e-08, - "loss": 3.7356, - "step": 4126500 - }, - { - "epoch": 45.83, - "learning_rate": 1.0419674388645802e-08, - "loss": 3.7367, - "step": 4127000 - }, - { - "epoch": 45.84, - "learning_rate": 1.0405792594895942e-08, - "loss": 3.7585, - "step": 4127500 - }, - { - "epoch": 45.84, - "learning_rate": 1.0391910801146081e-08, - "loss": 3.7518, - "step": 4128000 - }, - { - "epoch": 45.85, - "learning_rate": 1.0378029007396219e-08, - "loss": 3.7324, - "step": 4128500 - }, - { - "epoch": 45.85, - "learning_rate": 1.036414721364636e-08, - "loss": 3.7376, - "step": 4129000 - }, - { - "epoch": 45.86, - "learning_rate": 1.0350265419896496e-08, - "loss": 3.7528, - "step": 4129500 - }, - { - "epoch": 45.87, - "learning_rate": 1.0336383626146635e-08, - "loss": 3.7487, - "step": 4130000 - }, - { - "epoch": 45.87, - "learning_rate": 1.0322501832396774e-08, - "loss": 3.7465, - "step": 4130500 - }, - { - "epoch": 45.88, - "learning_rate": 1.0308620038646913e-08, - "loss": 3.7546, - "step": 4131000 - }, - { - "epoch": 45.88, - "learning_rate": 1.0294738244897052e-08, - "loss": 3.7473, - "step": 4131500 - }, - { - "epoch": 45.89, - "learning_rate": 1.0280856451147191e-08, - "loss": 3.7433, - "step": 4132000 - }, - { - "epoch": 45.89, - "learning_rate": 1.026697465739733e-08, - "loss": 3.7562, - "step": 4132500 - }, - { - "epoch": 45.9, - "learning_rate": 1.0253092863647468e-08, - "loss": 3.7568, - "step": 4133000 - }, - { - "epoch": 45.9, - "learning_rate": 1.0239211069897608e-08, - "loss": 3.7314, - "step": 4133500 - }, - { - "epoch": 45.91, - "learning_rate": 1.0225329276147747e-08, - "loss": 3.7356, - "step": 4134000 - }, - { - "epoch": 45.92, - "learning_rate": 1.0211447482397884e-08, - "loss": 3.7454, - "step": 4134500 - }, - { - "epoch": 45.92, - "learning_rate": 1.0197565688648025e-08, - "loss": 3.7406, - "step": 4135000 - }, - { - "epoch": 45.93, - "learning_rate": 1.0183683894898162e-08, - "loss": 3.7525, - "step": 4135500 - }, - { - "epoch": 45.93, - "learning_rate": 1.0169802101148301e-08, - "loss": 3.7258, - "step": 4136000 - }, - { - "epoch": 45.94, - "learning_rate": 1.015592030739844e-08, - "loss": 3.7477, - "step": 4136500 - }, - { - "epoch": 45.94, - "learning_rate": 1.0142038513648579e-08, - "loss": 3.7516, - "step": 4137000 - }, - { - "epoch": 45.95, - "learning_rate": 1.0128156719898718e-08, - "loss": 3.7278, - "step": 4137500 - }, - { - "epoch": 45.95, - "learning_rate": 1.0114274926148857e-08, - "loss": 3.7652, - "step": 4138000 - }, - { - "epoch": 45.96, - "learning_rate": 1.0100393132398996e-08, - "loss": 3.719, - "step": 4138500 - }, - { - "epoch": 45.97, - "learning_rate": 1.0086511338649133e-08, - "loss": 3.7351, - "step": 4139000 - }, - { - "epoch": 45.97, - "learning_rate": 1.0072629544899274e-08, - "loss": 3.7496, - "step": 4139500 - }, - { - "epoch": 45.98, - "learning_rate": 1.0058747751149411e-08, - "loss": 3.7471, - "step": 4140000 - }, - { - "epoch": 45.98, - "learning_rate": 1.004486595739955e-08, - "loss": 3.7638, - "step": 4140500 - }, - { - "epoch": 45.99, - "learning_rate": 1.003098416364969e-08, - "loss": 3.7404, - "step": 4141000 - }, - { - "epoch": 45.99, - "learning_rate": 1.0017102369899828e-08, - "loss": 3.7646, - "step": 4141500 - }, - { - "epoch": 46.0, - "learning_rate": 1.0003220576149967e-08, - "loss": 3.7462, - "step": 4142000 - }, - { - "epoch": 46.0, - "eval_loss": 3.824589490890503, - "eval_runtime": 6.307, - "eval_samples_per_second": 246.393, - "step": 4142116 - }, - { - "epoch": 46.0, - "learning_rate": 9.989338782400106e-09, - "loss": 3.7482, - "step": 4142500 - }, - { - "epoch": 46.01, - "learning_rate": 9.975456988650245e-09, - "loss": 3.7465, - "step": 4143000 - }, - { - "epoch": 46.02, - "learning_rate": 9.961575194900384e-09, - "loss": 3.7503, - "step": 4143500 - }, - { - "epoch": 46.02, - "learning_rate": 9.947693401150523e-09, - "loss": 3.7474, - "step": 4144000 - }, - { - "epoch": 46.03, - "learning_rate": 9.933811607400662e-09, - "loss": 3.7468, - "step": 4144500 - }, - { - "epoch": 46.03, - "learning_rate": 9.9199298136508e-09, - "loss": 3.7563, - "step": 4145000 - }, - { - "epoch": 46.04, - "learning_rate": 9.90604801990094e-09, - "loss": 3.7407, - "step": 4145500 - }, - { - "epoch": 46.04, - "learning_rate": 9.892166226151077e-09, - "loss": 3.7697, - "step": 4146000 - }, - { - "epoch": 46.05, - "learning_rate": 9.878284432401218e-09, - "loss": 3.7523, - "step": 4146500 - }, - { - "epoch": 46.05, - "learning_rate": 9.864402638651355e-09, - "loss": 3.7531, - "step": 4147000 - }, - { - "epoch": 46.06, - "learning_rate": 9.850520844901494e-09, - "loss": 3.7775, - "step": 4147500 - }, - { - "epoch": 46.07, - "learning_rate": 9.836639051151633e-09, - "loss": 3.7518, - "step": 4148000 - }, - { - "epoch": 46.07, - "learning_rate": 9.822757257401772e-09, - "loss": 3.7412, - "step": 4148500 - }, - { - "epoch": 46.08, - "learning_rate": 9.80887546365191e-09, - "loss": 3.7648, - "step": 4149000 - }, - { - "epoch": 46.08, - "learning_rate": 9.79499366990205e-09, - "loss": 3.7634, - "step": 4149500 - }, - { - "epoch": 46.09, - "learning_rate": 9.781111876152189e-09, - "loss": 3.755, - "step": 4150000 - }, - { - "epoch": 46.09, - "learning_rate": 9.767230082402328e-09, - "loss": 3.7599, - "step": 4150500 - }, - { - "epoch": 46.1, - "learning_rate": 9.753348288652467e-09, - "loss": 3.7446, - "step": 4151000 - }, - { - "epoch": 46.1, - "learning_rate": 9.739466494902606e-09, - "loss": 3.7538, - "step": 4151500 - }, - { - "epoch": 46.11, - "learning_rate": 9.725584701152743e-09, - "loss": 3.7337, - "step": 4152000 - }, - { - "epoch": 46.12, - "learning_rate": 9.711702907402883e-09, - "loss": 3.7634, - "step": 4152500 - }, - { - "epoch": 46.12, - "learning_rate": 9.69782111365302e-09, - "loss": 3.7337, - "step": 4153000 - }, - { - "epoch": 46.13, - "learning_rate": 9.68393931990316e-09, - "loss": 3.7546, - "step": 4153500 - }, - { - "epoch": 46.13, - "learning_rate": 9.6700575261533e-09, - "loss": 3.7528, - "step": 4154000 - }, - { - "epoch": 46.14, - "learning_rate": 9.656175732403438e-09, - "loss": 3.7457, - "step": 4154500 - }, - { - "epoch": 46.14, - "learning_rate": 9.642293938653577e-09, - "loss": 3.7325, - "step": 4155000 - }, - { - "epoch": 46.15, - "learning_rate": 9.628412144903716e-09, - "loss": 3.7416, - "step": 4155500 - }, - { - "epoch": 46.15, - "learning_rate": 9.614530351153854e-09, - "loss": 3.7404, - "step": 4156000 - }, - { - "epoch": 46.16, - "learning_rate": 9.600648557403992e-09, - "loss": 3.7493, - "step": 4156500 - }, - { - "epoch": 46.17, - "learning_rate": 9.586766763654132e-09, - "loss": 3.7475, - "step": 4157000 - }, - { - "epoch": 46.17, - "learning_rate": 9.572884969904271e-09, - "loss": 3.7655, - "step": 4157500 - }, - { - "epoch": 46.18, - "learning_rate": 9.559003176154409e-09, - "loss": 3.7527, - "step": 4158000 - }, - { - "epoch": 46.18, - "learning_rate": 9.54512138240455e-09, - "loss": 3.7587, - "step": 4158500 - }, - { - "epoch": 46.19, - "learning_rate": 9.531239588654687e-09, - "loss": 3.7539, - "step": 4159000 - }, - { - "epoch": 46.19, - "learning_rate": 9.517357794904826e-09, - "loss": 3.7548, - "step": 4159500 - }, - { - "epoch": 46.2, - "learning_rate": 9.503476001154964e-09, - "loss": 3.7661, - "step": 4160000 - }, - { - "epoch": 46.2, - "learning_rate": 9.489594207405103e-09, - "loss": 3.7509, - "step": 4160500 - }, - { - "epoch": 46.21, - "learning_rate": 9.475712413655242e-09, - "loss": 3.7366, - "step": 4161000 - }, - { - "epoch": 46.22, - "learning_rate": 9.461830619905381e-09, - "loss": 3.7553, - "step": 4161500 - }, - { - "epoch": 46.22, - "learning_rate": 9.44794882615552e-09, - "loss": 3.7445, - "step": 4162000 - }, - { - "epoch": 46.23, - "learning_rate": 9.434067032405658e-09, - "loss": 3.7339, - "step": 4162500 - }, - { - "epoch": 46.23, - "learning_rate": 9.420185238655798e-09, - "loss": 3.765, - "step": 4163000 - }, - { - "epoch": 46.24, - "learning_rate": 9.406303444905936e-09, - "loss": 3.7396, - "step": 4163500 - }, - { - "epoch": 46.24, - "learning_rate": 9.392421651156074e-09, - "loss": 3.7764, - "step": 4164000 - }, - { - "epoch": 46.25, - "learning_rate": 9.378539857406215e-09, - "loss": 3.7443, - "step": 4164500 - }, - { - "epoch": 46.25, - "learning_rate": 9.364658063656352e-09, - "loss": 3.7498, - "step": 4165000 - }, - { - "epoch": 46.26, - "learning_rate": 9.350776269906491e-09, - "loss": 3.7494, - "step": 4165500 - }, - { - "epoch": 46.27, - "learning_rate": 9.33689447615663e-09, - "loss": 3.749, - "step": 4166000 - }, - { - "epoch": 46.27, - "learning_rate": 9.32301268240677e-09, - "loss": 3.7586, - "step": 4166500 - }, - { - "epoch": 46.28, - "learning_rate": 9.309130888656908e-09, - "loss": 3.7361, - "step": 4167000 - }, - { - "epoch": 46.28, - "learning_rate": 9.295249094907047e-09, - "loss": 3.7269, - "step": 4167500 - }, - { - "epoch": 46.29, - "learning_rate": 9.281367301157186e-09, - "loss": 3.749, - "step": 4168000 - }, - { - "epoch": 46.29, - "learning_rate": 9.267485507407325e-09, - "loss": 3.7509, - "step": 4168500 - }, - { - "epoch": 46.3, - "learning_rate": 9.253603713657464e-09, - "loss": 3.7378, - "step": 4169000 - }, - { - "epoch": 46.3, - "learning_rate": 9.239721919907601e-09, - "loss": 3.7373, - "step": 4169500 - }, - { - "epoch": 46.31, - "learning_rate": 9.225840126157742e-09, - "loss": 3.748, - "step": 4170000 - }, - { - "epoch": 46.32, - "learning_rate": 9.211958332407881e-09, - "loss": 3.7466, - "step": 4170500 - }, - { - "epoch": 46.32, - "learning_rate": 9.198076538658018e-09, - "loss": 3.7457, - "step": 4171000 - }, - { - "epoch": 46.33, - "learning_rate": 9.184194744908159e-09, - "loss": 3.7494, - "step": 4171500 - }, - { - "epoch": 46.33, - "learning_rate": 9.170312951158296e-09, - "loss": 3.7453, - "step": 4172000 - }, - { - "epoch": 46.34, - "learning_rate": 9.156431157408435e-09, - "loss": 3.7418, - "step": 4172500 - }, - { - "epoch": 46.34, - "learning_rate": 9.142549363658574e-09, - "loss": 3.7366, - "step": 4173000 - }, - { - "epoch": 46.35, - "learning_rate": 9.128667569908713e-09, - "loss": 3.7323, - "step": 4173500 - }, - { - "epoch": 46.35, - "learning_rate": 9.114785776158852e-09, - "loss": 3.7559, - "step": 4174000 - }, - { - "epoch": 46.36, - "learning_rate": 9.100903982408991e-09, - "loss": 3.7543, - "step": 4174500 - }, - { - "epoch": 46.37, - "learning_rate": 9.08702218865913e-09, - "loss": 3.7335, - "step": 4175000 - }, - { - "epoch": 46.37, - "learning_rate": 9.073140394909267e-09, - "loss": 3.7483, - "step": 4175500 - }, - { - "epoch": 46.38, - "learning_rate": 9.059258601159408e-09, - "loss": 3.7488, - "step": 4176000 - }, - { - "epoch": 46.38, - "learning_rate": 9.045376807409545e-09, - "loss": 3.7475, - "step": 4176500 - }, - { - "epoch": 46.39, - "learning_rate": 9.031495013659684e-09, - "loss": 3.7447, - "step": 4177000 - }, - { - "epoch": 46.39, - "learning_rate": 9.017613219909825e-09, - "loss": 3.7255, - "step": 4177500 - }, - { - "epoch": 46.4, - "learning_rate": 9.003731426159962e-09, - "loss": 3.7488, - "step": 4178000 - }, - { - "epoch": 46.4, - "learning_rate": 8.989849632410101e-09, - "loss": 3.7595, - "step": 4178500 - }, - { - "epoch": 46.41, - "learning_rate": 8.97596783866024e-09, - "loss": 3.7672, - "step": 4179000 - }, - { - "epoch": 46.42, - "learning_rate": 8.962086044910379e-09, - "loss": 3.7499, - "step": 4179500 - }, - { - "epoch": 46.42, - "learning_rate": 8.948204251160516e-09, - "loss": 3.7323, - "step": 4180000 - }, - { - "epoch": 46.43, - "learning_rate": 8.934322457410657e-09, - "loss": 3.7302, - "step": 4180500 - }, - { - "epoch": 46.43, - "learning_rate": 8.920440663660796e-09, - "loss": 3.7605, - "step": 4181000 - }, - { - "epoch": 46.44, - "learning_rate": 8.906558869910933e-09, - "loss": 3.7395, - "step": 4181500 - }, - { - "epoch": 46.44, - "learning_rate": 8.892677076161074e-09, - "loss": 3.7641, - "step": 4182000 - }, - { - "epoch": 46.45, - "learning_rate": 8.878795282411211e-09, - "loss": 3.7529, - "step": 4182500 - }, - { - "epoch": 46.45, - "learning_rate": 8.86491348866135e-09, - "loss": 3.7306, - "step": 4183000 - }, - { - "epoch": 46.46, - "learning_rate": 8.85103169491149e-09, - "loss": 3.7585, - "step": 4183500 - }, - { - "epoch": 46.47, - "learning_rate": 8.837149901161628e-09, - "loss": 3.7317, - "step": 4184000 - }, - { - "epoch": 46.47, - "learning_rate": 8.823268107411767e-09, - "loss": 3.7636, - "step": 4184500 - }, - { - "epoch": 46.48, - "learning_rate": 8.809386313661906e-09, - "loss": 3.7619, - "step": 4185000 - }, - { - "epoch": 46.48, - "learning_rate": 8.795504519912045e-09, - "loss": 3.7452, - "step": 4185500 - }, - { - "epoch": 46.49, - "learning_rate": 8.781622726162184e-09, - "loss": 3.7616, - "step": 4186000 - }, - { - "epoch": 46.49, - "learning_rate": 8.767740932412323e-09, - "loss": 3.7291, - "step": 4186500 - }, - { - "epoch": 46.5, - "learning_rate": 8.753859138662461e-09, - "loss": 3.74, - "step": 4187000 - }, - { - "epoch": 46.5, - "learning_rate": 8.7399773449126e-09, - "loss": 3.7648, - "step": 4187500 - }, - { - "epoch": 46.51, - "learning_rate": 8.72609555116274e-09, - "loss": 3.7428, - "step": 4188000 - }, - { - "epoch": 46.52, - "learning_rate": 8.712213757412877e-09, - "loss": 3.7448, - "step": 4188500 - }, - { - "epoch": 46.52, - "learning_rate": 8.698331963663017e-09, - "loss": 3.7228, - "step": 4189000 - }, - { - "epoch": 46.53, - "learning_rate": 8.684450169913155e-09, - "loss": 3.7527, - "step": 4189500 - }, - { - "epoch": 46.53, - "learning_rate": 8.670568376163294e-09, - "loss": 3.7418, - "step": 4190000 - }, - { - "epoch": 46.54, - "learning_rate": 8.656686582413432e-09, - "loss": 3.745, - "step": 4190500 - }, - { - "epoch": 46.54, - "learning_rate": 8.642804788663571e-09, - "loss": 3.7309, - "step": 4191000 - }, - { - "epoch": 46.55, - "learning_rate": 8.62892299491371e-09, - "loss": 3.7464, - "step": 4191500 - }, - { - "epoch": 46.55, - "learning_rate": 8.61504120116385e-09, - "loss": 3.7476, - "step": 4192000 - }, - { - "epoch": 46.56, - "learning_rate": 8.601159407413988e-09, - "loss": 3.737, - "step": 4192500 - }, - { - "epoch": 46.57, - "learning_rate": 8.587277613664126e-09, - "loss": 3.7437, - "step": 4193000 - }, - { - "epoch": 46.57, - "learning_rate": 8.573395819914266e-09, - "loss": 3.7578, - "step": 4193500 - }, - { - "epoch": 46.58, - "learning_rate": 8.559514026164405e-09, - "loss": 3.7389, - "step": 4194000 - }, - { - "epoch": 46.58, - "learning_rate": 8.545632232414542e-09, - "loss": 3.7572, - "step": 4194500 - }, - { - "epoch": 46.59, - "learning_rate": 8.531750438664683e-09, - "loss": 3.7411, - "step": 4195000 - }, - { - "epoch": 46.59, - "learning_rate": 8.51786864491482e-09, - "loss": 3.754, - "step": 4195500 - }, - { - "epoch": 46.6, - "learning_rate": 8.50398685116496e-09, - "loss": 3.746, - "step": 4196000 - }, - { - "epoch": 46.6, - "learning_rate": 8.4901050574151e-09, - "loss": 3.7338, - "step": 4196500 - }, - { - "epoch": 46.61, - "learning_rate": 8.476223263665237e-09, - "loss": 3.776, - "step": 4197000 - }, - { - "epoch": 46.62, - "learning_rate": 8.462341469915376e-09, - "loss": 3.7661, - "step": 4197500 - }, - { - "epoch": 46.62, - "learning_rate": 8.448459676165515e-09, - "loss": 3.7635, - "step": 4198000 - }, - { - "epoch": 46.63, - "learning_rate": 8.434577882415654e-09, - "loss": 3.7478, - "step": 4198500 - }, - { - "epoch": 46.63, - "learning_rate": 8.420696088665791e-09, - "loss": 3.7508, - "step": 4199000 - }, - { - "epoch": 46.64, - "learning_rate": 8.406814294915932e-09, - "loss": 3.7457, - "step": 4199500 - }, - { - "epoch": 46.64, - "learning_rate": 8.392932501166071e-09, - "loss": 3.7412, - "step": 4200000 - }, - { - "epoch": 46.65, - "learning_rate": 8.379050707416208e-09, - "loss": 3.7442, - "step": 4200500 - }, - { - "epoch": 46.65, - "learning_rate": 8.365168913666349e-09, - "loss": 3.7372, - "step": 4201000 - }, - { - "epoch": 46.66, - "learning_rate": 8.351287119916486e-09, - "loss": 3.7292, - "step": 4201500 - }, - { - "epoch": 46.67, - "learning_rate": 8.337405326166625e-09, - "loss": 3.751, - "step": 4202000 - }, - { - "epoch": 46.67, - "learning_rate": 8.323523532416764e-09, - "loss": 3.7346, - "step": 4202500 - }, - { - "epoch": 46.68, - "learning_rate": 8.309641738666903e-09, - "loss": 3.7454, - "step": 4203000 - }, - { - "epoch": 46.68, - "learning_rate": 8.295759944917042e-09, - "loss": 3.745, - "step": 4203500 - }, - { - "epoch": 46.69, - "learning_rate": 8.281878151167181e-09, - "loss": 3.7355, - "step": 4204000 - }, - { - "epoch": 46.69, - "learning_rate": 8.26799635741732e-09, - "loss": 3.738, - "step": 4204500 - }, - { - "epoch": 46.7, - "learning_rate": 8.254114563667459e-09, - "loss": 3.7401, - "step": 4205000 - }, - { - "epoch": 46.7, - "learning_rate": 8.240232769917598e-09, - "loss": 3.7503, - "step": 4205500 - }, - { - "epoch": 46.71, - "learning_rate": 8.226350976167735e-09, - "loss": 3.7402, - "step": 4206000 - }, - { - "epoch": 46.72, - "learning_rate": 8.212469182417874e-09, - "loss": 3.7453, - "step": 4206500 - }, - { - "epoch": 46.72, - "learning_rate": 8.198587388668015e-09, - "loss": 3.7628, - "step": 4207000 - }, - { - "epoch": 46.73, - "learning_rate": 8.184705594918152e-09, - "loss": 3.7436, - "step": 4207500 - }, - { - "epoch": 46.73, - "learning_rate": 8.170823801168291e-09, - "loss": 3.7496, - "step": 4208000 - }, - { - "epoch": 46.74, - "learning_rate": 8.15694200741843e-09, - "loss": 3.7505, - "step": 4208500 - }, - { - "epoch": 46.74, - "learning_rate": 8.143060213668569e-09, - "loss": 3.7419, - "step": 4209000 - }, - { - "epoch": 46.75, - "learning_rate": 8.129178419918708e-09, - "loss": 3.7672, - "step": 4209500 - }, - { - "epoch": 46.75, - "learning_rate": 8.115296626168847e-09, - "loss": 3.7119, - "step": 4210000 - }, - { - "epoch": 46.76, - "learning_rate": 8.101414832418986e-09, - "loss": 3.7372, - "step": 4210500 - }, - { - "epoch": 46.76, - "learning_rate": 8.087533038669125e-09, - "loss": 3.7472, - "step": 4211000 - }, - { - "epoch": 46.77, - "learning_rate": 8.073651244919264e-09, - "loss": 3.7616, - "step": 4211500 - }, - { - "epoch": 46.78, - "learning_rate": 8.059769451169401e-09, - "loss": 3.7497, - "step": 4212000 - }, - { - "epoch": 46.78, - "learning_rate": 8.045887657419542e-09, - "loss": 3.7366, - "step": 4212500 - }, - { - "epoch": 46.79, - "learning_rate": 8.03200586366968e-09, - "loss": 3.766, - "step": 4213000 - }, - { - "epoch": 46.79, - "learning_rate": 8.018124069919818e-09, - "loss": 3.7244, - "step": 4213500 - }, - { - "epoch": 46.8, - "learning_rate": 8.004242276169958e-09, - "loss": 3.7508, - "step": 4214000 - }, - { - "epoch": 46.8, - "learning_rate": 7.990360482420096e-09, - "loss": 3.74, - "step": 4214500 - }, - { - "epoch": 46.81, - "learning_rate": 7.976478688670235e-09, - "loss": 3.7578, - "step": 4215000 - }, - { - "epoch": 46.81, - "learning_rate": 7.962596894920374e-09, - "loss": 3.7258, - "step": 4215500 - }, - { - "epoch": 46.82, - "learning_rate": 7.948715101170513e-09, - "loss": 3.7509, - "step": 4216000 - }, - { - "epoch": 46.83, - "learning_rate": 7.934833307420652e-09, - "loss": 3.7406, - "step": 4216500 - }, - { - "epoch": 46.83, - "learning_rate": 7.92095151367079e-09, - "loss": 3.7494, - "step": 4217000 - }, - { - "epoch": 46.84, - "learning_rate": 7.90706971992093e-09, - "loss": 3.7761, - "step": 4217500 - }, - { - "epoch": 46.84, - "learning_rate": 7.893187926171067e-09, - "loss": 3.7286, - "step": 4218000 - }, - { - "epoch": 46.85, - "learning_rate": 7.879306132421207e-09, - "loss": 3.7617, - "step": 4218500 - }, - { - "epoch": 46.85, - "learning_rate": 7.865424338671345e-09, - "loss": 3.754, - "step": 4219000 - }, - { - "epoch": 46.86, - "learning_rate": 7.851542544921484e-09, - "loss": 3.7426, - "step": 4219500 - }, - { - "epoch": 46.86, - "learning_rate": 7.837660751171624e-09, - "loss": 3.7342, - "step": 4220000 - }, - { - "epoch": 46.87, - "learning_rate": 7.823778957421762e-09, - "loss": 3.7505, - "step": 4220500 - }, - { - "epoch": 46.88, - "learning_rate": 7.8098971636719e-09, - "loss": 3.7531, - "step": 4221000 - }, - { - "epoch": 46.88, - "learning_rate": 7.79601536992204e-09, - "loss": 3.7449, - "step": 4221500 - }, - { - "epoch": 46.89, - "learning_rate": 7.782133576172178e-09, - "loss": 3.7328, - "step": 4222000 - }, - { - "epoch": 46.89, - "learning_rate": 7.768251782422316e-09, - "loss": 3.7622, - "step": 4222500 - }, - { - "epoch": 46.9, - "learning_rate": 7.754369988672456e-09, - "loss": 3.7446, - "step": 4223000 - }, - { - "epoch": 46.9, - "learning_rate": 7.740488194922595e-09, - "loss": 3.7417, - "step": 4223500 - }, - { - "epoch": 46.91, - "learning_rate": 7.726606401172733e-09, - "loss": 3.741, - "step": 4224000 - }, - { - "epoch": 46.91, - "learning_rate": 7.712724607422873e-09, - "loss": 3.7444, - "step": 4224500 - }, - { - "epoch": 46.92, - "learning_rate": 7.69884281367301e-09, - "loss": 3.7523, - "step": 4225000 - }, - { - "epoch": 46.93, - "learning_rate": 7.68496101992315e-09, - "loss": 3.7361, - "step": 4225500 - }, - { - "epoch": 46.93, - "learning_rate": 7.671079226173288e-09, - "loss": 3.7277, - "step": 4226000 - }, - { - "epoch": 46.94, - "learning_rate": 7.657197432423427e-09, - "loss": 3.7439, - "step": 4226500 - }, - { - "epoch": 46.94, - "learning_rate": 7.643315638673566e-09, - "loss": 3.7499, - "step": 4227000 - }, - { - "epoch": 46.95, - "learning_rate": 7.629433844923705e-09, - "loss": 3.7438, - "step": 4227500 - }, - { - "epoch": 46.95, - "learning_rate": 7.615552051173844e-09, - "loss": 3.7331, - "step": 4228000 - }, - { - "epoch": 46.96, - "learning_rate": 7.601670257423983e-09, - "loss": 3.7447, - "step": 4228500 - }, - { - "epoch": 46.96, - "learning_rate": 7.587788463674122e-09, - "loss": 3.7508, - "step": 4229000 - }, - { - "epoch": 46.97, - "learning_rate": 7.573906669924261e-09, - "loss": 3.7655, - "step": 4229500 - }, - { - "epoch": 46.98, - "learning_rate": 7.5600248761744e-09, - "loss": 3.7447, - "step": 4230000 - }, - { - "epoch": 46.98, - "learning_rate": 7.546143082424539e-09, - "loss": 3.7397, - "step": 4230500 - }, - { - "epoch": 46.99, - "learning_rate": 7.532261288674676e-09, - "loss": 3.7532, - "step": 4231000 - }, - { - "epoch": 46.99, - "learning_rate": 7.518379494924817e-09, - "loss": 3.7338, - "step": 4231500 - }, - { - "epoch": 47.0, - "learning_rate": 7.504497701174954e-09, - "loss": 3.7446, - "step": 4232000 - }, - { - "epoch": 47.0, - "eval_loss": 3.8242743015289307, - "eval_runtime": 6.3073, - "eval_samples_per_second": 246.381, - "step": 4232162 - }, - { - "epoch": 47.0, - "learning_rate": 7.490615907425093e-09, - "loss": 3.7579, - "step": 4232500 - }, - { - "epoch": 47.01, - "learning_rate": 7.476734113675232e-09, - "loss": 3.7574, - "step": 4233000 - }, - { - "epoch": 47.01, - "learning_rate": 7.462852319925371e-09, - "loss": 3.7213, - "step": 4233500 - }, - { - "epoch": 47.02, - "learning_rate": 7.44897052617551e-09, - "loss": 3.7384, - "step": 4234000 - }, - { - "epoch": 47.03, - "learning_rate": 7.435088732425648e-09, - "loss": 3.7482, - "step": 4234500 - }, - { - "epoch": 47.03, - "learning_rate": 7.421206938675788e-09, - "loss": 3.7463, - "step": 4235000 - }, - { - "epoch": 47.04, - "learning_rate": 7.407325144925926e-09, - "loss": 3.7536, - "step": 4235500 - }, - { - "epoch": 47.04, - "learning_rate": 7.393443351176065e-09, - "loss": 3.7523, - "step": 4236000 - }, - { - "epoch": 47.05, - "learning_rate": 7.379561557426205e-09, - "loss": 3.752, - "step": 4236500 - }, - { - "epoch": 47.05, - "learning_rate": 7.365679763676343e-09, - "loss": 3.7335, - "step": 4237000 - }, - { - "epoch": 47.06, - "learning_rate": 7.351797969926482e-09, - "loss": 3.7644, - "step": 4237500 - }, - { - "epoch": 47.06, - "learning_rate": 7.33791617617662e-09, - "loss": 3.7377, - "step": 4238000 - }, - { - "epoch": 47.07, - "learning_rate": 7.32403438242676e-09, - "loss": 3.7583, - "step": 4238500 - }, - { - "epoch": 47.08, - "learning_rate": 7.310152588676897e-09, - "loss": 3.748, - "step": 4239000 - }, - { - "epoch": 47.08, - "learning_rate": 7.296270794927037e-09, - "loss": 3.7442, - "step": 4239500 - }, - { - "epoch": 47.09, - "learning_rate": 7.282389001177177e-09, - "loss": 3.7548, - "step": 4240000 - }, - { - "epoch": 47.09, - "learning_rate": 7.268507207427314e-09, - "loss": 3.7585, - "step": 4240500 - }, - { - "epoch": 47.1, - "learning_rate": 7.254625413677454e-09, - "loss": 3.7421, - "step": 4241000 - }, - { - "epoch": 47.1, - "learning_rate": 7.240743619927592e-09, - "loss": 3.753, - "step": 4241500 - }, - { - "epoch": 47.11, - "learning_rate": 7.226861826177731e-09, - "loss": 3.7334, - "step": 4242000 - }, - { - "epoch": 47.11, - "learning_rate": 7.212980032427871e-09, - "loss": 3.7381, - "step": 4242500 - }, - { - "epoch": 47.12, - "learning_rate": 7.199098238678009e-09, - "loss": 3.7534, - "step": 4243000 - }, - { - "epoch": 47.13, - "learning_rate": 7.185216444928148e-09, - "loss": 3.7541, - "step": 4243500 - }, - { - "epoch": 47.13, - "learning_rate": 7.171334651178286e-09, - "loss": 3.7599, - "step": 4244000 - }, - { - "epoch": 47.14, - "learning_rate": 7.157452857428426e-09, - "loss": 3.757, - "step": 4244500 - }, - { - "epoch": 47.14, - "learning_rate": 7.143571063678564e-09, - "loss": 3.7498, - "step": 4245000 - }, - { - "epoch": 47.15, - "learning_rate": 7.129689269928703e-09, - "loss": 3.7329, - "step": 4245500 - }, - { - "epoch": 47.15, - "learning_rate": 7.1158074761788425e-09, - "loss": 3.7626, - "step": 4246000 - }, - { - "epoch": 47.16, - "learning_rate": 7.101925682428981e-09, - "loss": 3.7494, - "step": 4246500 - }, - { - "epoch": 47.16, - "learning_rate": 7.0880438886791196e-09, - "loss": 3.7384, - "step": 4247000 - }, - { - "epoch": 47.17, - "learning_rate": 7.074162094929258e-09, - "loss": 3.7444, - "step": 4247500 - }, - { - "epoch": 47.18, - "learning_rate": 7.0602803011793975e-09, - "loss": 3.7411, - "step": 4248000 - }, - { - "epoch": 47.18, - "learning_rate": 7.046398507429535e-09, - "loss": 3.754, - "step": 4248500 - }, - { - "epoch": 47.19, - "learning_rate": 7.0325167136796745e-09, - "loss": 3.7453, - "step": 4249000 - }, - { - "epoch": 47.19, - "learning_rate": 7.018634919929814e-09, - "loss": 3.7349, - "step": 4249500 - }, - { - "epoch": 47.2, - "learning_rate": 7.004753126179952e-09, - "loss": 3.759, - "step": 4250000 - }, - { - "epoch": 47.2, - "learning_rate": 6.9908713324300914e-09, - "loss": 3.7364, - "step": 4250500 - }, - { - "epoch": 47.21, - "learning_rate": 6.9769895386802295e-09, - "loss": 3.7578, - "step": 4251000 - }, - { - "epoch": 47.21, - "learning_rate": 6.9631077449303685e-09, - "loss": 3.7372, - "step": 4251500 - }, - { - "epoch": 47.22, - "learning_rate": 6.949225951180507e-09, - "loss": 3.7757, - "step": 4252000 - }, - { - "epoch": 47.23, - "learning_rate": 6.935344157430646e-09, - "loss": 3.7667, - "step": 4252500 - }, - { - "epoch": 47.23, - "learning_rate": 6.921462363680785e-09, - "loss": 3.7323, - "step": 4253000 - }, - { - "epoch": 47.24, - "learning_rate": 6.9075805699309235e-09, - "loss": 3.7586, - "step": 4253500 - }, - { - "epoch": 47.24, - "learning_rate": 6.893698776181063e-09, - "loss": 3.7294, - "step": 4254000 - }, - { - "epoch": 47.25, - "learning_rate": 6.879816982431201e-09, - "loss": 3.7665, - "step": 4254500 - }, - { - "epoch": 47.25, - "learning_rate": 6.86593518868134e-09, - "loss": 3.7453, - "step": 4255000 - }, - { - "epoch": 47.26, - "learning_rate": 6.8520533949314785e-09, - "loss": 3.7365, - "step": 4255500 - }, - { - "epoch": 47.26, - "learning_rate": 6.838171601181618e-09, - "loss": 3.7523, - "step": 4256000 - }, - { - "epoch": 47.27, - "learning_rate": 6.824289807431757e-09, - "loss": 3.74, - "step": 4256500 - }, - { - "epoch": 47.28, - "learning_rate": 6.810408013681895e-09, - "loss": 3.7408, - "step": 4257000 - }, - { - "epoch": 47.28, - "learning_rate": 6.796526219932035e-09, - "loss": 3.7457, - "step": 4257500 - }, - { - "epoch": 47.29, - "learning_rate": 6.7826444261821724e-09, - "loss": 3.7513, - "step": 4258000 - }, - { - "epoch": 47.29, - "learning_rate": 6.768762632432312e-09, - "loss": 3.7356, - "step": 4258500 - }, - { - "epoch": 47.3, - "learning_rate": 6.754880838682451e-09, - "loss": 3.7465, - "step": 4259000 - }, - { - "epoch": 47.3, - "learning_rate": 6.740999044932589e-09, - "loss": 3.7641, - "step": 4259500 - }, - { - "epoch": 47.31, - "learning_rate": 6.727117251182729e-09, - "loss": 3.7496, - "step": 4260000 - }, - { - "epoch": 47.31, - "learning_rate": 6.713235457432867e-09, - "loss": 3.7393, - "step": 4260500 - }, - { - "epoch": 47.32, - "learning_rate": 6.699353663683006e-09, - "loss": 3.7531, - "step": 4261000 - }, - { - "epoch": 47.33, - "learning_rate": 6.685471869933144e-09, - "loss": 3.7458, - "step": 4261500 - }, - { - "epoch": 47.33, - "learning_rate": 6.671590076183284e-09, - "loss": 3.7333, - "step": 4262000 - }, - { - "epoch": 47.34, - "learning_rate": 6.657708282433423e-09, - "loss": 3.7629, - "step": 4262500 - }, - { - "epoch": 47.34, - "learning_rate": 6.643826488683561e-09, - "loss": 3.7322, - "step": 4263000 - }, - { - "epoch": 47.35, - "learning_rate": 6.629944694933701e-09, - "loss": 3.7509, - "step": 4263500 - }, - { - "epoch": 47.35, - "learning_rate": 6.616062901183839e-09, - "loss": 3.7388, - "step": 4264000 - }, - { - "epoch": 47.36, - "learning_rate": 6.602181107433978e-09, - "loss": 3.7486, - "step": 4264500 - }, - { - "epoch": 47.36, - "learning_rate": 6.588299313684116e-09, - "loss": 3.7601, - "step": 4265000 - }, - { - "epoch": 47.37, - "learning_rate": 6.574417519934256e-09, - "loss": 3.7358, - "step": 4265500 - }, - { - "epoch": 47.38, - "learning_rate": 6.560535726184395e-09, - "loss": 3.7471, - "step": 4266000 - }, - { - "epoch": 47.38, - "learning_rate": 6.546653932434533e-09, - "loss": 3.762, - "step": 4266500 - }, - { - "epoch": 47.39, - "learning_rate": 6.532772138684672e-09, - "loss": 3.7427, - "step": 4267000 - }, - { - "epoch": 47.39, - "learning_rate": 6.51889034493481e-09, - "loss": 3.748, - "step": 4267500 - }, - { - "epoch": 47.4, - "learning_rate": 6.50500855118495e-09, - "loss": 3.7439, - "step": 4268000 - }, - { - "epoch": 47.4, - "learning_rate": 6.491126757435088e-09, - "loss": 3.7493, - "step": 4268500 - }, - { - "epoch": 47.41, - "learning_rate": 6.477244963685227e-09, - "loss": 3.7257, - "step": 4269000 - }, - { - "epoch": 47.41, - "learning_rate": 6.463363169935367e-09, - "loss": 3.7338, - "step": 4269500 - }, - { - "epoch": 47.42, - "learning_rate": 6.449481376185505e-09, - "loss": 3.7524, - "step": 4270000 - }, - { - "epoch": 47.43, - "learning_rate": 6.435599582435644e-09, - "loss": 3.7315, - "step": 4270500 - }, - { - "epoch": 47.43, - "learning_rate": 6.421717788685782e-09, - "loss": 3.7389, - "step": 4271000 - }, - { - "epoch": 47.44, - "learning_rate": 6.407835994935922e-09, - "loss": 3.7326, - "step": 4271500 - }, - { - "epoch": 47.44, - "learning_rate": 6.39395420118606e-09, - "loss": 3.7458, - "step": 4272000 - }, - { - "epoch": 47.45, - "learning_rate": 6.380072407436199e-09, - "loss": 3.7548, - "step": 4272500 - }, - { - "epoch": 47.45, - "learning_rate": 6.366190613686339e-09, - "loss": 3.7648, - "step": 4273000 - }, - { - "epoch": 47.46, - "learning_rate": 6.352308819936476e-09, - "loss": 3.7574, - "step": 4273500 - }, - { - "epoch": 47.46, - "learning_rate": 6.338427026186616e-09, - "loss": 3.7652, - "step": 4274000 - }, - { - "epoch": 47.47, - "learning_rate": 6.324545232436754e-09, - "loss": 3.7503, - "step": 4274500 - }, - { - "epoch": 47.48, - "learning_rate": 6.310663438686893e-09, - "loss": 3.748, - "step": 4275000 - }, - { - "epoch": 47.48, - "learning_rate": 6.2967816449370326e-09, - "loss": 3.7334, - "step": 4275500 - }, - { - "epoch": 47.49, - "learning_rate": 6.282899851187171e-09, - "loss": 3.7136, - "step": 4276000 - }, - { - "epoch": 47.49, - "learning_rate": 6.26901805743731e-09, - "loss": 3.7517, - "step": 4276500 - }, - { - "epoch": 47.5, - "learning_rate": 6.255136263687448e-09, - "loss": 3.7631, - "step": 4277000 - }, - { - "epoch": 47.5, - "learning_rate": 6.2412544699375876e-09, - "loss": 3.7392, - "step": 4277500 - }, - { - "epoch": 47.51, - "learning_rate": 6.2273726761877265e-09, - "loss": 3.7512, - "step": 4278000 - }, - { - "epoch": 47.51, - "learning_rate": 6.213490882437865e-09, - "loss": 3.7409, - "step": 4278500 - }, - { - "epoch": 47.52, - "learning_rate": 6.199609088688004e-09, - "loss": 3.7504, - "step": 4279000 - }, - { - "epoch": 47.53, - "learning_rate": 6.1857272949381426e-09, - "loss": 3.7566, - "step": 4279500 - }, - { - "epoch": 47.53, - "learning_rate": 6.171845501188281e-09, - "loss": 3.7324, - "step": 4280000 - }, - { - "epoch": 47.54, - "learning_rate": 6.1579637074384205e-09, - "loss": 3.7633, - "step": 4280500 - }, - { - "epoch": 47.54, - "learning_rate": 6.1440819136885594e-09, - "loss": 3.7647, - "step": 4281000 - }, - { - "epoch": 47.55, - "learning_rate": 6.1302001199386975e-09, - "loss": 3.7463, - "step": 4281500 - }, - { - "epoch": 47.55, - "learning_rate": 6.1163183261888365e-09, - "loss": 3.7659, - "step": 4282000 - }, - { - "epoch": 47.56, - "learning_rate": 6.1024365324389755e-09, - "loss": 3.7252, - "step": 4282500 - }, - { - "epoch": 47.56, - "learning_rate": 6.0885547386891136e-09, - "loss": 3.7308, - "step": 4283000 - }, - { - "epoch": 47.57, - "learning_rate": 6.0746729449392525e-09, - "loss": 3.7476, - "step": 4283500 - }, - { - "epoch": 47.58, - "learning_rate": 6.060791151189392e-09, - "loss": 3.7416, - "step": 4284000 - }, - { - "epoch": 47.58, - "learning_rate": 6.0469093574395304e-09, - "loss": 3.7505, - "step": 4284500 - }, - { - "epoch": 47.59, - "learning_rate": 6.033027563689669e-09, - "loss": 3.7479, - "step": 4285000 - }, - { - "epoch": 47.59, - "learning_rate": 6.019145769939808e-09, - "loss": 3.7348, - "step": 4285500 - }, - { - "epoch": 47.6, - "learning_rate": 6.005263976189947e-09, - "loss": 3.7442, - "step": 4286000 - }, - { - "epoch": 47.6, - "learning_rate": 5.9913821824400854e-09, - "loss": 3.749, - "step": 4286500 - }, - { - "epoch": 47.61, - "learning_rate": 5.977500388690225e-09, - "loss": 3.7436, - "step": 4287000 - }, - { - "epoch": 47.61, - "learning_rate": 5.963618594940363e-09, - "loss": 3.7438, - "step": 4287500 - }, - { - "epoch": 47.62, - "learning_rate": 5.949736801190502e-09, - "loss": 3.7576, - "step": 4288000 - }, - { - "epoch": 47.63, - "learning_rate": 5.935855007440641e-09, - "loss": 3.7379, - "step": 4288500 - }, - { - "epoch": 47.63, - "learning_rate": 5.92197321369078e-09, - "loss": 3.7452, - "step": 4289000 - }, - { - "epoch": 47.64, - "learning_rate": 5.908091419940918e-09, - "loss": 3.7453, - "step": 4289500 - }, - { - "epoch": 47.64, - "learning_rate": 5.894209626191057e-09, - "loss": 3.731, - "step": 4290000 - }, - { - "epoch": 47.65, - "learning_rate": 5.880327832441197e-09, - "loss": 3.7422, - "step": 4290500 - }, - { - "epoch": 47.65, - "learning_rate": 5.866446038691335e-09, - "loss": 3.7458, - "step": 4291000 - }, - { - "epoch": 47.66, - "learning_rate": 5.852564244941474e-09, - "loss": 3.7311, - "step": 4291500 - }, - { - "epoch": 47.66, - "learning_rate": 5.838682451191613e-09, - "loss": 3.7443, - "step": 4292000 - }, - { - "epoch": 47.67, - "learning_rate": 5.824800657441751e-09, - "loss": 3.751, - "step": 4292500 - }, - { - "epoch": 47.68, - "learning_rate": 5.81091886369189e-09, - "loss": 3.7512, - "step": 4293000 - }, - { - "epoch": 47.68, - "learning_rate": 5.79703706994203e-09, - "loss": 3.7437, - "step": 4293500 - }, - { - "epoch": 47.69, - "learning_rate": 5.783155276192168e-09, - "loss": 3.7528, - "step": 4294000 - }, - { - "epoch": 47.69, - "learning_rate": 5.769273482442307e-09, - "loss": 3.7476, - "step": 4294500 - }, - { - "epoch": 47.7, - "learning_rate": 5.755391688692446e-09, - "loss": 3.7665, - "step": 4295000 - }, - { - "epoch": 47.7, - "learning_rate": 5.741509894942584e-09, - "loss": 3.7433, - "step": 4295500 - }, - { - "epoch": 47.71, - "learning_rate": 5.727628101192723e-09, - "loss": 3.7335, - "step": 4296000 - }, - { - "epoch": 47.71, - "learning_rate": 5.713746307442862e-09, - "loss": 3.7512, - "step": 4296500 - }, - { - "epoch": 47.72, - "learning_rate": 5.699864513693001e-09, - "loss": 3.7378, - "step": 4297000 - }, - { - "epoch": 47.73, - "learning_rate": 5.68598271994314e-09, - "loss": 3.7372, - "step": 4297500 - }, - { - "epoch": 47.73, - "learning_rate": 5.672100926193279e-09, - "loss": 3.759, - "step": 4298000 - }, - { - "epoch": 47.74, - "learning_rate": 5.658219132443418e-09, - "loss": 3.7443, - "step": 4298500 - }, - { - "epoch": 47.74, - "learning_rate": 5.644337338693556e-09, - "loss": 3.7466, - "step": 4299000 - }, - { - "epoch": 47.75, - "learning_rate": 5.630455544943695e-09, - "loss": 3.7543, - "step": 4299500 - }, - { - "epoch": 47.75, - "learning_rate": 5.616573751193834e-09, - "loss": 3.7359, - "step": 4300000 - }, - { - "epoch": 47.76, - "learning_rate": 5.602691957443973e-09, - "loss": 3.7403, - "step": 4300500 - }, - { - "epoch": 47.76, - "learning_rate": 5.588810163694112e-09, - "loss": 3.7444, - "step": 4301000 - }, - { - "epoch": 47.77, - "learning_rate": 5.574928369944251e-09, - "loss": 3.7319, - "step": 4301500 - }, - { - "epoch": 47.78, - "learning_rate": 5.561046576194389e-09, - "loss": 3.7551, - "step": 4302000 - }, - { - "epoch": 47.78, - "learning_rate": 5.547164782444528e-09, - "loss": 3.7523, - "step": 4302500 - }, - { - "epoch": 47.79, - "learning_rate": 5.533282988694667e-09, - "loss": 3.7417, - "step": 4303000 - }, - { - "epoch": 47.79, - "learning_rate": 5.519401194944806e-09, - "loss": 3.7365, - "step": 4303500 - }, - { - "epoch": 47.8, - "learning_rate": 5.505519401194945e-09, - "loss": 3.7219, - "step": 4304000 - }, - { - "epoch": 47.8, - "learning_rate": 5.491637607445084e-09, - "loss": 3.7592, - "step": 4304500 - }, - { - "epoch": 47.81, - "learning_rate": 5.477755813695222e-09, - "loss": 3.7389, - "step": 4305000 - }, - { - "epoch": 47.81, - "learning_rate": 5.463874019945361e-09, - "loss": 3.7461, - "step": 4305500 - }, - { - "epoch": 47.82, - "learning_rate": 5.4499922261955e-09, - "loss": 3.758, - "step": 4306000 - }, - { - "epoch": 47.83, - "learning_rate": 5.436110432445639e-09, - "loss": 3.7574, - "step": 4306500 - }, - { - "epoch": 47.83, - "learning_rate": 5.422228638695778e-09, - "loss": 3.7653, - "step": 4307000 - }, - { - "epoch": 47.84, - "learning_rate": 5.408346844945917e-09, - "loss": 3.7657, - "step": 4307500 - }, - { - "epoch": 47.84, - "learning_rate": 5.3944650511960556e-09, - "loss": 3.7303, - "step": 4308000 - }, - { - "epoch": 47.85, - "learning_rate": 5.380583257446194e-09, - "loss": 3.751, - "step": 4308500 - }, - { - "epoch": 47.85, - "learning_rate": 5.366701463696333e-09, - "loss": 3.742, - "step": 4309000 - }, - { - "epoch": 47.86, - "learning_rate": 5.352819669946472e-09, - "loss": 3.748, - "step": 4309500 - }, - { - "epoch": 47.86, - "learning_rate": 5.3389378761966106e-09, - "loss": 3.7503, - "step": 4310000 - }, - { - "epoch": 47.87, - "learning_rate": 5.3250560824467495e-09, - "loss": 3.7446, - "step": 4310500 - }, - { - "epoch": 47.88, - "learning_rate": 5.3111742886968885e-09, - "loss": 3.7476, - "step": 4311000 - }, - { - "epoch": 47.88, - "learning_rate": 5.297292494947027e-09, - "loss": 3.7416, - "step": 4311500 - }, - { - "epoch": 47.89, - "learning_rate": 5.2834107011971655e-09, - "loss": 3.7469, - "step": 4312000 - }, - { - "epoch": 47.89, - "learning_rate": 5.2695289074473045e-09, - "loss": 3.7595, - "step": 4312500 - }, - { - "epoch": 47.9, - "learning_rate": 5.255647113697443e-09, - "loss": 3.741, - "step": 4313000 - }, - { - "epoch": 47.9, - "learning_rate": 5.2417653199475824e-09, - "loss": 3.7428, - "step": 4313500 - }, - { - "epoch": 47.91, - "learning_rate": 5.227883526197721e-09, - "loss": 3.7363, - "step": 4314000 - }, - { - "epoch": 47.91, - "learning_rate": 5.2140017324478595e-09, - "loss": 3.7287, - "step": 4314500 - }, - { - "epoch": 47.92, - "learning_rate": 5.2001199386979985e-09, - "loss": 3.7548, - "step": 4315000 - }, - { - "epoch": 47.93, - "learning_rate": 5.186238144948137e-09, - "loss": 3.7676, - "step": 4315500 - }, - { - "epoch": 47.93, - "learning_rate": 5.172356351198276e-09, - "loss": 3.7503, - "step": 4316000 - }, - { - "epoch": 47.94, - "learning_rate": 5.158474557448415e-09, - "loss": 3.7547, - "step": 4316500 - }, - { - "epoch": 47.94, - "learning_rate": 5.144592763698554e-09, - "loss": 3.7436, - "step": 4317000 - }, - { - "epoch": 47.95, - "learning_rate": 5.130710969948692e-09, - "loss": 3.7506, - "step": 4317500 - }, - { - "epoch": 47.95, - "learning_rate": 5.116829176198831e-09, - "loss": 3.7512, - "step": 4318000 - }, - { - "epoch": 47.96, - "learning_rate": 5.10294738244897e-09, - "loss": 3.7576, - "step": 4318500 - }, - { - "epoch": 47.96, - "learning_rate": 5.089065588699109e-09, - "loss": 3.752, - "step": 4319000 - }, - { - "epoch": 47.97, - "learning_rate": 5.075183794949247e-09, - "loss": 3.7454, - "step": 4319500 - }, - { - "epoch": 47.98, - "learning_rate": 5.061302001199387e-09, - "loss": 3.7301, - "step": 4320000 - }, - { - "epoch": 47.98, - "learning_rate": 5.047420207449526e-09, - "loss": 3.7606, - "step": 4320500 - }, - { - "epoch": 47.99, - "learning_rate": 5.033538413699664e-09, - "loss": 3.7354, - "step": 4321000 - }, - { - "epoch": 47.99, - "learning_rate": 5.019656619949803e-09, - "loss": 3.7535, - "step": 4321500 - }, - { - "epoch": 48.0, - "learning_rate": 5.005774826199942e-09, - "loss": 3.7427, - "step": 4322000 - }, - { - "epoch": 48.0, - "eval_loss": 3.824249267578125, - "eval_runtime": 6.2995, - "eval_samples_per_second": 246.684, - "step": 4322208 - }, - { - "epoch": 48.0, - "learning_rate": 4.99189303245008e-09, - "loss": 3.7413, - "step": 4322500 - }, - { - "epoch": 48.01, - "learning_rate": 4.978011238700219e-09, - "loss": 3.761, - "step": 4323000 - }, - { - "epoch": 48.01, - "learning_rate": 4.964129444950359e-09, - "loss": 3.7313, - "step": 4323500 - }, - { - "epoch": 48.02, - "learning_rate": 4.950247651200497e-09, - "loss": 3.7562, - "step": 4324000 - }, - { - "epoch": 48.03, - "learning_rate": 4.936365857450636e-09, - "loss": 3.7567, - "step": 4324500 - }, - { - "epoch": 48.03, - "learning_rate": 4.922484063700775e-09, - "loss": 3.7307, - "step": 4325000 - }, - { - "epoch": 48.04, - "learning_rate": 4.908602269950913e-09, - "loss": 3.7446, - "step": 4325500 - }, - { - "epoch": 48.04, - "learning_rate": 4.894720476201052e-09, - "loss": 3.7502, - "step": 4326000 - }, - { - "epoch": 48.05, - "learning_rate": 4.880838682451192e-09, - "loss": 3.7549, - "step": 4326500 - }, - { - "epoch": 48.05, - "learning_rate": 4.86695688870133e-09, - "loss": 3.7369, - "step": 4327000 - }, - { - "epoch": 48.06, - "learning_rate": 4.853075094951469e-09, - "loss": 3.7435, - "step": 4327500 - }, - { - "epoch": 48.06, - "learning_rate": 4.839193301201608e-09, - "loss": 3.7563, - "step": 4328000 - }, - { - "epoch": 48.07, - "learning_rate": 4.825311507451747e-09, - "loss": 3.7402, - "step": 4328500 - }, - { - "epoch": 48.08, - "learning_rate": 4.811429713701885e-09, - "loss": 3.736, - "step": 4329000 - }, - { - "epoch": 48.08, - "learning_rate": 4.797547919952024e-09, - "loss": 3.734, - "step": 4329500 - }, - { - "epoch": 48.09, - "learning_rate": 4.783666126202164e-09, - "loss": 3.7503, - "step": 4330000 - }, - { - "epoch": 48.09, - "learning_rate": 4.769784332452302e-09, - "loss": 3.762, - "step": 4330500 - }, - { - "epoch": 48.1, - "learning_rate": 4.755902538702441e-09, - "loss": 3.76, - "step": 4331000 - }, - { - "epoch": 48.1, - "learning_rate": 4.74202074495258e-09, - "loss": 3.7506, - "step": 4331500 - }, - { - "epoch": 48.11, - "learning_rate": 4.728138951202718e-09, - "loss": 3.7372, - "step": 4332000 - }, - { - "epoch": 48.11, - "learning_rate": 4.714257157452857e-09, - "loss": 3.7512, - "step": 4332500 - }, - { - "epoch": 48.12, - "learning_rate": 4.700375363702997e-09, - "loss": 3.7584, - "step": 4333000 - }, - { - "epoch": 48.13, - "learning_rate": 4.686493569953135e-09, - "loss": 3.7337, - "step": 4333500 - }, - { - "epoch": 48.13, - "learning_rate": 4.672611776203274e-09, - "loss": 3.7273, - "step": 4334000 - }, - { - "epoch": 48.14, - "learning_rate": 4.658729982453413e-09, - "loss": 3.7547, - "step": 4334500 - }, - { - "epoch": 48.14, - "learning_rate": 4.644848188703551e-09, - "loss": 3.7307, - "step": 4335000 - }, - { - "epoch": 48.15, - "learning_rate": 4.63096639495369e-09, - "loss": 3.7276, - "step": 4335500 - }, - { - "epoch": 48.15, - "learning_rate": 4.617084601203829e-09, - "loss": 3.7379, - "step": 4336000 - }, - { - "epoch": 48.16, - "learning_rate": 4.603202807453968e-09, - "loss": 3.7674, - "step": 4336500 - }, - { - "epoch": 48.16, - "learning_rate": 4.589321013704107e-09, - "loss": 3.76, - "step": 4337000 - }, - { - "epoch": 48.17, - "learning_rate": 4.575439219954246e-09, - "loss": 3.7587, - "step": 4337500 - }, - { - "epoch": 48.18, - "learning_rate": 4.561557426204384e-09, - "loss": 3.7482, - "step": 4338000 - }, - { - "epoch": 48.18, - "learning_rate": 4.547675632454523e-09, - "loss": 3.7387, - "step": 4338500 - }, - { - "epoch": 48.19, - "learning_rate": 4.533793838704662e-09, - "loss": 3.7455, - "step": 4339000 - }, - { - "epoch": 48.19, - "learning_rate": 4.519912044954801e-09, - "loss": 3.7585, - "step": 4339500 - }, - { - "epoch": 48.2, - "learning_rate": 4.50603025120494e-09, - "loss": 3.7477, - "step": 4340000 - }, - { - "epoch": 48.2, - "learning_rate": 4.4921484574550786e-09, - "loss": 3.7481, - "step": 4340500 - }, - { - "epoch": 48.21, - "learning_rate": 4.4782666637052175e-09, - "loss": 3.7569, - "step": 4341000 - }, - { - "epoch": 48.21, - "learning_rate": 4.464384869955356e-09, - "loss": 3.7658, - "step": 4341500 - }, - { - "epoch": 48.22, - "learning_rate": 4.450503076205495e-09, - "loss": 3.7601, - "step": 4342000 - }, - { - "epoch": 48.23, - "learning_rate": 4.4366212824556336e-09, - "loss": 3.7424, - "step": 4342500 - }, - { - "epoch": 48.23, - "learning_rate": 4.4227394887057725e-09, - "loss": 3.7402, - "step": 4343000 - }, - { - "epoch": 48.24, - "learning_rate": 4.4088576949559115e-09, - "loss": 3.7434, - "step": 4343500 - }, - { - "epoch": 48.24, - "learning_rate": 4.3949759012060504e-09, - "loss": 3.7456, - "step": 4344000 - }, - { - "epoch": 48.25, - "learning_rate": 4.3810941074561885e-09, - "loss": 3.7526, - "step": 4344500 - }, - { - "epoch": 48.25, - "learning_rate": 4.3672123137063275e-09, - "loss": 3.7507, - "step": 4345000 - }, - { - "epoch": 48.26, - "learning_rate": 4.3533305199564665e-09, - "loss": 3.7498, - "step": 4345500 - }, - { - "epoch": 48.26, - "learning_rate": 4.3394487262066046e-09, - "loss": 3.7383, - "step": 4346000 - }, - { - "epoch": 48.27, - "learning_rate": 4.325566932456744e-09, - "loss": 3.774, - "step": 4346500 - }, - { - "epoch": 48.28, - "learning_rate": 4.311685138706883e-09, - "loss": 3.7397, - "step": 4347000 - }, - { - "epoch": 48.28, - "learning_rate": 4.2978033449570215e-09, - "loss": 3.7565, - "step": 4347500 - }, - { - "epoch": 48.29, - "learning_rate": 4.28392155120716e-09, - "loss": 3.7412, - "step": 4348000 - }, - { - "epoch": 48.29, - "learning_rate": 4.270039757457299e-09, - "loss": 3.7558, - "step": 4348500 - }, - { - "epoch": 48.3, - "learning_rate": 4.256157963707438e-09, - "loss": 3.739, - "step": 4349000 - }, - { - "epoch": 48.3, - "learning_rate": 4.242276169957577e-09, - "loss": 3.7347, - "step": 4349500 - }, - { - "epoch": 48.31, - "learning_rate": 4.228394376207716e-09, - "loss": 3.7446, - "step": 4350000 - }, - { - "epoch": 48.31, - "learning_rate": 4.214512582457855e-09, - "loss": 3.7396, - "step": 4350500 - }, - { - "epoch": 48.32, - "learning_rate": 4.200630788707993e-09, - "loss": 3.7384, - "step": 4351000 - }, - { - "epoch": 48.33, - "learning_rate": 4.186748994958132e-09, - "loss": 3.7331, - "step": 4351500 - }, - { - "epoch": 48.33, - "learning_rate": 4.172867201208271e-09, - "loss": 3.7452, - "step": 4352000 - }, - { - "epoch": 48.34, - "learning_rate": 4.158985407458409e-09, - "loss": 3.7511, - "step": 4352500 - }, - { - "epoch": 48.34, - "learning_rate": 4.145103613708549e-09, - "loss": 3.7357, - "step": 4353000 - }, - { - "epoch": 48.35, - "learning_rate": 4.131221819958688e-09, - "loss": 3.7459, - "step": 4353500 - }, - { - "epoch": 48.35, - "learning_rate": 4.117340026208826e-09, - "loss": 3.7424, - "step": 4354000 - }, - { - "epoch": 48.36, - "learning_rate": 4.103458232458965e-09, - "loss": 3.7428, - "step": 4354500 - }, - { - "epoch": 48.36, - "learning_rate": 4.089576438709104e-09, - "loss": 3.7326, - "step": 4355000 - }, - { - "epoch": 48.37, - "learning_rate": 4.075694644959242e-09, - "loss": 3.7446, - "step": 4355500 - }, - { - "epoch": 48.38, - "learning_rate": 4.061812851209382e-09, - "loss": 3.7469, - "step": 4356000 - }, - { - "epoch": 48.38, - "learning_rate": 4.047931057459521e-09, - "loss": 3.7358, - "step": 4356500 - }, - { - "epoch": 48.39, - "learning_rate": 4.034049263709659e-09, - "loss": 3.724, - "step": 4357000 - }, - { - "epoch": 48.39, - "learning_rate": 4.020167469959798e-09, - "loss": 3.7405, - "step": 4357500 - }, - { - "epoch": 48.4, - "learning_rate": 4.006285676209937e-09, - "loss": 3.7326, - "step": 4358000 - }, - { - "epoch": 48.4, - "learning_rate": 3.992403882460076e-09, - "loss": 3.76, - "step": 4358500 - }, - { - "epoch": 48.41, - "learning_rate": 3.978522088710214e-09, - "loss": 3.7301, - "step": 4359000 - }, - { - "epoch": 48.41, - "learning_rate": 3.964640294960354e-09, - "loss": 3.774, - "step": 4359500 - }, - { - "epoch": 48.42, - "learning_rate": 3.950758501210492e-09, - "loss": 3.7366, - "step": 4360000 - }, - { - "epoch": 48.43, - "learning_rate": 3.936876707460631e-09, - "loss": 3.7594, - "step": 4360500 - }, - { - "epoch": 48.43, - "learning_rate": 3.92299491371077e-09, - "loss": 3.7262, - "step": 4361000 - }, - { - "epoch": 48.44, - "learning_rate": 3.909113119960909e-09, - "loss": 3.7613, - "step": 4361500 - }, - { - "epoch": 48.44, - "learning_rate": 3.895231326211047e-09, - "loss": 3.7586, - "step": 4362000 - }, - { - "epoch": 48.45, - "learning_rate": 3.881349532461186e-09, - "loss": 3.7425, - "step": 4362500 - }, - { - "epoch": 48.45, - "learning_rate": 3.867467738711326e-09, - "loss": 3.7375, - "step": 4363000 - }, - { - "epoch": 48.46, - "learning_rate": 3.853585944961464e-09, - "loss": 3.7531, - "step": 4363500 - }, - { - "epoch": 48.46, - "learning_rate": 3.839704151211603e-09, - "loss": 3.7472, - "step": 4364000 - }, - { - "epoch": 48.47, - "learning_rate": 3.825822357461742e-09, - "loss": 3.736, - "step": 4364500 - }, - { - "epoch": 48.48, - "learning_rate": 3.81194056371188e-09, - "loss": 3.7336, - "step": 4365000 - }, - { - "epoch": 48.48, - "learning_rate": 3.798058769962019e-09, - "loss": 3.7498, - "step": 4365500 - }, - { - "epoch": 48.49, - "learning_rate": 3.784176976212159e-09, - "loss": 3.7462, - "step": 4366000 - }, - { - "epoch": 48.49, - "learning_rate": 3.770295182462297e-09, - "loss": 3.7505, - "step": 4366500 - }, - { - "epoch": 48.5, - "learning_rate": 3.756413388712436e-09, - "loss": 3.7319, - "step": 4367000 - }, - { - "epoch": 48.5, - "learning_rate": 3.742531594962575e-09, - "loss": 3.7197, - "step": 4367500 - }, - { - "epoch": 48.51, - "learning_rate": 3.728649801212713e-09, - "loss": 3.7403, - "step": 4368000 - }, - { - "epoch": 48.51, - "learning_rate": 3.7147680074628518e-09, - "loss": 3.7683, - "step": 4368500 - }, - { - "epoch": 48.52, - "learning_rate": 3.7008862137129907e-09, - "loss": 3.7513, - "step": 4369000 - }, - { - "epoch": 48.53, - "learning_rate": 3.68700441996313e-09, - "loss": 3.7484, - "step": 4369500 - }, - { - "epoch": 48.53, - "learning_rate": 3.6731226262132687e-09, - "loss": 3.7541, - "step": 4370000 - }, - { - "epoch": 48.54, - "learning_rate": 3.6592408324634076e-09, - "loss": 3.7547, - "step": 4370500 - }, - { - "epoch": 48.54, - "learning_rate": 3.645359038713546e-09, - "loss": 3.7576, - "step": 4371000 - }, - { - "epoch": 48.55, - "learning_rate": 3.631477244963685e-09, - "loss": 3.7592, - "step": 4371500 - }, - { - "epoch": 48.55, - "learning_rate": 3.6175954512138236e-09, - "loss": 3.7575, - "step": 4372000 - }, - { - "epoch": 48.56, - "learning_rate": 3.603713657463963e-09, - "loss": 3.7601, - "step": 4372500 - }, - { - "epoch": 48.56, - "learning_rate": 3.5898318637141016e-09, - "loss": 3.7476, - "step": 4373000 - }, - { - "epoch": 48.57, - "learning_rate": 3.5759500699642405e-09, - "loss": 3.7387, - "step": 4373500 - }, - { - "epoch": 48.58, - "learning_rate": 3.562068276214379e-09, - "loss": 3.7594, - "step": 4374000 - }, - { - "epoch": 48.58, - "learning_rate": 3.548186482464518e-09, - "loss": 3.7584, - "step": 4374500 - }, - { - "epoch": 48.59, - "learning_rate": 3.5343046887146565e-09, - "loss": 3.7279, - "step": 4375000 - }, - { - "epoch": 48.59, - "learning_rate": 3.5204228949647955e-09, - "loss": 3.7369, - "step": 4375500 - }, - { - "epoch": 48.6, - "learning_rate": 3.506541101214935e-09, - "loss": 3.7355, - "step": 4376000 - }, - { - "epoch": 48.6, - "learning_rate": 3.4926593074650734e-09, - "loss": 3.7466, - "step": 4376500 - }, - { - "epoch": 48.61, - "learning_rate": 3.478777513715212e-09, - "loss": 3.7471, - "step": 4377000 - }, - { - "epoch": 48.61, - "learning_rate": 3.464895719965351e-09, - "loss": 3.7259, - "step": 4377500 - }, - { - "epoch": 48.62, - "learning_rate": 3.4510139262154895e-09, - "loss": 3.737, - "step": 4378000 - }, - { - "epoch": 48.63, - "learning_rate": 3.4371321324656284e-09, - "loss": 3.74, - "step": 4378500 - }, - { - "epoch": 48.63, - "learning_rate": 3.4232503387157678e-09, - "loss": 3.7592, - "step": 4379000 - }, - { - "epoch": 48.64, - "learning_rate": 3.4093685449659063e-09, - "loss": 3.7585, - "step": 4379500 - }, - { - "epoch": 48.64, - "learning_rate": 3.3954867512160453e-09, - "loss": 3.7596, - "step": 4380000 - }, - { - "epoch": 48.65, - "learning_rate": 3.381604957466184e-09, - "loss": 3.7442, - "step": 4380500 - }, - { - "epoch": 48.65, - "learning_rate": 3.3677231637163224e-09, - "loss": 3.7309, - "step": 4381000 - }, - { - "epoch": 48.66, - "learning_rate": 3.3538413699664613e-09, - "loss": 3.751, - "step": 4381500 - }, - { - "epoch": 48.66, - "learning_rate": 3.3399595762166e-09, - "loss": 3.7478, - "step": 4382000 - }, - { - "epoch": 48.67, - "learning_rate": 3.3260777824667392e-09, - "loss": 3.7532, - "step": 4382500 - }, - { - "epoch": 48.68, - "learning_rate": 3.312195988716878e-09, - "loss": 3.7336, - "step": 4383000 - }, - { - "epoch": 48.68, - "learning_rate": 3.2983141949670167e-09, - "loss": 3.7752, - "step": 4383500 - }, - { - "epoch": 48.69, - "learning_rate": 3.2844324012171557e-09, - "loss": 3.7333, - "step": 4384000 - }, - { - "epoch": 48.69, - "learning_rate": 3.2705506074672942e-09, - "loss": 3.7346, - "step": 4384500 - }, - { - "epoch": 48.7, - "learning_rate": 3.2566688137174328e-09, - "loss": 3.7482, - "step": 4385000 - }, - { - "epoch": 48.7, - "learning_rate": 3.2427870199675717e-09, - "loss": 3.7437, - "step": 4385500 - }, - { - "epoch": 48.71, - "learning_rate": 3.228905226217711e-09, - "loss": 3.7538, - "step": 4386000 - }, - { - "epoch": 48.71, - "learning_rate": 3.2150234324678496e-09, - "loss": 3.7316, - "step": 4386500 - }, - { - "epoch": 48.72, - "learning_rate": 3.2011416387179886e-09, - "loss": 3.7489, - "step": 4387000 - }, - { - "epoch": 48.73, - "learning_rate": 3.187259844968127e-09, - "loss": 3.7448, - "step": 4387500 - }, - { - "epoch": 48.73, - "learning_rate": 3.173378051218266e-09, - "loss": 3.7478, - "step": 4388000 - }, - { - "epoch": 48.74, - "learning_rate": 3.1594962574684046e-09, - "loss": 3.7706, - "step": 4388500 - }, - { - "epoch": 48.74, - "learning_rate": 3.145614463718544e-09, - "loss": 3.7455, - "step": 4389000 - }, - { - "epoch": 48.75, - "learning_rate": 3.131732669968683e-09, - "loss": 3.7359, - "step": 4389500 - }, - { - "epoch": 48.75, - "learning_rate": 3.1178508762188215e-09, - "loss": 3.7488, - "step": 4390000 - }, - { - "epoch": 48.76, - "learning_rate": 3.10396908246896e-09, - "loss": 3.7531, - "step": 4390500 - }, - { - "epoch": 48.76, - "learning_rate": 3.090087288719099e-09, - "loss": 3.7573, - "step": 4391000 - }, - { - "epoch": 48.77, - "learning_rate": 3.076205494969238e-09, - "loss": 3.7429, - "step": 4391500 - }, - { - "epoch": 48.78, - "learning_rate": 3.0623237012193765e-09, - "loss": 3.7476, - "step": 4392000 - }, - { - "epoch": 48.78, - "learning_rate": 3.0484419074695154e-09, - "loss": 3.7535, - "step": 4392500 - }, - { - "epoch": 48.79, - "learning_rate": 3.0345601137196544e-09, - "loss": 3.7491, - "step": 4393000 - }, - { - "epoch": 48.79, - "learning_rate": 3.020678319969793e-09, - "loss": 3.7546, - "step": 4393500 - }, - { - "epoch": 48.8, - "learning_rate": 3.006796526219932e-09, - "loss": 3.7614, - "step": 4394000 - }, - { - "epoch": 48.8, - "learning_rate": 2.9929147324700704e-09, - "loss": 3.743, - "step": 4394500 - }, - { - "epoch": 48.81, - "learning_rate": 2.97903293872021e-09, - "loss": 3.7431, - "step": 4395000 - }, - { - "epoch": 48.81, - "learning_rate": 2.9651511449703483e-09, - "loss": 3.7512, - "step": 4395500 - }, - { - "epoch": 48.82, - "learning_rate": 2.951269351220487e-09, - "loss": 3.7339, - "step": 4396000 - }, - { - "epoch": 48.83, - "learning_rate": 2.9373875574706263e-09, - "loss": 3.7485, - "step": 4396500 - }, - { - "epoch": 48.83, - "learning_rate": 2.923505763720765e-09, - "loss": 3.7315, - "step": 4397000 - }, - { - "epoch": 48.84, - "learning_rate": 2.9096239699709033e-09, - "loss": 3.7569, - "step": 4397500 - }, - { - "epoch": 48.84, - "learning_rate": 2.8957421762210427e-09, - "loss": 3.7529, - "step": 4398000 - }, - { - "epoch": 48.85, - "learning_rate": 2.8818603824711812e-09, - "loss": 3.7447, - "step": 4398500 - }, - { - "epoch": 48.85, - "learning_rate": 2.86797858872132e-09, - "loss": 3.7557, - "step": 4399000 - }, - { - "epoch": 48.86, - "learning_rate": 2.8540967949714587e-09, - "loss": 3.7354, - "step": 4399500 - }, - { - "epoch": 48.86, - "learning_rate": 2.8402150012215977e-09, - "loss": 3.7433, - "step": 4400000 - }, - { - "epoch": 48.87, - "learning_rate": 2.8263332074717367e-09, - "loss": 3.745, - "step": 4400500 - }, - { - "epoch": 48.88, - "learning_rate": 2.812451413721875e-09, - "loss": 3.7475, - "step": 4401000 - }, - { - "epoch": 48.88, - "learning_rate": 2.798569619972014e-09, - "loss": 3.7395, - "step": 4401500 - }, - { - "epoch": 48.89, - "learning_rate": 2.784687826222153e-09, - "loss": 3.746, - "step": 4402000 - }, - { - "epoch": 48.89, - "learning_rate": 2.7708060324722916e-09, - "loss": 3.7468, - "step": 4402500 - }, - { - "epoch": 48.9, - "learning_rate": 2.7569242387224306e-09, - "loss": 3.7423, - "step": 4403000 - }, - { - "epoch": 48.9, - "learning_rate": 2.7430424449725696e-09, - "loss": 3.7408, - "step": 4403500 - }, - { - "epoch": 48.91, - "learning_rate": 2.729160651222708e-09, - "loss": 3.7488, - "step": 4404000 - }, - { - "epoch": 48.91, - "learning_rate": 2.715278857472847e-09, - "loss": 3.7566, - "step": 4404500 - }, - { - "epoch": 48.92, - "learning_rate": 2.701397063722986e-09, - "loss": 3.7398, - "step": 4405000 - }, - { - "epoch": 48.92, - "learning_rate": 2.6875152699731246e-09, - "loss": 3.7627, - "step": 4405500 - }, - { - "epoch": 48.93, - "learning_rate": 2.6736334762232635e-09, - "loss": 3.7559, - "step": 4406000 - }, - { - "epoch": 48.94, - "learning_rate": 2.6597516824734025e-09, - "loss": 3.757, - "step": 4406500 - }, - { - "epoch": 48.94, - "learning_rate": 2.645869888723541e-09, - "loss": 3.7432, - "step": 4407000 - }, - { - "epoch": 48.95, - "learning_rate": 2.63198809497368e-09, - "loss": 3.747, - "step": 4407500 - }, - { - "epoch": 48.95, - "learning_rate": 2.618106301223819e-09, - "loss": 3.741, - "step": 4408000 - }, - { - "epoch": 48.96, - "learning_rate": 2.6042245074739575e-09, - "loss": 3.766, - "step": 4408500 - }, - { - "epoch": 48.96, - "learning_rate": 2.5903427137240964e-09, - "loss": 3.7385, - "step": 4409000 - }, - { - "epoch": 48.97, - "learning_rate": 2.5764609199742354e-09, - "loss": 3.7701, - "step": 4409500 - }, - { - "epoch": 48.97, - "learning_rate": 2.5625791262243743e-09, - "loss": 3.7594, - "step": 4410000 - }, - { - "epoch": 48.98, - "learning_rate": 2.548697332474513e-09, - "loss": 3.7521, - "step": 4410500 - }, - { - "epoch": 48.99, - "learning_rate": 2.5348155387246514e-09, - "loss": 3.7384, - "step": 4411000 - }, - { - "epoch": 48.99, - "learning_rate": 2.5209337449747908e-09, - "loss": 3.7661, - "step": 4411500 - }, - { - "epoch": 49.0, - "learning_rate": 2.5070519512249293e-09, - "loss": 3.7427, - "step": 4412000 - }, - { - "epoch": 49.0, - "eval_loss": 3.824198007583618, - "eval_runtime": 6.3065, - "eval_samples_per_second": 246.412, - "step": 4412254 - }, - { - "epoch": 49.0, - "learning_rate": 2.493170157475068e-09, - "loss": 3.7514, - "step": 4412500 - }, - { - "epoch": 49.01, - "learning_rate": 2.4792883637252072e-09, - "loss": 3.7573, - "step": 4413000 - }, - { - "epoch": 49.01, - "learning_rate": 2.4654065699753458e-09, - "loss": 3.7368, - "step": 4413500 - }, - { - "epoch": 49.02, - "learning_rate": 2.4515247762254847e-09, - "loss": 3.7344, - "step": 4414000 - }, - { - "epoch": 49.02, - "learning_rate": 2.4376429824756237e-09, - "loss": 3.7523, - "step": 4414500 - }, - { - "epoch": 49.03, - "learning_rate": 2.4237611887257622e-09, - "loss": 3.7547, - "step": 4415000 - }, - { - "epoch": 49.04, - "learning_rate": 2.409879394975901e-09, - "loss": 3.7464, - "step": 4415500 - }, - { - "epoch": 49.04, - "learning_rate": 2.39599760122604e-09, - "loss": 3.7586, - "step": 4416000 - }, - { - "epoch": 49.05, - "learning_rate": 2.3821158074761787e-09, - "loss": 3.7505, - "step": 4416500 - }, - { - "epoch": 49.05, - "learning_rate": 2.3682340137263176e-09, - "loss": 3.7578, - "step": 4417000 - }, - { - "epoch": 49.06, - "learning_rate": 2.354352219976456e-09, - "loss": 3.7678, - "step": 4417500 - }, - { - "epoch": 49.06, - "learning_rate": 2.340470426226595e-09, - "loss": 3.7323, - "step": 4418000 - }, - { - "epoch": 49.07, - "learning_rate": 2.326588632476734e-09, - "loss": 3.7412, - "step": 4418500 - }, - { - "epoch": 49.07, - "learning_rate": 2.3127068387268726e-09, - "loss": 3.7396, - "step": 4419000 - }, - { - "epoch": 49.08, - "learning_rate": 2.2988250449770116e-09, - "loss": 3.7359, - "step": 4419500 - }, - { - "epoch": 49.09, - "learning_rate": 2.2849432512271505e-09, - "loss": 3.7364, - "step": 4420000 - }, - { - "epoch": 49.09, - "learning_rate": 2.271061457477289e-09, - "loss": 3.7276, - "step": 4420500 - }, - { - "epoch": 49.1, - "learning_rate": 2.2571796637274284e-09, - "loss": 3.7448, - "step": 4421000 - }, - { - "epoch": 49.1, - "learning_rate": 2.243297869977567e-09, - "loss": 3.7429, - "step": 4421500 - }, - { - "epoch": 49.11, - "learning_rate": 2.2294160762277055e-09, - "loss": 3.7547, - "step": 4422000 - }, - { - "epoch": 49.11, - "learning_rate": 2.2155342824778445e-09, - "loss": 3.7279, - "step": 4422500 - }, - { - "epoch": 49.12, - "learning_rate": 2.2016524887279834e-09, - "loss": 3.735, - "step": 4423000 - }, - { - "epoch": 49.12, - "learning_rate": 2.187770694978122e-09, - "loss": 3.7509, - "step": 4423500 - }, - { - "epoch": 49.13, - "learning_rate": 2.173888901228261e-09, - "loss": 3.7432, - "step": 4424000 - }, - { - "epoch": 49.14, - "learning_rate": 2.1600071074784e-09, - "loss": 3.757, - "step": 4424500 - }, - { - "epoch": 49.14, - "learning_rate": 2.146125313728539e-09, - "loss": 3.7542, - "step": 4425000 - }, - { - "epoch": 49.15, - "learning_rate": 2.1322435199786774e-09, - "loss": 3.7613, - "step": 4425500 - }, - { - "epoch": 49.15, - "learning_rate": 2.1183617262288163e-09, - "loss": 3.7442, - "step": 4426000 - }, - { - "epoch": 49.16, - "learning_rate": 2.1044799324789553e-09, - "loss": 3.7572, - "step": 4426500 - }, - { - "epoch": 49.16, - "learning_rate": 2.090598138729094e-09, - "loss": 3.7369, - "step": 4427000 - }, - { - "epoch": 49.17, - "learning_rate": 2.076716344979233e-09, - "loss": 3.7495, - "step": 4427500 - }, - { - "epoch": 49.17, - "learning_rate": 2.0628345512293718e-09, - "loss": 3.7577, - "step": 4428000 - }, - { - "epoch": 49.18, - "learning_rate": 2.0489527574795103e-09, - "loss": 3.7458, - "step": 4428500 - }, - { - "epoch": 49.19, - "learning_rate": 2.0350709637296492e-09, - "loss": 3.7673, - "step": 4429000 - }, - { - "epoch": 49.19, - "learning_rate": 2.021189169979788e-09, - "loss": 3.7531, - "step": 4429500 - }, - { - "epoch": 49.2, - "learning_rate": 2.0073073762299267e-09, - "loss": 3.7386, - "step": 4430000 - }, - { - "epoch": 49.2, - "learning_rate": 1.9934255824800657e-09, - "loss": 3.7456, - "step": 4430500 - }, - { - "epoch": 49.21, - "learning_rate": 1.9795437887302047e-09, - "loss": 3.7621, - "step": 4431000 - }, - { - "epoch": 49.21, - "learning_rate": 1.965661994980343e-09, - "loss": 3.7583, - "step": 4431500 - }, - { - "epoch": 49.22, - "learning_rate": 1.951780201230482e-09, - "loss": 3.7574, - "step": 4432000 - }, - { - "epoch": 49.22, - "learning_rate": 1.937898407480621e-09, - "loss": 3.7487, - "step": 4432500 - }, - { - "epoch": 49.23, - "learning_rate": 1.9240166137307597e-09, - "loss": 3.7506, - "step": 4433000 - }, - { - "epoch": 49.24, - "learning_rate": 1.9101348199808986e-09, - "loss": 3.7421, - "step": 4433500 - }, - { - "epoch": 49.24, - "learning_rate": 1.896253026231037e-09, - "loss": 3.722, - "step": 4434000 - }, - { - "epoch": 49.25, - "learning_rate": 1.882371232481176e-09, - "loss": 3.757, - "step": 4434500 - }, - { - "epoch": 49.25, - "learning_rate": 1.868489438731315e-09, - "loss": 3.7406, - "step": 4435000 - }, - { - "epoch": 49.26, - "learning_rate": 1.8546076449814538e-09, - "loss": 3.7292, - "step": 4435500 - }, - { - "epoch": 49.26, - "learning_rate": 1.8407258512315928e-09, - "loss": 3.766, - "step": 4436000 - }, - { - "epoch": 49.27, - "learning_rate": 1.8268440574817315e-09, - "loss": 3.7409, - "step": 4436500 - }, - { - "epoch": 49.27, - "learning_rate": 1.8129622637318703e-09, - "loss": 3.7467, - "step": 4437000 - }, - { - "epoch": 49.28, - "learning_rate": 1.7990804699820092e-09, - "loss": 3.7699, - "step": 4437500 - }, - { - "epoch": 49.29, - "learning_rate": 1.785198676232148e-09, - "loss": 3.7499, - "step": 4438000 - }, - { - "epoch": 49.29, - "learning_rate": 1.7713168824822867e-09, - "loss": 3.7527, - "step": 4438500 - }, - { - "epoch": 49.3, - "learning_rate": 1.7574350887324257e-09, - "loss": 3.7476, - "step": 4439000 - }, - { - "epoch": 49.3, - "learning_rate": 1.7435532949825644e-09, - "loss": 3.731, - "step": 4439500 - }, - { - "epoch": 49.31, - "learning_rate": 1.7296715012327032e-09, - "loss": 3.7352, - "step": 4440000 - }, - { - "epoch": 49.31, - "learning_rate": 1.715789707482842e-09, - "loss": 3.7597, - "step": 4440500 - }, - { - "epoch": 49.32, - "learning_rate": 1.7019079137329809e-09, - "loss": 3.7482, - "step": 4441000 - }, - { - "epoch": 49.32, - "learning_rate": 1.6880261199831196e-09, - "loss": 3.7368, - "step": 4441500 - }, - { - "epoch": 49.33, - "learning_rate": 1.6741443262332584e-09, - "loss": 3.7453, - "step": 4442000 - }, - { - "epoch": 49.34, - "learning_rate": 1.6602625324833975e-09, - "loss": 3.7434, - "step": 4442500 - }, - { - "epoch": 49.34, - "learning_rate": 1.646380738733536e-09, - "loss": 3.7398, - "step": 4443000 - }, - { - "epoch": 49.35, - "learning_rate": 1.6324989449836748e-09, - "loss": 3.7241, - "step": 4443500 - }, - { - "epoch": 49.35, - "learning_rate": 1.618617151233814e-09, - "loss": 3.7415, - "step": 4444000 - }, - { - "epoch": 49.36, - "learning_rate": 1.6047353574839527e-09, - "loss": 3.7426, - "step": 4444500 - }, - { - "epoch": 49.36, - "learning_rate": 1.5908535637340913e-09, - "loss": 3.7419, - "step": 4445000 - }, - { - "epoch": 49.37, - "learning_rate": 1.57697176998423e-09, - "loss": 3.7611, - "step": 4445500 - }, - { - "epoch": 49.37, - "learning_rate": 1.5630899762343692e-09, - "loss": 3.7504, - "step": 4446000 - }, - { - "epoch": 49.38, - "learning_rate": 1.5492081824845077e-09, - "loss": 3.7541, - "step": 4446500 - }, - { - "epoch": 49.39, - "learning_rate": 1.5353263887346467e-09, - "loss": 3.7546, - "step": 4447000 - }, - { - "epoch": 49.39, - "learning_rate": 1.5214445949847854e-09, - "loss": 3.7475, - "step": 4447500 - }, - { - "epoch": 49.4, - "learning_rate": 1.5075628012349244e-09, - "loss": 3.7358, - "step": 4448000 - }, - { - "epoch": 49.4, - "learning_rate": 1.4936810074850631e-09, - "loss": 3.7506, - "step": 4448500 - }, - { - "epoch": 49.41, - "learning_rate": 1.4797992137352019e-09, - "loss": 3.7492, - "step": 4449000 - }, - { - "epoch": 49.41, - "learning_rate": 1.4659174199853408e-09, - "loss": 3.7515, - "step": 4449500 - }, - { - "epoch": 49.42, - "learning_rate": 1.4520356262354796e-09, - "loss": 3.736, - "step": 4450000 - }, - { - "epoch": 49.42, - "learning_rate": 1.4381538324856183e-09, - "loss": 3.7339, - "step": 4450500 - }, - { - "epoch": 49.43, - "learning_rate": 1.4242720387357573e-09, - "loss": 3.7638, - "step": 4451000 - }, - { - "epoch": 49.44, - "learning_rate": 1.410390244985896e-09, - "loss": 3.75, - "step": 4451500 - }, - { - "epoch": 49.44, - "learning_rate": 1.3965084512360348e-09, - "loss": 3.7474, - "step": 4452000 - }, - { - "epoch": 49.45, - "learning_rate": 1.3826266574861735e-09, - "loss": 3.7398, - "step": 4452500 - }, - { - "epoch": 49.45, - "learning_rate": 1.3687448637363125e-09, - "loss": 3.7462, - "step": 4453000 - }, - { - "epoch": 49.46, - "learning_rate": 1.3548630699864514e-09, - "loss": 3.7402, - "step": 4453500 - }, - { - "epoch": 49.46, - "learning_rate": 1.34098127623659e-09, - "loss": 3.7444, - "step": 4454000 - }, - { - "epoch": 49.47, - "learning_rate": 1.327099482486729e-09, - "loss": 3.743, - "step": 4454500 - }, - { - "epoch": 49.47, - "learning_rate": 1.3132176887368677e-09, - "loss": 3.7348, - "step": 4455000 - }, - { - "epoch": 49.48, - "learning_rate": 1.2993358949870066e-09, - "loss": 3.7489, - "step": 4455500 - }, - { - "epoch": 49.49, - "learning_rate": 1.2854541012371454e-09, - "loss": 3.7387, - "step": 4456000 - }, - { - "epoch": 49.49, - "learning_rate": 1.2715723074872841e-09, - "loss": 3.7473, - "step": 4456500 - }, - { - "epoch": 49.5, - "learning_rate": 1.257690513737423e-09, - "loss": 3.7576, - "step": 4457000 - }, - { - "epoch": 49.5, - "learning_rate": 1.2438087199875618e-09, - "loss": 3.7461, - "step": 4457500 - }, - { - "epoch": 49.51, - "learning_rate": 1.2299269262377006e-09, - "loss": 3.7505, - "step": 4458000 - }, - { - "epoch": 49.51, - "learning_rate": 1.2160451324878395e-09, - "loss": 3.7514, - "step": 4458500 - }, - { - "epoch": 49.52, - "learning_rate": 1.2021633387379783e-09, - "loss": 3.7436, - "step": 4459000 - }, - { - "epoch": 49.52, - "learning_rate": 1.188281544988117e-09, - "loss": 3.7425, - "step": 4459500 - }, - { - "epoch": 49.53, - "learning_rate": 1.174399751238256e-09, - "loss": 3.7549, - "step": 4460000 - }, - { - "epoch": 49.54, - "learning_rate": 1.1605179574883947e-09, - "loss": 3.7402, - "step": 4460500 - }, - { - "epoch": 49.54, - "learning_rate": 1.1466361637385337e-09, - "loss": 3.7342, - "step": 4461000 - }, - { - "epoch": 49.55, - "learning_rate": 1.1327543699886722e-09, - "loss": 3.7424, - "step": 4461500 - }, - { - "epoch": 49.55, - "learning_rate": 1.1188725762388112e-09, - "loss": 3.7396, - "step": 4462000 - }, - { - "epoch": 49.56, - "learning_rate": 1.1049907824889502e-09, - "loss": 3.7593, - "step": 4462500 - }, - { - "epoch": 49.56, - "learning_rate": 1.091108988739089e-09, - "loss": 3.7552, - "step": 4463000 - }, - { - "epoch": 49.57, - "learning_rate": 1.0772271949892277e-09, - "loss": 3.7461, - "step": 4463500 - }, - { - "epoch": 49.57, - "learning_rate": 1.0633454012393664e-09, - "loss": 3.7367, - "step": 4464000 - }, - { - "epoch": 49.58, - "learning_rate": 1.0494636074895054e-09, - "loss": 3.7456, - "step": 4464500 - }, - { - "epoch": 49.59, - "learning_rate": 1.0355818137396441e-09, - "loss": 3.7521, - "step": 4465000 - }, - { - "epoch": 49.59, - "learning_rate": 1.0217000199897829e-09, - "loss": 3.7379, - "step": 4465500 - }, - { - "epoch": 49.6, - "learning_rate": 1.0078182262399218e-09, - "loss": 3.7427, - "step": 4466000 - }, - { - "epoch": 49.6, - "learning_rate": 9.939364324900606e-10, - "loss": 3.756, - "step": 4466500 - }, - { - "epoch": 49.61, - "learning_rate": 9.800546387401993e-10, - "loss": 3.7639, - "step": 4467000 - }, - { - "epoch": 49.61, - "learning_rate": 9.661728449903383e-10, - "loss": 3.7395, - "step": 4467500 - }, - { - "epoch": 49.62, - "learning_rate": 9.52291051240477e-10, - "loss": 3.7458, - "step": 4468000 - }, - { - "epoch": 49.62, - "learning_rate": 9.38409257490616e-10, - "loss": 3.749, - "step": 4468500 - }, - { - "epoch": 49.63, - "learning_rate": 9.245274637407546e-10, - "loss": 3.7483, - "step": 4469000 - }, - { - "epoch": 49.64, - "learning_rate": 9.106456699908935e-10, - "loss": 3.7516, - "step": 4469500 - }, - { - "epoch": 49.64, - "learning_rate": 8.967638762410324e-10, - "loss": 3.7545, - "step": 4470000 - }, - { - "epoch": 49.65, - "learning_rate": 8.828820824911711e-10, - "loss": 3.7323, - "step": 4470500 - }, - { - "epoch": 49.65, - "learning_rate": 8.6900028874131e-10, - "loss": 3.7421, - "step": 4471000 - }, - { - "epoch": 49.66, - "learning_rate": 8.551184949914489e-10, - "loss": 3.727, - "step": 4471500 - }, - { - "epoch": 49.66, - "learning_rate": 8.412367012415876e-10, - "loss": 3.7473, - "step": 4472000 - }, - { - "epoch": 49.67, - "learning_rate": 8.273549074917265e-10, - "loss": 3.7502, - "step": 4472500 - }, - { - "epoch": 49.67, - "learning_rate": 8.134731137418652e-10, - "loss": 3.7533, - "step": 4473000 - }, - { - "epoch": 49.68, - "learning_rate": 7.995913199920041e-10, - "loss": 3.737, - "step": 4473500 - }, - { - "epoch": 49.69, - "learning_rate": 7.857095262421429e-10, - "loss": 3.7678, - "step": 4474000 - }, - { - "epoch": 49.69, - "learning_rate": 7.718277324922817e-10, - "loss": 3.7366, - "step": 4474500 - }, - { - "epoch": 49.7, - "learning_rate": 7.579459387424205e-10, - "loss": 3.7476, - "step": 4475000 - }, - { - "epoch": 49.7, - "learning_rate": 7.440641449925594e-10, - "loss": 3.7469, - "step": 4475500 - }, - { - "epoch": 49.71, - "learning_rate": 7.301823512426981e-10, - "loss": 3.7447, - "step": 4476000 - }, - { - "epoch": 49.71, - "learning_rate": 7.16300557492837e-10, - "loss": 3.7465, - "step": 4476500 - }, - { - "epoch": 49.72, - "learning_rate": 7.024187637429757e-10, - "loss": 3.7516, - "step": 4477000 - }, - { - "epoch": 49.72, - "learning_rate": 6.885369699931146e-10, - "loss": 3.765, - "step": 4477500 - }, - { - "epoch": 49.73, - "learning_rate": 6.746551762432534e-10, - "loss": 3.7353, - "step": 4478000 - }, - { - "epoch": 49.74, - "learning_rate": 6.607733824933922e-10, - "loss": 3.7609, - "step": 4478500 - }, - { - "epoch": 49.74, - "learning_rate": 6.46891588743531e-10, - "loss": 3.7521, - "step": 4479000 - }, - { - "epoch": 49.75, - "learning_rate": 6.330097949936699e-10, - "loss": 3.7518, - "step": 4479500 - }, - { - "epoch": 49.75, - "learning_rate": 6.191280012438087e-10, - "loss": 3.7477, - "step": 4480000 - }, - { - "epoch": 49.76, - "learning_rate": 6.052462074939475e-10, - "loss": 3.7675, - "step": 4480500 - }, - { - "epoch": 49.76, - "learning_rate": 5.913644137440863e-10, - "loss": 3.7429, - "step": 4481000 - }, - { - "epoch": 49.77, - "learning_rate": 5.774826199942251e-10, - "loss": 3.7585, - "step": 4481500 - }, - { - "epoch": 49.77, - "learning_rate": 5.63600826244364e-10, - "loss": 3.7566, - "step": 4482000 - }, - { - "epoch": 49.78, - "learning_rate": 5.497190324945028e-10, - "loss": 3.7519, - "step": 4482500 - }, - { - "epoch": 49.79, - "learning_rate": 5.358372387446416e-10, - "loss": 3.7361, - "step": 4483000 - }, - { - "epoch": 49.79, - "learning_rate": 5.219554449947804e-10, - "loss": 3.7402, - "step": 4483500 - }, - { - "epoch": 49.8, - "learning_rate": 5.080736512449192e-10, - "loss": 3.7263, - "step": 4484000 - }, - { - "epoch": 49.8, - "learning_rate": 4.941918574950581e-10, - "loss": 3.7414, - "step": 4484500 - }, - { - "epoch": 49.81, - "learning_rate": 4.803100637451968e-10, - "loss": 3.7291, - "step": 4485000 - }, - { - "epoch": 49.81, - "learning_rate": 4.664282699953357e-10, - "loss": 3.7426, - "step": 4485500 - }, - { - "epoch": 49.82, - "learning_rate": 4.525464762454745e-10, - "loss": 3.7538, - "step": 4486000 - }, - { - "epoch": 49.82, - "learning_rate": 4.386646824956134e-10, - "loss": 3.7286, - "step": 4486500 - }, - { - "epoch": 49.83, - "learning_rate": 4.247828887457522e-10, - "loss": 3.7406, - "step": 4487000 - }, - { - "epoch": 49.84, - "learning_rate": 4.10901094995891e-10, - "loss": 3.7444, - "step": 4487500 - }, - { - "epoch": 49.84, - "learning_rate": 3.9701930124602974e-10, - "loss": 3.7448, - "step": 4488000 - }, - { - "epoch": 49.85, - "learning_rate": 3.831375074961686e-10, - "loss": 3.7188, - "step": 4488500 - }, - { - "epoch": 49.85, - "learning_rate": 3.692557137463074e-10, - "loss": 3.7447, - "step": 4489000 - }, - { - "epoch": 49.86, - "learning_rate": 3.5537391999644625e-10, - "loss": 3.7544, - "step": 4489500 - }, - { - "epoch": 49.86, - "learning_rate": 3.4149212624658505e-10, - "loss": 3.7333, - "step": 4490000 - }, - { - "epoch": 49.87, - "learning_rate": 3.276103324967239e-10, - "loss": 3.7406, - "step": 4490500 - }, - { - "epoch": 49.87, - "learning_rate": 3.137285387468627e-10, - "loss": 3.7704, - "step": 4491000 - }, - { - "epoch": 49.88, - "learning_rate": 2.998467449970015e-10, - "loss": 3.7435, - "step": 4491500 - }, - { - "epoch": 49.89, - "learning_rate": 2.8596495124714035e-10, - "loss": 3.7519, - "step": 4492000 - }, - { - "epoch": 49.89, - "learning_rate": 2.7208315749727915e-10, - "loss": 3.7512, - "step": 4492500 - }, - { - "epoch": 49.9, - "learning_rate": 2.58201363747418e-10, - "loss": 3.7613, - "step": 4493000 - }, - { - "epoch": 49.9, - "learning_rate": 2.443195699975568e-10, - "loss": 3.7335, - "step": 4493500 - }, - { - "epoch": 49.91, - "learning_rate": 2.3043777624769563e-10, - "loss": 3.7446, - "step": 4494000 - }, - { - "epoch": 49.91, - "learning_rate": 2.1655598249783443e-10, - "loss": 3.7563, - "step": 4494500 - }, - { - "epoch": 49.92, - "learning_rate": 2.0267418874797323e-10, - "loss": 3.7504, - "step": 4495000 - }, - { - "epoch": 49.92, - "learning_rate": 1.8879239499811206e-10, - "loss": 3.7485, - "step": 4495500 - }, - { - "epoch": 49.93, - "learning_rate": 1.7491060124825088e-10, - "loss": 3.7562, - "step": 4496000 - }, - { - "epoch": 49.94, - "learning_rate": 1.610288074983897e-10, - "loss": 3.7513, - "step": 4496500 - }, - { - "epoch": 49.94, - "learning_rate": 1.471470137485285e-10, - "loss": 3.7387, - "step": 4497000 - }, - { - "epoch": 49.95, - "learning_rate": 1.3326521999866734e-10, - "loss": 3.7599, - "step": 4497500 - }, - { - "epoch": 49.95, - "learning_rate": 1.1938342624880616e-10, - "loss": 3.7616, - "step": 4498000 - }, - { - "epoch": 49.96, - "learning_rate": 1.0550163249894498e-10, - "loss": 3.7502, - "step": 4498500 - }, - { - "epoch": 49.96, - "learning_rate": 9.16198387490838e-11, - "loss": 3.7519, - "step": 4499000 - }, - { - "epoch": 49.97, - "learning_rate": 7.773804499922262e-11, - "loss": 3.74, - "step": 4499500 - }, - { - "epoch": 49.97, - "learning_rate": 6.385625124936143e-11, - "loss": 3.741, - "step": 4500000 - }, - { - "epoch": 49.98, - "learning_rate": 4.9974457499500255e-11, - "loss": 3.7423, - "step": 4500500 - }, - { - "epoch": 49.99, - "learning_rate": 3.609266374963907e-11, - "loss": 3.7529, - "step": 4501000 - }, - { - "epoch": 49.99, - "learning_rate": 2.221086999977789e-11, - "loss": 3.7412, - "step": 4501500 - }, - { - "epoch": 50.0, - "learning_rate": 8.329076249916709e-12, - "loss": 3.7532, - "step": 4502000 - }, - { - "epoch": 50.0, - "eval_loss": 3.824197769165039, - "eval_runtime": 6.3023, - "eval_samples_per_second": 246.578, - "step": 4502300 - } - ], - "max_steps": 4502300, - "num_train_epochs": 50, - "total_flos": 2.1542227070270976e+17, - "trial_name": null, - "trial_params": null -}