{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.538101577634107, "global_step": 1000000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4999999999999998e-06, "loss": 0.9852, "step": 500 }, { "epoch": 0.0, "learning_rate": 2.9999999999999997e-06, "loss": 0.7842, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.499999999999999e-06, "loss": 0.7423, "step": 1500 }, { "epoch": 0.0, "learning_rate": 5.999999999999999e-06, "loss": 0.7319, "step": 2000 }, { "epoch": 0.0, "learning_rate": 7.499999999999999e-06, "loss": 0.7291, "step": 2500 }, { "epoch": 0.01, "learning_rate": 8.999999999999999e-06, "loss": 0.7259, "step": 3000 }, { "epoch": 0.01, "learning_rate": 1.05e-05, "loss": 0.7247, "step": 3500 }, { "epoch": 0.01, "learning_rate": 1.1999999999999999e-05, "loss": 0.7241, "step": 4000 }, { "epoch": 0.01, "learning_rate": 1.3499999999999998e-05, "loss": 0.7233, "step": 4500 }, { "epoch": 0.01, "learning_rate": 1.4999999999999999e-05, "loss": 0.7226, "step": 5000 }, { "epoch": 0.01, "learning_rate": 1.6499999999999998e-05, "loss": 0.7212, "step": 5500 }, { "epoch": 0.01, "learning_rate": 1.7999999999999997e-05, "loss": 0.7198, "step": 6000 }, { "epoch": 0.01, "learning_rate": 1.95e-05, "loss": 0.7155, "step": 6500 }, { "epoch": 0.01, "learning_rate": 2.1e-05, "loss": 0.7006, "step": 7000 }, { "epoch": 0.01, "learning_rate": 2.2499999999999998e-05, "loss": 0.685, "step": 7500 }, { "epoch": 0.02, "learning_rate": 2.3999999999999997e-05, "loss": 0.6726, "step": 8000 }, { "epoch": 0.02, "learning_rate": 2.55e-05, "loss": 0.6646, "step": 8500 }, { "epoch": 0.02, "learning_rate": 2.6999999999999996e-05, "loss": 0.6584, "step": 9000 }, { "epoch": 0.02, "learning_rate": 2.8499999999999998e-05, "loss": 0.6529, "step": 9500 }, { "epoch": 0.02, "learning_rate": 2.9999999999999997e-05, "loss": 0.6478, "step": 10000 }, { "epoch": 0.02, "learning_rate": 3.149999999999999e-05, "loss": 0.6422, "step": 10500 }, { "epoch": 0.02, "learning_rate": 3.2999999999999996e-05, "loss": 0.6357, "step": 11000 }, { "epoch": 0.02, "learning_rate": 3.45e-05, "loss": 0.6294, "step": 11500 }, { "epoch": 0.02, "learning_rate": 3.5999999999999994e-05, "loss": 0.6241, "step": 12000 }, { "epoch": 0.02, "learning_rate": 3.75e-05, "loss": 0.6192, "step": 12500 }, { "epoch": 0.03, "learning_rate": 3.9e-05, "loss": 0.6154, "step": 13000 }, { "epoch": 0.03, "learning_rate": 4.05e-05, "loss": 0.6119, "step": 13500 }, { "epoch": 0.03, "learning_rate": 4.2e-05, "loss": 0.608, "step": 14000 }, { "epoch": 0.03, "learning_rate": 4.3499999999999993e-05, "loss": 0.6044, "step": 14500 }, { "epoch": 0.03, "learning_rate": 4.4999999999999996e-05, "loss": 0.6009, "step": 15000 }, { "epoch": 0.03, "learning_rate": 4.65e-05, "loss": 0.5974, "step": 15500 }, { "epoch": 0.03, "learning_rate": 4.7999999999999994e-05, "loss": 0.5943, "step": 16000 }, { "epoch": 0.03, "learning_rate": 4.95e-05, "loss": 0.592, "step": 16500 }, { "epoch": 0.03, "learning_rate": 5.1e-05, "loss": 0.5888, "step": 17000 }, { "epoch": 0.03, "learning_rate": 5.2499999999999995e-05, "loss": 0.5859, "step": 17500 }, { "epoch": 0.04, "learning_rate": 5.399999999999999e-05, "loss": 0.5834, "step": 18000 }, { "epoch": 0.04, "learning_rate": 5.5499999999999994e-05, "loss": 0.5808, "step": 18500 }, { "epoch": 0.04, "learning_rate": 5.6999999999999996e-05, "loss": 0.5777, "step": 19000 }, { "epoch": 0.04, "learning_rate": 5.85e-05, "loss": 0.5753, "step": 19500 }, { "epoch": 0.04, "learning_rate": 5.9999999999999995e-05, "loss": 0.5733, "step": 20000 }, { "epoch": 0.04, "learning_rate": 6.149999999999999e-05, "loss": 0.5711, "step": 20500 }, { "epoch": 0.04, "learning_rate": 6.299999999999999e-05, "loss": 0.5685, "step": 21000 }, { "epoch": 0.04, "learning_rate": 6.45e-05, "loss": 0.5654, "step": 21500 }, { "epoch": 0.04, "learning_rate": 6.599999999999999e-05, "loss": 0.5635, "step": 22000 }, { "epoch": 0.04, "learning_rate": 6.75e-05, "loss": 0.5605, "step": 22500 }, { "epoch": 0.05, "learning_rate": 6.9e-05, "loss": 0.5578, "step": 23000 }, { "epoch": 0.05, "learning_rate": 7.049999999999999e-05, "loss": 0.5553, "step": 23500 }, { "epoch": 0.05, "learning_rate": 7.199999999999999e-05, "loss": 0.552, "step": 24000 }, { "epoch": 0.05, "learning_rate": 7.35e-05, "loss": 0.5487, "step": 24500 }, { "epoch": 0.05, "learning_rate": 7.5e-05, "loss": 0.546, "step": 25000 }, { "epoch": 0.05, "learning_rate": 7.649999999999999e-05, "loss": 0.5432, "step": 25500 }, { "epoch": 0.05, "learning_rate": 7.8e-05, "loss": 0.5413, "step": 26000 }, { "epoch": 0.05, "learning_rate": 7.95e-05, "loss": 0.5383, "step": 26500 }, { "epoch": 0.05, "learning_rate": 8.1e-05, "loss": 0.5356, "step": 27000 }, { "epoch": 0.05, "learning_rate": 8.25e-05, "loss": 0.534, "step": 27500 }, { "epoch": 0.06, "learning_rate": 8.4e-05, "loss": 0.5314, "step": 28000 }, { "epoch": 0.06, "learning_rate": 8.549999999999999e-05, "loss": 0.5282, "step": 28500 }, { "epoch": 0.06, "learning_rate": 8.699999999999999e-05, "loss": 0.5256, "step": 29000 }, { "epoch": 0.06, "learning_rate": 8.849999999999998e-05, "loss": 0.5234, "step": 29500 }, { "epoch": 0.06, "learning_rate": 8.999999999999999e-05, "loss": 0.5217, "step": 30000 }, { "epoch": 0.06, "learning_rate": 9.149999999999999e-05, "loss": 0.5185, "step": 30500 }, { "epoch": 0.06, "learning_rate": 9.3e-05, "loss": 0.5168, "step": 31000 }, { "epoch": 0.06, "learning_rate": 9.449999999999999e-05, "loss": 0.5148, "step": 31500 }, { "epoch": 0.06, "learning_rate": 9.599999999999999e-05, "loss": 0.5117, "step": 32000 }, { "epoch": 0.06, "learning_rate": 9.75e-05, "loss": 0.5101, "step": 32500 }, { "epoch": 0.07, "learning_rate": 9.9e-05, "loss": 0.5085, "step": 33000 }, { "epoch": 0.07, "learning_rate": 0.0001005, "loss": 0.5054, "step": 33500 }, { "epoch": 0.07, "learning_rate": 0.000102, "loss": 0.5036, "step": 34000 }, { "epoch": 0.07, "learning_rate": 0.00010349999999999998, "loss": 0.5016, "step": 34500 }, { "epoch": 0.07, "learning_rate": 0.00010499999999999999, "loss": 0.4993, "step": 35000 }, { "epoch": 0.07, "learning_rate": 0.00010649999999999999, "loss": 0.4969, "step": 35500 }, { "epoch": 0.07, "learning_rate": 0.00010799999999999998, "loss": 0.4937, "step": 36000 }, { "epoch": 0.07, "learning_rate": 0.00010949999999999999, "loss": 0.4911, "step": 36500 }, { "epoch": 0.07, "learning_rate": 0.00011099999999999999, "loss": 0.4883, "step": 37000 }, { "epoch": 0.07, "learning_rate": 0.0001125, "loss": 0.4844, "step": 37500 }, { "epoch": 0.08, "learning_rate": 0.00011399999999999999, "loss": 0.4808, "step": 38000 }, { "epoch": 0.08, "learning_rate": 0.00011549999999999999, "loss": 0.4777, "step": 38500 }, { "epoch": 0.08, "learning_rate": 0.000117, "loss": 0.4742, "step": 39000 }, { "epoch": 0.08, "learning_rate": 0.0001185, "loss": 0.4717, "step": 39500 }, { "epoch": 0.08, "learning_rate": 0.00011999999999999999, "loss": 0.4685, "step": 40000 }, { "epoch": 0.08, "learning_rate": 0.0001215, "loss": 0.465, "step": 40500 }, { "epoch": 0.08, "learning_rate": 0.00012299999999999998, "loss": 0.4627, "step": 41000 }, { "epoch": 0.08, "learning_rate": 0.0001245, "loss": 0.4596, "step": 41500 }, { "epoch": 0.08, "learning_rate": 0.00012599999999999997, "loss": 0.4577, "step": 42000 }, { "epoch": 0.08, "learning_rate": 0.00012749999999999998, "loss": 0.4556, "step": 42500 }, { "epoch": 0.09, "learning_rate": 0.000129, "loss": 0.4533, "step": 43000 }, { "epoch": 0.09, "learning_rate": 0.0001305, "loss": 0.4519, "step": 43500 }, { "epoch": 0.09, "learning_rate": 0.00013199999999999998, "loss": 0.4489, "step": 44000 }, { "epoch": 0.09, "learning_rate": 0.0001335, "loss": 0.4474, "step": 44500 }, { "epoch": 0.09, "learning_rate": 0.000135, "loss": 0.4461, "step": 45000 }, { "epoch": 0.09, "learning_rate": 0.00013649999999999998, "loss": 0.4443, "step": 45500 }, { "epoch": 0.09, "learning_rate": 0.000138, "loss": 0.4437, "step": 46000 }, { "epoch": 0.09, "learning_rate": 0.0001395, "loss": 0.441, "step": 46500 }, { "epoch": 0.09, "learning_rate": 0.00014099999999999998, "loss": 0.4395, "step": 47000 }, { "epoch": 0.09, "learning_rate": 0.0001425, "loss": 0.438, "step": 47500 }, { "epoch": 0.1, "learning_rate": 0.00014399999999999998, "loss": 0.4372, "step": 48000 }, { "epoch": 0.1, "learning_rate": 0.00014549999999999999, "loss": 0.4359, "step": 48500 }, { "epoch": 0.1, "learning_rate": 0.000147, "loss": 0.4343, "step": 49000 }, { "epoch": 0.1, "learning_rate": 0.00014849999999999998, "loss": 0.4335, "step": 49500 }, { "epoch": 0.1, "learning_rate": 0.00015, "loss": 0.4321, "step": 50000 }, { "epoch": 0.1, "learning_rate": 0.00014999990431133645, "loss": 0.4307, "step": 50500 }, { "epoch": 0.1, "learning_rate": 0.0001499996172456075, "loss": 0.4303, "step": 51000 }, { "epoch": 0.1, "learning_rate": 0.00014999913880359787, "loss": 0.4279, "step": 51500 }, { "epoch": 0.1, "learning_rate": 0.00014999846898661572, "loss": 0.4265, "step": 52000 }, { "epoch": 0.1, "learning_rate": 0.00014999760779649222, "loss": 0.426, "step": 52500 }, { "epoch": 0.11, "learning_rate": 0.00014999655523558183, "loss": 0.4247, "step": 53000 }, { "epoch": 0.11, "learning_rate": 0.00014999531130676229, "loss": 0.4234, "step": 53500 }, { "epoch": 0.11, "learning_rate": 0.00014999387601343436, "loss": 0.4227, "step": 54000 }, { "epoch": 0.11, "learning_rate": 0.00014999224935952215, "loss": 0.4217, "step": 54500 }, { "epoch": 0.11, "learning_rate": 0.00014999043134947282, "loss": 0.4202, "step": 55000 }, { "epoch": 0.11, "learning_rate": 0.00014998842198825674, "loss": 0.4194, "step": 55500 }, { "epoch": 0.11, "learning_rate": 0.00014998622128136748, "loss": 0.4182, "step": 56000 }, { "epoch": 0.11, "learning_rate": 0.00014998382923482164, "loss": 0.4174, "step": 56500 }, { "epoch": 0.11, "learning_rate": 0.000149981245855159, "loss": 0.4168, "step": 57000 }, { "epoch": 0.11, "learning_rate": 0.00014997847114944242, "loss": 0.4157, "step": 57500 }, { "epoch": 0.12, "learning_rate": 0.00014997550512525784, "loss": 0.4152, "step": 58000 }, { "epoch": 0.12, "learning_rate": 0.00014997234779071426, "loss": 0.4139, "step": 58500 }, { "epoch": 0.12, "learning_rate": 0.0001499689991544437, "loss": 0.4135, "step": 59000 }, { "epoch": 0.12, "learning_rate": 0.0001499654592256012, "loss": 0.4127, "step": 59500 }, { "epoch": 0.12, "learning_rate": 0.00014996172801386482, "loss": 0.4117, "step": 60000 }, { "epoch": 0.12, "learning_rate": 0.00014995780552943551, "loss": 0.4106, "step": 60500 }, { "epoch": 0.12, "learning_rate": 0.00014995369178303722, "loss": 0.4098, "step": 61000 }, { "epoch": 0.12, "learning_rate": 0.0001499493867859168, "loss": 0.4095, "step": 61500 }, { "epoch": 0.12, "learning_rate": 0.0001499448905498439, "loss": 0.4081, "step": 62000 }, { "epoch": 0.12, "learning_rate": 0.00014994020308711106, "loss": 0.408, "step": 62500 }, { "epoch": 0.13, "learning_rate": 0.00014993532441053364, "loss": 0.4074, "step": 63000 }, { "epoch": 0.13, "learning_rate": 0.0001499302545334498, "loss": 0.4066, "step": 63500 }, { "epoch": 0.13, "learning_rate": 0.0001499249934697203, "loss": 0.4055, "step": 64000 }, { "epoch": 0.13, "learning_rate": 0.00014991954123372875, "loss": 0.4049, "step": 64500 }, { "epoch": 0.13, "learning_rate": 0.0001499138978403813, "loss": 0.4038, "step": 65000 }, { "epoch": 0.13, "learning_rate": 0.00014990806330510687, "loss": 0.4039, "step": 65500 }, { "epoch": 0.13, "learning_rate": 0.00014990203764385677, "loss": 0.4029, "step": 66000 }, { "epoch": 0.13, "learning_rate": 0.00014989582087310494, "loss": 0.4028, "step": 66500 }, { "epoch": 0.13, "learning_rate": 0.00014988941300984784, "loss": 0.4022, "step": 67000 }, { "epoch": 0.13, "learning_rate": 0.00014988281407160426, "loss": 0.4013, "step": 67500 }, { "epoch": 0.14, "learning_rate": 0.0001498760240764155, "loss": 0.4003, "step": 68000 }, { "epoch": 0.14, "learning_rate": 0.00014986904304284512, "loss": 0.3996, "step": 68500 }, { "epoch": 0.14, "learning_rate": 0.000149861870989979, "loss": 0.3994, "step": 69000 }, { "epoch": 0.14, "learning_rate": 0.00014985450793742527, "loss": 0.399, "step": 69500 }, { "epoch": 0.14, "learning_rate": 0.0001498469539053142, "loss": 0.3993, "step": 70000 }, { "epoch": 0.14, "learning_rate": 0.00014983920891429827, "loss": 0.3979, "step": 70500 }, { "epoch": 0.14, "learning_rate": 0.00014983127298555198, "loss": 0.3969, "step": 71000 }, { "epoch": 0.14, "learning_rate": 0.00014982314614077184, "loss": 0.3968, "step": 71500 }, { "epoch": 0.14, "learning_rate": 0.00014981482840217632, "loss": 0.3967, "step": 72000 }, { "epoch": 0.14, "learning_rate": 0.00014980631979250587, "loss": 0.3959, "step": 72500 }, { "epoch": 0.15, "learning_rate": 0.00014979762033502262, "loss": 0.3951, "step": 73000 }, { "epoch": 0.15, "learning_rate": 0.0001497887300535106, "loss": 0.3953, "step": 73500 }, { "epoch": 0.15, "learning_rate": 0.00014977964897227547, "loss": 0.3944, "step": 74000 }, { "epoch": 0.15, "learning_rate": 0.0001497703771161446, "loss": 0.3937, "step": 74500 }, { "epoch": 0.15, "learning_rate": 0.00014976091451046687, "loss": 0.3931, "step": 75000 }, { "epoch": 0.15, "learning_rate": 0.00014975126118111268, "loss": 0.3929, "step": 75500 }, { "epoch": 0.15, "learning_rate": 0.00014974141715447386, "loss": 0.3918, "step": 76000 }, { "epoch": 0.15, "learning_rate": 0.00014973138245746363, "loss": 0.3922, "step": 76500 }, { "epoch": 0.15, "learning_rate": 0.00014972115711751644, "loss": 0.3916, "step": 77000 }, { "epoch": 0.15, "learning_rate": 0.00014971074116258796, "loss": 0.3912, "step": 77500 }, { "epoch": 0.16, "learning_rate": 0.00014970013462115505, "loss": 0.3909, "step": 78000 }, { "epoch": 0.16, "learning_rate": 0.00014968933752221558, "loss": 0.3901, "step": 78500 }, { "epoch": 0.16, "learning_rate": 0.00014967834989528843, "loss": 0.3898, "step": 79000 }, { "epoch": 0.16, "learning_rate": 0.0001496671717704133, "loss": 0.3895, "step": 79500 }, { "epoch": 0.16, "learning_rate": 0.00014965580317815078, "loss": 0.3891, "step": 80000 }, { "epoch": 0.16, "learning_rate": 0.0001496442441495822, "loss": 0.389, "step": 80500 }, { "epoch": 0.16, "learning_rate": 0.00014963249471630944, "loss": 0.3882, "step": 81000 }, { "epoch": 0.16, "learning_rate": 0.00014962055491045506, "loss": 0.3883, "step": 81500 }, { "epoch": 0.16, "learning_rate": 0.000149608424764662, "loss": 0.3878, "step": 82000 }, { "epoch": 0.16, "learning_rate": 0.00014959610431209363, "loss": 0.3875, "step": 82500 }, { "epoch": 0.17, "learning_rate": 0.0001495835935864336, "loss": 0.3867, "step": 83000 }, { "epoch": 0.17, "learning_rate": 0.00014957089262188571, "loss": 0.3861, "step": 83500 }, { "epoch": 0.17, "learning_rate": 0.00014955800145317397, "loss": 0.3857, "step": 84000 }, { "epoch": 0.17, "learning_rate": 0.00014954492011554234, "loss": 0.3856, "step": 84500 }, { "epoch": 0.17, "learning_rate": 0.00014953164864475466, "loss": 0.3854, "step": 85000 }, { "epoch": 0.17, "learning_rate": 0.00014951818707709463, "loss": 0.3845, "step": 85500 }, { "epoch": 0.17, "learning_rate": 0.0001495045354493657, "loss": 0.3843, "step": 86000 }, { "epoch": 0.17, "learning_rate": 0.00014949069379889088, "loss": 0.3842, "step": 86500 }, { "epoch": 0.17, "learning_rate": 0.00014947666216351272, "loss": 0.3838, "step": 87000 }, { "epoch": 0.17, "learning_rate": 0.00014946244058159313, "loss": 0.3831, "step": 87500 }, { "epoch": 0.18, "learning_rate": 0.00014944802909201344, "loss": 0.383, "step": 88000 }, { "epoch": 0.18, "learning_rate": 0.00014943342773417407, "loss": 0.3826, "step": 88500 }, { "epoch": 0.18, "learning_rate": 0.00014941863654799456, "loss": 0.3829, "step": 89000 }, { "epoch": 0.18, "learning_rate": 0.0001494036555739135, "loss": 0.3825, "step": 89500 }, { "epoch": 0.18, "learning_rate": 0.00014938848485288825, "loss": 0.3819, "step": 90000 }, { "epoch": 0.18, "learning_rate": 0.000149373124426395, "loss": 0.3813, "step": 90500 }, { "epoch": 0.18, "learning_rate": 0.0001493575743364286, "loss": 0.3806, "step": 91000 }, { "epoch": 0.18, "learning_rate": 0.00014934183462550238, "loss": 0.3809, "step": 91500 }, { "epoch": 0.18, "learning_rate": 0.00014932590533664808, "loss": 0.3802, "step": 92000 }, { "epoch": 0.18, "learning_rate": 0.00014930978651341581, "loss": 0.38, "step": 92500 }, { "epoch": 0.19, "learning_rate": 0.0001492934781998738, "loss": 0.3799, "step": 93000 }, { "epoch": 0.19, "learning_rate": 0.00014927698044060834, "loss": 0.379, "step": 93500 }, { "epoch": 0.19, "learning_rate": 0.0001492602932807237, "loss": 0.3794, "step": 94000 }, { "epoch": 0.19, "learning_rate": 0.00014924341676584194, "loss": 0.379, "step": 94500 }, { "epoch": 0.19, "learning_rate": 0.00014922635094210277, "loss": 0.3786, "step": 95000 }, { "epoch": 0.19, "learning_rate": 0.00014920909585616356, "loss": 0.3781, "step": 95500 }, { "epoch": 0.19, "learning_rate": 0.000149191651555199, "loss": 0.3782, "step": 96000 }, { "epoch": 0.19, "learning_rate": 0.00014917401808690116, "loss": 0.3775, "step": 96500 }, { "epoch": 0.19, "learning_rate": 0.0001491561954994793, "loss": 0.378, "step": 97000 }, { "epoch": 0.19, "learning_rate": 0.00014913818384165964, "loss": 0.3776, "step": 97500 }, { "epoch": 0.2, "learning_rate": 0.00014911998316268537, "loss": 0.3768, "step": 98000 }, { "epoch": 0.2, "learning_rate": 0.00014910159351231653, "loss": 0.376, "step": 98500 }, { "epoch": 0.2, "learning_rate": 0.00014908301494082963, "loss": 0.3761, "step": 99000 }, { "epoch": 0.2, "learning_rate": 0.0001490642474990178, "loss": 0.3763, "step": 99500 }, { "epoch": 0.2, "learning_rate": 0.00014904529123819054, "loss": 0.3761, "step": 100000 }, { "epoch": 0.2, "learning_rate": 0.00014902614621017352, "loss": 0.3757, "step": 100500 }, { "epoch": 0.2, "learning_rate": 0.00014900681246730852, "loss": 0.3746, "step": 101000 }, { "epoch": 0.2, "learning_rate": 0.00014898729006245328, "loss": 0.3747, "step": 101500 }, { "epoch": 0.2, "learning_rate": 0.00014896757904898125, "loss": 0.3749, "step": 102000 }, { "epoch": 0.2, "learning_rate": 0.0001489476794807816, "loss": 0.3742, "step": 102500 }, { "epoch": 0.21, "learning_rate": 0.00014892759141225904, "loss": 0.3727, "step": 103000 }, { "epoch": 0.21, "learning_rate": 0.00014890731489833355, "loss": 0.3739, "step": 103500 }, { "epoch": 0.21, "learning_rate": 0.00014888684999444035, "loss": 0.3734, "step": 104000 }, { "epoch": 0.21, "learning_rate": 0.00014886619675652968, "loss": 0.3736, "step": 104500 }, { "epoch": 0.21, "learning_rate": 0.00014884535524106675, "loss": 0.3729, "step": 105000 }, { "epoch": 0.21, "learning_rate": 0.00014882432550503144, "loss": 0.3722, "step": 105500 }, { "epoch": 0.21, "learning_rate": 0.00014880310760591824, "loss": 0.3728, "step": 106000 }, { "epoch": 0.21, "learning_rate": 0.0001487817016017361, "loss": 0.3723, "step": 106500 }, { "epoch": 0.21, "learning_rate": 0.0001487601075510082, "loss": 0.3721, "step": 107000 }, { "epoch": 0.21, "learning_rate": 0.00014873832551277186, "loss": 0.3718, "step": 107500 }, { "epoch": 0.22, "learning_rate": 0.0001487163555465783, "loss": 0.3715, "step": 108000 }, { "epoch": 0.22, "learning_rate": 0.00014869419771249264, "loss": 0.3711, "step": 108500 }, { "epoch": 0.22, "learning_rate": 0.0001486718520710935, "loss": 0.3713, "step": 109000 }, { "epoch": 0.22, "learning_rate": 0.00014864931868347302, "loss": 0.3707, "step": 109500 }, { "epoch": 0.22, "learning_rate": 0.00014862659761123663, "loss": 0.3708, "step": 110000 }, { "epoch": 0.22, "learning_rate": 0.0001486036889165029, "loss": 0.3699, "step": 110500 }, { "epoch": 0.22, "learning_rate": 0.00014858059266190327, "loss": 0.3698, "step": 111000 }, { "epoch": 0.22, "learning_rate": 0.00014855730891058204, "loss": 0.37, "step": 111500 }, { "epoch": 0.22, "learning_rate": 0.00014853383772619612, "loss": 0.3701, "step": 112000 }, { "epoch": 0.22, "learning_rate": 0.0001485101791729148, "loss": 0.3695, "step": 112500 }, { "epoch": 0.23, "learning_rate": 0.00014848633331541967, "loss": 0.369, "step": 113000 }, { "epoch": 0.23, "learning_rate": 0.00014846230021890443, "loss": 0.3686, "step": 113500 }, { "epoch": 0.23, "learning_rate": 0.0001484380799490746, "loss": 0.3683, "step": 114000 }, { "epoch": 0.23, "learning_rate": 0.0001484136725721475, "loss": 0.3684, "step": 114500 }, { "epoch": 0.23, "learning_rate": 0.00014838907815485194, "loss": 0.3685, "step": 115000 }, { "epoch": 0.23, "learning_rate": 0.00014836429676442814, "loss": 0.3681, "step": 115500 }, { "epoch": 0.23, "learning_rate": 0.00014833932846862748, "loss": 0.3679, "step": 116000 }, { "epoch": 0.23, "learning_rate": 0.0001483141733357123, "loss": 0.3679, "step": 116500 }, { "epoch": 0.23, "learning_rate": 0.00014828883143445582, "loss": 0.3667, "step": 117000 }, { "epoch": 0.23, "learning_rate": 0.00014826330283414178, "loss": 0.367, "step": 117500 }, { "epoch": 0.24, "learning_rate": 0.0001482375876045644, "loss": 0.3667, "step": 118000 }, { "epoch": 0.24, "learning_rate": 0.0001482116858160282, "loss": 0.3665, "step": 118500 }, { "epoch": 0.24, "learning_rate": 0.0001481855975393476, "loss": 0.3665, "step": 119000 }, { "epoch": 0.24, "learning_rate": 0.000148159322845847, "loss": 0.3657, "step": 119500 }, { "epoch": 0.24, "learning_rate": 0.0001481328618073604, "loss": 0.366, "step": 120000 }, { "epoch": 0.24, "learning_rate": 0.00014810621449623125, "loss": 0.3656, "step": 120500 }, { "epoch": 0.24, "learning_rate": 0.0001480793809853123, "loss": 0.3654, "step": 121000 }, { "epoch": 0.24, "learning_rate": 0.00014805236134796536, "loss": 0.3652, "step": 121500 }, { "epoch": 0.24, "learning_rate": 0.00014802515565806107, "loss": 0.3654, "step": 122000 }, { "epoch": 0.24, "learning_rate": 0.00014799776398997873, "loss": 0.3645, "step": 122500 }, { "epoch": 0.25, "learning_rate": 0.00014797018641860612, "loss": 0.3642, "step": 123000 }, { "epoch": 0.25, "learning_rate": 0.00014794242301933928, "loss": 0.364, "step": 123500 }, { "epoch": 0.25, "learning_rate": 0.0001479144738680823, "loss": 0.3642, "step": 124000 }, { "epoch": 0.25, "learning_rate": 0.00014788633904124708, "loss": 0.3638, "step": 124500 }, { "epoch": 0.25, "learning_rate": 0.00014785801861575312, "loss": 0.3635, "step": 125000 }, { "epoch": 0.25, "learning_rate": 0.0001478295126690274, "loss": 0.3633, "step": 125500 }, { "epoch": 0.25, "learning_rate": 0.00014780082127900416, "loss": 0.3626, "step": 126000 }, { "epoch": 0.25, "learning_rate": 0.0001477719445241245, "loss": 0.3631, "step": 126500 }, { "epoch": 0.25, "learning_rate": 0.00014774288248333635, "loss": 0.3625, "step": 127000 }, { "epoch": 0.25, "learning_rate": 0.00014771363523609428, "loss": 0.3629, "step": 127500 }, { "epoch": 0.26, "learning_rate": 0.00014768420286235908, "loss": 0.3624, "step": 128000 }, { "epoch": 0.26, "learning_rate": 0.0001476545854425978, "loss": 0.3624, "step": 128500 }, { "epoch": 0.26, "learning_rate": 0.00014762478305778328, "loss": 0.3621, "step": 129000 }, { "epoch": 0.26, "learning_rate": 0.00014759479578939415, "loss": 0.3617, "step": 129500 }, { "epoch": 0.26, "learning_rate": 0.0001475646237194144, "loss": 0.3617, "step": 130000 }, { "epoch": 0.26, "learning_rate": 0.00014753426693033336, "loss": 0.3612, "step": 130500 }, { "epoch": 0.26, "learning_rate": 0.00014750372550514533, "loss": 0.3614, "step": 131000 }, { "epoch": 0.26, "learning_rate": 0.00014747299952734937, "loss": 0.3614, "step": 131500 }, { "epoch": 0.26, "learning_rate": 0.0001474420890809492, "loss": 0.3607, "step": 132000 }, { "epoch": 0.26, "learning_rate": 0.00014741099425045272, "loss": 0.3607, "step": 132500 }, { "epoch": 0.27, "learning_rate": 0.00014737971512087202, "loss": 0.3602, "step": 133000 }, { "epoch": 0.27, "learning_rate": 0.00014734825177772313, "loss": 0.3599, "step": 133500 }, { "epoch": 0.27, "learning_rate": 0.00014731660430702552, "loss": 0.3599, "step": 134000 }, { "epoch": 0.27, "learning_rate": 0.00014728477279530222, "loss": 0.3595, "step": 134500 }, { "epoch": 0.27, "learning_rate": 0.00014725275732957937, "loss": 0.3593, "step": 135000 }, { "epoch": 0.27, "learning_rate": 0.000147220557997386, "loss": 0.3595, "step": 135500 }, { "epoch": 0.27, "learning_rate": 0.00014718817488675387, "loss": 0.3593, "step": 136000 }, { "epoch": 0.27, "learning_rate": 0.0001471556080862172, "loss": 0.3589, "step": 136500 }, { "epoch": 0.27, "learning_rate": 0.00014712285768481235, "loss": 0.3586, "step": 137000 }, { "epoch": 0.27, "learning_rate": 0.00014708992377207767, "loss": 0.3588, "step": 137500 }, { "epoch": 0.28, "learning_rate": 0.00014705680643805323, "loss": 0.3587, "step": 138000 }, { "epoch": 0.28, "learning_rate": 0.00014702350577328052, "loss": 0.3584, "step": 138500 }, { "epoch": 0.28, "learning_rate": 0.00014699002186880232, "loss": 0.3583, "step": 139000 }, { "epoch": 0.28, "learning_rate": 0.00014695635481616235, "loss": 0.3583, "step": 139500 }, { "epoch": 0.28, "learning_rate": 0.00014692250470740503, "loss": 0.3583, "step": 140000 }, { "epoch": 0.28, "learning_rate": 0.00014688847163507525, "loss": 0.3576, "step": 140500 }, { "epoch": 0.28, "learning_rate": 0.00014685425569221819, "loss": 0.3575, "step": 141000 }, { "epoch": 0.28, "learning_rate": 0.00014681985697237885, "loss": 0.3574, "step": 141500 }, { "epoch": 0.28, "learning_rate": 0.00014678527556960207, "loss": 0.3581, "step": 142000 }, { "epoch": 0.28, "learning_rate": 0.00014675051157843208, "loss": 0.3572, "step": 142500 }, { "epoch": 0.29, "learning_rate": 0.0001467155650939123, "loss": 0.3571, "step": 143000 }, { "epoch": 0.29, "learning_rate": 0.00014668043621158508, "loss": 0.357, "step": 143500 }, { "epoch": 0.29, "learning_rate": 0.00014664512502749141, "loss": 0.3572, "step": 144000 }, { "epoch": 0.29, "learning_rate": 0.00014660963163817077, "loss": 0.3566, "step": 144500 }, { "epoch": 0.29, "learning_rate": 0.00014657395614066075, "loss": 0.3566, "step": 145000 }, { "epoch": 0.29, "learning_rate": 0.0001465380986324967, "loss": 0.3564, "step": 145500 }, { "epoch": 0.29, "learning_rate": 0.0001465020592117118, "loss": 0.3558, "step": 146000 }, { "epoch": 0.29, "learning_rate": 0.00014646583797683636, "loss": 0.3566, "step": 146500 }, { "epoch": 0.29, "learning_rate": 0.0001464294350268979, "loss": 0.3555, "step": 147000 }, { "epoch": 0.29, "learning_rate": 0.00014639285046142065, "loss": 0.3556, "step": 147500 }, { "epoch": 0.3, "learning_rate": 0.00014635608438042546, "loss": 0.355, "step": 148000 }, { "epoch": 0.3, "learning_rate": 0.00014631913688442936, "loss": 0.3552, "step": 148500 }, { "epoch": 0.3, "learning_rate": 0.00014628200807444543, "loss": 0.3549, "step": 149000 }, { "epoch": 0.3, "learning_rate": 0.0001462446980519824, "loss": 0.3549, "step": 149500 }, { "epoch": 0.3, "learning_rate": 0.0001462072069190444, "loss": 0.3551, "step": 150000 }, { "epoch": 0.3, "learning_rate": 0.00014616953477813085, "loss": 0.3547, "step": 150500 }, { "epoch": 0.3, "learning_rate": 0.00014613168173223585, "loss": 0.3547, "step": 151000 }, { "epoch": 0.3, "learning_rate": 0.00014609364788484825, "loss": 0.3544, "step": 151500 }, { "epoch": 0.3, "learning_rate": 0.00014605543333995113, "loss": 0.3538, "step": 152000 }, { "epoch": 0.3, "learning_rate": 0.00014601703820202154, "loss": 0.3547, "step": 152500 }, { "epoch": 0.31, "learning_rate": 0.00014597846257603038, "loss": 0.3537, "step": 153000 }, { "epoch": 0.31, "learning_rate": 0.00014593970656744194, "loss": 0.3538, "step": 153500 }, { "epoch": 0.31, "learning_rate": 0.0001459007702822136, "loss": 0.3534, "step": 154000 }, { "epoch": 0.31, "learning_rate": 0.00014586165382679577, "loss": 0.3532, "step": 154500 }, { "epoch": 0.31, "learning_rate": 0.00014582235730813128, "loss": 0.3537, "step": 155000 }, { "epoch": 0.31, "learning_rate": 0.00014578288083365532, "loss": 0.3533, "step": 155500 }, { "epoch": 0.31, "learning_rate": 0.00014574322451129507, "loss": 0.3527, "step": 156000 }, { "epoch": 0.31, "learning_rate": 0.00014570338844946943, "loss": 0.3529, "step": 156500 }, { "epoch": 0.31, "learning_rate": 0.00014566337275708863, "loss": 0.3524, "step": 157000 }, { "epoch": 0.31, "learning_rate": 0.00014562317754355405, "loss": 0.3529, "step": 157500 }, { "epoch": 0.32, "learning_rate": 0.0001455828029187579, "loss": 0.3517, "step": 158000 }, { "epoch": 0.32, "learning_rate": 0.00014554224899308285, "loss": 0.3528, "step": 158500 }, { "epoch": 0.32, "learning_rate": 0.00014550151587740178, "loss": 0.3525, "step": 159000 }, { "epoch": 0.32, "learning_rate": 0.00014546060368307744, "loss": 0.3523, "step": 159500 }, { "epoch": 0.32, "learning_rate": 0.00014541951252196225, "loss": 0.3515, "step": 160000 }, { "epoch": 0.32, "learning_rate": 0.00014537824250639785, "loss": 0.3517, "step": 160500 }, { "epoch": 0.32, "learning_rate": 0.00014533679374921493, "loss": 0.3521, "step": 161000 }, { "epoch": 0.32, "learning_rate": 0.00014529516636373275, "loss": 0.3522, "step": 161500 }, { "epoch": 0.32, "learning_rate": 0.00014525336046375905, "loss": 0.3515, "step": 162000 }, { "epoch": 0.32, "learning_rate": 0.00014521137616358952, "loss": 0.3511, "step": 162500 }, { "epoch": 0.33, "learning_rate": 0.00014516921357800766, "loss": 0.3513, "step": 163000 }, { "epoch": 0.33, "learning_rate": 0.00014512687282228432, "loss": 0.3509, "step": 163500 }, { "epoch": 0.33, "learning_rate": 0.00014508435401217759, "loss": 0.3515, "step": 164000 }, { "epoch": 0.33, "learning_rate": 0.0001450416572639322, "loss": 0.3506, "step": 164500 }, { "epoch": 0.33, "learning_rate": 0.00014499878269427948, "loss": 0.3504, "step": 165000 }, { "epoch": 0.33, "learning_rate": 0.00014495573042043683, "loss": 0.351, "step": 165500 }, { "epoch": 0.33, "learning_rate": 0.00014491250056010758, "loss": 0.3502, "step": 166000 }, { "epoch": 0.33, "learning_rate": 0.00014486909323148042, "loss": 0.3506, "step": 166500 }, { "epoch": 0.33, "learning_rate": 0.00014482550855322943, "loss": 0.3498, "step": 167000 }, { "epoch": 0.33, "learning_rate": 0.00014478174664451338, "loss": 0.3495, "step": 167500 }, { "epoch": 0.34, "learning_rate": 0.0001447378076249757, "loss": 0.3505, "step": 168000 }, { "epoch": 0.34, "learning_rate": 0.00014469369161474398, "loss": 0.3498, "step": 168500 }, { "epoch": 0.34, "learning_rate": 0.00014464939873442973, "loss": 0.3496, "step": 169000 }, { "epoch": 0.34, "learning_rate": 0.00014460492910512793, "loss": 0.3501, "step": 169500 }, { "epoch": 0.34, "learning_rate": 0.00014456028284841693, "loss": 0.3502, "step": 170000 }, { "epoch": 0.34, "learning_rate": 0.00014451546008635783, "loss": 0.3497, "step": 170500 }, { "epoch": 0.34, "learning_rate": 0.00014447046094149437, "loss": 0.3489, "step": 171000 }, { "epoch": 0.34, "learning_rate": 0.0001444252855368525, "loss": 0.3492, "step": 171500 }, { "epoch": 0.34, "learning_rate": 0.00014437993399594003, "loss": 0.3487, "step": 172000 }, { "epoch": 0.34, "learning_rate": 0.00014433440644274635, "loss": 0.3488, "step": 172500 }, { "epoch": 0.35, "learning_rate": 0.0001442887030017421, "loss": 0.349, "step": 173000 }, { "epoch": 0.35, "learning_rate": 0.00014424282379787865, "loss": 0.349, "step": 173500 }, { "epoch": 0.35, "learning_rate": 0.00014419676895658807, "loss": 0.3486, "step": 174000 }, { "epoch": 0.35, "learning_rate": 0.00014415053860378254, "loss": 0.3483, "step": 174500 }, { "epoch": 0.35, "learning_rate": 0.000144104132865854, "loss": 0.348, "step": 175000 }, { "epoch": 0.35, "learning_rate": 0.00014405755186967404, "loss": 0.3483, "step": 175500 }, { "epoch": 0.35, "learning_rate": 0.0001440107957425933, "loss": 0.3486, "step": 176000 }, { "epoch": 0.35, "learning_rate": 0.00014396386461244123, "loss": 0.3481, "step": 176500 }, { "epoch": 0.35, "learning_rate": 0.0001439167586075258, "loss": 0.3481, "step": 177000 }, { "epoch": 0.35, "learning_rate": 0.00014386947785663293, "loss": 0.3475, "step": 177500 }, { "epoch": 0.36, "learning_rate": 0.0001438220224890265, "loss": 0.3477, "step": 178000 }, { "epoch": 0.36, "learning_rate": 0.00014377439263444763, "loss": 0.3475, "step": 178500 }, { "epoch": 0.36, "learning_rate": 0.00014372658842311449, "loss": 0.3475, "step": 179000 }, { "epoch": 0.36, "learning_rate": 0.00014367860998572198, "loss": 0.3471, "step": 179500 }, { "epoch": 0.36, "learning_rate": 0.00014363045745344137, "loss": 0.3474, "step": 180000 }, { "epoch": 0.36, "learning_rate": 0.00014358213095791978, "loss": 0.3471, "step": 180500 }, { "epoch": 0.36, "learning_rate": 0.00014353363063128005, "loss": 0.3469, "step": 181000 }, { "epoch": 0.36, "learning_rate": 0.00014348495660612015, "loss": 0.3466, "step": 181500 }, { "epoch": 0.36, "learning_rate": 0.0001434361090155131, "loss": 0.3465, "step": 182000 }, { "epoch": 0.36, "learning_rate": 0.00014338708799300633, "loss": 0.3469, "step": 182500 }, { "epoch": 0.37, "learning_rate": 0.00014333789367262136, "loss": 0.3465, "step": 183000 }, { "epoch": 0.37, "learning_rate": 0.00014328852618885365, "loss": 0.3459, "step": 183500 }, { "epoch": 0.37, "learning_rate": 0.00014323898567667202, "loss": 0.3461, "step": 184000 }, { "epoch": 0.37, "learning_rate": 0.00014318927227151832, "loss": 0.3459, "step": 184500 }, { "epoch": 0.37, "learning_rate": 0.00014313938610930712, "loss": 0.3467, "step": 185000 }, { "epoch": 0.37, "learning_rate": 0.00014308932732642524, "loss": 0.3465, "step": 185500 }, { "epoch": 0.37, "learning_rate": 0.00014303909605973154, "loss": 0.3463, "step": 186000 }, { "epoch": 0.37, "learning_rate": 0.00014298869244655632, "loss": 0.3461, "step": 186500 }, { "epoch": 0.37, "learning_rate": 0.0001429381166247012, "loss": 0.3452, "step": 187000 }, { "epoch": 0.37, "learning_rate": 0.0001428873687324385, "loss": 0.3458, "step": 187500 }, { "epoch": 0.38, "learning_rate": 0.00014283644890851103, "loss": 0.3456, "step": 188000 }, { "epoch": 0.38, "learning_rate": 0.00014278535729213168, "loss": 0.3455, "step": 188500 }, { "epoch": 0.38, "learning_rate": 0.00014273409402298291, "loss": 0.3454, "step": 189000 }, { "epoch": 0.38, "learning_rate": 0.0001426826592412166, "loss": 0.3448, "step": 189500 }, { "epoch": 0.38, "learning_rate": 0.00014263105308745343, "loss": 0.345, "step": 190000 }, { "epoch": 0.38, "learning_rate": 0.0001425792757027827, "loss": 0.345, "step": 190500 }, { "epoch": 0.38, "learning_rate": 0.00014252732722876176, "loss": 0.3449, "step": 191000 }, { "epoch": 0.38, "learning_rate": 0.00014247520780741581, "loss": 0.3449, "step": 191500 }, { "epoch": 0.38, "learning_rate": 0.0001424229175812373, "loss": 0.3449, "step": 192000 }, { "epoch": 0.38, "learning_rate": 0.00014237045669318574, "loss": 0.3448, "step": 192500 }, { "epoch": 0.39, "learning_rate": 0.00014231782528668717, "loss": 0.3446, "step": 193000 }, { "epoch": 0.39, "learning_rate": 0.00014226502350563392, "loss": 0.345, "step": 193500 }, { "epoch": 0.39, "learning_rate": 0.00014221205149438394, "loss": 0.3443, "step": 194000 }, { "epoch": 0.39, "learning_rate": 0.00014215890939776074, "loss": 0.3442, "step": 194500 }, { "epoch": 0.39, "learning_rate": 0.0001421055973610528, "loss": 0.3443, "step": 195000 }, { "epoch": 0.39, "learning_rate": 0.00014205211553001317, "loss": 0.3435, "step": 195500 }, { "epoch": 0.39, "learning_rate": 0.00014199846405085913, "loss": 0.3442, "step": 196000 }, { "epoch": 0.39, "learning_rate": 0.00014194464307027178, "loss": 0.3442, "step": 196500 }, { "epoch": 0.39, "learning_rate": 0.00014189065273539564, "loss": 0.3438, "step": 197000 }, { "epoch": 0.39, "learning_rate": 0.0001418364931938382, "loss": 0.3439, "step": 197500 }, { "epoch": 0.4, "learning_rate": 0.00014178216459366958, "loss": 0.3439, "step": 198000 }, { "epoch": 0.4, "learning_rate": 0.0001417276670834221, "loss": 0.3436, "step": 198500 }, { "epoch": 0.4, "learning_rate": 0.00014167300081208988, "loss": 0.3431, "step": 199000 }, { "epoch": 0.4, "learning_rate": 0.00014161816592912844, "loss": 0.343, "step": 199500 }, { "epoch": 0.4, "learning_rate": 0.00014156316258445421, "loss": 0.3427, "step": 200000 }, { "epoch": 0.4, "learning_rate": 0.00014150799092844428, "loss": 0.3432, "step": 200500 }, { "epoch": 0.4, "learning_rate": 0.00014145265111193583, "loss": 0.3433, "step": 201000 }, { "epoch": 0.4, "learning_rate": 0.0001413971432862258, "loss": 0.3429, "step": 201500 }, { "epoch": 0.4, "learning_rate": 0.00014134146760307043, "loss": 0.343, "step": 202000 }, { "epoch": 0.4, "learning_rate": 0.000141285624214685, "loss": 0.3427, "step": 202500 }, { "epoch": 0.41, "learning_rate": 0.00014122961327374313, "loss": 0.3428, "step": 203000 }, { "epoch": 0.41, "learning_rate": 0.0001411734349333766, "loss": 0.3426, "step": 203500 }, { "epoch": 0.41, "learning_rate": 0.0001411170893471749, "loss": 0.3418, "step": 204000 }, { "epoch": 0.41, "learning_rate": 0.00014106057666918466, "loss": 0.3423, "step": 204500 }, { "epoch": 0.41, "learning_rate": 0.00014100389705390938, "loss": 0.3425, "step": 205000 }, { "epoch": 0.41, "learning_rate": 0.00014094705065630903, "loss": 0.3424, "step": 205500 }, { "epoch": 0.41, "learning_rate": 0.0001408900376317994, "loss": 0.3423, "step": 206000 }, { "epoch": 0.41, "learning_rate": 0.00014083285813625197, "loss": 0.3419, "step": 206500 }, { "epoch": 0.41, "learning_rate": 0.0001407755123259933, "loss": 0.3423, "step": 207000 }, { "epoch": 0.41, "learning_rate": 0.00014071800035780465, "loss": 0.3424, "step": 207500 }, { "epoch": 0.42, "learning_rate": 0.00014066032238892152, "loss": 0.3429, "step": 208000 }, { "epoch": 0.42, "learning_rate": 0.0001406024785770333, "loss": 0.3416, "step": 208500 }, { "epoch": 0.42, "learning_rate": 0.00014054446908028272, "loss": 0.3417, "step": 209000 }, { "epoch": 0.42, "learning_rate": 0.0001404862940572656, "loss": 0.3409, "step": 209500 }, { "epoch": 0.42, "learning_rate": 0.00014042795366703018, "loss": 0.3416, "step": 210000 }, { "epoch": 0.42, "learning_rate": 0.00014036944806907685, "loss": 0.342, "step": 210500 }, { "epoch": 0.42, "learning_rate": 0.0001403107774233577, "loss": 0.3417, "step": 211000 }, { "epoch": 0.42, "learning_rate": 0.00014025194189027604, "loss": 0.3411, "step": 211500 }, { "epoch": 0.42, "learning_rate": 0.00014019294163068597, "loss": 0.3415, "step": 212000 }, { "epoch": 0.42, "learning_rate": 0.00014013377680589196, "loss": 0.3409, "step": 212500 }, { "epoch": 0.43, "learning_rate": 0.00014007444757764835, "loss": 0.3411, "step": 213000 }, { "epoch": 0.43, "learning_rate": 0.000140014954108159, "loss": 0.3406, "step": 213500 }, { "epoch": 0.43, "learning_rate": 0.0001399552965600768, "loss": 0.3405, "step": 214000 }, { "epoch": 0.43, "learning_rate": 0.00013989547509650314, "loss": 0.3408, "step": 214500 }, { "epoch": 0.43, "learning_rate": 0.0001398354898809877, "loss": 0.3402, "step": 215000 }, { "epoch": 0.43, "learning_rate": 0.00013977534107752776, "loss": 0.3403, "step": 215500 }, { "epoch": 0.43, "learning_rate": 0.0001397150288505678, "loss": 0.3402, "step": 216000 }, { "epoch": 0.43, "learning_rate": 0.0001396545533649992, "loss": 0.3405, "step": 216500 }, { "epoch": 0.43, "learning_rate": 0.00013959391478615959, "loss": 0.3396, "step": 217000 }, { "epoch": 0.43, "learning_rate": 0.00013953311327983254, "loss": 0.3403, "step": 217500 }, { "epoch": 0.44, "learning_rate": 0.00013947214901224706, "loss": 0.3399, "step": 218000 }, { "epoch": 0.44, "learning_rate": 0.0001394110221500771, "loss": 0.3401, "step": 218500 }, { "epoch": 0.44, "learning_rate": 0.0001393497328604412, "loss": 0.3396, "step": 219000 }, { "epoch": 0.44, "learning_rate": 0.00013928828131090193, "loss": 0.34, "step": 219500 }, { "epoch": 0.44, "learning_rate": 0.00013922666766946545, "loss": 0.3401, "step": 220000 }, { "epoch": 0.44, "learning_rate": 0.00013916489210458118, "loss": 0.3398, "step": 220500 }, { "epoch": 0.44, "learning_rate": 0.00013910295478514106, "loss": 0.3397, "step": 221000 }, { "epoch": 0.44, "learning_rate": 0.0001390408558804794, "loss": 0.3392, "step": 221500 }, { "epoch": 0.44, "learning_rate": 0.0001389785955603722, "loss": 0.3392, "step": 222000 }, { "epoch": 0.44, "learning_rate": 0.00013891617399503688, "loss": 0.3394, "step": 222500 }, { "epoch": 0.45, "learning_rate": 0.00013885359135513154, "loss": 0.339, "step": 223000 }, { "epoch": 0.45, "learning_rate": 0.00013879084781175476, "loss": 0.3393, "step": 223500 }, { "epoch": 0.45, "learning_rate": 0.000138727943536445, "loss": 0.3392, "step": 224000 }, { "epoch": 0.45, "learning_rate": 0.0001386648787011801, "loss": 0.3391, "step": 224500 }, { "epoch": 0.45, "learning_rate": 0.00013860165347837698, "loss": 0.3386, "step": 225000 }, { "epoch": 0.45, "learning_rate": 0.00013853826804089095, "loss": 0.3385, "step": 225500 }, { "epoch": 0.45, "learning_rate": 0.00013847472256201535, "loss": 0.3383, "step": 226000 }, { "epoch": 0.45, "learning_rate": 0.00013841101721548112, "loss": 0.3388, "step": 226500 }, { "epoch": 0.45, "learning_rate": 0.00013834715217545625, "loss": 0.339, "step": 227000 }, { "epoch": 0.45, "learning_rate": 0.00013828312761654532, "loss": 0.3382, "step": 227500 }, { "epoch": 0.46, "learning_rate": 0.000138218943713789, "loss": 0.3389, "step": 228000 }, { "epoch": 0.46, "learning_rate": 0.00013815460064266368, "loss": 0.3391, "step": 228500 }, { "epoch": 0.46, "learning_rate": 0.0001380900985790808, "loss": 0.3388, "step": 229000 }, { "epoch": 0.46, "learning_rate": 0.0001380254376993866, "loss": 0.3382, "step": 229500 }, { "epoch": 0.46, "learning_rate": 0.00013796061818036138, "loss": 0.3381, "step": 230000 }, { "epoch": 0.46, "learning_rate": 0.00013789564019921931, "loss": 0.3382, "step": 230500 }, { "epoch": 0.46, "learning_rate": 0.00013783050393360768, "loss": 0.3376, "step": 231000 }, { "epoch": 0.46, "learning_rate": 0.00013776520956160655, "loss": 0.3374, "step": 231500 }, { "epoch": 0.46, "learning_rate": 0.0001376997572617282, "loss": 0.3382, "step": 232000 }, { "epoch": 0.46, "learning_rate": 0.0001376341472129168, "loss": 0.3377, "step": 232500 }, { "epoch": 0.47, "learning_rate": 0.00013756837959454766, "loss": 0.3375, "step": 233000 }, { "epoch": 0.47, "learning_rate": 0.00013750245458642692, "loss": 0.3374, "step": 233500 }, { "epoch": 0.47, "learning_rate": 0.0001374363723687911, "loss": 0.3377, "step": 234000 }, { "epoch": 0.47, "learning_rate": 0.0001373701331223064, "loss": 0.337, "step": 234500 }, { "epoch": 0.47, "learning_rate": 0.00013730373702806846, "loss": 0.3371, "step": 235000 }, { "epoch": 0.47, "learning_rate": 0.0001372371842676016, "loss": 0.3372, "step": 235500 }, { "epoch": 0.47, "learning_rate": 0.00013717047502285855, "loss": 0.3376, "step": 236000 }, { "epoch": 0.47, "learning_rate": 0.0001371036094762198, "loss": 0.338, "step": 236500 }, { "epoch": 0.47, "learning_rate": 0.0001370365878104933, "loss": 0.3372, "step": 237000 }, { "epoch": 0.47, "learning_rate": 0.00013696941020891363, "loss": 0.3372, "step": 237500 }, { "epoch": 0.48, "learning_rate": 0.00013690207685514185, "loss": 0.3373, "step": 238000 }, { "epoch": 0.48, "learning_rate": 0.0001368345879332647, "loss": 0.3362, "step": 238500 }, { "epoch": 0.48, "learning_rate": 0.0001367669436277944, "loss": 0.3367, "step": 239000 }, { "epoch": 0.48, "learning_rate": 0.00013669914412366783, "loss": 0.3369, "step": 239500 }, { "epoch": 0.48, "learning_rate": 0.0001366311896062463, "loss": 0.3363, "step": 240000 }, { "epoch": 0.48, "learning_rate": 0.00013656308026131485, "loss": 0.3366, "step": 240500 }, { "epoch": 0.48, "learning_rate": 0.00013649481627508181, "loss": 0.3369, "step": 241000 }, { "epoch": 0.48, "learning_rate": 0.00013642639783417832, "loss": 0.3367, "step": 241500 }, { "epoch": 0.48, "learning_rate": 0.0001363578251256578, "loss": 0.3363, "step": 242000 }, { "epoch": 0.48, "learning_rate": 0.0001362890983369954, "loss": 0.3358, "step": 242500 }, { "epoch": 0.49, "learning_rate": 0.00013622021765608754, "loss": 0.3359, "step": 243000 }, { "epoch": 0.49, "learning_rate": 0.00013615118327125136, "loss": 0.3359, "step": 243500 }, { "epoch": 0.49, "learning_rate": 0.00013608199537122425, "loss": 0.3358, "step": 244000 }, { "epoch": 0.49, "learning_rate": 0.0001360126541451633, "loss": 0.336, "step": 244500 }, { "epoch": 0.49, "learning_rate": 0.0001359431597826447, "loss": 0.3363, "step": 245000 }, { "epoch": 0.49, "learning_rate": 0.0001358735124736635, "loss": 0.3354, "step": 245500 }, { "epoch": 0.49, "learning_rate": 0.0001358037124086327, "loss": 0.3353, "step": 246000 }, { "epoch": 0.49, "learning_rate": 0.00013573375977838303, "loss": 0.3356, "step": 246500 }, { "epoch": 0.49, "learning_rate": 0.00013566365477416233, "loss": 0.3351, "step": 247000 }, { "epoch": 0.49, "learning_rate": 0.00013559339758763495, "loss": 0.3354, "step": 247500 }, { "epoch": 0.5, "learning_rate": 0.00013552298841088144, "loss": 0.3354, "step": 248000 }, { "epoch": 0.5, "learning_rate": 0.00013545242743639774, "loss": 0.335, "step": 248500 }, { "epoch": 0.5, "learning_rate": 0.00013538171485709486, "loss": 0.3351, "step": 249000 }, { "epoch": 0.5, "learning_rate": 0.00013531085086629832, "loss": 0.335, "step": 249500 }, { "epoch": 0.5, "learning_rate": 0.00013523983565774753, "loss": 0.3351, "step": 250000 }, { "epoch": 0.5, "learning_rate": 0.0001351686694255954, "loss": 0.335, "step": 250500 }, { "epoch": 0.5, "learning_rate": 0.00013509735236440766, "loss": 0.335, "step": 251000 }, { "epoch": 0.5, "learning_rate": 0.0001350258846691625, "loss": 0.3343, "step": 251500 }, { "epoch": 0.5, "learning_rate": 0.00013495426653524972, "loss": 0.3346, "step": 252000 }, { "epoch": 0.5, "learning_rate": 0.0001348824981584707, "loss": 0.3347, "step": 252500 }, { "epoch": 0.51, "learning_rate": 0.00013481057973503742, "loss": 0.3346, "step": 253000 }, { "epoch": 0.51, "learning_rate": 0.00013473851146157204, "loss": 0.3347, "step": 253500 }, { "epoch": 0.51, "learning_rate": 0.00013466629353510651, "loss": 0.3347, "step": 254000 }, { "epoch": 0.51, "learning_rate": 0.00013459392615308192, "loss": 0.3344, "step": 254500 }, { "epoch": 0.51, "learning_rate": 0.00013452140951334787, "loss": 0.3345, "step": 255000 }, { "epoch": 0.51, "learning_rate": 0.00013444874381416208, "loss": 0.3343, "step": 255500 }, { "epoch": 0.51, "learning_rate": 0.00013437592925418985, "loss": 0.334, "step": 256000 }, { "epoch": 0.51, "learning_rate": 0.00013430296603250338, "loss": 0.3344, "step": 256500 }, { "epoch": 0.51, "learning_rate": 0.00013422985434858133, "loss": 0.3338, "step": 257000 }, { "epoch": 0.51, "learning_rate": 0.00013415659440230824, "loss": 0.3348, "step": 257500 }, { "epoch": 0.52, "learning_rate": 0.00013408318639397405, "loss": 0.3347, "step": 258000 }, { "epoch": 0.52, "learning_rate": 0.00013400963052427337, "loss": 0.334, "step": 258500 }, { "epoch": 0.52, "learning_rate": 0.00013393592699430525, "loss": 0.3336, "step": 259000 }, { "epoch": 0.52, "learning_rate": 0.00013386207600557222, "loss": 0.334, "step": 259500 }, { "epoch": 0.52, "learning_rate": 0.00013378807775998012, "loss": 0.3336, "step": 260000 }, { "epoch": 0.52, "learning_rate": 0.0001337139324598373, "loss": 0.3335, "step": 260500 }, { "epoch": 0.52, "learning_rate": 0.00013363964030785422, "loss": 0.3334, "step": 261000 }, { "epoch": 0.52, "learning_rate": 0.00013356520150714277, "loss": 0.3336, "step": 261500 }, { "epoch": 0.52, "learning_rate": 0.00013349061626121578, "loss": 0.3337, "step": 262000 }, { "epoch": 0.52, "learning_rate": 0.00013341588477398645, "loss": 0.3339, "step": 262500 }, { "epoch": 0.53, "learning_rate": 0.00013334100724976783, "loss": 0.3332, "step": 263000 }, { "epoch": 0.53, "learning_rate": 0.00013326598389327223, "loss": 0.3337, "step": 263500 }, { "epoch": 0.53, "learning_rate": 0.0001331908149096106, "loss": 0.3332, "step": 264000 }, { "epoch": 0.53, "learning_rate": 0.0001331155005042921, "loss": 0.3332, "step": 264500 }, { "epoch": 0.53, "learning_rate": 0.00013304004088322342, "loss": 0.3334, "step": 265000 }, { "epoch": 0.53, "learning_rate": 0.00013296443625270828, "loss": 0.3327, "step": 265500 }, { "epoch": 0.53, "learning_rate": 0.00013288868681944692, "loss": 0.3328, "step": 266000 }, { "epoch": 0.53, "learning_rate": 0.00013281279279053532, "loss": 0.3336, "step": 266500 }, { "epoch": 0.53, "learning_rate": 0.00013273675437346487, "loss": 0.3325, "step": 267000 }, { "epoch": 0.53, "learning_rate": 0.00013266057177612172, "loss": 0.3328, "step": 267500 }, { "epoch": 0.54, "learning_rate": 0.00013258424520678618, "loss": 0.3325, "step": 268000 }, { "epoch": 0.54, "learning_rate": 0.00013250777487413217, "loss": 0.3324, "step": 268500 }, { "epoch": 0.54, "learning_rate": 0.00013243116098722663, "loss": 0.3327, "step": 269000 }, { "epoch": 0.54, "learning_rate": 0.00013235440375552906, "loss": 0.3326, "step": 269500 }, { "epoch": 0.54, "learning_rate": 0.00013227750338889077, "loss": 0.333, "step": 270000 }, { "epoch": 0.54, "learning_rate": 0.0001322004600975544, "loss": 0.3324, "step": 270500 }, { "epoch": 0.54, "learning_rate": 0.00013212327409215343, "loss": 0.3324, "step": 271000 }, { "epoch": 0.54, "learning_rate": 0.0001320459455837114, "loss": 0.332, "step": 271500 }, { "epoch": 0.54, "learning_rate": 0.0001319684747836415, "loss": 0.3325, "step": 272000 }, { "epoch": 0.54, "learning_rate": 0.00013189086190374595, "loss": 0.3319, "step": 272500 }, { "epoch": 0.55, "learning_rate": 0.0001318131071562154, "loss": 0.3319, "step": 273000 }, { "epoch": 0.55, "learning_rate": 0.0001317352107536284, "loss": 0.3322, "step": 273500 }, { "epoch": 0.55, "learning_rate": 0.00013165717290895067, "loss": 0.3323, "step": 274000 }, { "epoch": 0.55, "learning_rate": 0.00013157899383553474, "loss": 0.3317, "step": 274500 }, { "epoch": 0.55, "learning_rate": 0.0001315006737471192, "loss": 0.3319, "step": 275000 }, { "epoch": 0.55, "learning_rate": 0.0001314222128578282, "loss": 0.3317, "step": 275500 }, { "epoch": 0.55, "learning_rate": 0.0001313436113821708, "loss": 0.3324, "step": 276000 }, { "epoch": 0.55, "learning_rate": 0.0001312648695350404, "loss": 0.3316, "step": 276500 }, { "epoch": 0.55, "learning_rate": 0.00013118598753171425, "loss": 0.3314, "step": 277000 }, { "epoch": 0.55, "learning_rate": 0.00013110696558785273, "loss": 0.3311, "step": 277500 }, { "epoch": 0.56, "learning_rate": 0.0001310278039194988, "loss": 0.3316, "step": 278000 }, { "epoch": 0.56, "learning_rate": 0.00013094850274307745, "loss": 0.3308, "step": 278500 }, { "epoch": 0.56, "learning_rate": 0.00013086906227539506, "loss": 0.3307, "step": 279000 }, { "epoch": 0.56, "learning_rate": 0.00013078948273363884, "loss": 0.3316, "step": 279500 }, { "epoch": 0.56, "learning_rate": 0.00013070976433537623, "loss": 0.3315, "step": 280000 }, { "epoch": 0.56, "learning_rate": 0.00013062990729855427, "loss": 0.3317, "step": 280500 }, { "epoch": 0.56, "learning_rate": 0.00013054991184149905, "loss": 0.3309, "step": 281000 }, { "epoch": 0.56, "learning_rate": 0.00013046977818291508, "loss": 0.3313, "step": 281500 }, { "epoch": 0.56, "learning_rate": 0.00013038950654188476, "loss": 0.3311, "step": 282000 }, { "epoch": 0.56, "learning_rate": 0.00013030909713786768, "loss": 0.3308, "step": 282500 }, { "epoch": 0.57, "learning_rate": 0.00013022855019070005, "loss": 0.3308, "step": 283000 }, { "epoch": 0.57, "learning_rate": 0.00013014786592059418, "loss": 0.3309, "step": 283500 }, { "epoch": 0.57, "learning_rate": 0.0001300670445481378, "loss": 0.3305, "step": 284000 }, { "epoch": 0.57, "learning_rate": 0.0001299860862942934, "loss": 0.3307, "step": 284500 }, { "epoch": 0.57, "learning_rate": 0.0001299049913803978, "loss": 0.3317, "step": 285000 }, { "epoch": 0.57, "learning_rate": 0.00012982376002816138, "loss": 0.3299, "step": 285500 }, { "epoch": 0.57, "learning_rate": 0.00012974239245966754, "loss": 0.3309, "step": 286000 }, { "epoch": 0.57, "learning_rate": 0.00012966088889737216, "loss": 0.331, "step": 286500 }, { "epoch": 0.57, "learning_rate": 0.0001295792495641028, "loss": 0.3307, "step": 287000 }, { "epoch": 0.57, "learning_rate": 0.0001294974746830583, "loss": 0.3306, "step": 287500 }, { "epoch": 0.58, "learning_rate": 0.00012941556447780813, "loss": 0.3302, "step": 288000 }, { "epoch": 0.58, "learning_rate": 0.00012933351917229156, "loss": 0.3303, "step": 288500 }, { "epoch": 0.58, "learning_rate": 0.0001292513389908174, "loss": 0.3303, "step": 289000 }, { "epoch": 0.58, "learning_rate": 0.00012916902415806305, "loss": 0.3299, "step": 289500 }, { "epoch": 0.58, "learning_rate": 0.0001290865748990742, "loss": 0.33, "step": 290000 }, { "epoch": 0.58, "learning_rate": 0.00012900399143926395, "loss": 0.3309, "step": 290500 }, { "epoch": 0.58, "learning_rate": 0.00012892127400441228, "loss": 0.3304, "step": 291000 }, { "epoch": 0.58, "learning_rate": 0.00012883842282066557, "loss": 0.3299, "step": 291500 }, { "epoch": 0.58, "learning_rate": 0.00012875543811453576, "loss": 0.3297, "step": 292000 }, { "epoch": 0.58, "learning_rate": 0.00012867232011289984, "loss": 0.3306, "step": 292500 }, { "epoch": 0.59, "learning_rate": 0.0001285890690429993, "loss": 0.33, "step": 293000 }, { "epoch": 0.59, "learning_rate": 0.00012850568513243934, "loss": 0.3299, "step": 293500 }, { "epoch": 0.59, "learning_rate": 0.00012842216860918846, "loss": 0.3302, "step": 294000 }, { "epoch": 0.59, "learning_rate": 0.00012833851970157757, "loss": 0.3298, "step": 294500 }, { "epoch": 0.59, "learning_rate": 0.0001282547386382996, "loss": 0.3295, "step": 295000 }, { "epoch": 0.59, "learning_rate": 0.00012817082564840881, "loss": 0.329, "step": 295500 }, { "epoch": 0.59, "learning_rate": 0.0001280867809613201, "loss": 0.3294, "step": 296000 }, { "epoch": 0.59, "learning_rate": 0.00012800260480680845, "loss": 0.3288, "step": 296500 }, { "epoch": 0.59, "learning_rate": 0.0001279182974150082, "loss": 0.3293, "step": 297000 }, { "epoch": 0.59, "learning_rate": 0.00012783385901641258, "loss": 0.3291, "step": 297500 }, { "epoch": 0.6, "learning_rate": 0.00012774928984187297, "loss": 0.3293, "step": 298000 }, { "epoch": 0.6, "learning_rate": 0.00012766459012259818, "loss": 0.3288, "step": 298500 }, { "epoch": 0.6, "learning_rate": 0.00012757976009015413, "loss": 0.3292, "step": 299000 }, { "epoch": 0.6, "learning_rate": 0.00012749479997646275, "loss": 0.3294, "step": 299500 }, { "epoch": 0.6, "learning_rate": 0.0001274097100138019, "loss": 0.3285, "step": 300000 }, { "epoch": 0.6, "learning_rate": 0.00012732449043480413, "loss": 0.3286, "step": 300500 }, { "epoch": 0.6, "learning_rate": 0.00012723914147245663, "loss": 0.3288, "step": 301000 }, { "epoch": 0.6, "learning_rate": 0.00012715366336010016, "loss": 0.3286, "step": 301500 }, { "epoch": 0.6, "learning_rate": 0.00012706805633142863, "loss": 0.3286, "step": 302000 }, { "epoch": 0.6, "learning_rate": 0.00012698232062048837, "loss": 0.329, "step": 302500 }, { "epoch": 0.61, "learning_rate": 0.00012689645646167755, "loss": 0.3283, "step": 303000 }, { "epoch": 0.61, "learning_rate": 0.0001268104640897455, "loss": 0.3291, "step": 303500 }, { "epoch": 0.61, "learning_rate": 0.00012672434373979207, "loss": 0.329, "step": 304000 }, { "epoch": 0.61, "learning_rate": 0.00012663809564726706, "loss": 0.328, "step": 304500 }, { "epoch": 0.61, "learning_rate": 0.00012655172004796936, "loss": 0.3284, "step": 305000 }, { "epoch": 0.61, "learning_rate": 0.00012646521717804668, "loss": 0.3286, "step": 305500 }, { "epoch": 0.61, "learning_rate": 0.00012637858727399448, "loss": 0.3286, "step": 306000 }, { "epoch": 0.61, "learning_rate": 0.00012629183057265563, "loss": 0.3284, "step": 306500 }, { "epoch": 0.61, "learning_rate": 0.00012620494731121966, "loss": 0.3275, "step": 307000 }, { "epoch": 0.61, "learning_rate": 0.00012611793772722204, "loss": 0.3286, "step": 307500 }, { "epoch": 0.62, "learning_rate": 0.00012603080205854372, "loss": 0.3281, "step": 308000 }, { "epoch": 0.62, "learning_rate": 0.00012594354054341024, "loss": 0.328, "step": 308500 }, { "epoch": 0.62, "learning_rate": 0.00012585615342039126, "loss": 0.3287, "step": 309000 }, { "epoch": 0.62, "learning_rate": 0.00012576864092839985, "loss": 0.3284, "step": 309500 }, { "epoch": 0.62, "learning_rate": 0.0001256810033066918, "loss": 0.3284, "step": 310000 }, { "epoch": 0.62, "learning_rate": 0.00012559324079486505, "loss": 0.328, "step": 310500 }, { "epoch": 0.62, "learning_rate": 0.0001255053536328589, "loss": 0.3282, "step": 311000 }, { "epoch": 0.62, "learning_rate": 0.0001254173420609536, "loss": 0.3278, "step": 311500 }, { "epoch": 0.62, "learning_rate": 0.0001253292063197693, "loss": 0.3283, "step": 312000 }, { "epoch": 0.62, "learning_rate": 0.00012524094665026584, "loss": 0.3275, "step": 312500 }, { "epoch": 0.63, "learning_rate": 0.0001251525632937418, "loss": 0.3275, "step": 313000 }, { "epoch": 0.63, "learning_rate": 0.0001250640564918338, "loss": 0.3276, "step": 313500 }, { "epoch": 0.63, "learning_rate": 0.00012497542648651615, "loss": 0.3278, "step": 314000 }, { "epoch": 0.63, "learning_rate": 0.00012488667352009985, "loss": 0.3277, "step": 314500 }, { "epoch": 0.63, "learning_rate": 0.00012479779783523216, "loss": 0.3266, "step": 315000 }, { "epoch": 0.63, "learning_rate": 0.00012470879967489579, "loss": 0.3275, "step": 315500 }, { "epoch": 0.63, "learning_rate": 0.00012461967928240828, "loss": 0.3276, "step": 316000 }, { "epoch": 0.63, "learning_rate": 0.00012453043690142143, "loss": 0.3277, "step": 316500 }, { "epoch": 0.63, "learning_rate": 0.00012444107277592047, "loss": 0.3276, "step": 317000 }, { "epoch": 0.63, "learning_rate": 0.00012435158715022352, "loss": 0.3275, "step": 317500 }, { "epoch": 0.64, "learning_rate": 0.0001242619802689809, "loss": 0.3271, "step": 318000 }, { "epoch": 0.64, "learning_rate": 0.00012417225237717434, "loss": 0.3269, "step": 318500 }, { "epoch": 0.64, "learning_rate": 0.00012408240372011647, "loss": 0.3272, "step": 319000 }, { "epoch": 0.64, "learning_rate": 0.00012399243454345012, "loss": 0.3271, "step": 319500 }, { "epoch": 0.64, "learning_rate": 0.0001239023450931476, "loss": 0.3273, "step": 320000 }, { "epoch": 0.64, "learning_rate": 0.00012381213561550995, "loss": 0.3275, "step": 320500 }, { "epoch": 0.64, "learning_rate": 0.00012372180635716656, "loss": 0.3265, "step": 321000 }, { "epoch": 0.64, "learning_rate": 0.00012363135756507406, "loss": 0.327, "step": 321500 }, { "epoch": 0.64, "learning_rate": 0.00012354078948651604, "loss": 0.327, "step": 322000 }, { "epoch": 0.64, "learning_rate": 0.00012345010236910217, "loss": 0.3266, "step": 322500 }, { "epoch": 0.65, "learning_rate": 0.00012335929646076758, "loss": 0.3266, "step": 323000 }, { "epoch": 0.65, "learning_rate": 0.0001232683720097721, "loss": 0.3262, "step": 323500 }, { "epoch": 0.65, "learning_rate": 0.00012317732926469976, "loss": 0.3263, "step": 324000 }, { "epoch": 0.65, "learning_rate": 0.00012308616847445794, "loss": 0.3264, "step": 324500 }, { "epoch": 0.65, "learning_rate": 0.00012299488988827675, "loss": 0.3263, "step": 325000 }, { "epoch": 0.65, "learning_rate": 0.00012290349375570836, "loss": 0.3269, "step": 325500 }, { "epoch": 0.65, "learning_rate": 0.0001228119803266263, "loss": 0.3262, "step": 326000 }, { "epoch": 0.65, "learning_rate": 0.0001227203498512248, "loss": 0.3268, "step": 326500 }, { "epoch": 0.65, "learning_rate": 0.0001226286025800181, "loss": 0.3262, "step": 327000 }, { "epoch": 0.65, "learning_rate": 0.00012253673876383967, "loss": 0.326, "step": 327500 }, { "epoch": 0.66, "learning_rate": 0.00012244475865384177, "loss": 0.3261, "step": 328000 }, { "epoch": 0.66, "learning_rate": 0.00012235266250149444, "loss": 0.3256, "step": 328500 }, { "epoch": 0.66, "learning_rate": 0.00012226045055858505, "loss": 0.3257, "step": 329000 }, { "epoch": 0.66, "learning_rate": 0.00012216812307721755, "loss": 0.3263, "step": 329500 }, { "epoch": 0.66, "learning_rate": 0.00012207568030981174, "loss": 0.3263, "step": 330000 }, { "epoch": 0.66, "learning_rate": 0.00012198312250910265, "loss": 0.3255, "step": 330500 }, { "epoch": 0.66, "learning_rate": 0.00012189044992813972, "loss": 0.3256, "step": 331000 }, { "epoch": 0.66, "learning_rate": 0.00012179766282028625, "loss": 0.3254, "step": 331500 }, { "epoch": 0.66, "learning_rate": 0.0001217047614392187, "loss": 0.3255, "step": 332000 }, { "epoch": 0.66, "learning_rate": 0.00012161174603892584, "loss": 0.3263, "step": 332500 }, { "epoch": 0.67, "learning_rate": 0.00012151861687370828, "loss": 0.3258, "step": 333000 }, { "epoch": 0.67, "learning_rate": 0.00012142537419817753, "loss": 0.3254, "step": 333500 }, { "epoch": 0.67, "learning_rate": 0.00012133201826725558, "loss": 0.3255, "step": 334000 }, { "epoch": 0.67, "learning_rate": 0.00012123854933617394, "loss": 0.3249, "step": 334500 }, { "epoch": 0.67, "learning_rate": 0.0001211449676604731, "loss": 0.3258, "step": 335000 }, { "epoch": 0.67, "learning_rate": 0.0001210512734960018, "loss": 0.3251, "step": 335500 }, { "epoch": 0.67, "learning_rate": 0.00012095746709891632, "loss": 0.3255, "step": 336000 }, { "epoch": 0.67, "learning_rate": 0.00012086354872567969, "loss": 0.3257, "step": 336500 }, { "epoch": 0.67, "learning_rate": 0.00012076951863306127, "loss": 0.3253, "step": 337000 }, { "epoch": 0.67, "learning_rate": 0.00012067537707813568, "loss": 0.3253, "step": 337500 }, { "epoch": 0.68, "learning_rate": 0.0001205811243182823, "loss": 0.3256, "step": 338000 }, { "epoch": 0.68, "learning_rate": 0.00012048676061118467, "loss": 0.3258, "step": 338500 }, { "epoch": 0.68, "learning_rate": 0.00012039228621482949, "loss": 0.3249, "step": 339000 }, { "epoch": 0.68, "learning_rate": 0.0001202977013875062, "loss": 0.3251, "step": 339500 }, { "epoch": 0.68, "learning_rate": 0.00012020300638780604, "loss": 0.3253, "step": 340000 }, { "epoch": 0.68, "learning_rate": 0.0001201082014746216, "loss": 0.325, "step": 340500 }, { "epoch": 0.68, "learning_rate": 0.00012001328690714582, "loss": 0.3246, "step": 341000 }, { "epoch": 0.68, "learning_rate": 0.00011991826294487155, "loss": 0.3245, "step": 341500 }, { "epoch": 0.68, "learning_rate": 0.00011982312984759068, "loss": 0.325, "step": 342000 }, { "epoch": 0.68, "learning_rate": 0.00011972788787539345, "loss": 0.325, "step": 342500 }, { "epoch": 0.69, "learning_rate": 0.00011963253728866778, "loss": 0.3245, "step": 343000 }, { "epoch": 0.69, "learning_rate": 0.00011953707834809848, "loss": 0.3245, "step": 343500 }, { "epoch": 0.69, "learning_rate": 0.00011944151131466675, "loss": 0.3242, "step": 344000 }, { "epoch": 0.69, "learning_rate": 0.00011934583644964913, "loss": 0.3251, "step": 344500 }, { "epoch": 0.69, "learning_rate": 0.00011925005401461709, "loss": 0.3245, "step": 345000 }, { "epoch": 0.69, "learning_rate": 0.00011915416427143613, "loss": 0.3239, "step": 345500 }, { "epoch": 0.69, "learning_rate": 0.00011905816748226513, "loss": 0.3242, "step": 346000 }, { "epoch": 0.69, "learning_rate": 0.00011896206390955567, "loss": 0.3243, "step": 346500 }, { "epoch": 0.69, "learning_rate": 0.00011886585381605125, "loss": 0.324, "step": 347000 }, { "epoch": 0.69, "learning_rate": 0.0001187695374647866, "loss": 0.3241, "step": 347500 }, { "epoch": 0.7, "learning_rate": 0.00011867311511908693, "loss": 0.3241, "step": 348000 }, { "epoch": 0.7, "learning_rate": 0.00011857658704256721, "loss": 0.3241, "step": 348500 }, { "epoch": 0.7, "learning_rate": 0.00011847995349913162, "loss": 0.3247, "step": 349000 }, { "epoch": 0.7, "learning_rate": 0.00011838321475297247, "loss": 0.324, "step": 349500 }, { "epoch": 0.7, "learning_rate": 0.00011828637106856989, "loss": 0.3238, "step": 350000 }, { "epoch": 0.7, "learning_rate": 0.00011818942271069073, "loss": 0.3238, "step": 350500 }, { "epoch": 0.7, "learning_rate": 0.00011809236994438816, "loss": 0.3241, "step": 351000 }, { "epoch": 0.7, "learning_rate": 0.0001179952130350007, "loss": 0.3239, "step": 351500 }, { "epoch": 0.7, "learning_rate": 0.00011789795224815164, "loss": 0.3233, "step": 352000 }, { "epoch": 0.7, "learning_rate": 0.00011780058784974831, "loss": 0.3234, "step": 352500 }, { "epoch": 0.71, "learning_rate": 0.00011770312010598116, "loss": 0.3239, "step": 353000 }, { "epoch": 0.71, "learning_rate": 0.00011760554928332333, "loss": 0.3232, "step": 353500 }, { "epoch": 0.71, "learning_rate": 0.00011750787564852973, "loss": 0.3236, "step": 354000 }, { "epoch": 0.71, "learning_rate": 0.00011741009946863639, "loss": 0.3239, "step": 354500 }, { "epoch": 0.71, "learning_rate": 0.00011731222101095955, "loss": 0.3233, "step": 355000 }, { "epoch": 0.71, "learning_rate": 0.00011721424054309525, "loss": 0.3233, "step": 355500 }, { "epoch": 0.71, "learning_rate": 0.00011711615833291833, "loss": 0.3235, "step": 356000 }, { "epoch": 0.71, "learning_rate": 0.00011701797464858178, "loss": 0.3239, "step": 356500 }, { "epoch": 0.71, "learning_rate": 0.0001169196897585161, "loss": 0.3232, "step": 357000 }, { "epoch": 0.71, "learning_rate": 0.00011682130393142838, "loss": 0.3236, "step": 357500 }, { "epoch": 0.72, "learning_rate": 0.00011672281743630175, "loss": 0.3234, "step": 358000 }, { "epoch": 0.72, "learning_rate": 0.00011662423054239445, "loss": 0.3234, "step": 358500 }, { "epoch": 0.72, "learning_rate": 0.0001165255435192394, "loss": 0.323, "step": 359000 }, { "epoch": 0.72, "learning_rate": 0.00011642675663664308, "loss": 0.3231, "step": 359500 }, { "epoch": 0.72, "learning_rate": 0.00011632787016468506, "loss": 0.3227, "step": 360000 }, { "epoch": 0.72, "learning_rate": 0.00011622888437371719, "loss": 0.3232, "step": 360500 }, { "epoch": 0.72, "learning_rate": 0.0001161297995343628, "loss": 0.3227, "step": 361000 }, { "epoch": 0.72, "learning_rate": 0.00011603061591751615, "loss": 0.3232, "step": 361500 }, { "epoch": 0.72, "learning_rate": 0.00011593133379434138, "loss": 0.3229, "step": 362000 }, { "epoch": 0.72, "learning_rate": 0.00011583195343627207, "loss": 0.3225, "step": 362500 }, { "epoch": 0.73, "learning_rate": 0.00011573247511501028, "loss": 0.3226, "step": 363000 }, { "epoch": 0.73, "learning_rate": 0.00011563289910252599, "loss": 0.3227, "step": 363500 }, { "epoch": 0.73, "learning_rate": 0.00011553322567105619, "loss": 0.3226, "step": 364000 }, { "epoch": 0.73, "learning_rate": 0.00011543345509310421, "loss": 0.3228, "step": 364500 }, { "epoch": 0.73, "learning_rate": 0.00011533358764143905, "loss": 0.3233, "step": 365000 }, { "epoch": 0.73, "learning_rate": 0.00011523362358909449, "loss": 0.3226, "step": 365500 }, { "epoch": 0.73, "learning_rate": 0.00011513356320936841, "loss": 0.3222, "step": 366000 }, { "epoch": 0.73, "learning_rate": 0.00011503340677582213, "loss": 0.3222, "step": 366500 }, { "epoch": 0.73, "learning_rate": 0.00011493315456227943, "loss": 0.3224, "step": 367000 }, { "epoch": 0.73, "learning_rate": 0.00011483280684282611, "loss": 0.3228, "step": 367500 }, { "epoch": 0.74, "learning_rate": 0.00011473236389180894, "loss": 0.3228, "step": 368000 }, { "epoch": 0.74, "learning_rate": 0.00011463182598383516, "loss": 0.3221, "step": 368500 }, { "epoch": 0.74, "learning_rate": 0.00011453119339377154, "loss": 0.3224, "step": 369000 }, { "epoch": 0.74, "learning_rate": 0.00011443046639674375, "loss": 0.3221, "step": 369500 }, { "epoch": 0.74, "learning_rate": 0.00011432964526813558, "loss": 0.3221, "step": 370000 }, { "epoch": 0.74, "learning_rate": 0.00011422873028358807, "loss": 0.3225, "step": 370500 }, { "epoch": 0.74, "learning_rate": 0.00011412772171899904, "loss": 0.3219, "step": 371000 }, { "epoch": 0.74, "learning_rate": 0.00011402661985052197, "loss": 0.3221, "step": 371500 }, { "epoch": 0.74, "learning_rate": 0.00011392542495456556, "loss": 0.3218, "step": 372000 }, { "epoch": 0.74, "learning_rate": 0.00011382413730779273, "loss": 0.3219, "step": 372500 }, { "epoch": 0.75, "learning_rate": 0.00011372275718712006, "loss": 0.3225, "step": 373000 }, { "epoch": 0.75, "learning_rate": 0.00011362128486971696, "loss": 0.3221, "step": 373500 }, { "epoch": 0.75, "learning_rate": 0.00011351972063300484, "loss": 0.3213, "step": 374000 }, { "epoch": 0.75, "learning_rate": 0.0001134180647546565, "loss": 0.3215, "step": 374500 }, { "epoch": 0.75, "learning_rate": 0.00011331631751259515, "loss": 0.3215, "step": 375000 }, { "epoch": 0.75, "learning_rate": 0.00011321447918499391, "loss": 0.3214, "step": 375500 }, { "epoch": 0.75, "learning_rate": 0.00011311255005027487, "loss": 0.3214, "step": 376000 }, { "epoch": 0.75, "learning_rate": 0.00011301053038710837, "loss": 0.3211, "step": 376500 }, { "epoch": 0.75, "learning_rate": 0.00011290842047441232, "loss": 0.3218, "step": 377000 }, { "epoch": 0.75, "learning_rate": 0.0001128062205913513, "loss": 0.3224, "step": 377500 }, { "epoch": 0.76, "learning_rate": 0.00011270393101733585, "loss": 0.3214, "step": 378000 }, { "epoch": 0.76, "learning_rate": 0.00011260155203202183, "loss": 0.3221, "step": 378500 }, { "epoch": 0.76, "learning_rate": 0.00011249908391530946, "loss": 0.321, "step": 379000 }, { "epoch": 0.76, "learning_rate": 0.0001123965269473426, "loss": 0.321, "step": 379500 }, { "epoch": 0.76, "learning_rate": 0.00011229388140850814, "loss": 0.322, "step": 380000 }, { "epoch": 0.76, "learning_rate": 0.00011219114757943505, "loss": 0.3213, "step": 380500 }, { "epoch": 0.76, "learning_rate": 0.00011208832574099368, "loss": 0.3203, "step": 381000 }, { "epoch": 0.76, "learning_rate": 0.00011198541617429504, "loss": 0.3207, "step": 381500 }, { "epoch": 0.76, "learning_rate": 0.00011188241916068993, "loss": 0.3213, "step": 382000 }, { "epoch": 0.76, "learning_rate": 0.00011177933498176826, "loss": 0.3209, "step": 382500 }, { "epoch": 0.77, "learning_rate": 0.00011167616391935826, "loss": 0.3205, "step": 383000 }, { "epoch": 0.77, "learning_rate": 0.00011157290625552563, "loss": 0.3208, "step": 383500 }, { "epoch": 0.77, "learning_rate": 0.00011146956227257293, "loss": 0.3209, "step": 384000 }, { "epoch": 0.77, "learning_rate": 0.0001113661322530386, "loss": 0.3209, "step": 384500 }, { "epoch": 0.77, "learning_rate": 0.00011126261647969645, "loss": 0.3209, "step": 385000 }, { "epoch": 0.77, "learning_rate": 0.00011115901523555457, "loss": 0.3207, "step": 385500 }, { "epoch": 0.77, "learning_rate": 0.00011105532880385487, "loss": 0.3207, "step": 386000 }, { "epoch": 0.77, "learning_rate": 0.00011095155746807206, "loss": 0.3207, "step": 386500 }, { "epoch": 0.77, "learning_rate": 0.00011084770151191299, "loss": 0.3205, "step": 387000 }, { "epoch": 0.77, "learning_rate": 0.00011074376121931591, "loss": 0.3206, "step": 387500 }, { "epoch": 0.78, "learning_rate": 0.00011063973687444962, "loss": 0.3205, "step": 388000 }, { "epoch": 0.78, "learning_rate": 0.00011053562876171268, "loss": 0.3208, "step": 388500 }, { "epoch": 0.78, "learning_rate": 0.00011043143716573272, "loss": 0.3202, "step": 389000 }, { "epoch": 0.78, "learning_rate": 0.00011032716237136557, "loss": 0.32, "step": 389500 }, { "epoch": 0.78, "learning_rate": 0.00011022280466369448, "loss": 0.3205, "step": 390000 }, { "epoch": 0.78, "learning_rate": 0.00011011836432802956, "loss": 0.3201, "step": 390500 }, { "epoch": 0.78, "learning_rate": 0.00011001384164990662, "loss": 0.3202, "step": 391000 }, { "epoch": 0.78, "learning_rate": 0.00010990923691508666, "loss": 0.32, "step": 391500 }, { "epoch": 0.78, "learning_rate": 0.00010980455040955506, "loss": 0.3206, "step": 392000 }, { "epoch": 0.78, "learning_rate": 0.00010969978241952076, "loss": 0.3199, "step": 392500 }, { "epoch": 0.79, "learning_rate": 0.00010959493323141538, "loss": 0.3207, "step": 393000 }, { "epoch": 0.79, "learning_rate": 0.00010949000313189264, "loss": 0.3201, "step": 393500 }, { "epoch": 0.79, "learning_rate": 0.00010938499240782739, "loss": 0.3198, "step": 394000 }, { "epoch": 0.79, "learning_rate": 0.00010927990134631496, "loss": 0.3197, "step": 394500 }, { "epoch": 0.79, "learning_rate": 0.00010917473023467032, "loss": 0.3196, "step": 395000 }, { "epoch": 0.79, "learning_rate": 0.00010906947936042724, "loss": 0.3193, "step": 395500 }, { "epoch": 0.79, "learning_rate": 0.00010896414901133761, "loss": 0.3195, "step": 396000 }, { "epoch": 0.79, "learning_rate": 0.00010885873947537058, "loss": 0.3199, "step": 396500 }, { "epoch": 0.79, "learning_rate": 0.00010875325104071177, "loss": 0.3194, "step": 397000 }, { "epoch": 0.79, "learning_rate": 0.00010864768399576257, "loss": 0.3194, "step": 397500 }, { "epoch": 0.8, "learning_rate": 0.00010854203862913927, "loss": 0.3191, "step": 398000 }, { "epoch": 0.8, "learning_rate": 0.00010843631522967218, "loss": 0.3192, "step": 398500 }, { "epoch": 0.8, "learning_rate": 0.00010833051408640509, "loss": 0.3193, "step": 399000 }, { "epoch": 0.8, "learning_rate": 0.0001082246354885943, "loss": 0.32, "step": 399500 }, { "epoch": 0.8, "learning_rate": 0.00010811867972570786, "loss": 0.3194, "step": 400000 }, { "epoch": 0.8, "learning_rate": 0.00010801264708742474, "loss": 0.3195, "step": 400500 }, { "epoch": 0.8, "learning_rate": 0.00010790653786363416, "loss": 0.3195, "step": 401000 }, { "epoch": 0.8, "learning_rate": 0.00010780035234443463, "loss": 0.3191, "step": 401500 }, { "epoch": 0.8, "learning_rate": 0.00010769409082013337, "loss": 0.3192, "step": 402000 }, { "epoch": 0.8, "learning_rate": 0.00010758775358124532, "loss": 0.3192, "step": 402500 }, { "epoch": 0.81, "learning_rate": 0.00010748134091849238, "loss": 0.3193, "step": 403000 }, { "epoch": 0.81, "learning_rate": 0.00010737485312280277, "loss": 0.3192, "step": 403500 }, { "epoch": 0.81, "learning_rate": 0.00010726829048531, "loss": 0.3192, "step": 404000 }, { "epoch": 0.81, "learning_rate": 0.00010716165329735229, "loss": 0.3196, "step": 404500 }, { "epoch": 0.81, "learning_rate": 0.00010705494185047165, "loss": 0.3188, "step": 405000 }, { "epoch": 0.81, "learning_rate": 0.00010694815643641308, "loss": 0.3187, "step": 405500 }, { "epoch": 0.81, "learning_rate": 0.0001068412973471238, "loss": 0.3184, "step": 406000 }, { "epoch": 0.81, "learning_rate": 0.00010673436487475252, "loss": 0.3191, "step": 406500 }, { "epoch": 0.81, "learning_rate": 0.00010662735931164853, "loss": 0.3191, "step": 407000 }, { "epoch": 0.81, "learning_rate": 0.00010652028095036092, "loss": 0.3189, "step": 407500 }, { "epoch": 0.82, "learning_rate": 0.0001064131300836379, "loss": 0.3185, "step": 408000 }, { "epoch": 0.82, "learning_rate": 0.0001063059070044258, "loss": 0.319, "step": 408500 }, { "epoch": 0.82, "learning_rate": 0.0001061986120058684, "loss": 0.3189, "step": 409000 }, { "epoch": 0.82, "learning_rate": 0.00010609124538130623, "loss": 0.3192, "step": 409500 }, { "epoch": 0.82, "learning_rate": 0.00010598380742427543, "loss": 0.3184, "step": 410000 }, { "epoch": 0.82, "learning_rate": 0.00010587629842850737, "loss": 0.3188, "step": 410500 }, { "epoch": 0.82, "learning_rate": 0.00010576871868792746, "loss": 0.3184, "step": 411000 }, { "epoch": 0.82, "learning_rate": 0.00010566106849665463, "loss": 0.3189, "step": 411500 }, { "epoch": 0.82, "learning_rate": 0.0001055533481490004, "loss": 0.3174, "step": 412000 }, { "epoch": 0.82, "learning_rate": 0.00010544555793946805, "loss": 0.3179, "step": 412500 }, { "epoch": 0.83, "learning_rate": 0.000105337698162752, "loss": 0.3188, "step": 413000 }, { "epoch": 0.83, "learning_rate": 0.00010522976911373667, "loss": 0.3183, "step": 413500 }, { "epoch": 0.83, "learning_rate": 0.00010512177108749594, "loss": 0.3178, "step": 414000 }, { "epoch": 0.83, "learning_rate": 0.00010501370437929234, "loss": 0.3185, "step": 414500 }, { "epoch": 0.83, "learning_rate": 0.00010490556928457616, "loss": 0.318, "step": 415000 }, { "epoch": 0.83, "learning_rate": 0.00010479736609898454, "loss": 0.3187, "step": 415500 }, { "epoch": 0.83, "learning_rate": 0.00010468909511834088, "loss": 0.3187, "step": 416000 }, { "epoch": 0.83, "learning_rate": 0.00010458075663865392, "loss": 0.3178, "step": 416500 }, { "epoch": 0.83, "learning_rate": 0.00010447235095611692, "loss": 0.3181, "step": 417000 }, { "epoch": 0.83, "learning_rate": 0.0001043638783671069, "loss": 0.3178, "step": 417500 }, { "epoch": 0.84, "learning_rate": 0.00010425533916818376, "loss": 0.3184, "step": 418000 }, { "epoch": 0.84, "learning_rate": 0.0001041467336560895, "loss": 0.3178, "step": 418500 }, { "epoch": 0.84, "learning_rate": 0.00010403806212774747, "loss": 0.3177, "step": 419000 }, { "epoch": 0.84, "learning_rate": 0.00010392932488026147, "loss": 0.3175, "step": 419500 }, { "epoch": 0.84, "learning_rate": 0.000103820522210915, "loss": 0.3183, "step": 420000 }, { "epoch": 0.84, "learning_rate": 0.00010371165441717041, "loss": 0.318, "step": 420500 }, { "epoch": 0.84, "learning_rate": 0.00010360272179666802, "loss": 0.3175, "step": 421000 }, { "epoch": 0.84, "learning_rate": 0.00010349372464722555, "loss": 0.3171, "step": 421500 }, { "epoch": 0.84, "learning_rate": 0.00010338466326683697, "loss": 0.3176, "step": 422000 }, { "epoch": 0.84, "learning_rate": 0.00010327553795367197, "loss": 0.3177, "step": 422500 }, { "epoch": 0.85, "learning_rate": 0.00010316634900607497, "loss": 0.3177, "step": 423000 }, { "epoch": 0.85, "learning_rate": 0.0001030570967225644, "loss": 0.3176, "step": 423500 }, { "epoch": 0.85, "learning_rate": 0.00010294778140183182, "loss": 0.3176, "step": 424000 }, { "epoch": 0.85, "learning_rate": 0.00010283840334274117, "loss": 0.3175, "step": 424500 }, { "epoch": 0.85, "learning_rate": 0.00010272896284432785, "loss": 0.3176, "step": 425000 }, { "epoch": 0.85, "learning_rate": 0.00010261946020579799, "loss": 0.3168, "step": 425500 }, { "epoch": 0.85, "learning_rate": 0.00010250989572652766, "loss": 0.3171, "step": 426000 }, { "epoch": 0.85, "learning_rate": 0.00010240026970606198, "loss": 0.3176, "step": 426500 }, { "epoch": 0.85, "learning_rate": 0.00010229058244411427, "loss": 0.3174, "step": 427000 }, { "epoch": 0.85, "learning_rate": 0.0001021808342405653, "loss": 0.3174, "step": 427500 }, { "epoch": 0.86, "learning_rate": 0.00010207102539546251, "loss": 0.3167, "step": 428000 }, { "epoch": 0.86, "learning_rate": 0.00010196115620901904, "loss": 0.3169, "step": 428500 }, { "epoch": 0.86, "learning_rate": 0.00010185122698161311, "loss": 0.3172, "step": 429000 }, { "epoch": 0.86, "learning_rate": 0.00010174123801378698, "loss": 0.3165, "step": 429500 }, { "epoch": 0.86, "learning_rate": 0.00010163118960624632, "loss": 0.3167, "step": 430000 }, { "epoch": 0.86, "learning_rate": 0.00010152108205985925, "loss": 0.317, "step": 430500 }, { "epoch": 0.86, "learning_rate": 0.00010141091567565561, "loss": 0.3165, "step": 431000 }, { "epoch": 0.86, "learning_rate": 0.00010130069075482611, "loss": 0.3166, "step": 431500 }, { "epoch": 0.86, "learning_rate": 0.00010119040759872142, "loss": 0.3164, "step": 432000 }, { "epoch": 0.86, "learning_rate": 0.00010108006650885151, "loss": 0.3168, "step": 432500 }, { "epoch": 0.87, "learning_rate": 0.00010096966778688472, "loss": 0.3165, "step": 433000 }, { "epoch": 0.87, "learning_rate": 0.00010085921173464691, "loss": 0.3165, "step": 433500 }, { "epoch": 0.87, "learning_rate": 0.00010074869865412074, "loss": 0.3166, "step": 434000 }, { "epoch": 0.87, "learning_rate": 0.00010063812884744475, "loss": 0.3168, "step": 434500 }, { "epoch": 0.87, "learning_rate": 0.00010052750261691254, "loss": 0.3165, "step": 435000 }, { "epoch": 0.87, "learning_rate": 0.00010041682026497199, "loss": 0.3163, "step": 435500 }, { "epoch": 0.87, "learning_rate": 0.0001003060820942245, "loss": 0.3162, "step": 436000 }, { "epoch": 0.87, "learning_rate": 0.00010019528840742392, "loss": 0.3164, "step": 436500 }, { "epoch": 0.87, "learning_rate": 0.00010008443950747599, "loss": 0.3162, "step": 437000 }, { "epoch": 0.87, "learning_rate": 9.997353569743736e-05, "loss": 0.3155, "step": 437500 }, { "epoch": 0.88, "learning_rate": 9.986257728051483e-05, "loss": 0.3168, "step": 438000 }, { "epoch": 0.88, "learning_rate": 9.975156456006448e-05, "loss": 0.3162, "step": 438500 }, { "epoch": 0.88, "learning_rate": 9.964049783959082e-05, "loss": 0.3162, "step": 439000 }, { "epoch": 0.88, "learning_rate": 9.952937742274605e-05, "loss": 0.3159, "step": 439500 }, { "epoch": 0.88, "learning_rate": 9.94182036133291e-05, "loss": 0.3162, "step": 440000 }, { "epoch": 0.88, "learning_rate": 9.930697671528499e-05, "loss": 0.3155, "step": 440500 }, { "epoch": 0.88, "learning_rate": 9.919569703270376e-05, "loss": 0.316, "step": 441000 }, { "epoch": 0.88, "learning_rate": 9.908436486981984e-05, "loss": 0.3165, "step": 441500 }, { "epoch": 0.88, "learning_rate": 9.89729805310111e-05, "loss": 0.3157, "step": 442000 }, { "epoch": 0.88, "learning_rate": 9.886154432079803e-05, "loss": 0.3158, "step": 442500 }, { "epoch": 0.89, "learning_rate": 9.875005654384307e-05, "loss": 0.3158, "step": 443000 }, { "epoch": 0.89, "learning_rate": 9.863851750494944e-05, "loss": 0.3152, "step": 443500 }, { "epoch": 0.89, "learning_rate": 9.852692750906071e-05, "loss": 0.3158, "step": 444000 }, { "epoch": 0.89, "learning_rate": 9.841528686125961e-05, "loss": 0.3153, "step": 444500 }, { "epoch": 0.89, "learning_rate": 9.830359586676737e-05, "loss": 0.3151, "step": 445000 }, { "epoch": 0.89, "learning_rate": 9.819185483094299e-05, "loss": 0.3159, "step": 445500 }, { "epoch": 0.89, "learning_rate": 9.808006405928215e-05, "loss": 0.3159, "step": 446000 }, { "epoch": 0.89, "learning_rate": 9.796822385741657e-05, "loss": 0.3156, "step": 446500 }, { "epoch": 0.89, "learning_rate": 9.785633453111306e-05, "loss": 0.316, "step": 447000 }, { "epoch": 0.89, "learning_rate": 9.774439638627277e-05, "loss": 0.3152, "step": 447500 }, { "epoch": 0.9, "learning_rate": 9.763240972893037e-05, "loss": 0.3159, "step": 448000 }, { "epoch": 0.9, "learning_rate": 9.752037486525302e-05, "loss": 0.3149, "step": 448500 }, { "epoch": 0.9, "learning_rate": 9.740829210153984e-05, "loss": 0.3153, "step": 449000 }, { "epoch": 0.9, "learning_rate": 9.729616174422077e-05, "loss": 0.3154, "step": 449500 }, { "epoch": 0.9, "learning_rate": 9.718398409985593e-05, "loss": 0.315, "step": 450000 }, { "epoch": 0.9, "learning_rate": 9.707175947513475e-05, "loss": 0.3149, "step": 450500 }, { "epoch": 0.9, "learning_rate": 9.695948817687504e-05, "loss": 0.3152, "step": 451000 }, { "epoch": 0.9, "learning_rate": 9.684717051202227e-05, "loss": 0.3148, "step": 451500 }, { "epoch": 0.9, "learning_rate": 9.673480678764858e-05, "loss": 0.3158, "step": 452000 }, { "epoch": 0.9, "learning_rate": 9.662239731095222e-05, "loss": 0.3151, "step": 452500 }, { "epoch": 0.91, "learning_rate": 9.650994238925626e-05, "loss": 0.3145, "step": 453000 }, { "epoch": 0.91, "learning_rate": 9.63974423300083e-05, "loss": 0.315, "step": 453500 }, { "epoch": 0.91, "learning_rate": 9.628489744077911e-05, "loss": 0.3147, "step": 454000 }, { "epoch": 0.91, "learning_rate": 9.617230802926214e-05, "loss": 0.3148, "step": 454500 }, { "epoch": 0.91, "learning_rate": 9.60596744032726e-05, "loss": 0.3151, "step": 455000 }, { "epoch": 0.91, "learning_rate": 9.594699687074648e-05, "loss": 0.3143, "step": 455500 }, { "epoch": 0.91, "learning_rate": 9.583427573973982e-05, "loss": 0.3144, "step": 456000 }, { "epoch": 0.91, "learning_rate": 9.57215113184279e-05, "loss": 0.3148, "step": 456500 }, { "epoch": 0.91, "learning_rate": 9.560870391510441e-05, "loss": 0.3153, "step": 457000 }, { "epoch": 0.91, "learning_rate": 9.549585383818041e-05, "loss": 0.3148, "step": 457500 }, { "epoch": 0.92, "learning_rate": 9.538296139618371e-05, "loss": 0.3152, "step": 458000 }, { "epoch": 0.92, "learning_rate": 9.527002689775799e-05, "loss": 0.3146, "step": 458500 }, { "epoch": 0.92, "learning_rate": 9.515705065166178e-05, "loss": 0.3147, "step": 459000 }, { "epoch": 0.92, "learning_rate": 9.504403296676786e-05, "loss": 0.3144, "step": 459500 }, { "epoch": 0.92, "learning_rate": 9.493097415206228e-05, "loss": 0.3143, "step": 460000 }, { "epoch": 0.92, "learning_rate": 9.481787451664349e-05, "loss": 0.3143, "step": 460500 }, { "epoch": 0.92, "learning_rate": 9.47047343697216e-05, "loss": 0.3143, "step": 461000 }, { "epoch": 0.92, "learning_rate": 9.459155402061744e-05, "loss": 0.3148, "step": 461500 }, { "epoch": 0.92, "learning_rate": 9.447833377876176e-05, "loss": 0.3138, "step": 462000 }, { "epoch": 0.92, "learning_rate": 9.436507395369439e-05, "loss": 0.3143, "step": 462500 }, { "epoch": 0.93, "learning_rate": 9.425177485506336e-05, "loss": 0.3145, "step": 463000 }, { "epoch": 0.93, "learning_rate": 9.413843679262408e-05, "loss": 0.3146, "step": 463500 }, { "epoch": 0.93, "learning_rate": 9.402506007623848e-05, "loss": 0.3144, "step": 464000 }, { "epoch": 0.93, "learning_rate": 9.391164501587417e-05, "loss": 0.3139, "step": 464500 }, { "epoch": 0.93, "learning_rate": 9.379819192160362e-05, "loss": 0.3138, "step": 465000 }, { "epoch": 0.93, "learning_rate": 9.368470110360323e-05, "loss": 0.3137, "step": 465500 }, { "epoch": 0.93, "learning_rate": 9.357117287215258e-05, "loss": 0.3141, "step": 466000 }, { "epoch": 0.93, "learning_rate": 9.345760753763347e-05, "loss": 0.3135, "step": 466500 }, { "epoch": 0.93, "learning_rate": 9.334400541052928e-05, "loss": 0.3138, "step": 467000 }, { "epoch": 0.93, "learning_rate": 9.323036680142382e-05, "loss": 0.3138, "step": 467500 }, { "epoch": 0.94, "learning_rate": 9.311669202100073e-05, "loss": 0.3143, "step": 468000 }, { "epoch": 0.94, "learning_rate": 9.300298138004249e-05, "loss": 0.314, "step": 468500 }, { "epoch": 0.94, "learning_rate": 9.288923518942968e-05, "loss": 0.3137, "step": 469000 }, { "epoch": 0.94, "learning_rate": 9.277545376014005e-05, "loss": 0.3135, "step": 469500 }, { "epoch": 0.94, "learning_rate": 9.26616374032477e-05, "loss": 0.3133, "step": 470000 }, { "epoch": 0.94, "learning_rate": 9.254778642992213e-05, "loss": 0.3139, "step": 470500 }, { "epoch": 0.94, "learning_rate": 9.243390115142761e-05, "loss": 0.3134, "step": 471000 }, { "epoch": 0.94, "learning_rate": 9.231998187912211e-05, "loss": 0.3127, "step": 471500 }, { "epoch": 0.94, "learning_rate": 9.220602892445661e-05, "loss": 0.3132, "step": 472000 }, { "epoch": 0.94, "learning_rate": 9.209204259897412e-05, "loss": 0.3136, "step": 472500 }, { "epoch": 0.95, "learning_rate": 9.197802321430889e-05, "loss": 0.3137, "step": 473000 }, { "epoch": 0.95, "learning_rate": 9.186397108218558e-05, "loss": 0.3138, "step": 473500 }, { "epoch": 0.95, "learning_rate": 9.174988651441833e-05, "loss": 0.3129, "step": 474000 }, { "epoch": 0.95, "learning_rate": 9.163576982291006e-05, "loss": 0.3129, "step": 474500 }, { "epoch": 0.95, "learning_rate": 9.152162131965137e-05, "loss": 0.3128, "step": 475000 }, { "epoch": 0.95, "learning_rate": 9.140744131671994e-05, "loss": 0.3128, "step": 475500 }, { "epoch": 0.95, "learning_rate": 9.129323012627956e-05, "loss": 0.3135, "step": 476000 }, { "epoch": 0.95, "learning_rate": 9.117898806057925e-05, "loss": 0.3132, "step": 476500 }, { "epoch": 0.95, "learning_rate": 9.106471543195244e-05, "loss": 0.313, "step": 477000 }, { "epoch": 0.95, "learning_rate": 9.095041255281616e-05, "loss": 0.3129, "step": 477500 }, { "epoch": 0.96, "learning_rate": 9.08360797356701e-05, "loss": 0.3132, "step": 478000 }, { "epoch": 0.96, "learning_rate": 9.07217172930958e-05, "loss": 0.3128, "step": 478500 }, { "epoch": 0.96, "learning_rate": 9.060732553775582e-05, "loss": 0.3131, "step": 479000 }, { "epoch": 0.96, "learning_rate": 9.049290478239287e-05, "loss": 0.3127, "step": 479500 }, { "epoch": 0.96, "learning_rate": 9.037845533982892e-05, "loss": 0.3127, "step": 480000 }, { "epoch": 0.96, "learning_rate": 9.02639775229644e-05, "loss": 0.313, "step": 480500 }, { "epoch": 0.96, "learning_rate": 9.014947164477721e-05, "loss": 0.3128, "step": 481000 }, { "epoch": 0.96, "learning_rate": 9.003493801832213e-05, "loss": 0.3131, "step": 481500 }, { "epoch": 0.96, "learning_rate": 8.992037695672967e-05, "loss": 0.3127, "step": 482000 }, { "epoch": 0.96, "learning_rate": 8.980578877320544e-05, "loss": 0.313, "step": 482500 }, { "epoch": 0.97, "learning_rate": 8.969117378102912e-05, "loss": 0.3131, "step": 483000 }, { "epoch": 0.97, "learning_rate": 8.957653229355374e-05, "loss": 0.3127, "step": 483500 }, { "epoch": 0.97, "learning_rate": 8.946186462420478e-05, "loss": 0.3126, "step": 484000 }, { "epoch": 0.97, "learning_rate": 8.934717108647922e-05, "loss": 0.3132, "step": 484500 }, { "epoch": 0.97, "learning_rate": 8.923245199394482e-05, "loss": 0.3122, "step": 485000 }, { "epoch": 0.97, "learning_rate": 8.911770766023921e-05, "loss": 0.312, "step": 485500 }, { "epoch": 0.97, "learning_rate": 8.900293839906903e-05, "loss": 0.313, "step": 486000 }, { "epoch": 0.97, "learning_rate": 8.888814452420903e-05, "loss": 0.3121, "step": 486500 }, { "epoch": 0.97, "learning_rate": 8.87733263495013e-05, "loss": 0.3126, "step": 487000 }, { "epoch": 0.97, "learning_rate": 8.865848418885434e-05, "loss": 0.3127, "step": 487500 }, { "epoch": 0.98, "learning_rate": 8.85436183562422e-05, "loss": 0.3126, "step": 488000 }, { "epoch": 0.98, "learning_rate": 8.842872916570374e-05, "loss": 0.3129, "step": 488500 }, { "epoch": 0.98, "learning_rate": 8.83138169313416e-05, "loss": 0.3126, "step": 489000 }, { "epoch": 0.98, "learning_rate": 8.819888196732144e-05, "loss": 0.3119, "step": 489500 }, { "epoch": 0.98, "learning_rate": 8.808392458787103e-05, "loss": 0.3121, "step": 490000 }, { "epoch": 0.98, "learning_rate": 8.796894510727945e-05, "loss": 0.3127, "step": 490500 }, { "epoch": 0.98, "learning_rate": 8.78539438398963e-05, "loss": 0.3112, "step": 491000 }, { "epoch": 0.98, "learning_rate": 8.773892110013058e-05, "loss": 0.3121, "step": 491500 }, { "epoch": 0.98, "learning_rate": 8.762387720245008e-05, "loss": 0.3124, "step": 492000 }, { "epoch": 0.98, "learning_rate": 8.750881246138043e-05, "loss": 0.3124, "step": 492500 }, { "epoch": 0.99, "learning_rate": 8.73937271915042e-05, "loss": 0.3117, "step": 493000 }, { "epoch": 0.99, "learning_rate": 8.727862170746019e-05, "loss": 0.3121, "step": 493500 }, { "epoch": 0.99, "learning_rate": 8.716349632394235e-05, "loss": 0.3123, "step": 494000 }, { "epoch": 0.99, "learning_rate": 8.70483513556991e-05, "loss": 0.3123, "step": 494500 }, { "epoch": 0.99, "learning_rate": 8.69331871175324e-05, "loss": 0.3119, "step": 495000 }, { "epoch": 0.99, "learning_rate": 8.681800392429684e-05, "loss": 0.3111, "step": 495500 }, { "epoch": 0.99, "learning_rate": 8.67028020908989e-05, "loss": 0.3113, "step": 496000 }, { "epoch": 0.99, "learning_rate": 8.658758193229601e-05, "loss": 0.311, "step": 496500 }, { "epoch": 0.99, "learning_rate": 8.647234376349565e-05, "loss": 0.3118, "step": 497000 }, { "epoch": 0.99, "learning_rate": 8.635708789955458e-05, "loss": 0.3119, "step": 497500 }, { "epoch": 1.0, "learning_rate": 8.624181465557794e-05, "loss": 0.3111, "step": 498000 }, { "epoch": 1.0, "learning_rate": 8.612652434671837e-05, "loss": 0.3119, "step": 498500 }, { "epoch": 1.0, "learning_rate": 8.601121728817519e-05, "loss": 0.3116, "step": 499000 }, { "epoch": 1.0, "learning_rate": 8.589589379519346e-05, "loss": 0.3113, "step": 499500 }, { "epoch": 1.0, "learning_rate": 8.578055418306327e-05, "loss": 0.311, "step": 500000 }, { "epoch": 1.0, "learning_rate": 8.566519876711864e-05, "loss": 0.3114, "step": 500500 }, { "epoch": 1.0, "learning_rate": 8.55498278627369e-05, "loss": 0.3109, "step": 501000 }, { "epoch": 1.0, "learning_rate": 8.543444178533773e-05, "loss": 0.3123, "step": 501500 }, { "epoch": 1.0, "learning_rate": 8.531904085038221e-05, "loss": 0.3113, "step": 502000 }, { "epoch": 1.0, "learning_rate": 8.520362537337214e-05, "loss": 0.3109, "step": 502500 }, { "epoch": 1.01, "learning_rate": 8.508819566984897e-05, "loss": 0.3114, "step": 503000 }, { "epoch": 1.01, "learning_rate": 8.497275205539314e-05, "loss": 0.3107, "step": 503500 }, { "epoch": 1.01, "learning_rate": 8.485729484562307e-05, "loss": 0.311, "step": 504000 }, { "epoch": 1.01, "learning_rate": 8.474182435619437e-05, "loss": 0.3109, "step": 504500 }, { "epoch": 1.01, "learning_rate": 8.462634090279895e-05, "loss": 0.3107, "step": 505000 }, { "epoch": 1.01, "learning_rate": 8.451084480116415e-05, "loss": 0.3103, "step": 505500 }, { "epoch": 1.01, "learning_rate": 8.439533636705194e-05, "loss": 0.3107, "step": 506000 }, { "epoch": 1.01, "learning_rate": 8.427981591625791e-05, "loss": 0.3104, "step": 506500 }, { "epoch": 1.01, "learning_rate": 8.416428376461061e-05, "loss": 0.3106, "step": 507000 }, { "epoch": 1.01, "learning_rate": 8.404874022797049e-05, "loss": 0.3112, "step": 507500 }, { "epoch": 1.02, "learning_rate": 8.393318562222916e-05, "loss": 0.3104, "step": 508000 }, { "epoch": 1.02, "learning_rate": 8.381762026330858e-05, "loss": 0.3111, "step": 508500 }, { "epoch": 1.02, "learning_rate": 8.370204446715997e-05, "loss": 0.3105, "step": 509000 }, { "epoch": 1.02, "learning_rate": 8.358645854976311e-05, "loss": 0.3101, "step": 509500 }, { "epoch": 1.02, "learning_rate": 8.347086282712556e-05, "loss": 0.3102, "step": 510000 }, { "epoch": 1.02, "learning_rate": 8.335525761528157e-05, "loss": 0.3105, "step": 510500 }, { "epoch": 1.02, "learning_rate": 8.323964323029136e-05, "loss": 0.3104, "step": 511000 }, { "epoch": 1.02, "learning_rate": 8.312401998824027e-05, "loss": 0.3109, "step": 511500 }, { "epoch": 1.02, "learning_rate": 8.300838820523784e-05, "loss": 0.3102, "step": 512000 }, { "epoch": 1.02, "learning_rate": 8.289274819741691e-05, "loss": 0.3099, "step": 512500 }, { "epoch": 1.03, "learning_rate": 8.277710028093289e-05, "loss": 0.3104, "step": 513000 }, { "epoch": 1.03, "learning_rate": 8.266144477196274e-05, "loss": 0.3102, "step": 513500 }, { "epoch": 1.03, "learning_rate": 8.254578198670421e-05, "loss": 0.3108, "step": 514000 }, { "epoch": 1.03, "learning_rate": 8.243011224137492e-05, "loss": 0.3112, "step": 514500 }, { "epoch": 1.03, "learning_rate": 8.231443585221157e-05, "loss": 0.3103, "step": 515000 }, { "epoch": 1.03, "learning_rate": 8.219875313546898e-05, "loss": 0.3101, "step": 515500 }, { "epoch": 1.03, "learning_rate": 8.208306440741926e-05, "loss": 0.3095, "step": 516000 }, { "epoch": 1.03, "learning_rate": 8.196736998435101e-05, "loss": 0.3094, "step": 516500 }, { "epoch": 1.03, "learning_rate": 8.185167018256834e-05, "loss": 0.3098, "step": 517000 }, { "epoch": 1.03, "learning_rate": 8.173596531839011e-05, "loss": 0.31, "step": 517500 }, { "epoch": 1.04, "learning_rate": 8.162025570814896e-05, "loss": 0.3096, "step": 518000 }, { "epoch": 1.04, "learning_rate": 8.150454166819059e-05, "loss": 0.3102, "step": 518500 }, { "epoch": 1.04, "learning_rate": 8.138882351487275e-05, "loss": 0.3102, "step": 519000 }, { "epoch": 1.04, "learning_rate": 8.127310156456445e-05, "loss": 0.3093, "step": 519500 }, { "epoch": 1.04, "learning_rate": 8.115737613364511e-05, "loss": 0.3095, "step": 520000 }, { "epoch": 1.04, "learning_rate": 8.104164753850357e-05, "loss": 0.3097, "step": 520500 }, { "epoch": 1.04, "learning_rate": 8.092591609553747e-05, "loss": 0.3102, "step": 521000 }, { "epoch": 1.04, "learning_rate": 8.081018212115208e-05, "loss": 0.3093, "step": 521500 }, { "epoch": 1.04, "learning_rate": 8.069444593175975e-05, "loss": 0.3101, "step": 522000 }, { "epoch": 1.04, "learning_rate": 8.057870784377874e-05, "loss": 0.3097, "step": 522500 }, { "epoch": 1.05, "learning_rate": 8.046296817363259e-05, "loss": 0.3098, "step": 523000 }, { "epoch": 1.05, "learning_rate": 8.034722723774913e-05, "loss": 0.3091, "step": 523500 }, { "epoch": 1.05, "learning_rate": 8.023148535255965e-05, "loss": 0.3098, "step": 524000 }, { "epoch": 1.05, "learning_rate": 8.011574283449807e-05, "loss": 0.3095, "step": 524500 }, { "epoch": 1.05, "learning_rate": 7.999999999999999e-05, "loss": 0.3086, "step": 525000 }, { "epoch": 1.05, "learning_rate": 7.98842571655019e-05, "loss": 0.3092, "step": 525500 }, { "epoch": 1.05, "learning_rate": 7.976851464744033e-05, "loss": 0.3091, "step": 526000 }, { "epoch": 1.05, "learning_rate": 7.965277276225087e-05, "loss": 0.3095, "step": 526500 }, { "epoch": 1.05, "learning_rate": 7.953703182636741e-05, "loss": 0.3088, "step": 527000 }, { "epoch": 1.05, "learning_rate": 7.942129215622125e-05, "loss": 0.3086, "step": 527500 }, { "epoch": 1.06, "learning_rate": 7.930555406824026e-05, "loss": 0.3093, "step": 528000 }, { "epoch": 1.06, "learning_rate": 7.91898178788479e-05, "loss": 0.3086, "step": 528500 }, { "epoch": 1.06, "learning_rate": 7.907408390446254e-05, "loss": 0.3089, "step": 529000 }, { "epoch": 1.06, "learning_rate": 7.895835246149643e-05, "loss": 0.3088, "step": 529500 }, { "epoch": 1.06, "learning_rate": 7.884262386635489e-05, "loss": 0.3089, "step": 530000 }, { "epoch": 1.06, "learning_rate": 7.872689843543554e-05, "loss": 0.3093, "step": 530500 }, { "epoch": 1.06, "learning_rate": 7.861117648512725e-05, "loss": 0.3091, "step": 531000 }, { "epoch": 1.06, "learning_rate": 7.849545833180941e-05, "loss": 0.3092, "step": 531500 }, { "epoch": 1.06, "learning_rate": 7.837974429185103e-05, "loss": 0.3089, "step": 532000 }, { "epoch": 1.06, "learning_rate": 7.82640346816099e-05, "loss": 0.308, "step": 532500 }, { "epoch": 1.07, "learning_rate": 7.814832981743164e-05, "loss": 0.3088, "step": 533000 }, { "epoch": 1.07, "learning_rate": 7.803263001564899e-05, "loss": 0.3092, "step": 533500 }, { "epoch": 1.07, "learning_rate": 7.791693559258072e-05, "loss": 0.3089, "step": 534000 }, { "epoch": 1.07, "learning_rate": 7.780124686453101e-05, "loss": 0.3089, "step": 534500 }, { "epoch": 1.07, "learning_rate": 7.768556414778842e-05, "loss": 0.3089, "step": 535000 }, { "epoch": 1.07, "learning_rate": 7.756988775862508e-05, "loss": 0.3086, "step": 535500 }, { "epoch": 1.07, "learning_rate": 7.74542180132958e-05, "loss": 0.3084, "step": 536000 }, { "epoch": 1.07, "learning_rate": 7.733855522803725e-05, "loss": 0.3081, "step": 536500 }, { "epoch": 1.07, "learning_rate": 7.72228997190671e-05, "loss": 0.3084, "step": 537000 }, { "epoch": 1.07, "learning_rate": 7.710725180258306e-05, "loss": 0.3085, "step": 537500 }, { "epoch": 1.08, "learning_rate": 7.699161179476217e-05, "loss": 0.3082, "step": 538000 }, { "epoch": 1.08, "learning_rate": 7.687598001175972e-05, "loss": 0.3081, "step": 538500 }, { "epoch": 1.08, "learning_rate": 7.676035676970863e-05, "loss": 0.3075, "step": 539000 }, { "epoch": 1.08, "learning_rate": 7.664474238471844e-05, "loss": 0.3083, "step": 539500 }, { "epoch": 1.08, "learning_rate": 7.652913717287443e-05, "loss": 0.3079, "step": 540000 }, { "epoch": 1.08, "learning_rate": 7.641354145023687e-05, "loss": 0.3083, "step": 540500 }, { "epoch": 1.08, "learning_rate": 7.629795553284005e-05, "loss": 0.3085, "step": 541000 }, { "epoch": 1.08, "learning_rate": 7.61823797366914e-05, "loss": 0.308, "step": 541500 }, { "epoch": 1.08, "learning_rate": 7.606681437777081e-05, "loss": 0.3077, "step": 542000 }, { "epoch": 1.08, "learning_rate": 7.595125977202952e-05, "loss": 0.3078, "step": 542500 }, { "epoch": 1.09, "learning_rate": 7.583571623538939e-05, "loss": 0.3083, "step": 543000 }, { "epoch": 1.09, "learning_rate": 7.572018408374208e-05, "loss": 0.3074, "step": 543500 }, { "epoch": 1.09, "learning_rate": 7.560466363294806e-05, "loss": 0.3076, "step": 544000 }, { "epoch": 1.09, "learning_rate": 7.548915519883582e-05, "loss": 0.3077, "step": 544500 }, { "epoch": 1.09, "learning_rate": 7.537365909720104e-05, "loss": 0.3078, "step": 545000 }, { "epoch": 1.09, "learning_rate": 7.525817564380562e-05, "loss": 0.308, "step": 545500 }, { "epoch": 1.09, "learning_rate": 7.514270515437691e-05, "loss": 0.3084, "step": 546000 }, { "epoch": 1.09, "learning_rate": 7.502724794460685e-05, "loss": 0.3077, "step": 546500 }, { "epoch": 1.09, "learning_rate": 7.491180433015101e-05, "loss": 0.3075, "step": 547000 }, { "epoch": 1.09, "learning_rate": 7.479637462662786e-05, "loss": 0.3076, "step": 547500 }, { "epoch": 1.1, "learning_rate": 7.468095914961777e-05, "loss": 0.3075, "step": 548000 }, { "epoch": 1.1, "learning_rate": 7.456555821466225e-05, "loss": 0.3075, "step": 548500 }, { "epoch": 1.1, "learning_rate": 7.445017213726307e-05, "loss": 0.3076, "step": 549000 }, { "epoch": 1.1, "learning_rate": 7.433480123288138e-05, "loss": 0.3081, "step": 549500 }, { "epoch": 1.1, "learning_rate": 7.421944581693674e-05, "loss": 0.3074, "step": 550000 }, { "epoch": 1.1, "learning_rate": 7.410410620480651e-05, "loss": 0.3075, "step": 550500 }, { "epoch": 1.1, "learning_rate": 7.39887827118248e-05, "loss": 0.3073, "step": 551000 }, { "epoch": 1.1, "learning_rate": 7.38734756532816e-05, "loss": 0.3072, "step": 551500 }, { "epoch": 1.1, "learning_rate": 7.375818534442207e-05, "loss": 0.3074, "step": 552000 }, { "epoch": 1.1, "learning_rate": 7.364291210044542e-05, "loss": 0.3074, "step": 552500 }, { "epoch": 1.11, "learning_rate": 7.352765623650435e-05, "loss": 0.3074, "step": 553000 }, { "epoch": 1.11, "learning_rate": 7.341241806770399e-05, "loss": 0.3071, "step": 553500 }, { "epoch": 1.11, "learning_rate": 7.329719790910108e-05, "loss": 0.3076, "step": 554000 }, { "epoch": 1.11, "learning_rate": 7.318199607570318e-05, "loss": 0.3069, "step": 554500 }, { "epoch": 1.11, "learning_rate": 7.30668128824676e-05, "loss": 0.3071, "step": 555000 }, { "epoch": 1.11, "learning_rate": 7.295164864430088e-05, "loss": 0.3072, "step": 555500 }, { "epoch": 1.11, "learning_rate": 7.283650367605764e-05, "loss": 0.3073, "step": 556000 }, { "epoch": 1.11, "learning_rate": 7.272137829253983e-05, "loss": 0.3072, "step": 556500 }, { "epoch": 1.11, "learning_rate": 7.260627280849581e-05, "loss": 0.3072, "step": 557000 }, { "epoch": 1.11, "learning_rate": 7.249118753861958e-05, "loss": 0.3071, "step": 557500 }, { "epoch": 1.12, "learning_rate": 7.23761227975499e-05, "loss": 0.3073, "step": 558000 }, { "epoch": 1.12, "learning_rate": 7.22610788998694e-05, "loss": 0.3065, "step": 558500 }, { "epoch": 1.12, "learning_rate": 7.21460561601037e-05, "loss": 0.3067, "step": 559000 }, { "epoch": 1.12, "learning_rate": 7.203105489272053e-05, "loss": 0.3067, "step": 559500 }, { "epoch": 1.12, "learning_rate": 7.191607541212897e-05, "loss": 0.3067, "step": 560000 }, { "epoch": 1.12, "learning_rate": 7.180111803267856e-05, "loss": 0.3072, "step": 560500 }, { "epoch": 1.12, "learning_rate": 7.168618306865838e-05, "loss": 0.3067, "step": 561000 }, { "epoch": 1.12, "learning_rate": 7.157127083429626e-05, "loss": 0.3064, "step": 561500 }, { "epoch": 1.12, "learning_rate": 7.145638164375779e-05, "loss": 0.3067, "step": 562000 }, { "epoch": 1.12, "learning_rate": 7.134151581114565e-05, "loss": 0.3062, "step": 562500 }, { "epoch": 1.13, "learning_rate": 7.122667365049869e-05, "loss": 0.3064, "step": 563000 }, { "epoch": 1.13, "learning_rate": 7.111185547579099e-05, "loss": 0.307, "step": 563500 }, { "epoch": 1.13, "learning_rate": 7.099706160093098e-05, "loss": 0.3057, "step": 564000 }, { "epoch": 1.13, "learning_rate": 7.08822923397608e-05, "loss": 0.3061, "step": 564500 }, { "epoch": 1.13, "learning_rate": 7.076754800605516e-05, "loss": 0.3064, "step": 565000 }, { "epoch": 1.13, "learning_rate": 7.065282891352078e-05, "loss": 0.3061, "step": 565500 }, { "epoch": 1.13, "learning_rate": 7.053813537579523e-05, "loss": 0.3059, "step": 566000 }, { "epoch": 1.13, "learning_rate": 7.042346770644624e-05, "loss": 0.3065, "step": 566500 }, { "epoch": 1.13, "learning_rate": 7.030882621897088e-05, "loss": 0.3064, "step": 567000 }, { "epoch": 1.13, "learning_rate": 7.019421122679455e-05, "loss": 0.3065, "step": 567500 }, { "epoch": 1.14, "learning_rate": 7.00796230432703e-05, "loss": 0.3065, "step": 568000 }, { "epoch": 1.14, "learning_rate": 6.996506198167789e-05, "loss": 0.3059, "step": 568500 }, { "epoch": 1.14, "learning_rate": 6.985052835522279e-05, "loss": 0.306, "step": 569000 }, { "epoch": 1.14, "learning_rate": 6.973602247703561e-05, "loss": 0.306, "step": 569500 }, { "epoch": 1.14, "learning_rate": 6.962154466017105e-05, "loss": 0.306, "step": 570000 }, { "epoch": 1.14, "learning_rate": 6.950709521760712e-05, "loss": 0.3054, "step": 570500 }, { "epoch": 1.14, "learning_rate": 6.939267446224418e-05, "loss": 0.3056, "step": 571000 }, { "epoch": 1.14, "learning_rate": 6.927828270690422e-05, "loss": 0.3058, "step": 571500 }, { "epoch": 1.14, "learning_rate": 6.91639202643299e-05, "loss": 0.3057, "step": 572000 }, { "epoch": 1.14, "learning_rate": 6.904958744718383e-05, "loss": 0.3058, "step": 572500 }, { "epoch": 1.15, "learning_rate": 6.893528456804756e-05, "loss": 0.3055, "step": 573000 }, { "epoch": 1.15, "learning_rate": 6.882101193942075e-05, "loss": 0.3061, "step": 573500 }, { "epoch": 1.15, "learning_rate": 6.870676987372044e-05, "loss": 0.3058, "step": 574000 }, { "epoch": 1.15, "learning_rate": 6.859255868328003e-05, "loss": 0.3056, "step": 574500 }, { "epoch": 1.15, "learning_rate": 6.847837868034861e-05, "loss": 0.3057, "step": 575000 }, { "epoch": 1.15, "learning_rate": 6.836423017708996e-05, "loss": 0.3051, "step": 575500 }, { "epoch": 1.15, "learning_rate": 6.825011348558167e-05, "loss": 0.3054, "step": 576000 }, { "epoch": 1.15, "learning_rate": 6.813602891781443e-05, "loss": 0.3056, "step": 576500 }, { "epoch": 1.15, "learning_rate": 6.802197678569109e-05, "loss": 0.3051, "step": 577000 }, { "epoch": 1.15, "learning_rate": 6.790795740102589e-05, "loss": 0.3062, "step": 577500 }, { "epoch": 1.16, "learning_rate": 6.779397107554339e-05, "loss": 0.3049, "step": 578000 }, { "epoch": 1.16, "learning_rate": 6.768001812087789e-05, "loss": 0.3053, "step": 578500 }, { "epoch": 1.16, "learning_rate": 6.756609884857239e-05, "loss": 0.3056, "step": 579000 }, { "epoch": 1.16, "learning_rate": 6.745221357007786e-05, "loss": 0.3056, "step": 579500 }, { "epoch": 1.16, "learning_rate": 6.733836259675233e-05, "loss": 0.3055, "step": 580000 }, { "epoch": 1.16, "learning_rate": 6.722454623985994e-05, "loss": 0.3046, "step": 580500 }, { "epoch": 1.16, "learning_rate": 6.71107648105703e-05, "loss": 0.3051, "step": 581000 }, { "epoch": 1.16, "learning_rate": 6.69970186199575e-05, "loss": 0.305, "step": 581500 }, { "epoch": 1.16, "learning_rate": 6.688330797899925e-05, "loss": 0.3048, "step": 582000 }, { "epoch": 1.16, "learning_rate": 6.676963319857618e-05, "loss": 0.3049, "step": 582500 }, { "epoch": 1.17, "learning_rate": 6.665599458947072e-05, "loss": 0.3056, "step": 583000 }, { "epoch": 1.17, "learning_rate": 6.654239246236651e-05, "loss": 0.3043, "step": 583500 }, { "epoch": 1.17, "learning_rate": 6.642882712784742e-05, "loss": 0.3049, "step": 584000 }, { "epoch": 1.17, "learning_rate": 6.631529889639679e-05, "loss": 0.3052, "step": 584500 }, { "epoch": 1.17, "learning_rate": 6.620180807839639e-05, "loss": 0.3046, "step": 585000 }, { "epoch": 1.17, "learning_rate": 6.608835498412583e-05, "loss": 0.3042, "step": 585500 }, { "epoch": 1.17, "learning_rate": 6.597493992376152e-05, "loss": 0.3047, "step": 586000 }, { "epoch": 1.17, "learning_rate": 6.586156320737592e-05, "loss": 0.305, "step": 586500 }, { "epoch": 1.17, "learning_rate": 6.574822514493664e-05, "loss": 0.3049, "step": 587000 }, { "epoch": 1.17, "learning_rate": 6.56349260463056e-05, "loss": 0.3044, "step": 587500 }, { "epoch": 1.18, "learning_rate": 6.552166622123824e-05, "loss": 0.3045, "step": 588000 }, { "epoch": 1.18, "learning_rate": 6.540844597938256e-05, "loss": 0.3046, "step": 588500 }, { "epoch": 1.18, "learning_rate": 6.52952656302784e-05, "loss": 0.3045, "step": 589000 }, { "epoch": 1.18, "learning_rate": 6.518212548335651e-05, "loss": 0.3044, "step": 589500 }, { "epoch": 1.18, "learning_rate": 6.506902584793773e-05, "loss": 0.3043, "step": 590000 }, { "epoch": 1.18, "learning_rate": 6.495596703323214e-05, "loss": 0.3039, "step": 590500 }, { "epoch": 1.18, "learning_rate": 6.484294934833822e-05, "loss": 0.3042, "step": 591000 }, { "epoch": 1.18, "learning_rate": 6.472997310224204e-05, "loss": 0.304, "step": 591500 }, { "epoch": 1.18, "learning_rate": 6.461703860381628e-05, "loss": 0.3039, "step": 592000 }, { "epoch": 1.18, "learning_rate": 6.450414616181959e-05, "loss": 0.3041, "step": 592500 }, { "epoch": 1.19, "learning_rate": 6.439129608489559e-05, "loss": 0.3043, "step": 593000 }, { "epoch": 1.19, "learning_rate": 6.427848868157208e-05, "loss": 0.3041, "step": 593500 }, { "epoch": 1.19, "learning_rate": 6.41657242602602e-05, "loss": 0.304, "step": 594000 }, { "epoch": 1.19, "learning_rate": 6.405300312925353e-05, "loss": 0.3044, "step": 594500 }, { "epoch": 1.19, "learning_rate": 6.39403255967274e-05, "loss": 0.304, "step": 595000 }, { "epoch": 1.19, "learning_rate": 6.382769197073783e-05, "loss": 0.3042, "step": 595500 }, { "epoch": 1.19, "learning_rate": 6.371510255922088e-05, "loss": 0.3041, "step": 596000 }, { "epoch": 1.19, "learning_rate": 6.360255766999172e-05, "loss": 0.3036, "step": 596500 }, { "epoch": 1.19, "learning_rate": 6.349005761074372e-05, "loss": 0.3044, "step": 597000 }, { "epoch": 1.19, "learning_rate": 6.33776026890478e-05, "loss": 0.3038, "step": 597500 }, { "epoch": 1.2, "learning_rate": 6.326519321235139e-05, "loss": 0.3036, "step": 598000 }, { "epoch": 1.2, "learning_rate": 6.315282948797776e-05, "loss": 0.3033, "step": 598500 }, { "epoch": 1.2, "learning_rate": 6.304051182312496e-05, "loss": 0.3034, "step": 599000 }, { "epoch": 1.2, "learning_rate": 6.292824052486525e-05, "loss": 0.3036, "step": 599500 }, { "epoch": 1.2, "learning_rate": 6.281601590014407e-05, "loss": 0.3039, "step": 600000 }, { "epoch": 1.2, "learning_rate": 6.270383825577923e-05, "loss": 0.3039, "step": 600500 }, { "epoch": 1.2, "learning_rate": 6.259170789846017e-05, "loss": 0.3034, "step": 601000 }, { "epoch": 1.2, "learning_rate": 6.247962513474697e-05, "loss": 0.3039, "step": 601500 }, { "epoch": 1.2, "learning_rate": 6.236759027106965e-05, "loss": 0.3035, "step": 602000 }, { "epoch": 1.2, "learning_rate": 6.225560361372722e-05, "loss": 0.3034, "step": 602500 }, { "epoch": 1.21, "learning_rate": 6.214366546888694e-05, "loss": 0.3045, "step": 603000 }, { "epoch": 1.21, "learning_rate": 6.203177614258345e-05, "loss": 0.3031, "step": 603500 }, { "epoch": 1.21, "learning_rate": 6.191993594071785e-05, "loss": 0.3039, "step": 604000 }, { "epoch": 1.21, "learning_rate": 6.180814516905701e-05, "loss": 0.3068, "step": 604500 }, { "epoch": 1.21, "learning_rate": 6.169640413323262e-05, "loss": 0.3035, "step": 605000 }, { "epoch": 1.21, "learning_rate": 6.158471313874041e-05, "loss": 0.3027, "step": 605500 }, { "epoch": 1.21, "learning_rate": 6.147307249093929e-05, "loss": 0.3039, "step": 606000 }, { "epoch": 1.21, "learning_rate": 6.136148249505053e-05, "loss": 0.3037, "step": 606500 }, { "epoch": 1.21, "learning_rate": 6.124994345615693e-05, "loss": 0.3034, "step": 607000 }, { "epoch": 1.21, "learning_rate": 6.113845567920194e-05, "loss": 0.3033, "step": 607500 }, { "epoch": 1.22, "learning_rate": 6.102701946898891e-05, "loss": 0.3041, "step": 608000 }, { "epoch": 1.22, "learning_rate": 6.0915635130180154e-05, "loss": 0.3027, "step": 608500 }, { "epoch": 1.22, "learning_rate": 6.0804302967296225e-05, "loss": 0.3028, "step": 609000 }, { "epoch": 1.22, "learning_rate": 6.0693023284715e-05, "loss": 0.3031, "step": 609500 }, { "epoch": 1.22, "learning_rate": 6.058179638667089e-05, "loss": 0.3027, "step": 610000 }, { "epoch": 1.22, "learning_rate": 6.047062257725395e-05, "loss": 0.3035, "step": 610500 }, { "epoch": 1.22, "learning_rate": 6.035950216040917e-05, "loss": 0.303, "step": 611000 }, { "epoch": 1.22, "learning_rate": 6.0248435439935516e-05, "loss": 0.3031, "step": 611500 }, { "epoch": 1.22, "learning_rate": 6.0137422719485145e-05, "loss": 0.3032, "step": 612000 }, { "epoch": 1.22, "learning_rate": 6.0026464302562636e-05, "loss": 0.303, "step": 612500 }, { "epoch": 1.23, "learning_rate": 5.991556049252401e-05, "loss": 0.303, "step": 613000 }, { "epoch": 1.23, "learning_rate": 5.980471159257609e-05, "loss": 0.3031, "step": 613500 }, { "epoch": 1.23, "learning_rate": 5.969391790577551e-05, "loss": 0.3026, "step": 614000 }, { "epoch": 1.23, "learning_rate": 5.958317973502798e-05, "loss": 0.3026, "step": 614500 }, { "epoch": 1.23, "learning_rate": 5.947249738308747e-05, "loss": 0.3024, "step": 615000 }, { "epoch": 1.23, "learning_rate": 5.9361871152555254e-05, "loss": 0.3031, "step": 615500 }, { "epoch": 1.23, "learning_rate": 5.925130134587924e-05, "loss": 0.3021, "step": 616000 }, { "epoch": 1.23, "learning_rate": 5.914078826535307e-05, "loss": 0.3021, "step": 616500 }, { "epoch": 1.23, "learning_rate": 5.903033221311528e-05, "loss": 0.3023, "step": 617000 }, { "epoch": 1.23, "learning_rate": 5.891993349114847e-05, "loss": 0.3025, "step": 617500 }, { "epoch": 1.24, "learning_rate": 5.880959240127858e-05, "loss": 0.3021, "step": 618000 }, { "epoch": 1.24, "learning_rate": 5.86993092451739e-05, "loss": 0.3022, "step": 618500 }, { "epoch": 1.24, "learning_rate": 5.858908432434438e-05, "loss": 0.3021, "step": 619000 }, { "epoch": 1.24, "learning_rate": 5.847891794014074e-05, "loss": 0.3017, "step": 619500 }, { "epoch": 1.24, "learning_rate": 5.8368810393753684e-05, "loss": 0.3021, "step": 620000 }, { "epoch": 1.24, "learning_rate": 5.8258761986213015e-05, "loss": 0.3027, "step": 620500 }, { "epoch": 1.24, "learning_rate": 5.814877301838688e-05, "loss": 0.3021, "step": 621000 }, { "epoch": 1.24, "learning_rate": 5.803884379098094e-05, "loss": 0.3022, "step": 621500 }, { "epoch": 1.24, "learning_rate": 5.7928974604537494e-05, "loss": 0.3022, "step": 622000 }, { "epoch": 1.24, "learning_rate": 5.781916575943469e-05, "loss": 0.3022, "step": 622500 }, { "epoch": 1.25, "learning_rate": 5.770941755588573e-05, "loss": 0.3023, "step": 623000 }, { "epoch": 1.25, "learning_rate": 5.7599730293938e-05, "loss": 0.302, "step": 623500 }, { "epoch": 1.25, "learning_rate": 5.749010427347233e-05, "loss": 0.3021, "step": 624000 }, { "epoch": 1.25, "learning_rate": 5.738053979420199e-05, "loss": 0.3019, "step": 624500 }, { "epoch": 1.25, "learning_rate": 5.7271037155672156e-05, "loss": 0.3015, "step": 625000 }, { "epoch": 1.25, "learning_rate": 5.716159665725883e-05, "loss": 0.3016, "step": 625500 }, { "epoch": 1.25, "learning_rate": 5.7052218598168154e-05, "loss": 0.3017, "step": 626000 }, { "epoch": 1.25, "learning_rate": 5.69429032774356e-05, "loss": 0.3021, "step": 626500 }, { "epoch": 1.25, "learning_rate": 5.6833650993925016e-05, "loss": 0.3015, "step": 627000 }, { "epoch": 1.25, "learning_rate": 5.6724462046328025e-05, "loss": 0.3021, "step": 627500 }, { "epoch": 1.26, "learning_rate": 5.661533673316303e-05, "loss": 0.3026, "step": 628000 }, { "epoch": 1.26, "learning_rate": 5.6506275352774447e-05, "loss": 0.3009, "step": 628500 }, { "epoch": 1.26, "learning_rate": 5.639727820333198e-05, "loss": 0.3017, "step": 629000 }, { "epoch": 1.26, "learning_rate": 5.62883455828296e-05, "loss": 0.3016, "step": 629500 }, { "epoch": 1.26, "learning_rate": 5.617947778908498e-05, "loss": 0.3015, "step": 630000 }, { "epoch": 1.26, "learning_rate": 5.60706751197385e-05, "loss": 0.3014, "step": 630500 }, { "epoch": 1.26, "learning_rate": 5.596193787225254e-05, "loss": 0.3008, "step": 631000 }, { "epoch": 1.26, "learning_rate": 5.585326634391049e-05, "loss": 0.3008, "step": 631500 }, { "epoch": 1.26, "learning_rate": 5.574466083181624e-05, "loss": 0.3014, "step": 632000 }, { "epoch": 1.26, "learning_rate": 5.563612163289308e-05, "loss": 0.3008, "step": 632500 }, { "epoch": 1.27, "learning_rate": 5.552764904388305e-05, "loss": 0.3016, "step": 633000 }, { "epoch": 1.27, "learning_rate": 5.541924336134609e-05, "loss": 0.3014, "step": 633500 }, { "epoch": 1.27, "learning_rate": 5.5310904881659116e-05, "loss": 0.301, "step": 634000 }, { "epoch": 1.27, "learning_rate": 5.5202633901015464e-05, "loss": 0.3008, "step": 634500 }, { "epoch": 1.27, "learning_rate": 5.5094430715423835e-05, "loss": 0.3017, "step": 635000 }, { "epoch": 1.27, "learning_rate": 5.4986295620707626e-05, "loss": 0.3005, "step": 635500 }, { "epoch": 1.27, "learning_rate": 5.487822891250406e-05, "loss": 0.3004, "step": 636000 }, { "epoch": 1.27, "learning_rate": 5.477023088626334e-05, "loss": 0.3008, "step": 636500 }, { "epoch": 1.27, "learning_rate": 5.4662301837247985e-05, "loss": 0.301, "step": 637000 }, { "epoch": 1.27, "learning_rate": 5.45544420605319e-05, "loss": 0.3008, "step": 637500 }, { "epoch": 1.28, "learning_rate": 5.4446651850999604e-05, "loss": 0.3012, "step": 638000 }, { "epoch": 1.28, "learning_rate": 5.433893150334538e-05, "loss": 0.3009, "step": 638500 }, { "epoch": 1.28, "learning_rate": 5.4231281312072544e-05, "loss": 0.301, "step": 639000 }, { "epoch": 1.28, "learning_rate": 5.4123701571492636e-05, "loss": 0.3009, "step": 639500 }, { "epoch": 1.28, "learning_rate": 5.401619257572453e-05, "loss": 0.3007, "step": 640000 }, { "epoch": 1.28, "learning_rate": 5.390875461869379e-05, "loss": 0.3011, "step": 640500 }, { "epoch": 1.28, "learning_rate": 5.3801387994131576e-05, "loss": 0.3012, "step": 641000 }, { "epoch": 1.28, "learning_rate": 5.36940929955742e-05, "loss": 0.3011, "step": 641500 }, { "epoch": 1.28, "learning_rate": 5.358686991636209e-05, "loss": 0.3005, "step": 642000 }, { "epoch": 1.28, "learning_rate": 5.347971904963904e-05, "loss": 0.3004, "step": 642500 }, { "epoch": 1.29, "learning_rate": 5.3372640688351476e-05, "loss": 0.3002, "step": 643000 }, { "epoch": 1.29, "learning_rate": 5.326563512524748e-05, "loss": 0.3005, "step": 643500 }, { "epoch": 1.29, "learning_rate": 5.315870265287618e-05, "loss": 0.2999, "step": 644000 }, { "epoch": 1.29, "learning_rate": 5.3051843563586914e-05, "loss": 0.3004, "step": 644500 }, { "epoch": 1.29, "learning_rate": 5.294505814952835e-05, "loss": 0.3003, "step": 645000 }, { "epoch": 1.29, "learning_rate": 5.28383467026477e-05, "loss": 0.2999, "step": 645500 }, { "epoch": 1.29, "learning_rate": 5.2731709514689995e-05, "loss": 0.2997, "step": 646000 }, { "epoch": 1.29, "learning_rate": 5.262514687719722e-05, "loss": 0.2999, "step": 646500 }, { "epoch": 1.29, "learning_rate": 5.25186590815076e-05, "loss": 0.3007, "step": 647000 }, { "epoch": 1.29, "learning_rate": 5.24122464187547e-05, "loss": 0.3007, "step": 647500 }, { "epoch": 1.3, "learning_rate": 5.2305909179866635e-05, "loss": 0.3002, "step": 648000 }, { "epoch": 1.3, "learning_rate": 5.219964765556536e-05, "loss": 0.3003, "step": 648500 }, { "epoch": 1.3, "learning_rate": 5.209346213636584e-05, "loss": 0.2997, "step": 649000 }, { "epoch": 1.3, "learning_rate": 5.1987352912575244e-05, "loss": 0.2995, "step": 649500 }, { "epoch": 1.3, "learning_rate": 5.188132027429215e-05, "loss": 0.2991, "step": 650000 }, { "epoch": 1.3, "learning_rate": 5.177536451140569e-05, "loss": 0.3005, "step": 650500 }, { "epoch": 1.3, "learning_rate": 5.166948591359489e-05, "loss": 0.3002, "step": 651000 }, { "epoch": 1.3, "learning_rate": 5.1563684770327804e-05, "loss": 0.3003, "step": 651500 }, { "epoch": 1.3, "learning_rate": 5.145796137086076e-05, "loss": 0.3, "step": 652000 }, { "epoch": 1.3, "learning_rate": 5.135231600423742e-05, "loss": 0.2997, "step": 652500 }, { "epoch": 1.31, "learning_rate": 5.124674895928823e-05, "loss": 0.2998, "step": 653000 }, { "epoch": 1.31, "learning_rate": 5.114126052462943e-05, "loss": 0.2998, "step": 653500 }, { "epoch": 1.31, "learning_rate": 5.103585098866237e-05, "loss": 0.2995, "step": 654000 }, { "epoch": 1.31, "learning_rate": 5.093052063957276e-05, "loss": 0.2996, "step": 654500 }, { "epoch": 1.31, "learning_rate": 5.082526976532968e-05, "loss": 0.2994, "step": 655000 }, { "epoch": 1.31, "learning_rate": 5.072009865368501e-05, "loss": 0.2996, "step": 655500 }, { "epoch": 1.31, "learning_rate": 5.061500759217261e-05, "loss": 0.2999, "step": 656000 }, { "epoch": 1.31, "learning_rate": 5.050999686810735e-05, "loss": 0.2997, "step": 656500 }, { "epoch": 1.31, "learning_rate": 5.04050667685846e-05, "loss": 0.2995, "step": 657000 }, { "epoch": 1.31, "learning_rate": 5.0300217580479244e-05, "loss": 0.2997, "step": 657500 }, { "epoch": 1.32, "learning_rate": 5.01954495904449e-05, "loss": 0.2993, "step": 658000 }, { "epoch": 1.32, "learning_rate": 5.0090763084913336e-05, "loss": 0.2991, "step": 658500 }, { "epoch": 1.32, "learning_rate": 4.998615835009339e-05, "loss": 0.2995, "step": 659000 }, { "epoch": 1.32, "learning_rate": 4.988163567197043e-05, "loss": 0.2993, "step": 659500 }, { "epoch": 1.32, "learning_rate": 4.97771953363055e-05, "loss": 0.299, "step": 660000 }, { "epoch": 1.32, "learning_rate": 4.967283762863444e-05, "loss": 0.2994, "step": 660500 }, { "epoch": 1.32, "learning_rate": 4.956856283426728e-05, "loss": 0.2994, "step": 661000 }, { "epoch": 1.32, "learning_rate": 4.946437123828732e-05, "loss": 0.2982, "step": 661500 }, { "epoch": 1.32, "learning_rate": 4.936026312555037e-05, "loss": 0.2992, "step": 662000 }, { "epoch": 1.32, "learning_rate": 4.925623878068408e-05, "loss": 0.2984, "step": 662500 }, { "epoch": 1.33, "learning_rate": 4.915229848808698e-05, "loss": 0.2994, "step": 663000 }, { "epoch": 1.33, "learning_rate": 4.904844253192795e-05, "loss": 0.2989, "step": 663500 }, { "epoch": 1.33, "learning_rate": 4.8944671196145136e-05, "loss": 0.2991, "step": 664000 }, { "epoch": 1.33, "learning_rate": 4.884098476444539e-05, "loss": 0.2984, "step": 664500 }, { "epoch": 1.33, "learning_rate": 4.8737383520303546e-05, "loss": 0.2984, "step": 665000 }, { "epoch": 1.33, "learning_rate": 4.8633867746961356e-05, "loss": 0.2988, "step": 665500 }, { "epoch": 1.33, "learning_rate": 4.853043772742709e-05, "loss": 0.2986, "step": 666000 }, { "epoch": 1.33, "learning_rate": 4.8427093744474364e-05, "loss": 0.299, "step": 666500 }, { "epoch": 1.33, "learning_rate": 4.832383608064172e-05, "loss": 0.2992, "step": 667000 }, { "epoch": 1.33, "learning_rate": 4.822066501823172e-05, "loss": 0.299, "step": 667500 }, { "epoch": 1.34, "learning_rate": 4.811758083931005e-05, "loss": 0.2984, "step": 668000 }, { "epoch": 1.34, "learning_rate": 4.8014583825704976e-05, "loss": 0.2982, "step": 668500 }, { "epoch": 1.34, "learning_rate": 4.791167425900632e-05, "loss": 0.2988, "step": 669000 }, { "epoch": 1.34, "learning_rate": 4.780885242056493e-05, "loss": 0.2983, "step": 669500 }, { "epoch": 1.34, "learning_rate": 4.770611859149185e-05, "loss": 0.2987, "step": 670000 }, { "epoch": 1.34, "learning_rate": 4.7603473052657374e-05, "loss": 0.2986, "step": 670500 }, { "epoch": 1.34, "learning_rate": 4.7500916084690564e-05, "loss": 0.298, "step": 671000 }, { "epoch": 1.34, "learning_rate": 4.7398447967978165e-05, "loss": 0.2991, "step": 671500 }, { "epoch": 1.34, "learning_rate": 4.729606898266411e-05, "loss": 0.2981, "step": 672000 }, { "epoch": 1.34, "learning_rate": 4.71937794086487e-05, "loss": 0.2989, "step": 672500 }, { "epoch": 1.35, "learning_rate": 4.709157952558768e-05, "loss": 0.2984, "step": 673000 }, { "epoch": 1.35, "learning_rate": 4.698946961289163e-05, "loss": 0.2981, "step": 673500 }, { "epoch": 1.35, "learning_rate": 4.688744994972514e-05, "loss": 0.2986, "step": 674000 }, { "epoch": 1.35, "learning_rate": 4.6785520815006085e-05, "loss": 0.2979, "step": 674500 }, { "epoch": 1.35, "learning_rate": 4.668368248740485e-05, "loss": 0.2984, "step": 675000 }, { "epoch": 1.35, "learning_rate": 4.658193524534351e-05, "loss": 0.2985, "step": 675500 }, { "epoch": 1.35, "learning_rate": 4.6480279366995116e-05, "loss": 0.2986, "step": 676000 }, { "epoch": 1.35, "learning_rate": 4.637871513028303e-05, "loss": 0.2981, "step": 676500 }, { "epoch": 1.35, "learning_rate": 4.6277242812879914e-05, "loss": 0.2978, "step": 677000 }, { "epoch": 1.35, "learning_rate": 4.617586269220728e-05, "loss": 0.2978, "step": 677500 }, { "epoch": 1.36, "learning_rate": 4.607457504543447e-05, "loss": 0.2977, "step": 678000 }, { "epoch": 1.36, "learning_rate": 4.597338014947801e-05, "loss": 0.2975, "step": 678500 }, { "epoch": 1.36, "learning_rate": 4.5872278281000955e-05, "loss": 0.2978, "step": 679000 }, { "epoch": 1.36, "learning_rate": 4.577126971641189e-05, "loss": 0.2978, "step": 679500 }, { "epoch": 1.36, "learning_rate": 4.567035473186444e-05, "loss": 0.2978, "step": 680000 }, { "epoch": 1.36, "learning_rate": 4.556953360325625e-05, "loss": 0.2978, "step": 680500 }, { "epoch": 1.36, "learning_rate": 4.546880660622845e-05, "loss": 0.2975, "step": 681000 }, { "epoch": 1.36, "learning_rate": 4.5368174016164844e-05, "loss": 0.2976, "step": 681500 }, { "epoch": 1.36, "learning_rate": 4.5267636108191036e-05, "loss": 0.2983, "step": 682000 }, { "epoch": 1.36, "learning_rate": 4.5167193157173913e-05, "loss": 0.2978, "step": 682500 }, { "epoch": 1.37, "learning_rate": 4.5066845437720555e-05, "loss": 0.2977, "step": 683000 }, { "epoch": 1.37, "learning_rate": 4.4966593224177866e-05, "loss": 0.2976, "step": 683500 }, { "epoch": 1.37, "learning_rate": 4.4866436790631564e-05, "loss": 0.2971, "step": 684000 }, { "epoch": 1.37, "learning_rate": 4.476637641090551e-05, "loss": 0.2975, "step": 684500 }, { "epoch": 1.37, "learning_rate": 4.4666412358560955e-05, "loss": 0.2978, "step": 685000 }, { "epoch": 1.37, "learning_rate": 4.456654490689578e-05, "loss": 0.2967, "step": 685500 }, { "epoch": 1.37, "learning_rate": 4.4466774328943796e-05, "loss": 0.2979, "step": 686000 }, { "epoch": 1.37, "learning_rate": 4.4367100897474e-05, "loss": 0.2975, "step": 686500 }, { "epoch": 1.37, "learning_rate": 4.426752488498972e-05, "loss": 0.2972, "step": 687000 }, { "epoch": 1.37, "learning_rate": 4.4168046563727945e-05, "loss": 0.2972, "step": 687500 }, { "epoch": 1.38, "learning_rate": 4.406866620565862e-05, "loss": 0.2968, "step": 688000 }, { "epoch": 1.38, "learning_rate": 4.396938408248383e-05, "loss": 0.2973, "step": 688500 }, { "epoch": 1.38, "learning_rate": 4.3870200465637164e-05, "loss": 0.2972, "step": 689000 }, { "epoch": 1.38, "learning_rate": 4.377111562628282e-05, "loss": 0.2965, "step": 689500 }, { "epoch": 1.38, "learning_rate": 4.3672129835314955e-05, "loss": 0.2971, "step": 690000 }, { "epoch": 1.38, "learning_rate": 4.3573243363356916e-05, "loss": 0.297, "step": 690500 }, { "epoch": 1.38, "learning_rate": 4.347445648076057e-05, "loss": 0.2969, "step": 691000 }, { "epoch": 1.38, "learning_rate": 4.337576945760554e-05, "loss": 0.2978, "step": 691500 }, { "epoch": 1.38, "learning_rate": 4.327718256369826e-05, "loss": 0.2965, "step": 692000 }, { "epoch": 1.38, "learning_rate": 4.317869606857162e-05, "loss": 0.2973, "step": 692500 }, { "epoch": 1.39, "learning_rate": 4.3080310241483885e-05, "loss": 0.2967, "step": 693000 }, { "epoch": 1.39, "learning_rate": 4.298202535141818e-05, "loss": 0.2974, "step": 693500 }, { "epoch": 1.39, "learning_rate": 4.2883841667081675e-05, "loss": 0.2967, "step": 694000 }, { "epoch": 1.39, "learning_rate": 4.2785759456904745e-05, "loss": 0.2966, "step": 694500 }, { "epoch": 1.39, "learning_rate": 4.268777898904044e-05, "loss": 0.2969, "step": 695000 }, { "epoch": 1.39, "learning_rate": 4.2589900531363606e-05, "loss": 0.2967, "step": 695500 }, { "epoch": 1.39, "learning_rate": 4.2492124351470214e-05, "loss": 0.2962, "step": 696000 }, { "epoch": 1.39, "learning_rate": 4.239445071667666e-05, "loss": 0.297, "step": 696500 }, { "epoch": 1.39, "learning_rate": 4.2296879894018835e-05, "loss": 0.2966, "step": 697000 }, { "epoch": 1.39, "learning_rate": 4.219941215025171e-05, "loss": 0.2971, "step": 697500 }, { "epoch": 1.4, "learning_rate": 4.210204775184834e-05, "loss": 0.2973, "step": 698000 }, { "epoch": 1.4, "learning_rate": 4.2004786964999304e-05, "loss": 0.2962, "step": 698500 }, { "epoch": 1.4, "learning_rate": 4.190763005561186e-05, "loss": 0.2964, "step": 699000 }, { "epoch": 1.4, "learning_rate": 4.1810577289309266e-05, "loss": 0.2968, "step": 699500 }, { "epoch": 1.4, "learning_rate": 4.171362893143013e-05, "loss": 0.2965, "step": 700000 }, { "epoch": 1.4, "learning_rate": 4.1616785247027506e-05, "loss": 0.2963, "step": 700500 }, { "epoch": 1.4, "learning_rate": 4.1520046500868384e-05, "loss": 0.2966, "step": 701000 }, { "epoch": 1.4, "learning_rate": 4.1423412957432775e-05, "loss": 0.2963, "step": 701500 }, { "epoch": 1.4, "learning_rate": 4.1326884880913074e-05, "loss": 0.2966, "step": 702000 }, { "epoch": 1.4, "learning_rate": 4.123046253521341e-05, "loss": 0.2962, "step": 702500 }, { "epoch": 1.41, "learning_rate": 4.1134146183948724e-05, "loss": 0.2961, "step": 703000 }, { "epoch": 1.41, "learning_rate": 4.1037936090444315e-05, "loss": 0.296, "step": 703500 }, { "epoch": 1.41, "learning_rate": 4.0941832517734885e-05, "loss": 0.2959, "step": 704000 }, { "epoch": 1.41, "learning_rate": 4.084583572856388e-05, "loss": 0.2962, "step": 704500 }, { "epoch": 1.41, "learning_rate": 4.0749945985382915e-05, "loss": 0.2961, "step": 705000 }, { "epoch": 1.41, "learning_rate": 4.065416355035087e-05, "loss": 0.296, "step": 705500 }, { "epoch": 1.41, "learning_rate": 4.0558488685333235e-05, "loss": 0.2958, "step": 706000 }, { "epoch": 1.41, "learning_rate": 4.04629216519015e-05, "loss": 0.2961, "step": 706500 }, { "epoch": 1.41, "learning_rate": 4.036746271133223e-05, "loss": 0.2962, "step": 707000 }, { "epoch": 1.41, "learning_rate": 4.0272112124606546e-05, "loss": 0.2962, "step": 707500 }, { "epoch": 1.42, "learning_rate": 4.0176870152409324e-05, "loss": 0.296, "step": 708000 }, { "epoch": 1.42, "learning_rate": 4.008173705512842e-05, "loss": 0.295, "step": 708500 }, { "epoch": 1.42, "learning_rate": 3.998671309285417e-05, "loss": 0.2958, "step": 709000 }, { "epoch": 1.42, "learning_rate": 3.989179852537839e-05, "loss": 0.2964, "step": 709500 }, { "epoch": 1.42, "learning_rate": 3.979699361219395e-05, "loss": 0.2956, "step": 710000 }, { "epoch": 1.42, "learning_rate": 3.9702298612493816e-05, "loss": 0.2958, "step": 710500 }, { "epoch": 1.42, "learning_rate": 3.960771378517049e-05, "loss": 0.296, "step": 711000 }, { "epoch": 1.42, "learning_rate": 3.951323938881533e-05, "loss": 0.2953, "step": 711500 }, { "epoch": 1.42, "learning_rate": 3.941887568171766e-05, "loss": 0.2956, "step": 712000 }, { "epoch": 1.42, "learning_rate": 3.9324622921864323e-05, "loss": 0.2963, "step": 712500 }, { "epoch": 1.43, "learning_rate": 3.923048136693873e-05, "loss": 0.2951, "step": 713000 }, { "epoch": 1.43, "learning_rate": 3.913645127432028e-05, "loss": 0.2957, "step": 713500 }, { "epoch": 1.43, "learning_rate": 3.904253290108369e-05, "loss": 0.2953, "step": 714000 }, { "epoch": 1.43, "learning_rate": 3.8948726503998176e-05, "loss": 0.2954, "step": 714500 }, { "epoch": 1.43, "learning_rate": 3.885503233952689e-05, "loss": 0.2958, "step": 715000 }, { "epoch": 1.43, "learning_rate": 3.876145066382606e-05, "loss": 0.2948, "step": 715500 }, { "epoch": 1.43, "learning_rate": 3.86679817327444e-05, "loss": 0.2953, "step": 716000 }, { "epoch": 1.43, "learning_rate": 3.857462580182245e-05, "loss": 0.2952, "step": 716500 }, { "epoch": 1.43, "learning_rate": 3.848138312629171e-05, "loss": 0.2953, "step": 717000 }, { "epoch": 1.43, "learning_rate": 3.838825396107415e-05, "loss": 0.2962, "step": 717500 }, { "epoch": 1.44, "learning_rate": 3.8295238560781317e-05, "loss": 0.2957, "step": 718000 }, { "epoch": 1.44, "learning_rate": 3.820233717971374e-05, "loss": 0.2955, "step": 718500 }, { "epoch": 1.44, "learning_rate": 3.810955007186029e-05, "loss": 0.2953, "step": 719000 }, { "epoch": 1.44, "learning_rate": 3.801687749089737e-05, "loss": 0.295, "step": 719500 }, { "epoch": 1.44, "learning_rate": 3.792431969018824e-05, "loss": 0.2951, "step": 720000 }, { "epoch": 1.44, "learning_rate": 3.783187692278245e-05, "loss": 0.2952, "step": 720500 }, { "epoch": 1.44, "learning_rate": 3.7739549441414945e-05, "loss": 0.2951, "step": 721000 }, { "epoch": 1.44, "learning_rate": 3.764733749850558e-05, "loss": 0.2957, "step": 721500 }, { "epoch": 1.44, "learning_rate": 3.755524134615825e-05, "loss": 0.2955, "step": 722000 }, { "epoch": 1.44, "learning_rate": 3.746326123616032e-05, "loss": 0.2953, "step": 722500 }, { "epoch": 1.45, "learning_rate": 3.7371397419981925e-05, "loss": 0.2947, "step": 723000 }, { "epoch": 1.45, "learning_rate": 3.7279650148775196e-05, "loss": 0.2949, "step": 723500 }, { "epoch": 1.45, "learning_rate": 3.7188019673373706e-05, "loss": 0.2954, "step": 724000 }, { "epoch": 1.45, "learning_rate": 3.709650624429166e-05, "loss": 0.2949, "step": 724500 }, { "epoch": 1.45, "learning_rate": 3.700511011172325e-05, "loss": 0.2946, "step": 725000 }, { "epoch": 1.45, "learning_rate": 3.691383152554207e-05, "loss": 0.2953, "step": 725500 }, { "epoch": 1.45, "learning_rate": 3.682267073530023e-05, "loss": 0.295, "step": 726000 }, { "epoch": 1.45, "learning_rate": 3.67316279902279e-05, "loss": 0.2948, "step": 726500 }, { "epoch": 1.45, "learning_rate": 3.664070353923245e-05, "loss": 0.2949, "step": 727000 }, { "epoch": 1.45, "learning_rate": 3.654989763089782e-05, "loss": 0.295, "step": 727500 }, { "epoch": 1.46, "learning_rate": 3.645921051348396e-05, "loss": 0.2949, "step": 728000 }, { "epoch": 1.46, "learning_rate": 3.6368642434925924e-05, "loss": 0.2938, "step": 728500 }, { "epoch": 1.46, "learning_rate": 3.627819364283345e-05, "loss": 0.294, "step": 729000 }, { "epoch": 1.46, "learning_rate": 3.6187864384490035e-05, "loss": 0.2937, "step": 729500 }, { "epoch": 1.46, "learning_rate": 3.6097654906852405e-05, "loss": 0.2947, "step": 730000 }, { "epoch": 1.0, "learning_rate": 3.600756545654988e-05, "loss": 0.2941, "step": 730500 }, { "epoch": 1.0, "learning_rate": 3.591759627988353e-05, "loss": 0.2945, "step": 731000 }, { "epoch": 1.0, "learning_rate": 3.582774762282568e-05, "loss": 0.2945, "step": 731500 }, { "epoch": 1.0, "learning_rate": 3.573801973101913e-05, "loss": 0.2942, "step": 732000 }, { "epoch": 1.0, "learning_rate": 3.564841284977646e-05, "loss": 0.2943, "step": 732500 }, { "epoch": 1.01, "learning_rate": 3.5558927224079534e-05, "loss": 0.2949, "step": 733000 }, { "epoch": 1.01, "learning_rate": 3.546956309857859e-05, "loss": 0.2946, "step": 733500 }, { "epoch": 1.01, "learning_rate": 3.5380320717591716e-05, "loss": 0.2944, "step": 734000 }, { "epoch": 1.01, "learning_rate": 3.5291200325104234e-05, "loss": 0.2936, "step": 734500 }, { "epoch": 1.01, "learning_rate": 3.5202202164767836e-05, "loss": 0.2943, "step": 735000 }, { "epoch": 1.01, "learning_rate": 3.511332647990014e-05, "loss": 0.294, "step": 735500 }, { "epoch": 1.01, "learning_rate": 3.5024573513483864e-05, "loss": 0.2939, "step": 736000 }, { "epoch": 1.01, "learning_rate": 3.493594350816619e-05, "loss": 0.2944, "step": 736500 }, { "epoch": 1.01, "learning_rate": 3.484743670625822e-05, "loss": 0.2943, "step": 737000 }, { "epoch": 1.01, "learning_rate": 3.4759053349734126e-05, "loss": 0.2938, "step": 737500 }, { "epoch": 1.02, "learning_rate": 3.467079368023068e-05, "loss": 0.2937, "step": 738000 }, { "epoch": 1.02, "learning_rate": 3.458265793904642e-05, "loss": 0.2947, "step": 738500 }, { "epoch": 1.02, "learning_rate": 3.449464636714107e-05, "loss": 0.2941, "step": 739000 }, { "epoch": 1.02, "learning_rate": 3.4406759205134966e-05, "loss": 0.2939, "step": 739500 }, { "epoch": 1.02, "learning_rate": 3.431899669330819e-05, "loss": 0.2936, "step": 740000 }, { "epoch": 1.02, "learning_rate": 3.4231359071600156e-05, "loss": 0.2931, "step": 740500 }, { "epoch": 1.02, "learning_rate": 3.4143846579608744e-05, "loss": 0.2936, "step": 741000 }, { "epoch": 1.02, "learning_rate": 3.405645945658976e-05, "loss": 0.2935, "step": 741500 }, { "epoch": 1.02, "learning_rate": 3.396919794145629e-05, "loss": 0.294, "step": 742000 }, { "epoch": 1.02, "learning_rate": 3.3882062272777936e-05, "loss": 0.2937, "step": 742500 }, { "epoch": 1.03, "learning_rate": 3.3795052688780345e-05, "loss": 0.2932, "step": 743000 }, { "epoch": 1.03, "learning_rate": 3.370816942734438e-05, "loss": 0.2937, "step": 743500 }, { "epoch": 1.03, "learning_rate": 3.362141272600552e-05, "loss": 0.2937, "step": 744000 }, { "epoch": 1.03, "learning_rate": 3.3534782821953325e-05, "loss": 0.2934, "step": 744500 }, { "epoch": 1.03, "learning_rate": 3.3448279952030615e-05, "loss": 0.2939, "step": 745000 }, { "epoch": 1.03, "learning_rate": 3.336190435273295e-05, "loss": 0.2935, "step": 745500 }, { "epoch": 1.03, "learning_rate": 3.327565626020793e-05, "loss": 0.2935, "step": 746000 }, { "epoch": 1.03, "learning_rate": 3.31895359102545e-05, "loss": 0.2932, "step": 746500 }, { "epoch": 1.03, "learning_rate": 3.3103543538322455e-05, "loss": 0.2938, "step": 747000 }, { "epoch": 1.03, "learning_rate": 3.3017679379511645e-05, "loss": 0.2936, "step": 747500 }, { "epoch": 1.04, "learning_rate": 3.293194366857137e-05, "loss": 0.2927, "step": 748000 }, { "epoch": 1.04, "learning_rate": 3.2846336639899845e-05, "loss": 0.293, "step": 748500 }, { "epoch": 1.04, "learning_rate": 3.276085852754336e-05, "loss": 0.293, "step": 749000 }, { "epoch": 1.04, "learning_rate": 3.267550956519586e-05, "loss": 0.293, "step": 749500 }, { "epoch": 1.04, "learning_rate": 3.259028998619814e-05, "loss": 0.2933, "step": 750000 }, { "epoch": 1.04, "learning_rate": 3.2505200023537225e-05, "loss": 0.2932, "step": 750500 }, { "epoch": 1.04, "learning_rate": 3.2420239909845894e-05, "loss": 0.2928, "step": 751000 }, { "epoch": 1.04, "learning_rate": 3.233540987740179e-05, "loss": 0.2931, "step": 751500 }, { "epoch": 1.04, "learning_rate": 3.2250710158127045e-05, "loss": 0.2928, "step": 752000 }, { "epoch": 1.04, "learning_rate": 3.216614098358741e-05, "loss": 0.2936, "step": 752500 }, { "epoch": 1.05, "learning_rate": 3.2081702584991786e-05, "loss": 0.293, "step": 753000 }, { "epoch": 1.05, "learning_rate": 3.1997395193191565e-05, "loss": 0.2927, "step": 753500 }, { "epoch": 1.05, "learning_rate": 3.191321903867988e-05, "loss": 0.2924, "step": 754000 }, { "epoch": 1.05, "learning_rate": 3.18291743515912e-05, "loss": 0.2928, "step": 754500 }, { "epoch": 1.05, "learning_rate": 3.174526136170039e-05, "loss": 0.2931, "step": 755000 }, { "epoch": 1.05, "learning_rate": 3.1661480298422433e-05, "loss": 0.2929, "step": 755500 }, { "epoch": 1.05, "learning_rate": 3.157783139081155e-05, "loss": 0.2926, "step": 756000 }, { "epoch": 1.05, "learning_rate": 3.149431486756063e-05, "loss": 0.293, "step": 756500 }, { "epoch": 1.05, "learning_rate": 3.141093095700072e-05, "loss": 0.2929, "step": 757000 }, { "epoch": 1.05, "learning_rate": 3.132767988710016e-05, "loss": 0.2924, "step": 757500 }, { "epoch": 1.06, "learning_rate": 3.1244561885464244e-05, "loss": 0.2927, "step": 758000 }, { "epoch": 1.06, "learning_rate": 3.116157717933443e-05, "loss": 0.2925, "step": 758500 }, { "epoch": 1.06, "learning_rate": 3.107872599558769e-05, "loss": 0.2928, "step": 759000 }, { "epoch": 1.06, "learning_rate": 3.0996008560736083e-05, "loss": 0.2926, "step": 759500 }, { "epoch": 1.06, "learning_rate": 3.0913425100925795e-05, "loss": 0.2933, "step": 760000 }, { "epoch": 1.06, "learning_rate": 3.083097584193693e-05, "loss": 0.293, "step": 760500 }, { "epoch": 1.06, "learning_rate": 3.0748661009182616e-05, "loss": 0.2924, "step": 761000 }, { "epoch": 1.06, "learning_rate": 3.066648082770845e-05, "loss": 0.2924, "step": 761500 }, { "epoch": 1.06, "learning_rate": 3.0584435522191896e-05, "loss": 0.2926, "step": 762000 }, { "epoch": 1.06, "learning_rate": 3.0502525316941673e-05, "loss": 0.2928, "step": 762500 }, { "epoch": 1.07, "learning_rate": 3.0420750435897183e-05, "loss": 0.2922, "step": 763000 }, { "epoch": 1.07, "learning_rate": 3.0339111102627846e-05, "loss": 0.2921, "step": 763500 }, { "epoch": 1.07, "learning_rate": 3.025760754033246e-05, "loss": 0.2925, "step": 764000 }, { "epoch": 1.07, "learning_rate": 3.017623997183864e-05, "loss": 0.2926, "step": 764500 }, { "epoch": 1.07, "learning_rate": 3.0095008619602206e-05, "loss": 0.2919, "step": 765000 }, { "epoch": 1.07, "learning_rate": 3.0013913705706587e-05, "loss": 0.2923, "step": 765500 }, { "epoch": 1.07, "learning_rate": 2.993295545186223e-05, "loss": 0.2924, "step": 766000 }, { "epoch": 1.07, "learning_rate": 2.9852134079405817e-05, "loss": 0.2921, "step": 766500 }, { "epoch": 1.07, "learning_rate": 2.977144980929996e-05, "loss": 0.2918, "step": 767000 }, { "epoch": 1.07, "learning_rate": 2.969090286213233e-05, "loss": 0.2922, "step": 767500 }, { "epoch": 1.08, "learning_rate": 2.961049345811523e-05, "loss": 0.2919, "step": 768000 }, { "epoch": 1.08, "learning_rate": 2.9530221817084937e-05, "loss": 0.2915, "step": 768500 }, { "epoch": 1.08, "learning_rate": 2.945008815850097e-05, "loss": 0.2924, "step": 769000 }, { "epoch": 1.08, "learning_rate": 2.9370092701445748e-05, "loss": 0.2925, "step": 769500 }, { "epoch": 1.08, "learning_rate": 2.929023566462377e-05, "loss": 0.2923, "step": 770000 }, { "epoch": 1.08, "learning_rate": 2.921051726636114e-05, "loss": 0.292, "step": 770500 }, { "epoch": 1.08, "learning_rate": 2.9130937724604947e-05, "loss": 0.2918, "step": 771000 }, { "epoch": 1.08, "learning_rate": 2.9051497256922545e-05, "loss": 0.292, "step": 771500 }, { "epoch": 1.08, "learning_rate": 2.8972196080501208e-05, "loss": 0.2915, "step": 772000 }, { "epoch": 1.08, "learning_rate": 2.8893034412147268e-05, "loss": 0.291, "step": 772500 }, { "epoch": 1.09, "learning_rate": 2.8814012468285748e-05, "loss": 0.2919, "step": 773000 }, { "epoch": 1.09, "learning_rate": 2.8735130464959604e-05, "loss": 0.2916, "step": 773500 }, { "epoch": 1.09, "learning_rate": 2.865638861782922e-05, "loss": 0.292, "step": 774000 }, { "epoch": 1.09, "learning_rate": 2.8577787142171804e-05, "loss": 0.2916, "step": 774500 }, { "epoch": 1.09, "learning_rate": 2.849932625288079e-05, "loss": 0.2916, "step": 775000 }, { "epoch": 1.09, "learning_rate": 2.8421006164465254e-05, "loss": 0.291, "step": 775500 }, { "epoch": 1.09, "learning_rate": 2.8342827091049336e-05, "loss": 0.2912, "step": 776000 }, { "epoch": 1.09, "learning_rate": 2.8264789246371605e-05, "loss": 0.2916, "step": 776500 }, { "epoch": 1.09, "learning_rate": 2.8186892843784587e-05, "loss": 0.2912, "step": 777000 }, { "epoch": 1.09, "learning_rate": 2.810913809625404e-05, "loss": 0.2921, "step": 777500 }, { "epoch": 1.1, "learning_rate": 2.803152521635851e-05, "loss": 0.2916, "step": 778000 }, { "epoch": 1.1, "learning_rate": 2.795405441628862e-05, "loss": 0.2913, "step": 778500 }, { "epoch": 1.1, "learning_rate": 2.7876725907846578e-05, "loss": 0.2918, "step": 779000 }, { "epoch": 1.1, "learning_rate": 2.7799539902445596e-05, "loss": 0.2913, "step": 779500 }, { "epoch": 1.1, "learning_rate": 2.7722496611109243e-05, "loss": 0.2912, "step": 780000 }, { "epoch": 1.1, "learning_rate": 2.7645596244470935e-05, "loss": 0.2908, "step": 780500 }, { "epoch": 1.1, "learning_rate": 2.7568839012773365e-05, "loss": 0.2904, "step": 781000 }, { "epoch": 1.1, "learning_rate": 2.7492225125867825e-05, "loss": 0.2915, "step": 781500 }, { "epoch": 1.1, "learning_rate": 2.7415754793213826e-05, "loss": 0.2914, "step": 782000 }, { "epoch": 1.1, "learning_rate": 2.7339428223878283e-05, "loss": 0.2911, "step": 782500 }, { "epoch": 1.11, "learning_rate": 2.7263245626535116e-05, "loss": 0.291, "step": 783000 }, { "epoch": 1.11, "learning_rate": 2.7187207209464687e-05, "loss": 0.2905, "step": 783500 }, { "epoch": 1.11, "learning_rate": 2.7111313180553077e-05, "loss": 0.2917, "step": 784000 }, { "epoch": 1.11, "learning_rate": 2.703556374729169e-05, "loss": 0.2909, "step": 784500 }, { "epoch": 1.11, "learning_rate": 2.6959959116776587e-05, "loss": 0.2905, "step": 785000 }, { "epoch": 1.11, "learning_rate": 2.68844994957079e-05, "loss": 0.2901, "step": 785500 }, { "epoch": 1.11, "learning_rate": 2.6809185090389406e-05, "loss": 0.2908, "step": 786000 }, { "epoch": 1.11, "learning_rate": 2.6734016106727777e-05, "loss": 0.2911, "step": 786500 }, { "epoch": 1.11, "learning_rate": 2.6658992750232167e-05, "loss": 0.2907, "step": 787000 }, { "epoch": 1.11, "learning_rate": 2.6584115226013553e-05, "loss": 0.2912, "step": 787500 }, { "epoch": 1.12, "learning_rate": 2.6509383738784218e-05, "loss": 0.2907, "step": 788000 }, { "epoch": 1.12, "learning_rate": 2.6434798492857228e-05, "loss": 0.2912, "step": 788500 }, { "epoch": 1.12, "learning_rate": 2.6360359692145757e-05, "loss": 0.2907, "step": 789000 }, { "epoch": 1.12, "learning_rate": 2.6286067540162677e-05, "loss": 0.2904, "step": 789500 }, { "epoch": 1.12, "learning_rate": 2.6211922240019883e-05, "loss": 0.2908, "step": 790000 }, { "epoch": 1.12, "learning_rate": 2.6137923994427768e-05, "loss": 0.2911, "step": 790500 }, { "epoch": 1.12, "learning_rate": 2.6064073005694758e-05, "loss": 0.2909, "step": 791000 }, { "epoch": 1.12, "learning_rate": 2.5990369475726598e-05, "loss": 0.2898, "step": 791500 }, { "epoch": 1.12, "learning_rate": 2.591681360602595e-05, "loss": 0.2902, "step": 792000 }, { "epoch": 1.12, "learning_rate": 2.5843405597691748e-05, "loss": 0.2909, "step": 792500 }, { "epoch": 1.13, "learning_rate": 2.577014565141866e-05, "loss": 0.2899, "step": 793000 }, { "epoch": 1.13, "learning_rate": 2.569703396749661e-05, "loss": 0.2905, "step": 793500 }, { "epoch": 1.13, "learning_rate": 2.562407074581014e-05, "loss": 0.2908, "step": 794000 }, { "epoch": 1.13, "learning_rate": 2.5551256185837897e-05, "loss": 0.2904, "step": 794500 }, { "epoch": 1.13, "learning_rate": 2.5478590486652137e-05, "loss": 0.2905, "step": 795000 }, { "epoch": 1.13, "learning_rate": 2.5406073846918076e-05, "loss": 0.2901, "step": 795500 }, { "epoch": 1.13, "learning_rate": 2.533370646489347e-05, "loss": 0.2904, "step": 796000 }, { "epoch": 1.13, "learning_rate": 2.526148853842796e-05, "loss": 0.2903, "step": 796500 }, { "epoch": 1.13, "learning_rate": 2.5189420264962586e-05, "loss": 0.2898, "step": 797000 }, { "epoch": 1.13, "learning_rate": 2.5117501841529297e-05, "loss": 0.291, "step": 797500 }, { "epoch": 1.14, "learning_rate": 2.504573346475026e-05, "loss": 0.2897, "step": 798000 }, { "epoch": 1.14, "learning_rate": 2.497411533083753e-05, "loss": 0.2901, "step": 798500 }, { "epoch": 1.14, "learning_rate": 2.4902647635592324e-05, "loss": 0.2909, "step": 799000 }, { "epoch": 1.14, "learning_rate": 2.483133057440458e-05, "loss": 0.2899, "step": 799500 }, { "epoch": 1.14, "learning_rate": 2.476016434225246e-05, "loss": 0.2901, "step": 800000 }, { "epoch": 1.14, "learning_rate": 2.4689149133701672e-05, "loss": 0.2905, "step": 800500 }, { "epoch": 1.14, "learning_rate": 2.461828514290513e-05, "loss": 0.2898, "step": 801000 }, { "epoch": 1.14, "learning_rate": 2.4547572563602267e-05, "loss": 0.2903, "step": 801500 }, { "epoch": 1.14, "learning_rate": 2.447701158911855e-05, "loss": 0.2898, "step": 802000 }, { "epoch": 1.14, "learning_rate": 2.4406602412365027e-05, "loss": 0.2898, "step": 802500 }, { "epoch": 1.15, "learning_rate": 2.4336345225837658e-05, "loss": 0.2902, "step": 803000 }, { "epoch": 1.15, "learning_rate": 2.4266240221616956e-05, "loss": 0.2905, "step": 803500 }, { "epoch": 1.15, "learning_rate": 2.4196287591367296e-05, "loss": 0.2897, "step": 804000 }, { "epoch": 1.15, "learning_rate": 2.412648752633649e-05, "loss": 0.2898, "step": 804500 }, { "epoch": 1.15, "learning_rate": 2.405684021735527e-05, "loss": 0.2898, "step": 805000 }, { "epoch": 1.15, "learning_rate": 2.39873458548367e-05, "loss": 0.2895, "step": 805500 }, { "epoch": 1.15, "learning_rate": 2.3918004628775736e-05, "loss": 0.2897, "step": 806000 }, { "epoch": 1.15, "learning_rate": 2.3848816728748643e-05, "loss": 0.2897, "step": 806500 }, { "epoch": 1.15, "learning_rate": 2.3779782343912463e-05, "loss": 0.2888, "step": 807000 }, { "epoch": 1.15, "learning_rate": 2.3710901663004604e-05, "loss": 0.29, "step": 807500 }, { "epoch": 1.16, "learning_rate": 2.364217487434221e-05, "loss": 0.2895, "step": 808000 }, { "epoch": 1.16, "learning_rate": 2.3573602165821668e-05, "loss": 0.2899, "step": 808500 }, { "epoch": 1.16, "learning_rate": 2.3505183724918196e-05, "loss": 0.2897, "step": 809000 }, { "epoch": 1.16, "learning_rate": 2.3436919738685132e-05, "loss": 0.29, "step": 809500 }, { "epoch": 1.16, "learning_rate": 2.3368810393753687e-05, "loss": 0.2895, "step": 810000 }, { "epoch": 1.16, "learning_rate": 2.3300855876332162e-05, "loss": 0.2894, "step": 810500 }, { "epoch": 1.16, "learning_rate": 2.32330563722056e-05, "loss": 0.2896, "step": 811000 }, { "epoch": 1.16, "learning_rate": 2.316541206673529e-05, "loss": 0.2896, "step": 811500 }, { "epoch": 1.16, "learning_rate": 2.309792314485815e-05, "loss": 0.2895, "step": 812000 }, { "epoch": 1.16, "learning_rate": 2.3030589791086353e-05, "loss": 0.2893, "step": 812500 }, { "epoch": 1.17, "learning_rate": 2.2963412189506695e-05, "loss": 0.2893, "step": 813000 }, { "epoch": 1.17, "learning_rate": 2.2896390523780156e-05, "loss": 0.2894, "step": 813500 }, { "epoch": 1.17, "learning_rate": 2.282952497714145e-05, "loss": 0.2894, "step": 814000 }, { "epoch": 1.17, "learning_rate": 2.2762815732398387e-05, "loss": 0.2896, "step": 814500 }, { "epoch": 1.17, "learning_rate": 2.2696262971931538e-05, "loss": 0.2891, "step": 815000 }, { "epoch": 1.17, "learning_rate": 2.2629866877693577e-05, "loss": 0.2892, "step": 815500 }, { "epoch": 1.17, "learning_rate": 2.2563627631208887e-05, "loss": 0.2892, "step": 816000 }, { "epoch": 1.17, "learning_rate": 2.2497545413573065e-05, "loss": 0.2902, "step": 816500 }, { "epoch": 1.17, "learning_rate": 2.2431620405452336e-05, "loss": 0.2889, "step": 817000 }, { "epoch": 1.17, "learning_rate": 2.23658527870832e-05, "loss": 0.2892, "step": 817500 }, { "epoch": 1.18, "learning_rate": 2.230024273827179e-05, "loss": 0.2885, "step": 818000 }, { "epoch": 1.18, "learning_rate": 2.223479043839345e-05, "loss": 0.2888, "step": 818500 }, { "epoch": 1.18, "learning_rate": 2.216949606639231e-05, "loss": 0.2892, "step": 819000 }, { "epoch": 1.18, "learning_rate": 2.2104359800780665e-05, "loss": 0.2885, "step": 819500 }, { "epoch": 1.18, "learning_rate": 2.2039381819638596e-05, "loss": 0.2894, "step": 820000 }, { "epoch": 1.18, "learning_rate": 2.1974562300613417e-05, "loss": 0.2893, "step": 820500 }, { "epoch": 1.18, "learning_rate": 2.1909901420919184e-05, "loss": 0.2891, "step": 821000 }, { "epoch": 1.18, "learning_rate": 2.1845399357336326e-05, "loss": 0.2891, "step": 821500 }, { "epoch": 1.18, "learning_rate": 2.1781056286210997e-05, "loss": 0.2891, "step": 822000 }, { "epoch": 1.18, "learning_rate": 2.1716872383454674e-05, "loss": 0.2892, "step": 822500 }, { "epoch": 1.19, "learning_rate": 2.1652847824543744e-05, "loss": 0.2884, "step": 823000 }, { "epoch": 1.19, "learning_rate": 2.1588982784518853e-05, "loss": 0.288, "step": 823500 }, { "epoch": 1.19, "learning_rate": 2.1525277437984636e-05, "loss": 0.2889, "step": 824000 }, { "epoch": 1.19, "learning_rate": 2.1461731959109053e-05, "loss": 0.2893, "step": 824500 }, { "epoch": 1.19, "learning_rate": 2.1398346521623e-05, "loss": 0.2884, "step": 825000 }, { "epoch": 1.19, "learning_rate": 2.1335121298819867e-05, "loss": 0.2891, "step": 825500 }, { "epoch": 1.19, "learning_rate": 2.1272056463554978e-05, "loss": 0.2887, "step": 826000 }, { "epoch": 1.19, "learning_rate": 2.1209152188245214e-05, "loss": 0.2887, "step": 826500 }, { "epoch": 1.19, "learning_rate": 2.114640864486845e-05, "loss": 0.2892, "step": 827000 }, { "epoch": 1.19, "learning_rate": 2.1083826004963102e-05, "loss": 0.2888, "step": 827500 }, { "epoch": 1.2, "learning_rate": 2.1021404439627775e-05, "loss": 0.2889, "step": 828000 }, { "epoch": 1.2, "learning_rate": 2.09591441195206e-05, "loss": 0.2878, "step": 828500 }, { "epoch": 1.2, "learning_rate": 2.089704521485896e-05, "loss": 0.2888, "step": 829000 }, { "epoch": 1.2, "learning_rate": 2.083510789541883e-05, "loss": 0.2879, "step": 829500 }, { "epoch": 1.2, "learning_rate": 2.0773332330534513e-05, "loss": 0.2887, "step": 830000 }, { "epoch": 1.2, "learning_rate": 2.0711718689098057e-05, "loss": 0.2886, "step": 830500 }, { "epoch": 1.2, "learning_rate": 2.0650267139558772e-05, "loss": 0.2887, "step": 831000 }, { "epoch": 1.2, "learning_rate": 2.058897784992289e-05, "loss": 0.2885, "step": 831500 }, { "epoch": 1.2, "learning_rate": 2.052785098775293e-05, "loss": 0.2886, "step": 832000 }, { "epoch": 1.2, "learning_rate": 2.0466886720167436e-05, "loss": 0.2877, "step": 832500 }, { "epoch": 1.21, "learning_rate": 2.04060852138404e-05, "loss": 0.2878, "step": 833000 }, { "epoch": 1.21, "learning_rate": 2.0345446635000783e-05, "loss": 0.2887, "step": 833500 }, { "epoch": 1.21, "learning_rate": 2.028497114943219e-05, "loss": 0.2888, "step": 834000 }, { "epoch": 1.21, "learning_rate": 2.022465892247223e-05, "loss": 0.2881, "step": 834500 }, { "epoch": 1.21, "learning_rate": 2.0164510119012263e-05, "loss": 0.2878, "step": 835000 }, { "epoch": 1.21, "learning_rate": 2.0104524903496834e-05, "loss": 0.2879, "step": 835500 }, { "epoch": 1.21, "learning_rate": 2.0044703439923217e-05, "loss": 0.2876, "step": 836000 }, { "epoch": 1.21, "learning_rate": 1.998504589184101e-05, "loss": 0.2879, "step": 836500 }, { "epoch": 1.21, "learning_rate": 1.9925552422351654e-05, "loss": 0.2878, "step": 837000 }, { "epoch": 1.21, "learning_rate": 1.9866223194108028e-05, "loss": 0.2884, "step": 837500 }, { "epoch": 1.22, "learning_rate": 1.9807058369314016e-05, "loss": 0.288, "step": 838000 }, { "epoch": 1.22, "learning_rate": 1.9748058109723953e-05, "loss": 0.2879, "step": 838500 }, { "epoch": 1.22, "learning_rate": 1.968922257664231e-05, "loss": 0.2878, "step": 839000 }, { "epoch": 1.22, "learning_rate": 1.9630551930923155e-05, "loss": 0.288, "step": 839500 }, { "epoch": 1.22, "learning_rate": 1.9572046332969825e-05, "loss": 0.2881, "step": 840000 }, { "epoch": 1.22, "learning_rate": 1.95137059427344e-05, "loss": 0.2884, "step": 840500 }, { "epoch": 1.22, "learning_rate": 1.945553091971727e-05, "loss": 0.2883, "step": 841000 }, { "epoch": 1.22, "learning_rate": 1.93975214229667e-05, "loss": 0.2885, "step": 841500 }, { "epoch": 1.22, "learning_rate": 1.933967761107847e-05, "loss": 0.2877, "step": 842000 }, { "epoch": 1.22, "learning_rate": 1.928199964219533e-05, "loss": 0.2876, "step": 842500 }, { "epoch": 1.23, "learning_rate": 1.9224487674006694e-05, "loss": 0.2873, "step": 843000 }, { "epoch": 1.23, "learning_rate": 1.9167141863748015e-05, "loss": 0.288, "step": 843500 }, { "epoch": 1.23, "learning_rate": 1.9109962368200602e-05, "loss": 0.2874, "step": 844000 }, { "epoch": 1.23, "learning_rate": 1.9052949343690977e-05, "loss": 0.2884, "step": 844500 }, { "epoch": 1.23, "learning_rate": 1.8996102946090586e-05, "loss": 0.2874, "step": 845000 }, { "epoch": 1.23, "learning_rate": 1.8939423330815345e-05, "loss": 0.2879, "step": 845500 }, { "epoch": 1.23, "learning_rate": 1.888291065282509e-05, "loss": 0.2872, "step": 846000 }, { "epoch": 1.23, "learning_rate": 1.882656506662338e-05, "loss": 0.2882, "step": 846500 }, { "epoch": 1.23, "learning_rate": 1.8770386726256865e-05, "loss": 0.2875, "step": 847000 }, { "epoch": 1.23, "learning_rate": 1.8714375785315006e-05, "loss": 0.2871, "step": 847500 }, { "epoch": 1.24, "learning_rate": 1.8658532396929565e-05, "loss": 0.2872, "step": 848000 }, { "epoch": 1.24, "learning_rate": 1.8602856713774208e-05, "loss": 0.2874, "step": 848500 }, { "epoch": 1.24, "learning_rate": 1.8547348888064178e-05, "loss": 0.2869, "step": 849000 }, { "epoch": 1.24, "learning_rate": 1.8492009071555703e-05, "loss": 0.2873, "step": 849500 }, { "epoch": 1.24, "learning_rate": 1.8436837415545772e-05, "loss": 0.2874, "step": 850000 }, { "epoch": 1.24, "learning_rate": 1.838183407087156e-05, "loss": 0.2878, "step": 850500 }, { "epoch": 1.24, "learning_rate": 1.8326999187910095e-05, "loss": 0.2872, "step": 851000 }, { "epoch": 1.24, "learning_rate": 1.8272332916577875e-05, "loss": 0.2867, "step": 851500 }, { "epoch": 1.24, "learning_rate": 1.8217835406330415e-05, "loss": 0.2874, "step": 852000 }, { "epoch": 1.24, "learning_rate": 1.81635068061618e-05, "loss": 0.2873, "step": 852500 }, { "epoch": 1.25, "learning_rate": 1.810934726460436e-05, "loss": 0.2875, "step": 853000 }, { "epoch": 1.25, "learning_rate": 1.80553569297282e-05, "loss": 0.2873, "step": 853500 }, { "epoch": 1.25, "learning_rate": 1.800153594914084e-05, "loss": 0.2875, "step": 854000 }, { "epoch": 1.25, "learning_rate": 1.7947884469986816e-05, "loss": 0.2872, "step": 854500 }, { "epoch": 1.25, "learning_rate": 1.7894402638947176e-05, "loss": 0.287, "step": 855000 }, { "epoch": 1.25, "learning_rate": 1.7841090602239237e-05, "loss": 0.2872, "step": 855500 }, { "epoch": 1.25, "learning_rate": 1.778794850561604e-05, "loss": 0.2871, "step": 856000 }, { "epoch": 1.25, "learning_rate": 1.7734976494366073e-05, "loss": 0.2873, "step": 856500 }, { "epoch": 1.25, "learning_rate": 1.7682174713312805e-05, "loss": 0.2871, "step": 857000 }, { "epoch": 1.25, "learning_rate": 1.7629543306814255e-05, "loss": 0.2875, "step": 857500 }, { "epoch": 1.26, "learning_rate": 1.75770824187627e-05, "loss": 0.287, "step": 858000 }, { "epoch": 1.26, "learning_rate": 1.7524792192584186e-05, "loss": 0.2865, "step": 858500 }, { "epoch": 1.26, "learning_rate": 1.747267277123821e-05, "loss": 0.2867, "step": 859000 }, { "epoch": 1.26, "learning_rate": 1.74207242972173e-05, "loss": 0.2862, "step": 859500 }, { "epoch": 1.26, "learning_rate": 1.7368946912546556e-05, "loss": 0.2878, "step": 860000 }, { "epoch": 1.26, "learning_rate": 1.7317340758783407e-05, "loss": 0.2875, "step": 860500 }, { "epoch": 1.26, "learning_rate": 1.726590597701708e-05, "loss": 0.2874, "step": 861000 }, { "epoch": 1.26, "learning_rate": 1.7214642707868325e-05, "loss": 0.2863, "step": 861500 }, { "epoch": 1.26, "learning_rate": 1.7163551091488952e-05, "loss": 0.2869, "step": 862000 }, { "epoch": 1.26, "learning_rate": 1.711263126756148e-05, "loss": 0.2867, "step": 862500 }, { "epoch": 1.27, "learning_rate": 1.7061883375298788e-05, "loss": 0.2874, "step": 863000 }, { "epoch": 1.27, "learning_rate": 1.7011307553443647e-05, "loss": 0.286, "step": 863500 }, { "epoch": 1.27, "learning_rate": 1.6960903940268456e-05, "loss": 0.2873, "step": 864000 }, { "epoch": 1.27, "learning_rate": 1.6910672673574746e-05, "loss": 0.2867, "step": 864500 }, { "epoch": 1.27, "learning_rate": 1.6860613890692876e-05, "loss": 0.2872, "step": 865000 }, { "epoch": 1.27, "learning_rate": 1.6810727728481673e-05, "loss": 0.2864, "step": 865500 }, { "epoch": 1.27, "learning_rate": 1.6761014323327962e-05, "loss": 0.2866, "step": 866000 }, { "epoch": 1.27, "learning_rate": 1.6711473811146333e-05, "loss": 0.2867, "step": 866500 }, { "epoch": 1.27, "learning_rate": 1.6662106327378645e-05, "loss": 0.2869, "step": 867000 }, { "epoch": 1.27, "learning_rate": 1.6612912006993688e-05, "loss": 0.2867, "step": 867500 }, { "epoch": 1.28, "learning_rate": 1.6563890984486884e-05, "loss": 0.2866, "step": 868000 }, { "epoch": 1.28, "learning_rate": 1.6515043393879825e-05, "loss": 0.287, "step": 868500 }, { "epoch": 1.28, "learning_rate": 1.6466369368719955e-05, "loss": 0.2868, "step": 869000 }, { "epoch": 1.28, "learning_rate": 1.641786904208022e-05, "loss": 0.2865, "step": 869500 }, { "epoch": 1.28, "learning_rate": 1.6369542546558626e-05, "loss": 0.2864, "step": 870000 }, { "epoch": 1.28, "learning_rate": 1.6321390014277996e-05, "loss": 0.2868, "step": 870500 }, { "epoch": 1.28, "learning_rate": 1.6273411576885517e-05, "loss": 0.2869, "step": 871000 }, { "epoch": 1.28, "learning_rate": 1.6225607365552378e-05, "loss": 0.2863, "step": 871500 }, { "epoch": 1.28, "learning_rate": 1.617797751097349e-05, "loss": 0.2866, "step": 872000 }, { "epoch": 1.28, "learning_rate": 1.6130522143367032e-05, "loss": 0.2868, "step": 872500 }, { "epoch": 1.28, "learning_rate": 1.608324139247421e-05, "loss": 0.2864, "step": 873000 }, { "epoch": 1.29, "learning_rate": 1.6036135387558756e-05, "loss": 0.2862, "step": 873500 }, { "epoch": 1.29, "learning_rate": 1.5989204257406693e-05, "loss": 0.286, "step": 874000 }, { "epoch": 1.29, "learning_rate": 1.594244813032595e-05, "loss": 0.286, "step": 874500 }, { "epoch": 1.29, "learning_rate": 1.5895867134145974e-05, "loss": 0.2861, "step": 875000 }, { "epoch": 1.29, "learning_rate": 1.5849461396217467e-05, "loss": 0.2855, "step": 875500 }, { "epoch": 1.29, "learning_rate": 1.5803231043411912e-05, "loss": 0.286, "step": 876000 }, { "epoch": 1.29, "learning_rate": 1.575717620212132e-05, "loss": 0.2867, "step": 876500 }, { "epoch": 1.29, "learning_rate": 1.5711296998257902e-05, "loss": 0.2867, "step": 877000 }, { "epoch": 1.29, "learning_rate": 1.5665593557253623e-05, "loss": 0.2865, "step": 877500 }, { "epoch": 1.29, "learning_rate": 1.562006600405996e-05, "loss": 0.286, "step": 878000 }, { "epoch": 1.3, "learning_rate": 1.5574714463147512e-05, "loss": 0.2857, "step": 878500 }, { "epoch": 1.3, "learning_rate": 1.5529539058505624e-05, "loss": 0.286, "step": 879000 }, { "epoch": 1.3, "learning_rate": 1.5484539913642175e-05, "loss": 0.2862, "step": 879500 }, { "epoch": 1.3, "learning_rate": 1.543971715158307e-05, "loss": 0.2863, "step": 880000 }, { "epoch": 1.3, "learning_rate": 1.539507089487205e-05, "loss": 0.2865, "step": 880500 }, { "epoch": 1.3, "learning_rate": 1.535060126557028e-05, "loss": 0.2861, "step": 881000 }, { "epoch": 1.3, "learning_rate": 1.5306308385255997e-05, "loss": 0.2859, "step": 881500 }, { "epoch": 1.3, "learning_rate": 1.5262192375024284e-05, "loss": 0.2856, "step": 882000 }, { "epoch": 1.3, "learning_rate": 1.521825335548661e-05, "loss": 0.2861, "step": 882500 }, { "epoch": 1.3, "learning_rate": 1.5174491446770566e-05, "loss": 0.2866, "step": 883000 }, { "epoch": 1.31, "learning_rate": 1.5130906768519563e-05, "loss": 0.2862, "step": 883500 }, { "epoch": 1.31, "learning_rate": 1.508749943989242e-05, "loss": 0.2864, "step": 884000 }, { "epoch": 1.31, "learning_rate": 1.5044269579563144e-05, "loss": 0.286, "step": 884500 }, { "epoch": 1.31, "learning_rate": 1.500121730572051e-05, "loss": 0.2859, "step": 885000 }, { "epoch": 1.31, "learning_rate": 1.4958342736067783e-05, "loss": 0.2856, "step": 885500 }, { "epoch": 1.31, "learning_rate": 1.4915645987822406e-05, "loss": 0.2858, "step": 886000 }, { "epoch": 1.31, "learning_rate": 1.4873127177715653e-05, "loss": 0.2847, "step": 886500 }, { "epoch": 1.31, "learning_rate": 1.4830786421992347e-05, "loss": 0.2863, "step": 887000 }, { "epoch": 1.31, "learning_rate": 1.4788623836410479e-05, "loss": 0.2857, "step": 887500 }, { "epoch": 1.31, "learning_rate": 1.4746639536240942e-05, "loss": 0.2856, "step": 888000 }, { "epoch": 1.32, "learning_rate": 1.4704833636267232e-05, "loss": 0.2858, "step": 888500 }, { "epoch": 1.32, "learning_rate": 1.4663206250785055e-05, "loss": 0.2854, "step": 889000 }, { "epoch": 1.32, "learning_rate": 1.4621757493602125e-05, "loss": 0.2857, "step": 889500 }, { "epoch": 1.32, "learning_rate": 1.4580487478037748e-05, "loss": 0.2854, "step": 890000 }, { "epoch": 1.32, "learning_rate": 1.4539396316922552e-05, "loss": 0.2862, "step": 890500 }, { "epoch": 1.32, "learning_rate": 1.4498484122598232e-05, "loss": 0.2858, "step": 891000 }, { "epoch": 1.32, "learning_rate": 1.4457751006917137e-05, "loss": 0.2853, "step": 891500 }, { "epoch": 1.32, "learning_rate": 1.4417197081242083e-05, "loss": 0.2856, "step": 892000 }, { "epoch": 1.32, "learning_rate": 1.4376822456445926e-05, "loss": 0.2859, "step": 892500 }, { "epoch": 1.32, "learning_rate": 1.433662724291136e-05, "loss": 0.2859, "step": 893000 }, { "epoch": 1.33, "learning_rate": 1.4296611550530563e-05, "loss": 0.2859, "step": 893500 }, { "epoch": 1.33, "learning_rate": 1.4256775488704904e-05, "loss": 0.2859, "step": 894000 }, { "epoch": 1.33, "learning_rate": 1.4217119166344665e-05, "loss": 0.2851, "step": 894500 }, { "epoch": 1.33, "learning_rate": 1.4177642691868717e-05, "loss": 0.2856, "step": 895000 }, { "epoch": 1.33, "learning_rate": 1.4138346173204218e-05, "loss": 0.2855, "step": 895500 }, { "epoch": 1.33, "learning_rate": 1.4099229717786368e-05, "loss": 0.2857, "step": 896000 }, { "epoch": 1.33, "learning_rate": 1.406029343255806e-05, "loss": 0.2852, "step": 896500 }, { "epoch": 1.33, "learning_rate": 1.4021537423969588e-05, "loss": 0.2858, "step": 897000 }, { "epoch": 1.33, "learning_rate": 1.3982961797978431e-05, "loss": 0.2853, "step": 897500 }, { "epoch": 1.33, "learning_rate": 1.3944566660048863e-05, "loss": 0.2851, "step": 898000 }, { "epoch": 1.34, "learning_rate": 1.3906352115151725e-05, "loss": 0.285, "step": 898500 }, { "epoch": 1.34, "learning_rate": 1.3868318267764128e-05, "loss": 0.2854, "step": 899000 }, { "epoch": 1.34, "learning_rate": 1.3830465221869146e-05, "loss": 0.2855, "step": 899500 }, { "epoch": 1.34, "learning_rate": 1.3792793080955574e-05, "loss": 0.2852, "step": 900000 }, { "epoch": 1.34, "learning_rate": 1.3755301948017599e-05, "loss": 0.2853, "step": 900500 }, { "epoch": 1.34, "learning_rate": 1.3717991925554562e-05, "loss": 0.285, "step": 901000 }, { "epoch": 1.34, "learning_rate": 1.368086311557062e-05, "loss": 0.2854, "step": 901500 }, { "epoch": 1.34, "learning_rate": 1.3643915619574529e-05, "loss": 0.2849, "step": 902000 }, { "epoch": 1.34, "learning_rate": 1.3607149538579341e-05, "loss": 0.2853, "step": 902500 }, { "epoch": 1.34, "learning_rate": 1.35705649731021e-05, "loss": 0.2849, "step": 903000 }, { "epoch": 1.35, "learning_rate": 1.3534162023163642e-05, "loss": 0.2853, "step": 903500 }, { "epoch": 1.35, "learning_rate": 1.3497940788288195e-05, "loss": 0.2853, "step": 904000 }, { "epoch": 1.35, "learning_rate": 1.3461901367503262e-05, "loss": 0.2858, "step": 904500 }, { "epoch": 1.35, "learning_rate": 1.3426043859339253e-05, "loss": 0.2854, "step": 905000 }, { "epoch": 1.35, "learning_rate": 1.3390368361829197e-05, "loss": 0.2853, "step": 905500 }, { "epoch": 1.35, "learning_rate": 1.3354874972508582e-05, "loss": 0.2851, "step": 906000 }, { "epoch": 1.35, "learning_rate": 1.3319563788414934e-05, "loss": 0.2847, "step": 906500 }, { "epoch": 1.35, "learning_rate": 1.3284434906087695e-05, "loss": 0.2846, "step": 907000 }, { "epoch": 1.35, "learning_rate": 1.3249488421567911e-05, "loss": 0.2851, "step": 907500 }, { "epoch": 1.35, "learning_rate": 1.3214724430397915e-05, "loss": 0.285, "step": 908000 }, { "epoch": 1.36, "learning_rate": 1.3180143027621145e-05, "loss": 0.2852, "step": 908500 }, { "epoch": 1.36, "learning_rate": 1.314574430778182e-05, "loss": 0.2844, "step": 909000 }, { "epoch": 1.36, "learning_rate": 1.311152836492473e-05, "loss": 0.2848, "step": 909500 }, { "epoch": 1.36, "learning_rate": 1.3077495292594966e-05, "loss": 0.285, "step": 910000 }, { "epoch": 1.36, "learning_rate": 1.3043645183837645e-05, "loss": 0.2843, "step": 910500 }, { "epoch": 1.36, "learning_rate": 1.3009978131197669e-05, "loss": 0.2853, "step": 911000 }, { "epoch": 1.36, "learning_rate": 1.297649422671947e-05, "loss": 0.2848, "step": 911500 }, { "epoch": 1.36, "learning_rate": 1.2943193561946762e-05, "loss": 0.2854, "step": 912000 }, { "epoch": 1.36, "learning_rate": 1.291007622792231e-05, "loss": 0.2845, "step": 912500 }, { "epoch": 1.36, "learning_rate": 1.2877142315187628e-05, "loss": 0.2849, "step": 913000 }, { "epoch": 1.37, "learning_rate": 1.2844391913782773e-05, "loss": 0.2852, "step": 913500 }, { "epoch": 1.37, "learning_rate": 1.28118251132461e-05, "loss": 0.2845, "step": 914000 }, { "epoch": 1.37, "learning_rate": 1.2779442002613984e-05, "loss": 0.2844, "step": 914500 }, { "epoch": 1.37, "learning_rate": 1.274724267042063e-05, "loss": 0.285, "step": 915000 }, { "epoch": 1.37, "learning_rate": 1.2715227204697775e-05, "loss": 0.2848, "step": 915500 }, { "epoch": 1.37, "learning_rate": 1.2683395692974472e-05, "loss": 0.2843, "step": 916000 }, { "epoch": 1.37, "learning_rate": 1.2651748222276879e-05, "loss": 0.284, "step": 916500 }, { "epoch": 1.37, "learning_rate": 1.2620284879127947e-05, "loss": 0.2846, "step": 917000 }, { "epoch": 1.37, "learning_rate": 1.2589005749547281e-05, "loss": 0.2848, "step": 917500 }, { "epoch": 1.37, "learning_rate": 1.2557910919050803e-05, "loss": 0.2845, "step": 918000 }, { "epoch": 1.38, "learning_rate": 1.2527000472650597e-05, "loss": 0.2842, "step": 918500 }, { "epoch": 1.38, "learning_rate": 1.2496274494854666e-05, "loss": 0.285, "step": 919000 }, { "epoch": 1.38, "learning_rate": 1.2465733069666629e-05, "loss": 0.2848, "step": 919500 }, { "epoch": 1.38, "learning_rate": 1.24353762805856e-05, "loss": 0.2847, "step": 920000 }, { "epoch": 1.38, "learning_rate": 1.240520421060586e-05, "loss": 0.2843, "step": 920500 }, { "epoch": 1.38, "learning_rate": 1.2375216942216713e-05, "loss": 0.2848, "step": 921000 }, { "epoch": 1.38, "learning_rate": 1.2345414557402198e-05, "loss": 0.2848, "step": 921500 }, { "epoch": 1.38, "learning_rate": 1.2315797137640906e-05, "loss": 0.2843, "step": 922000 }, { "epoch": 1.38, "learning_rate": 1.2286364763905723e-05, "loss": 0.2847, "step": 922500 }, { "epoch": 1.38, "learning_rate": 1.225711751666363e-05, "loss": 0.2841, "step": 923000 }, { "epoch": 1.39, "learning_rate": 1.2228055475875488e-05, "loss": 0.285, "step": 923500 }, { "epoch": 1.39, "learning_rate": 1.2199178720995825e-05, "loss": 0.2845, "step": 924000 }, { "epoch": 1.39, "learning_rate": 1.217048733097256e-05, "loss": 0.2843, "step": 924500 }, { "epoch": 1.39, "learning_rate": 1.2141981384246874e-05, "loss": 0.2845, "step": 925000 }, { "epoch": 1.39, "learning_rate": 1.211366095875293e-05, "loss": 0.2849, "step": 925500 }, { "epoch": 1.39, "learning_rate": 1.2085526131917685e-05, "loss": 0.2847, "step": 926000 }, { "epoch": 1.39, "learning_rate": 1.2057576980660691e-05, "loss": 0.2841, "step": 926500 }, { "epoch": 1.39, "learning_rate": 1.2029813581393866e-05, "loss": 0.2847, "step": 927000 }, { "epoch": 1.39, "learning_rate": 1.2002236010021269e-05, "loss": 0.2849, "step": 927500 }, { "epoch": 1.39, "learning_rate": 1.197484434193893e-05, "loss": 0.2842, "step": 928000 }, { "epoch": 1.4, "learning_rate": 1.1947638652034617e-05, "loss": 0.2843, "step": 928500 }, { "epoch": 1.4, "learning_rate": 1.192061901468768e-05, "loss": 0.2844, "step": 929000 }, { "epoch": 1.4, "learning_rate": 1.1893785503768736e-05, "loss": 0.2838, "step": 929500 }, { "epoch": 1.4, "learning_rate": 1.1867138192639601e-05, "loss": 0.2842, "step": 930000 }, { "epoch": 1.4, "learning_rate": 1.1840677154152987e-05, "loss": 0.2848, "step": 930500 }, { "epoch": 1.4, "learning_rate": 1.1814402460652382e-05, "loss": 0.2842, "step": 931000 }, { "epoch": 1.4, "learning_rate": 1.178831418397181e-05, "loss": 0.2839, "step": 931500 }, { "epoch": 1.4, "learning_rate": 1.176241239543558e-05, "loss": 0.2842, "step": 932000 }, { "epoch": 1.4, "learning_rate": 1.173669716585822e-05, "loss": 0.2847, "step": 932500 }, { "epoch": 1.4, "learning_rate": 1.171116856554418e-05, "loss": 0.2836, "step": 933000 }, { "epoch": 1.41, "learning_rate": 1.168582666428768e-05, "loss": 0.284, "step": 933500 }, { "epoch": 1.41, "learning_rate": 1.1660671531372517e-05, "loss": 0.2837, "step": 934000 }, { "epoch": 1.41, "learning_rate": 1.1635703235571846e-05, "loss": 0.2848, "step": 934500 }, { "epoch": 1.41, "learning_rate": 1.1610921845148052e-05, "loss": 0.2845, "step": 935000 }, { "epoch": 1.41, "learning_rate": 1.1586327427852503e-05, "loss": 0.2847, "step": 935500 }, { "epoch": 1.41, "learning_rate": 1.156192005092539e-05, "loss": 0.2843, "step": 936000 }, { "epoch": 1.41, "learning_rate": 1.153769978109557e-05, "loss": 0.2836, "step": 936500 }, { "epoch": 1.41, "learning_rate": 1.1513666684580308e-05, "loss": 0.2847, "step": 937000 }, { "epoch": 1.41, "learning_rate": 1.1489820827085185e-05, "loss": 0.2839, "step": 937500 }, { "epoch": 1.41, "learning_rate": 1.1466162273803876e-05, "loss": 0.2844, "step": 938000 }, { "epoch": 1.42, "learning_rate": 1.144269108941795e-05, "loss": 0.284, "step": 938500 }, { "epoch": 1.42, "learning_rate": 1.1419407338096732e-05, "loss": 0.285, "step": 939000 }, { "epoch": 1.42, "learning_rate": 1.1396311083497103e-05, "loss": 0.2841, "step": 939500 }, { "epoch": 1.42, "learning_rate": 1.1373402388763346e-05, "loss": 0.2834, "step": 940000 }, { "epoch": 1.42, "learning_rate": 1.1350681316526965e-05, "loss": 0.2837, "step": 940500 }, { "epoch": 1.42, "learning_rate": 1.1328147928906494e-05, "loss": 0.2842, "step": 941000 }, { "epoch": 1.42, "learning_rate": 1.1305802287507358e-05, "loss": 0.2839, "step": 941500 }, { "epoch": 1.42, "learning_rate": 1.1283644453421678e-05, "loss": 0.284, "step": 942000 }, { "epoch": 1.42, "learning_rate": 1.1261674487228149e-05, "loss": 0.2839, "step": 942500 }, { "epoch": 1.42, "learning_rate": 1.1239892448991798e-05, "loss": 0.2834, "step": 943000 }, { "epoch": 1.43, "learning_rate": 1.1218298398263894e-05, "loss": 0.2833, "step": 943500 }, { "epoch": 1.43, "learning_rate": 1.1196892394081743e-05, "loss": 0.284, "step": 944000 }, { "epoch": 1.43, "learning_rate": 1.1175674494968552e-05, "loss": 0.2833, "step": 944500 }, { "epoch": 1.43, "learning_rate": 1.1154644758933235e-05, "loss": 0.2835, "step": 945000 }, { "epoch": 1.43, "learning_rate": 1.11338032434703e-05, "loss": 0.2832, "step": 945500 }, { "epoch": 1.43, "learning_rate": 1.1113150005559644e-05, "loss": 0.2836, "step": 946000 }, { "epoch": 1.43, "learning_rate": 1.1092685101666438e-05, "loss": 0.284, "step": 946500 }, { "epoch": 1.43, "learning_rate": 1.1072408587740942e-05, "loss": 0.2836, "step": 947000 }, { "epoch": 1.43, "learning_rate": 1.1052320519218383e-05, "loss": 0.2833, "step": 947500 }, { "epoch": 1.43, "learning_rate": 1.1032420951018755e-05, "loss": 0.2841, "step": 948000 }, { "epoch": 1.44, "learning_rate": 1.1012709937546722e-05, "loss": 0.2837, "step": 948500 }, { "epoch": 1.44, "learning_rate": 1.0993187532691458e-05, "loss": 0.2844, "step": 949000 }, { "epoch": 1.44, "learning_rate": 1.0973853789826454e-05, "loss": 0.2842, "step": 949500 }, { "epoch": 1.44, "learning_rate": 1.0954708761809438e-05, "loss": 0.2843, "step": 950000 }, { "epoch": 1.44, "learning_rate": 1.0935752500982175e-05, "loss": 0.2834, "step": 950500 }, { "epoch": 1.44, "learning_rate": 1.091698505917036e-05, "loss": 0.2853, "step": 951000 }, { "epoch": 1.44, "learning_rate": 1.0898406487683472e-05, "loss": 0.284, "step": 951500 }, { "epoch": 1.44, "learning_rate": 1.0880016837314599e-05, "loss": 0.2833, "step": 952000 }, { "epoch": 1.44, "learning_rate": 1.0861816158340365e-05, "loss": 0.2835, "step": 952500 }, { "epoch": 1.44, "learning_rate": 1.084380450052071e-05, "loss": 0.284, "step": 953000 }, { "epoch": 1.45, "learning_rate": 1.0825981913098828e-05, "loss": 0.2835, "step": 953500 }, { "epoch": 1.45, "learning_rate": 1.0808348444801e-05, "loss": 0.2836, "step": 954000 }, { "epoch": 1.45, "learning_rate": 1.0790904143836438e-05, "loss": 0.2834, "step": 954500 }, { "epoch": 1.45, "learning_rate": 1.0773649057897206e-05, "loss": 0.2833, "step": 955000 }, { "epoch": 1.45, "learning_rate": 1.0756583234158057e-05, "loss": 0.2839, "step": 955500 }, { "epoch": 1.45, "learning_rate": 1.073970671927628e-05, "loss": 0.2834, "step": 956000 }, { "epoch": 1.45, "learning_rate": 1.0723019559391643e-05, "loss": 0.2843, "step": 956500 }, { "epoch": 1.45, "learning_rate": 1.0706521800126198e-05, "loss": 0.2843, "step": 957000 }, { "epoch": 1.45, "learning_rate": 1.0690213486584175e-05, "loss": 0.284, "step": 957500 }, { "epoch": 1.45, "learning_rate": 1.0674094663351906e-05, "loss": 0.2833, "step": 958000 }, { "epoch": 1.46, "learning_rate": 1.0658165374497611e-05, "loss": 0.2836, "step": 958500 }, { "epoch": 1.46, "learning_rate": 1.0642425663571383e-05, "loss": 0.2839, "step": 959000 }, { "epoch": 1.46, "learning_rate": 1.062687557360497e-05, "loss": 0.2834, "step": 959500 }, { "epoch": 1.46, "learning_rate": 1.0611515147111736e-05, "loss": 0.2832, "step": 960000 }, { "epoch": 1.46, "learning_rate": 1.0596344426086501e-05, "loss": 0.2831, "step": 960500 }, { "epoch": 1.46, "learning_rate": 1.0581363452005424e-05, "loss": 0.2836, "step": 961000 }, { "epoch": 1.46, "learning_rate": 1.0566572265825932e-05, "loss": 0.2835, "step": 961500 }, { "epoch": 1.46, "learning_rate": 1.0551970907986557e-05, "loss": 0.2835, "step": 962000 }, { "epoch": 1.46, "learning_rate": 1.0537559418406849e-05, "loss": 0.2838, "step": 962500 }, { "epoch": 1.46, "learning_rate": 1.0523337836487271e-05, "loss": 0.2829, "step": 963000 }, { "epoch": 1.47, "learning_rate": 1.0509306201109092e-05, "loss": 0.2835, "step": 963500 }, { "epoch": 1.47, "learning_rate": 1.0495464550634267e-05, "loss": 0.284, "step": 964000 }, { "epoch": 1.47, "learning_rate": 1.0481812922905339e-05, "loss": 0.2837, "step": 964500 }, { "epoch": 1.47, "learning_rate": 1.046835135524533e-05, "loss": 0.2834, "step": 965000 }, { "epoch": 1.47, "learning_rate": 1.0455079884457653e-05, "loss": 0.2832, "step": 965500 }, { "epoch": 1.47, "learning_rate": 1.044199854682601e-05, "loss": 0.2837, "step": 966000 }, { "epoch": 1.47, "learning_rate": 1.0429107378114277e-05, "loss": 0.2834, "step": 966500 }, { "epoch": 1.47, "learning_rate": 1.0416406413566414e-05, "loss": 0.2833, "step": 967000 }, { "epoch": 1.47, "learning_rate": 1.0403895687906366e-05, "loss": 0.2839, "step": 967500 }, { "epoch": 1.47, "learning_rate": 1.0391575235337991e-05, "loss": 0.2828, "step": 968000 }, { "epoch": 1.48, "learning_rate": 1.0379445089544929e-05, "loss": 0.2837, "step": 968500 }, { "epoch": 1.48, "learning_rate": 1.0367505283690547e-05, "loss": 0.2832, "step": 969000 }, { "epoch": 1.48, "learning_rate": 1.0355755850417803e-05, "loss": 0.283, "step": 969500 }, { "epoch": 1.48, "learning_rate": 1.0344196821849202e-05, "loss": 0.2821, "step": 970000 }, { "epoch": 1.48, "learning_rate": 1.0332828229586692e-05, "loss": 0.2831, "step": 970500 }, { "epoch": 1.48, "learning_rate": 1.032165010471157e-05, "loss": 0.2835, "step": 971000 }, { "epoch": 1.48, "learning_rate": 1.0310662477784401e-05, "loss": 0.2824, "step": 971500 }, { "epoch": 1.48, "learning_rate": 1.0299865378844936e-05, "loss": 0.2828, "step": 972000 }, { "epoch": 1.48, "learning_rate": 1.028925883741203e-05, "loss": 0.2832, "step": 972500 }, { "epoch": 1.48, "learning_rate": 1.0278842882483569e-05, "loss": 0.2836, "step": 973000 }, { "epoch": 1.49, "learning_rate": 1.026861754253637e-05, "loss": 0.2837, "step": 973500 }, { "epoch": 1.49, "learning_rate": 1.025858284552612e-05, "loss": 0.2829, "step": 974000 }, { "epoch": 1.49, "learning_rate": 1.0248738818887307e-05, "loss": 0.2829, "step": 974500 }, { "epoch": 1.49, "learning_rate": 1.023908548953311e-05, "loss": 0.2832, "step": 975000 }, { "epoch": 1.49, "learning_rate": 1.0229622883855378e-05, "loss": 0.2837, "step": 975500 }, { "epoch": 1.49, "learning_rate": 1.02203510277245e-05, "loss": 0.2832, "step": 976000 }, { "epoch": 1.49, "learning_rate": 1.021126994648939e-05, "loss": 0.2828, "step": 976500 }, { "epoch": 1.49, "learning_rate": 1.0202379664977364e-05, "loss": 0.2838, "step": 977000 }, { "epoch": 1.49, "learning_rate": 1.019368020749412e-05, "loss": 0.2828, "step": 977500 }, { "epoch": 1.49, "learning_rate": 1.018517159782365e-05, "loss": 0.2826, "step": 978000 }, { "epoch": 1.5, "learning_rate": 1.0176853859228149e-05, "loss": 0.2829, "step": 978500 }, { "epoch": 1.5, "learning_rate": 1.0168727014448004e-05, "loss": 0.2836, "step": 979000 }, { "epoch": 1.5, "learning_rate": 1.0160791085701714e-05, "loss": 0.2834, "step": 979500 }, { "epoch": 1.5, "learning_rate": 1.0153046094685783e-05, "loss": 0.2831, "step": 980000 }, { "epoch": 1.5, "learning_rate": 1.0145492062574731e-05, "loss": 0.2826, "step": 980500 }, { "epoch": 1.5, "learning_rate": 1.0138129010020992e-05, "loss": 0.2831, "step": 981000 }, { "epoch": 1.5, "learning_rate": 1.0130956957154867e-05, "loss": 0.2831, "step": 981500 }, { "epoch": 1.5, "learning_rate": 1.0123975923584488e-05, "loss": 0.2824, "step": 982000 }, { "epoch": 1.5, "learning_rate": 1.0117185928395721e-05, "loss": 0.2828, "step": 982500 }, { "epoch": 1.5, "learning_rate": 1.0110586990152152e-05, "loss": 0.2831, "step": 983000 }, { "epoch": 1.51, "learning_rate": 1.0104179126895039e-05, "loss": 0.2829, "step": 983500 }, { "epoch": 1.51, "learning_rate": 1.0097962356143219e-05, "loss": 0.2831, "step": 984000 }, { "epoch": 1.51, "learning_rate": 1.009193669489312e-05, "loss": 0.2834, "step": 984500 }, { "epoch": 1.51, "learning_rate": 1.0086102159618668e-05, "loss": 0.2834, "step": 985000 }, { "epoch": 1.51, "learning_rate": 1.0080458766271252e-05, "loss": 0.2832, "step": 985500 }, { "epoch": 1.51, "learning_rate": 1.0075006530279694e-05, "loss": 0.2833, "step": 986000 }, { "epoch": 1.51, "learning_rate": 1.0069745466550205e-05, "loss": 0.2827, "step": 986500 }, { "epoch": 1.51, "learning_rate": 1.0064675589466339e-05, "loss": 0.2821, "step": 987000 }, { "epoch": 1.51, "learning_rate": 1.005979691288893e-05, "loss": 0.2834, "step": 987500 }, { "epoch": 1.51, "learning_rate": 1.0055109450156098e-05, "loss": 0.2831, "step": 988000 }, { "epoch": 1.52, "learning_rate": 1.0050613214083197e-05, "loss": 0.2822, "step": 988500 }, { "epoch": 1.52, "learning_rate": 1.0046308216962759e-05, "loss": 0.2826, "step": 989000 }, { "epoch": 1.52, "learning_rate": 1.0042194470564472e-05, "loss": 0.2824, "step": 989500 }, { "epoch": 1.52, "learning_rate": 1.0038271986135177e-05, "loss": 0.2827, "step": 990000 }, { "epoch": 1.52, "learning_rate": 1.003454077439879e-05, "loss": 0.2826, "step": 990500 }, { "epoch": 1.52, "learning_rate": 1.0031000845556304e-05, "loss": 0.2828, "step": 991000 }, { "epoch": 1.52, "learning_rate": 1.0027652209285743e-05, "loss": 0.2829, "step": 991500 }, { "epoch": 1.52, "learning_rate": 1.0024494874742152e-05, "loss": 0.2832, "step": 992000 }, { "epoch": 1.52, "learning_rate": 1.0021528850557572e-05, "loss": 0.283, "step": 992500 }, { "epoch": 1.52, "learning_rate": 1.0018754144840986e-05, "loss": 0.2821, "step": 993000 }, { "epoch": 1.53, "learning_rate": 1.0016170765178345e-05, "loss": 0.2822, "step": 993500 }, { "epoch": 1.53, "learning_rate": 1.0013778718632507e-05, "loss": 0.2826, "step": 994000 }, { "epoch": 1.53, "learning_rate": 1.0011578011743233e-05, "loss": 0.2828, "step": 994500 }, { "epoch": 1.53, "learning_rate": 1.000956865052717e-05, "loss": 0.2827, "step": 995000 }, { "epoch": 1.53, "learning_rate": 1.0007750640477843e-05, "loss": 0.2829, "step": 995500 }, { "epoch": 1.53, "learning_rate": 1.0006123986565623e-05, "loss": 0.2829, "step": 996000 }, { "epoch": 1.53, "learning_rate": 1.0004688693237708e-05, "loss": 0.2832, "step": 996500 }, { "epoch": 1.53, "learning_rate": 1.0003444764418138e-05, "loss": 0.2835, "step": 997000 }, { "epoch": 1.53, "learning_rate": 1.0002392203507781e-05, "loss": 0.2832, "step": 997500 }, { "epoch": 1.53, "learning_rate": 1.000153101338428e-05, "loss": 0.2826, "step": 998000 }, { "epoch": 1.54, "learning_rate": 1.00008611964021e-05, "loss": 0.283, "step": 998500 }, { "epoch": 1.54, "learning_rate": 1.00003827543925e-05, "loss": 0.2832, "step": 999000 }, { "epoch": 1.54, "learning_rate": 1.0000095688663532e-05, "loss": 0.2827, "step": 999500 }, { "epoch": 1.54, "learning_rate": 1e-05, "loss": 0.2828, "step": 1000000 }, { "epoch": 1.54, "eval_loss": 0.3599591553211212, "eval_runtime": 50.3004, "eval_samples_per_second": 2519.919, "eval_steps_per_second": 9.861, "step": 1000000 } ], "max_steps": 1000000, "num_train_epochs": 2, "total_flos": 6.7607019944302705e+22, "trial_name": null, "trial_params": null }