{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9664951741145456, "eval_steps": 500, "global_step": 185000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 3.6198, "step": 100 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.5109, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 3.4948, "step": 300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 3.5552, "step": 400 }, { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 3.5109, "step": 500 }, { "epoch": 0.01, "learning_rate": 3e-05, "loss": 3.513, "step": 600 }, { "epoch": 0.01, "learning_rate": 3.5000000000000004e-05, "loss": 3.5135, "step": 700 }, { "epoch": 0.01, "learning_rate": 4e-05, "loss": 3.5552, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.4999999999999996e-05, "loss": 3.5392, "step": 900 }, { "epoch": 0.01, "learning_rate": 5e-05, "loss": 3.5638, "step": 1000 }, { "epoch": 0.01, "learning_rate": 5.5e-05, "loss": 3.5414, "step": 1100 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 3.5545, "step": 1200 }, { "epoch": 0.01, "learning_rate": 6.500000000000001e-05, "loss": 3.4934, "step": 1300 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-05, "loss": 3.5182, "step": 1400 }, { "epoch": 0.02, "learning_rate": 7.5e-05, "loss": 3.5703, "step": 1500 }, { "epoch": 0.02, "learning_rate": 8e-05, "loss": 3.5242, "step": 1600 }, { "epoch": 0.02, "learning_rate": 8.5e-05, "loss": 3.4979, "step": 1700 }, { "epoch": 0.02, "learning_rate": 8.999999999999999e-05, "loss": 3.502, "step": 1800 }, { "epoch": 0.02, "learning_rate": 9.5e-05, "loss": 3.5335, "step": 1900 }, { "epoch": 0.02, "learning_rate": 0.0001, "loss": 3.5493, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.000105, "loss": 3.5539, "step": 2100 }, { "epoch": 0.02, "learning_rate": 0.00011, "loss": 3.5152, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.000115, "loss": 3.5788, "step": 2300 }, { "epoch": 0.03, "learning_rate": 0.00012, "loss": 3.5338, "step": 2400 }, { "epoch": 0.03, "learning_rate": 0.000125, "loss": 3.5295, "step": 2500 }, { "epoch": 0.03, "learning_rate": 0.00013000000000000002, "loss": 3.5262, "step": 2600 }, { "epoch": 0.03, "learning_rate": 0.000135, "loss": 3.5277, "step": 2700 }, { "epoch": 0.03, "learning_rate": 0.00014000000000000001, "loss": 3.5143, "step": 2800 }, { "epoch": 0.03, "learning_rate": 0.000145, "loss": 3.5899, "step": 2900 }, { "epoch": 0.03, "learning_rate": 0.00015, "loss": 3.5366, "step": 3000 }, { "epoch": 0.03, "learning_rate": 0.000155, "loss": 3.4522, "step": 3100 }, { "epoch": 0.03, "learning_rate": 0.00016, "loss": 3.531, "step": 3200 }, { "epoch": 0.04, "learning_rate": 0.000165, "loss": 3.5378, "step": 3300 }, { "epoch": 0.04, "learning_rate": 0.00017, "loss": 3.5002, "step": 3400 }, { "epoch": 0.04, "learning_rate": 0.000175, "loss": 3.4772, "step": 3500 }, { "epoch": 0.04, "learning_rate": 0.00017999999999999998, "loss": 3.5223, "step": 3600 }, { "epoch": 0.04, "learning_rate": 0.000185, "loss": 3.5212, "step": 3700 }, { "epoch": 0.04, "learning_rate": 0.00019, "loss": 3.5397, "step": 3800 }, { "epoch": 0.04, "learning_rate": 0.00019500000000000002, "loss": 3.5471, "step": 3900 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 3.5082, "step": 4000 }, { "epoch": 0.04, "learning_rate": 0.000205, "loss": 3.5092, "step": 4100 }, { "epoch": 0.04, "learning_rate": 0.00021, "loss": 3.4302, "step": 4200 }, { "epoch": 0.05, "learning_rate": 0.000215, "loss": 3.5378, "step": 4300 }, { "epoch": 0.05, "learning_rate": 0.00022, "loss": 3.4796, "step": 4400 }, { "epoch": 0.05, "learning_rate": 0.00022500000000000002, "loss": 3.5122, "step": 4500 }, { "epoch": 0.05, "learning_rate": 0.00023, "loss": 3.5079, "step": 4600 }, { "epoch": 0.05, "learning_rate": 0.000235, "loss": 3.484, "step": 4700 }, { "epoch": 0.05, "learning_rate": 0.00024, "loss": 3.5695, "step": 4800 }, { "epoch": 0.05, "learning_rate": 0.000245, "loss": 3.5049, "step": 4900 }, { "epoch": 0.05, "learning_rate": 0.00025, "loss": 3.4858, "step": 5000 }, { "epoch": 0.05, "learning_rate": 0.000255, "loss": 3.4795, "step": 5100 }, { "epoch": 0.06, "learning_rate": 0.00026000000000000003, "loss": 3.5687, "step": 5200 }, { "epoch": 0.06, "learning_rate": 0.00026500000000000004, "loss": 3.4746, "step": 5300 }, { "epoch": 0.06, "learning_rate": 0.00027, "loss": 3.5224, "step": 5400 }, { "epoch": 0.06, "learning_rate": 0.000275, "loss": 3.4772, "step": 5500 }, { "epoch": 0.06, "learning_rate": 0.00028000000000000003, "loss": 3.5545, "step": 5600 }, { "epoch": 0.06, "learning_rate": 0.000285, "loss": 3.5292, "step": 5700 }, { "epoch": 0.06, "learning_rate": 0.00029, "loss": 3.5156, "step": 5800 }, { "epoch": 0.06, "learning_rate": 0.000295, "loss": 3.5265, "step": 5900 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 3.5554, "step": 6000 }, { "epoch": 0.06, "learning_rate": 0.000305, "loss": 3.5095, "step": 6100 }, { "epoch": 0.07, "learning_rate": 0.00031, "loss": 3.5129, "step": 6200 }, { "epoch": 0.07, "learning_rate": 0.000315, "loss": 3.546, "step": 6300 }, { "epoch": 0.07, "learning_rate": 0.00032, "loss": 3.5059, "step": 6400 }, { "epoch": 0.07, "learning_rate": 0.00032500000000000004, "loss": 3.4791, "step": 6500 }, { "epoch": 0.07, "learning_rate": 0.00033, "loss": 3.4911, "step": 6600 }, { "epoch": 0.07, "learning_rate": 0.000335, "loss": 3.5105, "step": 6700 }, { "epoch": 0.07, "learning_rate": 0.00034, "loss": 3.4258, "step": 6800 }, { "epoch": 0.07, "learning_rate": 0.000345, "loss": 3.5187, "step": 6900 }, { "epoch": 0.07, "learning_rate": 0.00035, "loss": 3.5052, "step": 7000 }, { "epoch": 0.08, "learning_rate": 0.000355, "loss": 3.4961, "step": 7100 }, { "epoch": 0.08, "learning_rate": 0.00035999999999999997, "loss": 3.5155, "step": 7200 }, { "epoch": 0.08, "learning_rate": 0.000365, "loss": 3.537, "step": 7300 }, { "epoch": 0.08, "learning_rate": 0.00037, "loss": 3.4744, "step": 7400 }, { "epoch": 0.08, "learning_rate": 0.000375, "loss": 3.4898, "step": 7500 }, { "epoch": 0.08, "learning_rate": 0.00038, "loss": 3.4827, "step": 7600 }, { "epoch": 0.08, "learning_rate": 0.00038500000000000003, "loss": 3.5292, "step": 7700 }, { "epoch": 0.08, "learning_rate": 0.00039000000000000005, "loss": 3.5189, "step": 7800 }, { "epoch": 0.08, "learning_rate": 0.000395, "loss": 3.4855, "step": 7900 }, { "epoch": 0.09, "learning_rate": 0.0004, "loss": 3.5686, "step": 8000 }, { "epoch": 0.09, "learning_rate": 0.00040500000000000003, "loss": 3.5008, "step": 8100 }, { "epoch": 0.09, "learning_rate": 0.00041, "loss": 3.5276, "step": 8200 }, { "epoch": 0.09, "learning_rate": 0.000415, "loss": 3.491, "step": 8300 }, { "epoch": 0.09, "learning_rate": 0.00042, "loss": 3.5629, "step": 8400 }, { "epoch": 0.09, "learning_rate": 0.000425, "loss": 3.5554, "step": 8500 }, { "epoch": 0.09, "learning_rate": 0.00043, "loss": 3.5069, "step": 8600 }, { "epoch": 0.09, "learning_rate": 0.000435, "loss": 3.5329, "step": 8700 }, { "epoch": 0.09, "learning_rate": 0.00044, "loss": 3.5153, "step": 8800 }, { "epoch": 0.09, "learning_rate": 0.00044500000000000003, "loss": 3.5705, "step": 8900 }, { "epoch": 0.1, "learning_rate": 0.00045000000000000004, "loss": 3.4461, "step": 9000 }, { "epoch": 0.1, "learning_rate": 0.000455, "loss": 3.5421, "step": 9100 }, { "epoch": 0.1, "learning_rate": 0.00046, "loss": 3.5423, "step": 9200 }, { "epoch": 0.1, "learning_rate": 0.000465, "loss": 3.5614, "step": 9300 }, { "epoch": 0.1, "learning_rate": 0.00047, "loss": 3.4959, "step": 9400 }, { "epoch": 0.1, "learning_rate": 0.000475, "loss": 3.5324, "step": 9500 }, { "epoch": 0.1, "learning_rate": 0.00048, "loss": 3.4574, "step": 9600 }, { "epoch": 0.1, "learning_rate": 0.00048499999999999997, "loss": 3.5164, "step": 9700 }, { "epoch": 0.1, "learning_rate": 0.00049, "loss": 3.5273, "step": 9800 }, { "epoch": 0.11, "learning_rate": 0.000495, "loss": 3.5234, "step": 9900 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 3.5284, "step": 10000 }, { "epoch": 0.11, "learning_rate": 0.0004999996112877375, "loss": 3.5233, "step": 10100 }, { "epoch": 0.11, "learning_rate": 0.0004999984451521587, "loss": 3.4993, "step": 10200 }, { "epoch": 0.11, "learning_rate": 0.0004999965015968901, "loss": 3.5276, "step": 10300 }, { "epoch": 0.11, "learning_rate": 0.0004999937806279752, "loss": 3.5053, "step": 10400 }, { "epoch": 0.11, "learning_rate": 0.0004999902822538758, "loss": 3.486, "step": 10500 }, { "epoch": 0.11, "learning_rate": 0.0004999860064854707, "loss": 3.5093, "step": 10600 }, { "epoch": 0.11, "learning_rate": 0.0004999809533360561, "loss": 3.531, "step": 10700 }, { "epoch": 0.11, "learning_rate": 0.0004999751228213458, "loss": 3.5188, "step": 10800 }, { "epoch": 0.12, "learning_rate": 0.000499968514959471, "loss": 3.4891, "step": 10900 }, { "epoch": 0.12, "learning_rate": 0.00049996112977098, "loss": 3.5318, "step": 11000 }, { "epoch": 0.12, "learning_rate": 0.0004999529672788389, "loss": 3.538, "step": 11100 }, { "epoch": 0.12, "learning_rate": 0.0004999440275084302, "loss": 3.5255, "step": 11200 }, { "epoch": 0.12, "learning_rate": 0.000499934310487554, "loss": 3.563, "step": 11300 }, { "epoch": 0.12, "learning_rate": 0.0004999238162464273, "loss": 3.5304, "step": 11400 }, { "epoch": 0.12, "learning_rate": 0.0004999125448176843, "loss": 3.5348, "step": 11500 }, { "epoch": 0.12, "learning_rate": 0.0004999004962363751, "loss": 3.4948, "step": 11600 }, { "epoch": 0.12, "learning_rate": 0.0004998876705399677, "loss": 3.5647, "step": 11700 }, { "epoch": 0.13, "learning_rate": 0.000499874067768346, "loss": 3.5225, "step": 11800 }, { "epoch": 0.13, "learning_rate": 0.0004998596879638106, "loss": 3.4625, "step": 11900 }, { "epoch": 0.13, "learning_rate": 0.000499844531171078, "loss": 3.466, "step": 12000 }, { "epoch": 0.13, "learning_rate": 0.0004998285974372816, "loss": 3.5515, "step": 12100 }, { "epoch": 0.13, "learning_rate": 0.0004998118868119704, "loss": 3.6037, "step": 12200 }, { "epoch": 0.13, "learning_rate": 0.0004997943993471093, "loss": 3.5103, "step": 12300 }, { "epoch": 0.13, "learning_rate": 0.0004997761350970793, "loss": 3.4917, "step": 12400 }, { "epoch": 0.13, "learning_rate": 0.0004997570941186764, "loss": 3.5306, "step": 12500 }, { "epoch": 0.13, "learning_rate": 0.0004997372764711125, "loss": 3.4867, "step": 12600 }, { "epoch": 0.13, "learning_rate": 0.0004997166822160145, "loss": 3.449, "step": 12700 }, { "epoch": 0.14, "learning_rate": 0.0004996953114174239, "loss": 3.5224, "step": 12800 }, { "epoch": 0.14, "learning_rate": 0.0004996731641417981, "loss": 3.5221, "step": 12900 }, { "epoch": 0.14, "learning_rate": 0.000499650240458008, "loss": 3.5179, "step": 13000 }, { "epoch": 0.14, "learning_rate": 0.0004996265404373395, "loss": 3.485, "step": 13100 }, { "epoch": 0.14, "learning_rate": 0.0004996020641534924, "loss": 3.5237, "step": 13200 }, { "epoch": 0.14, "learning_rate": 0.0004995768116825806, "loss": 3.51, "step": 13300 }, { "epoch": 0.14, "learning_rate": 0.0004995507831031317, "loss": 3.5003, "step": 13400 }, { "epoch": 0.14, "learning_rate": 0.0004995239784960868, "loss": 3.5773, "step": 13500 }, { "epoch": 0.14, "learning_rate": 0.0004994963979447999, "loss": 3.5475, "step": 13600 }, { "epoch": 0.15, "learning_rate": 0.0004994680415350384, "loss": 3.5233, "step": 13700 }, { "epoch": 0.15, "learning_rate": 0.000499438909354982, "loss": 3.5136, "step": 13800 }, { "epoch": 0.15, "learning_rate": 0.0004994090014952231, "loss": 3.5589, "step": 13900 }, { "epoch": 0.15, "learning_rate": 0.0004993783180487662, "loss": 3.4992, "step": 14000 }, { "epoch": 0.15, "learning_rate": 0.0004993468591110274, "loss": 3.5307, "step": 14100 }, { "epoch": 0.15, "learning_rate": 0.0004993146247798345, "loss": 3.5067, "step": 14200 }, { "epoch": 0.15, "learning_rate": 0.0004992816151554267, "loss": 3.5534, "step": 14300 }, { "epoch": 0.15, "learning_rate": 0.0004992478303404537, "loss": 3.4614, "step": 14400 }, { "epoch": 0.15, "learning_rate": 0.0004992132704399764, "loss": 3.4507, "step": 14500 }, { "epoch": 0.16, "learning_rate": 0.0004991779355614653, "loss": 3.5012, "step": 14600 }, { "epoch": 0.16, "learning_rate": 0.0004991418258148015, "loss": 3.5332, "step": 14700 }, { "epoch": 0.16, "learning_rate": 0.0004991049413122752, "loss": 3.5121, "step": 14800 }, { "epoch": 0.16, "learning_rate": 0.0004990672821685863, "loss": 3.4945, "step": 14900 }, { "epoch": 0.16, "learning_rate": 0.0004990288485008431, "loss": 3.5502, "step": 15000 }, { "epoch": 0.16, "learning_rate": 0.000498989640428563, "loss": 3.5384, "step": 15100 }, { "epoch": 0.16, "learning_rate": 0.000498949658073671, "loss": 3.5698, "step": 15200 }, { "epoch": 0.16, "learning_rate": 0.0004989089015605002, "loss": 3.4467, "step": 15300 }, { "epoch": 0.16, "learning_rate": 0.0004988673710157913, "loss": 3.5147, "step": 15400 }, { "epoch": 0.16, "learning_rate": 0.0004988250665686915, "loss": 3.4889, "step": 15500 }, { "epoch": 0.17, "learning_rate": 0.0004987819883507549, "loss": 3.4569, "step": 15600 }, { "epoch": 0.17, "learning_rate": 0.0004987381364959417, "loss": 3.4654, "step": 15700 }, { "epoch": 0.17, "learning_rate": 0.000498693511140618, "loss": 3.5525, "step": 15800 }, { "epoch": 0.17, "learning_rate": 0.0004986481124235554, "loss": 3.4769, "step": 15900 }, { "epoch": 0.17, "learning_rate": 0.0004986019404859298, "loss": 3.4723, "step": 16000 }, { "epoch": 0.17, "learning_rate": 0.0004985549954713222, "loss": 3.5086, "step": 16100 }, { "epoch": 0.17, "learning_rate": 0.0004985072775257175, "loss": 3.537, "step": 16200 }, { "epoch": 0.17, "learning_rate": 0.0004984587867975039, "loss": 3.4993, "step": 16300 }, { "epoch": 0.17, "learning_rate": 0.0004984095234374732, "loss": 3.4976, "step": 16400 }, { "epoch": 0.18, "learning_rate": 0.0004983594875988193, "loss": 3.5206, "step": 16500 }, { "epoch": 0.18, "learning_rate": 0.0004983086794371385, "loss": 3.4844, "step": 16600 }, { "epoch": 0.18, "learning_rate": 0.0004982570991104293, "loss": 3.4813, "step": 16700 }, { "epoch": 0.18, "learning_rate": 0.0004982047467790904, "loss": 3.5309, "step": 16800 }, { "epoch": 0.18, "learning_rate": 0.0004981516226059222, "loss": 3.5115, "step": 16900 }, { "epoch": 0.18, "learning_rate": 0.0004980977267561245, "loss": 3.4775, "step": 17000 }, { "epoch": 0.18, "learning_rate": 0.0004980430593972974, "loss": 3.4671, "step": 17100 }, { "epoch": 0.18, "learning_rate": 0.0004979876206994396, "loss": 3.4842, "step": 17200 }, { "epoch": 0.18, "learning_rate": 0.0004979314108349489, "loss": 3.462, "step": 17300 }, { "epoch": 0.18, "learning_rate": 0.000497874429978621, "loss": 3.4734, "step": 17400 }, { "epoch": 0.19, "learning_rate": 0.0004978166783076492, "loss": 3.4945, "step": 17500 }, { "epoch": 0.19, "learning_rate": 0.0004977581560016236, "loss": 3.49, "step": 17600 }, { "epoch": 0.19, "learning_rate": 0.0004976988632425309, "loss": 3.5377, "step": 17700 }, { "epoch": 0.19, "learning_rate": 0.0004976388002147538, "loss": 3.5332, "step": 17800 }, { "epoch": 0.19, "learning_rate": 0.0004975779671050702, "loss": 3.4532, "step": 17900 }, { "epoch": 0.19, "learning_rate": 0.0004975163641026527, "loss": 3.5116, "step": 18000 }, { "epoch": 0.19, "learning_rate": 0.000497453991399068, "loss": 3.4816, "step": 18100 }, { "epoch": 0.19, "learning_rate": 0.0004973908491882763, "loss": 3.4983, "step": 18200 }, { "epoch": 0.19, "learning_rate": 0.000497326937666631, "loss": 3.4687, "step": 18300 }, { "epoch": 0.2, "learning_rate": 0.0004972622570328775, "loss": 3.5534, "step": 18400 }, { "epoch": 0.2, "learning_rate": 0.0004971968074881528, "loss": 3.4703, "step": 18500 }, { "epoch": 0.2, "learning_rate": 0.0004971305892359858, "loss": 3.5117, "step": 18600 }, { "epoch": 0.2, "learning_rate": 0.0004970636024822949, "loss": 3.4832, "step": 18700 }, { "epoch": 0.2, "learning_rate": 0.0004969958474353888, "loss": 3.554, "step": 18800 }, { "epoch": 0.2, "learning_rate": 0.0004969273243059651, "loss": 3.4567, "step": 18900 }, { "epoch": 0.2, "learning_rate": 0.0004968580333071101, "loss": 3.5089, "step": 19000 }, { "epoch": 0.2, "learning_rate": 0.0004967879746542981, "loss": 3.5264, "step": 19100 }, { "epoch": 0.2, "learning_rate": 0.00049671714856539, "loss": 3.546, "step": 19200 }, { "epoch": 0.21, "learning_rate": 0.0004966455552606338, "loss": 3.5111, "step": 19300 }, { "epoch": 0.21, "learning_rate": 0.0004965731949626629, "loss": 3.4773, "step": 19400 }, { "epoch": 0.21, "learning_rate": 0.0004965000678964962, "loss": 3.4718, "step": 19500 }, { "epoch": 0.21, "learning_rate": 0.0004964261742895367, "loss": 3.4829, "step": 19600 }, { "epoch": 0.21, "learning_rate": 0.0004963515143715711, "loss": 3.5101, "step": 19700 }, { "epoch": 0.21, "learning_rate": 0.0004962760883747694, "loss": 3.4787, "step": 19800 }, { "epoch": 0.21, "learning_rate": 0.0004961998965336835, "loss": 3.5048, "step": 19900 }, { "epoch": 0.21, "learning_rate": 0.0004961229390852471, "loss": 3.5439, "step": 20000 }, { "epoch": 0.21, "learning_rate": 0.0004960452162687747, "loss": 3.4837, "step": 20100 }, { "epoch": 0.21, "learning_rate": 0.0004959667283259607, "loss": 3.4976, "step": 20200 }, { "epoch": 0.22, "learning_rate": 0.0004958874755008788, "loss": 3.4594, "step": 20300 }, { "epoch": 0.22, "learning_rate": 0.0004958074580399816, "loss": 3.4317, "step": 20400 }, { "epoch": 0.22, "learning_rate": 0.0004957266761920991, "loss": 3.4615, "step": 20500 }, { "epoch": 0.22, "learning_rate": 0.0004956451302084385, "loss": 3.5461, "step": 20600 }, { "epoch": 0.22, "learning_rate": 0.0004955628203425832, "loss": 3.5074, "step": 20700 }, { "epoch": 0.22, "learning_rate": 0.000495479746850492, "loss": 3.5118, "step": 20800 }, { "epoch": 0.22, "learning_rate": 0.0004953959099904985, "loss": 3.4543, "step": 20900 }, { "epoch": 0.22, "learning_rate": 0.0004953113100233098, "loss": 3.5353, "step": 21000 }, { "epoch": 0.22, "learning_rate": 0.0004952259472120064, "loss": 3.541, "step": 21100 }, { "epoch": 0.23, "learning_rate": 0.0004951398218220408, "loss": 3.5633, "step": 21200 }, { "epoch": 0.23, "learning_rate": 0.0004950529341212371, "loss": 3.4821, "step": 21300 }, { "epoch": 0.23, "learning_rate": 0.0004949652843797897, "loss": 3.5209, "step": 21400 }, { "epoch": 0.23, "learning_rate": 0.0004948768728702628, "loss": 3.5295, "step": 21500 }, { "epoch": 0.23, "learning_rate": 0.0004947876998675897, "loss": 3.4903, "step": 21600 }, { "epoch": 0.23, "learning_rate": 0.0004946977656490713, "loss": 3.5398, "step": 21700 }, { "epoch": 0.23, "learning_rate": 0.0004946070704943761, "loss": 3.5016, "step": 21800 }, { "epoch": 0.23, "learning_rate": 0.0004945156146855383, "loss": 3.4882, "step": 21900 }, { "epoch": 0.23, "learning_rate": 0.0004944233985069581, "loss": 3.4632, "step": 22000 }, { "epoch": 0.23, "learning_rate": 0.0004943304222454001, "loss": 3.4802, "step": 22100 }, { "epoch": 0.24, "learning_rate": 0.0004942366861899921, "loss": 3.4686, "step": 22200 }, { "epoch": 0.24, "learning_rate": 0.000494142190632225, "loss": 3.4896, "step": 22300 }, { "epoch": 0.24, "learning_rate": 0.0004940469358659516, "loss": 3.4763, "step": 22400 }, { "epoch": 0.24, "learning_rate": 0.0004939509221873854, "loss": 3.5467, "step": 22500 }, { "epoch": 0.24, "learning_rate": 0.0004938541498951, "loss": 3.5006, "step": 22600 }, { "epoch": 0.24, "learning_rate": 0.0004937566192900279, "loss": 3.4922, "step": 22700 }, { "epoch": 0.24, "learning_rate": 0.00049365833067546, "loss": 3.4747, "step": 22800 }, { "epoch": 0.24, "learning_rate": 0.000493559284357044, "loss": 3.4843, "step": 22900 }, { "epoch": 0.24, "learning_rate": 0.0004934594806427843, "loss": 3.5593, "step": 23000 }, { "epoch": 0.25, "learning_rate": 0.00049335891984304, "loss": 3.5082, "step": 23100 }, { "epoch": 0.25, "learning_rate": 0.0004932576022705252, "loss": 3.4836, "step": 23200 }, { "epoch": 0.25, "learning_rate": 0.0004931555282403066, "loss": 3.472, "step": 23300 }, { "epoch": 0.25, "learning_rate": 0.0004930526980698039, "loss": 3.4998, "step": 23400 }, { "epoch": 0.25, "learning_rate": 0.0004929491120787878, "loss": 3.4831, "step": 23500 }, { "epoch": 0.25, "learning_rate": 0.0004928447705893794, "loss": 3.4745, "step": 23600 }, { "epoch": 0.25, "learning_rate": 0.0004927396739260493, "loss": 3.469, "step": 23700 }, { "epoch": 0.25, "learning_rate": 0.0004926338224156163, "loss": 3.5138, "step": 23800 }, { "epoch": 0.25, "learning_rate": 0.0004925272163872468, "loss": 3.4742, "step": 23900 }, { "epoch": 0.26, "learning_rate": 0.0004924198561724532, "loss": 3.4287, "step": 24000 }, { "epoch": 0.26, "learning_rate": 0.0004923117421050934, "loss": 3.4663, "step": 24100 }, { "epoch": 0.26, "learning_rate": 0.0004922028745213696, "loss": 3.4808, "step": 24200 }, { "epoch": 0.26, "learning_rate": 0.0004920932537598269, "loss": 3.5508, "step": 24300 }, { "epoch": 0.26, "learning_rate": 0.0004919828801613532, "loss": 3.5053, "step": 24400 }, { "epoch": 0.26, "learning_rate": 0.0004918717540691766, "loss": 3.5131, "step": 24500 }, { "epoch": 0.26, "learning_rate": 0.000491759875828866, "loss": 3.4786, "step": 24600 }, { "epoch": 0.26, "learning_rate": 0.0004916472457883287, "loss": 3.5234, "step": 24700 }, { "epoch": 0.26, "learning_rate": 0.0004915338642978103, "loss": 3.5251, "step": 24800 }, { "epoch": 0.26, "learning_rate": 0.0004914197317098931, "loss": 3.4751, "step": 24900 }, { "epoch": 0.27, "learning_rate": 0.0004913048483794948, "loss": 3.5188, "step": 25000 }, { "epoch": 0.27, "learning_rate": 0.000491189214663868, "loss": 3.4953, "step": 25100 }, { "epoch": 0.27, "learning_rate": 0.0004910728309225985, "loss": 3.4985, "step": 25200 }, { "epoch": 0.27, "learning_rate": 0.0004909556975176047, "loss": 3.4845, "step": 25300 }, { "epoch": 0.27, "learning_rate": 0.0004908378148131362, "loss": 3.6007, "step": 25400 }, { "epoch": 0.27, "learning_rate": 0.0004907191831757724, "loss": 3.481, "step": 25500 }, { "epoch": 0.27, "learning_rate": 0.0004905998029744222, "loss": 3.5303, "step": 25600 }, { "epoch": 0.27, "learning_rate": 0.0004904796745803217, "loss": 3.5077, "step": 25700 }, { "epoch": 0.27, "learning_rate": 0.0004903587983670339, "loss": 3.481, "step": 25800 }, { "epoch": 0.28, "learning_rate": 0.0004902371747104476, "loss": 3.451, "step": 25900 }, { "epoch": 0.28, "learning_rate": 0.0004901148039887756, "loss": 3.4953, "step": 26000 }, { "epoch": 0.28, "learning_rate": 0.0004899916865825537, "loss": 3.458, "step": 26100 }, { "epoch": 0.28, "learning_rate": 0.00048986782287464, "loss": 3.4587, "step": 26200 }, { "epoch": 0.28, "learning_rate": 0.0004897432132502132, "loss": 3.491, "step": 26300 }, { "epoch": 0.28, "learning_rate": 0.0004896178580967717, "loss": 3.4245, "step": 26400 }, { "epoch": 0.28, "learning_rate": 0.0004894917578041322, "loss": 3.5217, "step": 26500 }, { "epoch": 0.28, "learning_rate": 0.0004893649127644283, "loss": 3.5185, "step": 26600 }, { "epoch": 0.28, "learning_rate": 0.00048923732337211, "loss": 3.505, "step": 26700 }, { "epoch": 0.28, "learning_rate": 0.0004891089900239418, "loss": 3.4743, "step": 26800 }, { "epoch": 0.29, "learning_rate": 0.0004889799131190015, "loss": 3.4964, "step": 26900 }, { "epoch": 0.29, "learning_rate": 0.0004888500930586793, "loss": 3.5558, "step": 27000 }, { "epoch": 0.29, "learning_rate": 0.0004887195302466767, "loss": 3.442, "step": 27100 }, { "epoch": 0.29, "learning_rate": 0.0004885882250890044, "loss": 3.5129, "step": 27200 }, { "epoch": 0.29, "learning_rate": 0.0004884561779939817, "loss": 3.5072, "step": 27300 }, { "epoch": 0.29, "learning_rate": 0.0004883233893722354, "loss": 3.4971, "step": 27400 }, { "epoch": 0.29, "learning_rate": 0.000488189859636698, "loss": 3.47, "step": 27500 }, { "epoch": 0.29, "learning_rate": 0.0004880555892026066, "loss": 3.5349, "step": 27600 }, { "epoch": 0.29, "learning_rate": 0.0004879205784875017, "loss": 3.4856, "step": 27700 }, { "epoch": 0.3, "learning_rate": 0.0004877848279112259, "loss": 3.5072, "step": 27800 }, { "epoch": 0.3, "learning_rate": 0.00048764833789592254, "loss": 3.4752, "step": 27900 }, { "epoch": 0.3, "learning_rate": 0.0004875111088660343, "loss": 3.4654, "step": 28000 }, { "epoch": 0.3, "learning_rate": 0.000487373141248302, "loss": 3.5321, "step": 28100 }, { "epoch": 0.3, "learning_rate": 0.0004872344354717634, "loss": 3.4574, "step": 28200 }, { "epoch": 0.3, "learning_rate": 0.0004870949919677515, "loss": 3.4621, "step": 28300 }, { "epoch": 0.3, "learning_rate": 0.00048695481116989357, "loss": 3.532, "step": 28400 }, { "epoch": 0.3, "learning_rate": 0.00048681389351410955, "loss": 3.5071, "step": 28500 }, { "epoch": 0.3, "learning_rate": 0.0004866722394386107, "loss": 3.5263, "step": 28600 }, { "epoch": 0.31, "learning_rate": 0.00048652984938389853, "loss": 3.5198, "step": 28700 }, { "epoch": 0.31, "learning_rate": 0.00048638672379276314, "loss": 3.4399, "step": 28800 }, { "epoch": 0.31, "learning_rate": 0.0004862428631102819, "loss": 3.4343, "step": 28900 }, { "epoch": 0.31, "learning_rate": 0.000486098267783818, "loss": 3.4477, "step": 29000 }, { "epoch": 0.31, "learning_rate": 0.00048595293826301936, "loss": 3.4616, "step": 29100 }, { "epoch": 0.31, "learning_rate": 0.0004858068749998169, "loss": 3.498, "step": 29200 }, { "epoch": 0.31, "learning_rate": 0.0004856600784484232, "loss": 3.5423, "step": 29300 }, { "epoch": 0.31, "learning_rate": 0.00048551254906533135, "loss": 3.5488, "step": 29400 }, { "epoch": 0.31, "learning_rate": 0.00048536428730931307, "loss": 3.4823, "step": 29500 }, { "epoch": 0.31, "learning_rate": 0.00048521529364141776, "loss": 3.4666, "step": 29600 }, { "epoch": 0.32, "learning_rate": 0.0004850655685249706, "loss": 3.4553, "step": 29700 }, { "epoch": 0.32, "learning_rate": 0.0004849151124255716, "loss": 3.4764, "step": 29800 }, { "epoch": 0.32, "learning_rate": 0.0004847639258110939, "loss": 3.504, "step": 29900 }, { "epoch": 0.32, "learning_rate": 0.000484612009151682, "loss": 3.5053, "step": 30000 }, { "epoch": 0.32, "learning_rate": 0.0004844593629197511, "loss": 3.4995, "step": 30100 }, { "epoch": 0.32, "learning_rate": 0.00048430598758998465, "loss": 3.5613, "step": 30200 }, { "epoch": 0.32, "learning_rate": 0.00048415188363933384, "loss": 3.4437, "step": 30300 }, { "epoch": 0.32, "learning_rate": 0.0004839970515470153, "loss": 3.5437, "step": 30400 }, { "epoch": 0.32, "learning_rate": 0.0004838414917945101, "loss": 3.4199, "step": 30500 }, { "epoch": 0.33, "learning_rate": 0.00048368520486556215, "loss": 3.5321, "step": 30600 }, { "epoch": 0.33, "learning_rate": 0.00048352819124617666, "loss": 3.5139, "step": 30700 }, { "epoch": 0.33, "learning_rate": 0.00048337045142461845, "loss": 3.5193, "step": 30800 }, { "epoch": 0.33, "learning_rate": 0.0004832119858914108, "loss": 3.4716, "step": 30900 }, { "epoch": 0.33, "learning_rate": 0.00048305279513933375, "loss": 3.4594, "step": 31000 }, { "epoch": 0.33, "learning_rate": 0.0004828928796634224, "loss": 3.5184, "step": 31100 }, { "epoch": 0.33, "learning_rate": 0.0004827322399609656, "loss": 3.4635, "step": 31200 }, { "epoch": 0.33, "learning_rate": 0.0004825708765315044, "loss": 3.5565, "step": 31300 }, { "epoch": 0.33, "learning_rate": 0.00048240878987683037, "loss": 3.4901, "step": 31400 }, { "epoch": 0.33, "learning_rate": 0.0004822459805009839, "loss": 3.5068, "step": 31500 }, { "epoch": 0.34, "learning_rate": 0.0004820824489102531, "loss": 3.4434, "step": 31600 }, { "epoch": 0.34, "learning_rate": 0.00048191819561317184, "loss": 3.5135, "step": 31700 }, { "epoch": 0.34, "learning_rate": 0.0004817532211205184, "loss": 3.5205, "step": 31800 }, { "epoch": 0.34, "learning_rate": 0.00048158752594531346, "loss": 3.5106, "step": 31900 }, { "epoch": 0.34, "learning_rate": 0.0004814211106028191, "loss": 3.5037, "step": 32000 }, { "epoch": 0.34, "learning_rate": 0.00048125397561053676, "loss": 3.5225, "step": 32100 }, { "epoch": 0.34, "learning_rate": 0.0004810861214882058, "loss": 3.5481, "step": 32200 }, { "epoch": 0.34, "learning_rate": 0.0004809175487578019, "loss": 3.5227, "step": 32300 }, { "epoch": 0.34, "learning_rate": 0.0004807482579435353, "loss": 3.5191, "step": 32400 }, { "epoch": 0.35, "learning_rate": 0.0004805782495718494, "loss": 3.4667, "step": 32500 }, { "epoch": 0.35, "learning_rate": 0.0004804075241714189, "loss": 3.5535, "step": 32600 }, { "epoch": 0.35, "learning_rate": 0.0004802360822731482, "loss": 3.5245, "step": 32700 }, { "epoch": 0.35, "learning_rate": 0.00048006392441016986, "loss": 3.4818, "step": 32800 }, { "epoch": 0.35, "learning_rate": 0.0004798910511178429, "loss": 3.5417, "step": 32900 }, { "epoch": 0.35, "learning_rate": 0.00047971746293375107, "loss": 3.4636, "step": 33000 }, { "epoch": 0.35, "learning_rate": 0.0004795431603977011, "loss": 3.4918, "step": 33100 }, { "epoch": 0.35, "learning_rate": 0.00047936814405172143, "loss": 3.4898, "step": 33200 }, { "epoch": 0.35, "learning_rate": 0.0004791924144400599, "loss": 3.5102, "step": 33300 }, { "epoch": 0.36, "learning_rate": 0.0004790159721091827, "loss": 3.5459, "step": 33400 }, { "epoch": 0.36, "learning_rate": 0.00047883881760777205, "loss": 3.4848, "step": 33500 }, { "epoch": 0.36, "learning_rate": 0.0004786609514867251, "loss": 3.534, "step": 33600 }, { "epoch": 0.36, "learning_rate": 0.00047848237429915175, "loss": 3.5235, "step": 33700 }, { "epoch": 0.36, "learning_rate": 0.00047830308660037305, "loss": 3.5164, "step": 33800 }, { "epoch": 0.36, "learning_rate": 0.0004781230889479198, "loss": 3.5117, "step": 33900 }, { "epoch": 0.36, "learning_rate": 0.0004779423819015302, "loss": 3.5032, "step": 34000 }, { "epoch": 0.36, "learning_rate": 0.0004777609660231486, "loss": 3.4958, "step": 34100 }, { "epoch": 0.36, "learning_rate": 0.00047757884187692374, "loss": 3.5169, "step": 34200 }, { "epoch": 0.36, "learning_rate": 0.0004773960100292066, "loss": 3.5191, "step": 34300 }, { "epoch": 0.37, "learning_rate": 0.0004772124710485492, "loss": 3.4945, "step": 34400 }, { "epoch": 0.37, "learning_rate": 0.0004770282255057022, "loss": 3.5127, "step": 34500 }, { "epoch": 0.37, "learning_rate": 0.0004768432739736137, "loss": 3.4561, "step": 34600 }, { "epoch": 0.37, "learning_rate": 0.00047665761702742705, "loss": 3.558, "step": 34700 }, { "epoch": 0.37, "learning_rate": 0.0004764712552444794, "loss": 3.5893, "step": 34800 }, { "epoch": 0.37, "learning_rate": 0.0004762841892042995, "loss": 3.5003, "step": 34900 }, { "epoch": 0.37, "learning_rate": 0.00047609641948860636, "loss": 3.4897, "step": 35000 }, { "epoch": 0.37, "learning_rate": 0.0004759079466813072, "loss": 3.4982, "step": 35100 }, { "epoch": 0.37, "learning_rate": 0.00047571877136849537, "loss": 3.524, "step": 35200 }, { "epoch": 0.38, "learning_rate": 0.000475528894138449, "loss": 3.4562, "step": 35300 }, { "epoch": 0.38, "learning_rate": 0.0004753383155816291, "loss": 3.5271, "step": 35400 }, { "epoch": 0.38, "learning_rate": 0.00047514703629067726, "loss": 3.4422, "step": 35500 }, { "epoch": 0.38, "learning_rate": 0.0004749550568604145, "loss": 3.4279, "step": 35600 }, { "epoch": 0.38, "learning_rate": 0.0004747623778878387, "loss": 3.4677, "step": 35700 }, { "epoch": 0.38, "learning_rate": 0.0004745689999721234, "loss": 3.4405, "step": 35800 }, { "epoch": 0.38, "learning_rate": 0.00047437492371461566, "loss": 3.4902, "step": 35900 }, { "epoch": 0.38, "learning_rate": 0.0004741801497188339, "loss": 3.4773, "step": 36000 }, { "epoch": 0.38, "learning_rate": 0.0004739846785904664, "loss": 3.5532, "step": 36100 }, { "epoch": 0.38, "learning_rate": 0.00047378851093736945, "loss": 3.4676, "step": 36200 }, { "epoch": 0.39, "learning_rate": 0.0004735916473695653, "loss": 3.4511, "step": 36300 }, { "epoch": 0.39, "learning_rate": 0.00047339408849924, "loss": 3.5473, "step": 36400 }, { "epoch": 0.39, "learning_rate": 0.0004731958349407421, "loss": 3.5044, "step": 36500 }, { "epoch": 0.39, "learning_rate": 0.0004729968873105804, "loss": 3.5104, "step": 36600 }, { "epoch": 0.39, "learning_rate": 0.0004727972462274219, "loss": 3.4658, "step": 36700 }, { "epoch": 0.39, "learning_rate": 0.00047259691231209006, "loss": 3.4728, "step": 36800 }, { "epoch": 0.39, "learning_rate": 0.0004723958861875629, "loss": 3.4425, "step": 36900 }, { "epoch": 0.39, "learning_rate": 0.000472194168478971, "loss": 3.4615, "step": 37000 }, { "epoch": 0.39, "learning_rate": 0.00047199175981359556, "loss": 3.5654, "step": 37100 }, { "epoch": 0.4, "learning_rate": 0.00047178866082086635, "loss": 3.5196, "step": 37200 }, { "epoch": 0.4, "learning_rate": 0.0004715848721323599, "loss": 3.4618, "step": 37300 }, { "epoch": 0.4, "learning_rate": 0.00047138039438179765, "loss": 3.4837, "step": 37400 }, { "epoch": 0.4, "learning_rate": 0.00047117522820504357, "loss": 3.5105, "step": 37500 }, { "epoch": 0.4, "learning_rate": 0.00047096937424010246, "loss": 3.5053, "step": 37600 }, { "epoch": 0.4, "learning_rate": 0.0004707628331271182, "loss": 3.5327, "step": 37700 }, { "epoch": 0.4, "learning_rate": 0.0004705556055083711, "loss": 3.4411, "step": 37800 }, { "epoch": 0.4, "learning_rate": 0.0004703476920282766, "loss": 3.5237, "step": 37900 }, { "epoch": 0.4, "learning_rate": 0.0004701390933333829, "loss": 3.4413, "step": 38000 }, { "epoch": 0.4, "learning_rate": 0.0004699298100723688, "loss": 3.5936, "step": 38100 }, { "epoch": 0.41, "learning_rate": 0.0004697198428960422, "loss": 3.5068, "step": 38200 }, { "epoch": 0.41, "learning_rate": 0.00046950919245733756, "loss": 3.5562, "step": 38300 }, { "epoch": 0.41, "learning_rate": 0.0004692978594113142, "loss": 3.471, "step": 38400 }, { "epoch": 0.41, "learning_rate": 0.000469085844415154, "loss": 3.4787, "step": 38500 }, { "epoch": 0.41, "learning_rate": 0.0004688731481281597, "loss": 3.4779, "step": 38600 }, { "epoch": 0.41, "learning_rate": 0.00046865977121175257, "loss": 3.4712, "step": 38700 }, { "epoch": 0.41, "learning_rate": 0.00046844571432947025, "loss": 3.4615, "step": 38800 }, { "epoch": 0.41, "learning_rate": 0.00046823097814696515, "loss": 3.4683, "step": 38900 }, { "epoch": 0.41, "learning_rate": 0.0004680155633320019, "loss": 3.482, "step": 39000 }, { "epoch": 0.42, "learning_rate": 0.0004677994705544555, "loss": 3.4946, "step": 39100 }, { "epoch": 0.42, "learning_rate": 0.00046758270048630933, "loss": 3.4712, "step": 39200 }, { "epoch": 0.42, "learning_rate": 0.00046736525380165284, "loss": 3.4971, "step": 39300 }, { "epoch": 0.42, "learning_rate": 0.0004671471311766796, "loss": 3.4925, "step": 39400 }, { "epoch": 0.42, "learning_rate": 0.0004669283332896851, "loss": 3.4788, "step": 39500 }, { "epoch": 0.42, "learning_rate": 0.0004667088608210647, "loss": 3.4891, "step": 39600 }, { "epoch": 0.42, "learning_rate": 0.00046648871445331144, "loss": 3.5367, "step": 39700 }, { "epoch": 0.42, "learning_rate": 0.0004662678948710142, "loss": 3.4932, "step": 39800 }, { "epoch": 0.42, "learning_rate": 0.0004660464027608552, "loss": 3.5584, "step": 39900 }, { "epoch": 0.43, "learning_rate": 0.00046582423881160796, "loss": 3.4568, "step": 40000 }, { "epoch": 0.43, "learning_rate": 0.00046560140371413526, "loss": 3.4778, "step": 40100 }, { "epoch": 0.43, "learning_rate": 0.0004653778981613871, "loss": 3.4778, "step": 40200 }, { "epoch": 0.43, "learning_rate": 0.0004651537228483983, "loss": 3.4521, "step": 40300 }, { "epoch": 0.43, "learning_rate": 0.0004649288784722862, "loss": 3.4848, "step": 40400 }, { "epoch": 0.43, "learning_rate": 0.00046470336573224913, "loss": 3.4623, "step": 40500 }, { "epoch": 0.43, "learning_rate": 0.0004644771853295635, "loss": 3.5117, "step": 40600 }, { "epoch": 0.43, "learning_rate": 0.00046425033796758207, "loss": 3.4535, "step": 40700 }, { "epoch": 0.43, "learning_rate": 0.0004640228243517318, "loss": 3.4737, "step": 40800 }, { "epoch": 0.43, "learning_rate": 0.0004637946451895113, "loss": 3.4991, "step": 40900 }, { "epoch": 0.44, "learning_rate": 0.0004635658011904887, "loss": 3.517, "step": 41000 }, { "epoch": 0.44, "learning_rate": 0.00046333629306629997, "loss": 3.4767, "step": 41100 }, { "epoch": 0.44, "learning_rate": 0.00046310612153064603, "loss": 3.4917, "step": 41200 }, { "epoch": 0.44, "learning_rate": 0.0004628752872992909, "loss": 3.4907, "step": 41300 }, { "epoch": 0.44, "learning_rate": 0.0004626437910900591, "loss": 3.5002, "step": 41400 }, { "epoch": 0.44, "learning_rate": 0.00046241163362283424, "loss": 3.4782, "step": 41500 }, { "epoch": 0.44, "learning_rate": 0.0004621788156195559, "loss": 3.48, "step": 41600 }, { "epoch": 0.44, "learning_rate": 0.00046194533780421766, "loss": 3.5048, "step": 41700 }, { "epoch": 0.44, "learning_rate": 0.00046171120090286516, "loss": 3.4651, "step": 41800 }, { "epoch": 0.45, "learning_rate": 0.0004614764056435934, "loss": 3.5113, "step": 41900 }, { "epoch": 0.45, "learning_rate": 0.00046124095275654485, "loss": 3.4631, "step": 42000 }, { "epoch": 0.45, "learning_rate": 0.00046100484297390676, "loss": 3.4961, "step": 42100 }, { "epoch": 0.45, "learning_rate": 0.00046076807702990943, "loss": 3.4688, "step": 42200 }, { "epoch": 0.45, "learning_rate": 0.00046053065566082344, "loss": 3.4649, "step": 42300 }, { "epoch": 0.45, "learning_rate": 0.0004602925796049574, "loss": 3.5527, "step": 42400 }, { "epoch": 0.45, "learning_rate": 0.00046005384960265617, "loss": 3.5142, "step": 42500 }, { "epoch": 0.45, "learning_rate": 0.0004598144663962979, "loss": 3.4609, "step": 42600 }, { "epoch": 0.45, "learning_rate": 0.000459574430730292, "loss": 3.5237, "step": 42700 }, { "epoch": 0.45, "learning_rate": 0.0004593337433510771, "loss": 3.4829, "step": 42800 }, { "epoch": 0.46, "learning_rate": 0.0004590924050071182, "loss": 3.5192, "step": 42900 }, { "epoch": 0.46, "learning_rate": 0.00045885041644890467, "loss": 3.4881, "step": 43000 }, { "epoch": 0.46, "learning_rate": 0.00045860777842894796, "loss": 3.5034, "step": 43100 }, { "epoch": 0.46, "learning_rate": 0.00045836449170177896, "loss": 3.486, "step": 43200 }, { "epoch": 0.46, "learning_rate": 0.00045812055702394597, "loss": 3.4628, "step": 43300 }, { "epoch": 0.46, "learning_rate": 0.00045787597515401223, "loss": 3.4878, "step": 43400 }, { "epoch": 0.46, "learning_rate": 0.0004576307468525535, "loss": 3.4622, "step": 43500 }, { "epoch": 0.46, "learning_rate": 0.0004573848728821557, "loss": 3.5293, "step": 43600 }, { "epoch": 0.46, "learning_rate": 0.00045713835400741274, "loss": 3.4451, "step": 43700 }, { "epoch": 0.47, "learning_rate": 0.00045689119099492383, "loss": 3.5038, "step": 43800 }, { "epoch": 0.47, "learning_rate": 0.00045664338461329137, "loss": 3.4633, "step": 43900 }, { "epoch": 0.47, "learning_rate": 0.0004563949356331184, "loss": 3.4536, "step": 44000 }, { "epoch": 0.47, "learning_rate": 0.0004561458448270062, "loss": 3.5222, "step": 44100 }, { "epoch": 0.47, "learning_rate": 0.0004558961129695519, "loss": 3.4848, "step": 44200 }, { "epoch": 0.47, "learning_rate": 0.0004556457408373464, "loss": 3.5302, "step": 44300 }, { "epoch": 0.47, "learning_rate": 0.0004553947292089713, "loss": 3.5057, "step": 44400 }, { "epoch": 0.47, "learning_rate": 0.000455143078864997, "loss": 3.537, "step": 44500 }, { "epoch": 0.47, "learning_rate": 0.00045489079058798, "loss": 3.4947, "step": 44600 }, { "epoch": 0.48, "learning_rate": 0.00045463786516246086, "loss": 3.546, "step": 44700 }, { "epoch": 0.48, "learning_rate": 0.00045438430337496117, "loss": 3.5723, "step": 44800 }, { "epoch": 0.48, "learning_rate": 0.00045413010601398163, "loss": 3.4514, "step": 44900 }, { "epoch": 0.48, "learning_rate": 0.0004538752738699992, "loss": 3.4672, "step": 45000 }, { "epoch": 0.48, "learning_rate": 0.000453619807735465, "loss": 3.4934, "step": 45100 }, { "epoch": 0.48, "learning_rate": 0.00045336370840480143, "loss": 3.4974, "step": 45200 }, { "epoch": 0.48, "learning_rate": 0.00045310697667440026, "loss": 3.4252, "step": 45300 }, { "epoch": 0.48, "learning_rate": 0.00045284961334261965, "loss": 3.5194, "step": 45400 }, { "epoch": 0.48, "learning_rate": 0.0004525916192097818, "loss": 3.4608, "step": 45500 }, { "epoch": 0.48, "learning_rate": 0.0004523329950781705, "loss": 3.5033, "step": 45600 }, { "epoch": 0.49, "learning_rate": 0.0004520737417520289, "loss": 3.4757, "step": 45700 }, { "epoch": 0.49, "learning_rate": 0.0004518138600375565, "loss": 3.4889, "step": 45800 }, { "epoch": 0.49, "learning_rate": 0.0004515533507429069, "loss": 3.4402, "step": 45900 }, { "epoch": 0.49, "learning_rate": 0.00045129221467818544, "loss": 3.5092, "step": 46000 }, { "epoch": 0.49, "learning_rate": 0.0004510304526554464, "loss": 3.5367, "step": 46100 }, { "epoch": 0.49, "learning_rate": 0.0004507680654886907, "loss": 3.4865, "step": 46200 }, { "epoch": 0.49, "learning_rate": 0.0004505050539938632, "loss": 3.5106, "step": 46300 }, { "epoch": 0.49, "learning_rate": 0.00045024141898885017, "loss": 3.4592, "step": 46400 }, { "epoch": 0.49, "learning_rate": 0.000449977161293477, "loss": 3.4813, "step": 46500 }, { "epoch": 0.5, "learning_rate": 0.0004497122817295053, "loss": 3.4957, "step": 46600 }, { "epoch": 0.5, "learning_rate": 0.00044944678112063046, "loss": 3.4612, "step": 46700 }, { "epoch": 0.5, "learning_rate": 0.00044918066029247936, "loss": 3.5378, "step": 46800 }, { "epoch": 0.5, "learning_rate": 0.00044891392007260735, "loss": 3.5104, "step": 46900 }, { "epoch": 0.5, "learning_rate": 0.000448646561290496, "loss": 3.4465, "step": 47000 }, { "epoch": 0.5, "learning_rate": 0.0004483785847775503, "loss": 3.4633, "step": 47100 }, { "epoch": 0.5, "learning_rate": 0.0004481099913670965, "loss": 3.4771, "step": 47200 }, { "epoch": 0.5, "learning_rate": 0.0004478407818943789, "loss": 3.5111, "step": 47300 }, { "epoch": 0.5, "learning_rate": 0.0004475709571965578, "loss": 3.4932, "step": 47400 }, { "epoch": 0.5, "learning_rate": 0.00044730051811270647, "loss": 3.4843, "step": 47500 }, { "epoch": 0.51, "learning_rate": 0.0004470294654838087, "loss": 3.4771, "step": 47600 }, { "epoch": 0.51, "learning_rate": 0.0004467578001527565, "loss": 3.5115, "step": 47700 }, { "epoch": 0.51, "learning_rate": 0.00044648552296434695, "loss": 3.5195, "step": 47800 }, { "epoch": 0.51, "learning_rate": 0.00044621263476528003, "loss": 3.4532, "step": 47900 }, { "epoch": 0.51, "learning_rate": 0.00044593913640415545, "loss": 3.4574, "step": 48000 }, { "epoch": 0.51, "learning_rate": 0.0004456650287314707, "loss": 3.5016, "step": 48100 }, { "epoch": 0.51, "learning_rate": 0.00044539031259961784, "loss": 3.4765, "step": 48200 }, { "epoch": 0.51, "learning_rate": 0.00044511498886288105, "loss": 3.465, "step": 48300 }, { "epoch": 0.51, "learning_rate": 0.00044483905837743417, "loss": 3.5028, "step": 48400 }, { "epoch": 0.52, "learning_rate": 0.00044456252200133757, "loss": 3.5421, "step": 48500 }, { "epoch": 0.52, "learning_rate": 0.0004442853805945359, "loss": 3.4714, "step": 48600 }, { "epoch": 0.52, "learning_rate": 0.00044400763501885543, "loss": 3.4677, "step": 48700 }, { "epoch": 0.52, "learning_rate": 0.0004437292861380009, "loss": 3.535, "step": 48800 }, { "epoch": 0.52, "learning_rate": 0.00044345033481755326, "loss": 3.4449, "step": 48900 }, { "epoch": 0.52, "learning_rate": 0.000443170781924967, "loss": 3.4953, "step": 49000 }, { "epoch": 0.52, "learning_rate": 0.0004428906283295672, "loss": 3.4682, "step": 49100 }, { "epoch": 0.52, "learning_rate": 0.00044260987490254695, "loss": 3.4276, "step": 49200 }, { "epoch": 0.52, "learning_rate": 0.00044232852251696467, "loss": 3.5311, "step": 49300 }, { "epoch": 0.53, "learning_rate": 0.00044204657204774124, "loss": 3.4406, "step": 49400 }, { "epoch": 0.53, "learning_rate": 0.0004417640243716576, "loss": 3.5214, "step": 49500 }, { "epoch": 0.53, "learning_rate": 0.0004414808803673518, "loss": 3.4624, "step": 49600 }, { "epoch": 0.53, "learning_rate": 0.000441197140915316, "loss": 3.4879, "step": 49700 }, { "epoch": 0.53, "learning_rate": 0.0004409128068978944, "loss": 3.52, "step": 49800 }, { "epoch": 0.53, "learning_rate": 0.0004406278791992798, "loss": 3.5174, "step": 49900 }, { "epoch": 0.53, "learning_rate": 0.00044034235870551156, "loss": 3.4889, "step": 50000 }, { "epoch": 0.53, "learning_rate": 0.000440056246304472, "loss": 3.4301, "step": 50100 }, { "epoch": 0.53, "learning_rate": 0.0004397695428858844, "loss": 3.4761, "step": 50200 }, { "epoch": 0.53, "learning_rate": 0.00043948224934130985, "loss": 3.4547, "step": 50300 }, { "epoch": 0.54, "learning_rate": 0.00043919436656414445, "loss": 3.4262, "step": 50400 }, { "epoch": 0.54, "learning_rate": 0.0004389058954496169, "loss": 3.4494, "step": 50500 }, { "epoch": 0.54, "learning_rate": 0.0004386168368947851, "loss": 3.5187, "step": 50600 }, { "epoch": 0.54, "learning_rate": 0.000438327191798534, "loss": 3.4925, "step": 50700 }, { "epoch": 0.54, "learning_rate": 0.0004380369610615722, "loss": 3.4945, "step": 50800 }, { "epoch": 0.54, "learning_rate": 0.00043774614558643, "loss": 3.4728, "step": 50900 }, { "epoch": 0.54, "learning_rate": 0.0004374547462774555, "loss": 3.5043, "step": 51000 }, { "epoch": 0.54, "learning_rate": 0.00043716276404081266, "loss": 3.465, "step": 51100 }, { "epoch": 0.54, "learning_rate": 0.0004368701997844781, "loss": 3.455, "step": 51200 }, { "epoch": 0.55, "learning_rate": 0.00043657705441823826, "loss": 3.4398, "step": 51300 }, { "epoch": 0.55, "learning_rate": 0.0004362833288536867, "loss": 3.4834, "step": 51400 }, { "epoch": 0.55, "learning_rate": 0.0004359890240042214, "loss": 3.5072, "step": 51500 }, { "epoch": 0.55, "learning_rate": 0.00043569414078504154, "loss": 3.4757, "step": 51600 }, { "epoch": 0.55, "learning_rate": 0.0004353986801131448, "loss": 3.4697, "step": 51700 }, { "epoch": 0.55, "learning_rate": 0.00043510264290732474, "loss": 3.5054, "step": 51800 }, { "epoch": 0.55, "learning_rate": 0.0004348060300881678, "loss": 3.507, "step": 51900 }, { "epoch": 0.55, "learning_rate": 0.00043450884257805014, "loss": 3.504, "step": 52000 }, { "epoch": 0.55, "learning_rate": 0.0004342110813011352, "loss": 3.5152, "step": 52100 }, { "epoch": 0.55, "learning_rate": 0.00043391274718337084, "loss": 3.4792, "step": 52200 }, { "epoch": 0.56, "learning_rate": 0.00043361384115248584, "loss": 3.521, "step": 52300 }, { "epoch": 0.56, "learning_rate": 0.0004333143641379879, "loss": 3.4768, "step": 52400 }, { "epoch": 0.56, "learning_rate": 0.00043301431707116014, "loss": 3.4492, "step": 52500 }, { "epoch": 0.56, "learning_rate": 0.0004327137008850582, "loss": 3.4987, "step": 52600 }, { "epoch": 0.56, "learning_rate": 0.0004324125165145077, "loss": 3.4817, "step": 52700 }, { "epoch": 0.56, "learning_rate": 0.00043211076489610135, "loss": 3.4574, "step": 52800 }, { "epoch": 0.56, "learning_rate": 0.0004318084469681952, "loss": 3.4127, "step": 52900 }, { "epoch": 0.56, "learning_rate": 0.00043150556367090704, "loss": 3.463, "step": 53000 }, { "epoch": 0.56, "learning_rate": 0.00043120211594611235, "loss": 3.5262, "step": 53100 }, { "epoch": 0.57, "learning_rate": 0.00043089810473744195, "loss": 3.5513, "step": 53200 }, { "epoch": 0.57, "learning_rate": 0.0004305935309902789, "loss": 3.4956, "step": 53300 }, { "epoch": 0.57, "learning_rate": 0.00043028839565175563, "loss": 3.5124, "step": 53400 }, { "epoch": 0.57, "learning_rate": 0.000429982699670751, "loss": 3.4991, "step": 53500 }, { "epoch": 0.57, "learning_rate": 0.0004296764439978871, "loss": 3.5046, "step": 53600 }, { "epoch": 0.57, "learning_rate": 0.0004293696295855266, "loss": 3.5114, "step": 53700 }, { "epoch": 0.57, "learning_rate": 0.0004290622573877698, "loss": 3.5114, "step": 53800 }, { "epoch": 0.57, "learning_rate": 0.00042875432836045145, "loss": 3.5104, "step": 53900 }, { "epoch": 0.57, "learning_rate": 0.0004284458434611378, "loss": 3.4757, "step": 54000 }, { "epoch": 0.58, "learning_rate": 0.0004281368036491237, "loss": 3.4637, "step": 54100 }, { "epoch": 0.58, "learning_rate": 0.00042782720988542976, "loss": 3.4404, "step": 54200 }, { "epoch": 0.58, "learning_rate": 0.0004275170631327991, "loss": 3.5348, "step": 54300 }, { "epoch": 0.58, "learning_rate": 0.0004272063643556945, "loss": 3.5154, "step": 54400 }, { "epoch": 0.58, "learning_rate": 0.00042689511452029526, "loss": 3.528, "step": 54500 }, { "epoch": 0.58, "learning_rate": 0.0004265833145944945, "loss": 3.5509, "step": 54600 }, { "epoch": 0.58, "learning_rate": 0.00042627096554789584, "loss": 3.5283, "step": 54700 }, { "epoch": 0.58, "learning_rate": 0.0004259580683518105, "loss": 3.4573, "step": 54800 }, { "epoch": 0.58, "learning_rate": 0.0004256446239792543, "loss": 3.5586, "step": 54900 }, { "epoch": 0.58, "learning_rate": 0.0004253306334049446, "loss": 3.5568, "step": 55000 }, { "epoch": 0.59, "learning_rate": 0.00042501609760529734, "loss": 3.5057, "step": 55100 }, { "epoch": 0.59, "learning_rate": 0.0004247010175584239, "loss": 3.5694, "step": 55200 }, { "epoch": 0.59, "learning_rate": 0.000424385394244128, "loss": 3.4765, "step": 55300 }, { "epoch": 0.59, "learning_rate": 0.000424069228643903, "loss": 3.5685, "step": 55400 }, { "epoch": 0.59, "learning_rate": 0.00042375252174092824, "loss": 3.5035, "step": 55500 }, { "epoch": 0.59, "learning_rate": 0.0004234352745200669, "loss": 3.4847, "step": 55600 }, { "epoch": 0.59, "learning_rate": 0.00042311748796786174, "loss": 3.4716, "step": 55700 }, { "epoch": 0.59, "learning_rate": 0.0004227991630725333, "loss": 3.4406, "step": 55800 }, { "epoch": 0.59, "learning_rate": 0.0004224803008239757, "loss": 3.4917, "step": 55900 }, { "epoch": 0.6, "learning_rate": 0.00042216090221375426, "loss": 3.5315, "step": 56000 }, { "epoch": 0.6, "learning_rate": 0.0004218409682351023, "loss": 3.4636, "step": 56100 }, { "epoch": 0.6, "learning_rate": 0.000421520499882918, "loss": 3.4861, "step": 56200 }, { "epoch": 0.6, "learning_rate": 0.0004211994981537609, "loss": 3.5376, "step": 56300 }, { "epoch": 0.6, "learning_rate": 0.00042087796404584977, "loss": 3.5678, "step": 56400 }, { "epoch": 0.6, "learning_rate": 0.00042055589855905846, "loss": 3.5243, "step": 56500 }, { "epoch": 0.6, "learning_rate": 0.00042023330269491346, "loss": 3.5343, "step": 56600 }, { "epoch": 0.6, "learning_rate": 0.0004199101774565905, "loss": 3.541, "step": 56700 }, { "epoch": 0.6, "learning_rate": 0.00041958652384891146, "loss": 3.4849, "step": 56800 }, { "epoch": 0.6, "learning_rate": 0.00041926234287834144, "loss": 3.525, "step": 56900 }, { "epoch": 0.61, "learning_rate": 0.00041893763555298527, "loss": 3.5095, "step": 57000 }, { "epoch": 0.61, "learning_rate": 0.00041861240288258483, "loss": 3.4635, "step": 57100 }, { "epoch": 0.61, "learning_rate": 0.0004182866458785155, "loss": 3.5302, "step": 57200 }, { "epoch": 0.61, "learning_rate": 0.00041796036555378325, "loss": 3.4834, "step": 57300 }, { "epoch": 0.61, "learning_rate": 0.0004176335629230213, "loss": 3.4757, "step": 57400 }, { "epoch": 0.61, "learning_rate": 0.00041730623900248717, "loss": 3.5522, "step": 57500 }, { "epoch": 0.61, "learning_rate": 0.0004169783948100595, "loss": 3.4441, "step": 57600 }, { "epoch": 0.61, "learning_rate": 0.0004166500313652347, "loss": 3.5152, "step": 57700 }, { "epoch": 0.61, "learning_rate": 0.00041632114968912404, "loss": 3.4957, "step": 57800 }, { "epoch": 0.62, "learning_rate": 0.0004159917508044502, "loss": 3.4784, "step": 57900 }, { "epoch": 0.62, "learning_rate": 0.0004156618357355442, "loss": 3.4779, "step": 58000 }, { "epoch": 0.62, "learning_rate": 0.00041533140550834225, "loss": 3.4575, "step": 58100 }, { "epoch": 0.62, "learning_rate": 0.0004150004611503828, "loss": 3.5332, "step": 58200 }, { "epoch": 0.62, "learning_rate": 0.0004146690036908028, "loss": 3.5279, "step": 58300 }, { "epoch": 0.62, "learning_rate": 0.00041433703416033485, "loss": 3.4968, "step": 58400 }, { "epoch": 0.62, "learning_rate": 0.00041400455359130397, "loss": 3.5489, "step": 58500 }, { "epoch": 0.62, "learning_rate": 0.00041367156301762444, "loss": 3.5178, "step": 58600 }, { "epoch": 0.62, "learning_rate": 0.0004133380634747963, "loss": 3.4902, "step": 58700 }, { "epoch": 0.63, "learning_rate": 0.0004130040559999025, "loss": 3.4997, "step": 58800 }, { "epoch": 0.63, "learning_rate": 0.0004126695416316054, "loss": 3.4763, "step": 58900 }, { "epoch": 0.63, "learning_rate": 0.0004123345214101438, "loss": 3.4574, "step": 59000 }, { "epoch": 0.63, "learning_rate": 0.00041199899637732934, "loss": 3.4456, "step": 59100 }, { "epoch": 0.63, "learning_rate": 0.00041166296757654366, "loss": 3.4863, "step": 59200 }, { "epoch": 0.63, "learning_rate": 0.0004113264360527348, "loss": 3.4747, "step": 59300 }, { "epoch": 0.63, "learning_rate": 0.0004109894028524143, "loss": 3.4851, "step": 59400 }, { "epoch": 0.63, "learning_rate": 0.0004106518690236536, "loss": 3.4395, "step": 59500 }, { "epoch": 0.63, "learning_rate": 0.000410313835616081, "loss": 3.4848, "step": 59600 }, { "epoch": 0.63, "learning_rate": 0.0004099753036808783, "loss": 3.4535, "step": 59700 }, { "epoch": 0.64, "learning_rate": 0.00040963627427077775, "loss": 3.4992, "step": 59800 }, { "epoch": 0.64, "learning_rate": 0.00040929674844005843, "loss": 3.4552, "step": 59900 }, { "epoch": 0.64, "learning_rate": 0.00040895672724454305, "loss": 3.5318, "step": 60000 }, { "epoch": 0.64, "learning_rate": 0.00040861621174159495, "loss": 3.5415, "step": 60100 }, { "epoch": 0.64, "learning_rate": 0.0004082752029901146, "loss": 3.5494, "step": 60200 }, { "epoch": 0.64, "learning_rate": 0.0004079337020505362, "loss": 3.4648, "step": 60300 }, { "epoch": 0.64, "learning_rate": 0.0004075917099848245, "loss": 3.5292, "step": 60400 }, { "epoch": 0.64, "learning_rate": 0.0004072492278564718, "loss": 3.5001, "step": 60500 }, { "epoch": 0.64, "learning_rate": 0.0004069062567304939, "loss": 3.4884, "step": 60600 }, { "epoch": 0.65, "learning_rate": 0.00040656279767342765, "loss": 3.4814, "step": 60700 }, { "epoch": 0.65, "learning_rate": 0.0004062188517533268, "loss": 3.4705, "step": 60800 }, { "epoch": 0.65, "learning_rate": 0.0004058744200397595, "loss": 3.4994, "step": 60900 }, { "epoch": 0.65, "learning_rate": 0.00040552950360380434, "loss": 3.4953, "step": 61000 }, { "epoch": 0.65, "learning_rate": 0.0004051841035180472, "loss": 3.521, "step": 61100 }, { "epoch": 0.65, "learning_rate": 0.0004048382208565784, "loss": 3.5164, "step": 61200 }, { "epoch": 0.65, "learning_rate": 0.0004044918566949882, "loss": 3.507, "step": 61300 }, { "epoch": 0.65, "learning_rate": 0.00040414501211036486, "loss": 3.4945, "step": 61400 }, { "epoch": 0.65, "learning_rate": 0.0004037976881812901, "loss": 3.4484, "step": 61500 }, { "epoch": 0.65, "learning_rate": 0.0004034498859878367, "loss": 3.4555, "step": 61600 }, { "epoch": 0.66, "learning_rate": 0.00040310160661156437, "loss": 3.4847, "step": 61700 }, { "epoch": 0.66, "learning_rate": 0.00040275285113551676, "loss": 3.4943, "step": 61800 }, { "epoch": 0.66, "learning_rate": 0.0004024036206442182, "loss": 3.5139, "step": 61900 }, { "epoch": 0.66, "learning_rate": 0.00040205391622367016, "loss": 3.4618, "step": 62000 }, { "epoch": 0.66, "learning_rate": 0.0004017037389613476, "loss": 3.4801, "step": 62100 }, { "epoch": 0.66, "learning_rate": 0.0004013530899461963, "loss": 3.5376, "step": 62200 }, { "epoch": 0.66, "learning_rate": 0.00040100197026862874, "loss": 3.5269, "step": 62300 }, { "epoch": 0.66, "learning_rate": 0.0004006503810205211, "loss": 3.4552, "step": 62400 }, { "epoch": 0.66, "learning_rate": 0.00040029832329520977, "loss": 3.5446, "step": 62500 }, { "epoch": 0.67, "learning_rate": 0.0003999457981874881, "loss": 3.5022, "step": 62600 }, { "epoch": 0.67, "learning_rate": 0.0003995928067936027, "loss": 3.5138, "step": 62700 }, { "epoch": 0.67, "learning_rate": 0.00039923935021125015, "loss": 3.4631, "step": 62800 }, { "epoch": 0.67, "learning_rate": 0.00039888542953957375, "loss": 3.444, "step": 62900 }, { "epoch": 0.67, "learning_rate": 0.00039853104587916005, "loss": 3.5034, "step": 63000 }, { "epoch": 0.67, "learning_rate": 0.0003981762003320351, "loss": 3.4181, "step": 63100 }, { "epoch": 0.67, "learning_rate": 0.00039782089400166155, "loss": 3.515, "step": 63200 }, { "epoch": 0.67, "learning_rate": 0.0003974651279929348, "loss": 3.5136, "step": 63300 }, { "epoch": 0.67, "learning_rate": 0.00039710890341217967, "loss": 3.489, "step": 63400 }, { "epoch": 0.67, "learning_rate": 0.00039675222136714705, "loss": 3.4723, "step": 63500 }, { "epoch": 0.68, "learning_rate": 0.00039639508296701045, "loss": 3.4656, "step": 63600 }, { "epoch": 0.68, "learning_rate": 0.0003960374893223625, "loss": 3.5267, "step": 63700 }, { "epoch": 0.68, "learning_rate": 0.0003956794415452115, "loss": 3.4808, "step": 63800 }, { "epoch": 0.68, "learning_rate": 0.00039532094074897785, "loss": 3.5218, "step": 63900 }, { "epoch": 0.68, "learning_rate": 0.00039496198804849083, "loss": 3.4797, "step": 64000 }, { "epoch": 0.68, "learning_rate": 0.00039460258455998497, "loss": 3.4921, "step": 64100 }, { "epoch": 0.68, "learning_rate": 0.0003942427314010967, "loss": 3.4503, "step": 64200 }, { "epoch": 0.68, "learning_rate": 0.00039388242969086066, "loss": 3.4776, "step": 64300 }, { "epoch": 0.68, "learning_rate": 0.0003935216805497063, "loss": 3.4785, "step": 64400 }, { "epoch": 0.69, "learning_rate": 0.00039316048509945457, "loss": 3.4773, "step": 64500 }, { "epoch": 0.69, "learning_rate": 0.00039279884446331436, "loss": 3.4693, "step": 64600 }, { "epoch": 0.69, "learning_rate": 0.00039243675976587876, "loss": 3.4932, "step": 64700 }, { "epoch": 0.69, "learning_rate": 0.00039207423213312204, "loss": 3.4971, "step": 64800 }, { "epoch": 0.69, "learning_rate": 0.00039171126269239555, "loss": 3.4892, "step": 64900 }, { "epoch": 0.69, "learning_rate": 0.00039134785257242467, "loss": 3.527, "step": 65000 }, { "epoch": 0.69, "learning_rate": 0.0003909840029033052, "loss": 3.4535, "step": 65100 }, { "epoch": 0.69, "learning_rate": 0.0003906197148164997, "loss": 3.5352, "step": 65200 }, { "epoch": 0.69, "learning_rate": 0.0003902549894448342, "loss": 3.4764, "step": 65300 }, { "epoch": 0.7, "learning_rate": 0.00038988982792249454, "loss": 3.4837, "step": 65400 }, { "epoch": 0.7, "learning_rate": 0.0003895242313850228, "loss": 3.4606, "step": 65500 }, { "epoch": 0.7, "learning_rate": 0.00038915820096931364, "loss": 3.4713, "step": 65600 }, { "epoch": 0.7, "learning_rate": 0.00038879173781361146, "loss": 3.4764, "step": 65700 }, { "epoch": 0.7, "learning_rate": 0.00038842484305750587, "loss": 3.4697, "step": 65800 }, { "epoch": 0.7, "learning_rate": 0.00038805751784192876, "loss": 3.4771, "step": 65900 }, { "epoch": 0.7, "learning_rate": 0.00038768976330915073, "loss": 3.542, "step": 66000 }, { "epoch": 0.7, "learning_rate": 0.0003873215806027773, "loss": 3.4594, "step": 66100 }, { "epoch": 0.7, "learning_rate": 0.0003869529708677456, "loss": 3.5125, "step": 66200 }, { "epoch": 0.7, "learning_rate": 0.0003865839352503206, "loss": 3.4936, "step": 66300 }, { "epoch": 0.71, "learning_rate": 0.0003862144748980917, "loss": 3.4315, "step": 66400 }, { "epoch": 0.71, "learning_rate": 0.00038584459095996883, "loss": 3.5041, "step": 66500 }, { "epoch": 0.71, "learning_rate": 0.0003854742845861796, "loss": 3.4987, "step": 66600 }, { "epoch": 0.71, "learning_rate": 0.00038510355692826504, "loss": 3.4537, "step": 66700 }, { "epoch": 0.71, "learning_rate": 0.0003847324091390761, "loss": 3.4199, "step": 66800 }, { "epoch": 0.71, "learning_rate": 0.0003843608423727706, "loss": 3.545, "step": 66900 }, { "epoch": 0.71, "learning_rate": 0.0003839888577848086, "loss": 3.4893, "step": 67000 }, { "epoch": 0.71, "learning_rate": 0.00038361645653195025, "loss": 3.5127, "step": 67100 }, { "epoch": 0.71, "learning_rate": 0.0003832436397722509, "loss": 3.4984, "step": 67200 }, { "epoch": 0.72, "learning_rate": 0.00038287040866505806, "loss": 3.4773, "step": 67300 }, { "epoch": 0.72, "learning_rate": 0.00038249676437100775, "loss": 3.5147, "step": 67400 }, { "epoch": 0.72, "learning_rate": 0.00038212270805202113, "loss": 3.4999, "step": 67500 }, { "epoch": 0.72, "learning_rate": 0.00038174824087130023, "loss": 3.4605, "step": 67600 }, { "epoch": 0.72, "learning_rate": 0.000381373363993325, "loss": 3.5478, "step": 67700 }, { "epoch": 0.72, "learning_rate": 0.00038099807858384935, "loss": 3.5431, "step": 67800 }, { "epoch": 0.72, "learning_rate": 0.0003806223858098976, "loss": 3.5196, "step": 67900 }, { "epoch": 0.72, "learning_rate": 0.0003802462868397609, "loss": 3.4565, "step": 68000 }, { "epoch": 0.72, "learning_rate": 0.00037986978284299346, "loss": 3.479, "step": 68100 }, { "epoch": 0.72, "learning_rate": 0.00037949287499040895, "loss": 3.4619, "step": 68200 }, { "epoch": 0.73, "learning_rate": 0.00037911556445407725, "loss": 3.5283, "step": 68300 }, { "epoch": 0.73, "learning_rate": 0.00037873785240731994, "loss": 3.4817, "step": 68400 }, { "epoch": 0.73, "learning_rate": 0.0003783597400247077, "loss": 3.4796, "step": 68500 }, { "epoch": 0.73, "learning_rate": 0.00037798122848205576, "loss": 3.4814, "step": 68600 }, { "epoch": 0.73, "learning_rate": 0.0003776023189564206, "loss": 3.5202, "step": 68700 }, { "epoch": 0.73, "learning_rate": 0.0003772230126260968, "loss": 3.4558, "step": 68800 }, { "epoch": 0.73, "learning_rate": 0.00037684331067061225, "loss": 3.5382, "step": 68900 }, { "epoch": 0.73, "learning_rate": 0.0003764632142707255, "loss": 3.4725, "step": 69000 }, { "epoch": 0.73, "learning_rate": 0.0003760827246084216, "loss": 3.5022, "step": 69100 }, { "epoch": 0.74, "learning_rate": 0.0003757018428669086, "loss": 3.4569, "step": 69200 }, { "epoch": 0.74, "learning_rate": 0.0003753205702306135, "loss": 3.4843, "step": 69300 }, { "epoch": 0.74, "learning_rate": 0.00037493890788517937, "loss": 3.5093, "step": 69400 }, { "epoch": 0.74, "learning_rate": 0.0003745568570174607, "loss": 3.439, "step": 69500 }, { "epoch": 0.74, "learning_rate": 0.00037417441881552036, "loss": 3.5336, "step": 69600 }, { "epoch": 0.74, "learning_rate": 0.0003737915944686258, "loss": 3.4631, "step": 69700 }, { "epoch": 0.74, "learning_rate": 0.00037340838516724514, "loss": 3.4551, "step": 69800 }, { "epoch": 0.74, "learning_rate": 0.0003730247921030436, "loss": 3.4739, "step": 69900 }, { "epoch": 0.74, "learning_rate": 0.0003726408164688797, "loss": 3.448, "step": 70000 }, { "epoch": 0.75, "learning_rate": 0.00037225645945880196, "loss": 3.4937, "step": 70100 }, { "epoch": 0.75, "learning_rate": 0.00037187172226804433, "loss": 3.473, "step": 70200 }, { "epoch": 0.75, "learning_rate": 0.00037148660609302367, "loss": 3.5094, "step": 70300 }, { "epoch": 0.75, "learning_rate": 0.00037110111213133475, "loss": 3.5025, "step": 70400 }, { "epoch": 0.75, "learning_rate": 0.0003707152415817476, "loss": 3.51, "step": 70500 }, { "epoch": 0.75, "learning_rate": 0.000370328995644203, "loss": 3.5018, "step": 70600 }, { "epoch": 0.75, "learning_rate": 0.0003699423755198092, "loss": 3.5345, "step": 70700 }, { "epoch": 0.75, "learning_rate": 0.0003695553824108381, "loss": 3.5203, "step": 70800 }, { "epoch": 0.75, "learning_rate": 0.00036916801752072154, "loss": 3.4331, "step": 70900 }, { "epoch": 0.75, "learning_rate": 0.0003687802820540473, "loss": 3.4551, "step": 71000 }, { "epoch": 0.76, "learning_rate": 0.0003683921772165556, "loss": 3.4541, "step": 71100 }, { "epoch": 0.76, "learning_rate": 0.0003680037042151353, "loss": 3.4983, "step": 71200 }, { "epoch": 0.76, "learning_rate": 0.00036761486425782025, "loss": 3.4602, "step": 71300 }, { "epoch": 0.76, "learning_rate": 0.00036722565855378534, "loss": 3.5578, "step": 71400 }, { "epoch": 0.76, "learning_rate": 0.0003668360883133426, "loss": 3.4924, "step": 71500 }, { "epoch": 0.76, "learning_rate": 0.0003664461547479381, "loss": 3.5228, "step": 71600 }, { "epoch": 0.76, "learning_rate": 0.00036605585907014727, "loss": 3.4527, "step": 71700 }, { "epoch": 0.76, "learning_rate": 0.00036566520249367216, "loss": 3.5135, "step": 71800 }, { "epoch": 0.76, "learning_rate": 0.00036527418623333655, "loss": 3.4777, "step": 71900 }, { "epoch": 0.77, "learning_rate": 0.00036488281150508293, "loss": 3.4908, "step": 72000 }, { "epoch": 0.77, "learning_rate": 0.0003644910795259687, "loss": 3.4532, "step": 72100 }, { "epoch": 0.77, "learning_rate": 0.00036409899151416194, "loss": 3.4572, "step": 72200 }, { "epoch": 0.77, "learning_rate": 0.00036370654868893813, "loss": 3.5133, "step": 72300 }, { "epoch": 0.77, "learning_rate": 0.0003633137522706758, "loss": 3.528, "step": 72400 }, { "epoch": 0.77, "learning_rate": 0.0003629206034808534, "loss": 3.5001, "step": 72500 }, { "epoch": 0.77, "learning_rate": 0.00036252710354204486, "loss": 3.5106, "step": 72600 }, { "epoch": 0.77, "learning_rate": 0.0003621332536779162, "loss": 3.5027, "step": 72700 }, { "epoch": 0.77, "learning_rate": 0.0003617390551132216, "loss": 3.4608, "step": 72800 }, { "epoch": 0.77, "learning_rate": 0.00036134450907379965, "loss": 3.4917, "step": 72900 }, { "epoch": 0.78, "learning_rate": 0.00036094961678656936, "loss": 3.4839, "step": 73000 }, { "epoch": 0.78, "learning_rate": 0.00036055437947952654, "loss": 3.5027, "step": 73100 }, { "epoch": 0.78, "learning_rate": 0.00036015879838173986, "loss": 3.4551, "step": 73200 }, { "epoch": 0.78, "learning_rate": 0.00035976287472334716, "loss": 3.4427, "step": 73300 }, { "epoch": 0.78, "learning_rate": 0.00035936660973555145, "loss": 3.4986, "step": 73400 }, { "epoch": 0.78, "learning_rate": 0.00035897000465061725, "loss": 3.449, "step": 73500 }, { "epoch": 0.78, "learning_rate": 0.0003585730607018667, "loss": 3.4778, "step": 73600 }, { "epoch": 0.78, "learning_rate": 0.00035817577912367537, "loss": 3.5685, "step": 73700 }, { "epoch": 0.78, "learning_rate": 0.0003577781611514694, "loss": 3.5013, "step": 73800 }, { "epoch": 0.79, "learning_rate": 0.0003573802080217203, "loss": 3.4755, "step": 73900 }, { "epoch": 0.79, "learning_rate": 0.0003569819209719425, "loss": 3.4971, "step": 74000 }, { "epoch": 0.79, "learning_rate": 0.00035658330124068815, "loss": 3.5367, "step": 74100 }, { "epoch": 0.79, "learning_rate": 0.0003561843500675445, "loss": 3.4988, "step": 74200 }, { "epoch": 0.79, "learning_rate": 0.0003557850686931292, "loss": 3.4971, "step": 74300 }, { "epoch": 0.79, "learning_rate": 0.00035538545835908674, "loss": 3.471, "step": 74400 }, { "epoch": 0.79, "learning_rate": 0.00035498552030808464, "loss": 3.4602, "step": 74500 }, { "epoch": 0.79, "learning_rate": 0.0003545852557838095, "loss": 3.4899, "step": 74600 }, { "epoch": 0.79, "learning_rate": 0.0003541846660309631, "loss": 3.4727, "step": 74700 }, { "epoch": 0.8, "learning_rate": 0.0003537837522952587, "loss": 3.4743, "step": 74800 }, { "epoch": 0.8, "learning_rate": 0.00035338251582341703, "loss": 3.4798, "step": 74900 }, { "epoch": 0.8, "learning_rate": 0.0003529809578631622, "loss": 3.4574, "step": 75000 }, { "epoch": 0.8, "learning_rate": 0.00035257907966321846, "loss": 3.4465, "step": 75100 }, { "epoch": 0.8, "learning_rate": 0.00035217688247330553, "loss": 3.4908, "step": 75200 }, { "epoch": 0.8, "learning_rate": 0.0003517743675441353, "loss": 3.4411, "step": 75300 }, { "epoch": 0.8, "learning_rate": 0.00035137153612740767, "loss": 3.5287, "step": 75400 }, { "epoch": 0.8, "learning_rate": 0.0003509683894758068, "loss": 3.4939, "step": 75500 }, { "epoch": 0.8, "learning_rate": 0.0003505649288429969, "loss": 3.4693, "step": 75600 }, { "epoch": 0.8, "learning_rate": 0.00035016115548361886, "loss": 3.5589, "step": 75700 }, { "epoch": 0.81, "learning_rate": 0.0003497570706532859, "loss": 3.5431, "step": 75800 }, { "epoch": 0.81, "learning_rate": 0.0003493526756085799, "loss": 3.5008, "step": 75900 }, { "epoch": 0.81, "learning_rate": 0.00034894797160704737, "loss": 3.4913, "step": 76000 }, { "epoch": 0.81, "learning_rate": 0.0003485429599071954, "loss": 3.5414, "step": 76100 }, { "epoch": 0.81, "learning_rate": 0.00034813764176848833, "loss": 3.4656, "step": 76200 }, { "epoch": 0.81, "learning_rate": 0.000347732018451343, "loss": 3.5164, "step": 76300 }, { "epoch": 0.81, "learning_rate": 0.00034732609121712566, "loss": 3.5187, "step": 76400 }, { "epoch": 0.81, "learning_rate": 0.00034691986132814737, "loss": 3.4221, "step": 76500 }, { "epoch": 0.81, "learning_rate": 0.0003465133300476604, "loss": 3.4573, "step": 76600 }, { "epoch": 0.82, "learning_rate": 0.00034610649863985434, "loss": 3.4997, "step": 76700 }, { "epoch": 0.82, "learning_rate": 0.0003456993683698521, "loss": 3.4785, "step": 76800 }, { "epoch": 0.82, "learning_rate": 0.0003452919405037057, "loss": 3.461, "step": 76900 }, { "epoch": 0.82, "learning_rate": 0.00034488421630839307, "loss": 3.4799, "step": 77000 }, { "epoch": 0.82, "learning_rate": 0.0003444761970518133, "loss": 3.4866, "step": 77100 }, { "epoch": 0.82, "learning_rate": 0.000344067884002783, "loss": 3.4616, "step": 77200 }, { "epoch": 0.82, "learning_rate": 0.0003436592784310325, "loss": 3.5271, "step": 77300 }, { "epoch": 0.82, "learning_rate": 0.00034325038160720186, "loss": 3.5399, "step": 77400 }, { "epoch": 0.82, "learning_rate": 0.0003428411948028367, "loss": 3.4931, "step": 77500 }, { "epoch": 0.82, "learning_rate": 0.0003424317192903844, "loss": 3.481, "step": 77600 }, { "epoch": 0.83, "learning_rate": 0.00034202195634319026, "loss": 3.4759, "step": 77700 }, { "epoch": 0.83, "learning_rate": 0.0003416119072354933, "loss": 3.4805, "step": 77800 }, { "epoch": 0.83, "learning_rate": 0.0003412015732424225, "loss": 3.4803, "step": 77900 }, { "epoch": 0.83, "learning_rate": 0.00034079095563999264, "loss": 3.4571, "step": 78000 }, { "epoch": 0.83, "learning_rate": 0.00034038005570510046, "loss": 3.5024, "step": 78100 }, { "epoch": 0.83, "learning_rate": 0.00033996887471552084, "loss": 3.512, "step": 78200 }, { "epoch": 0.83, "learning_rate": 0.00033955741394990234, "loss": 3.4331, "step": 78300 }, { "epoch": 0.83, "learning_rate": 0.00033914567468776394, "loss": 3.4274, "step": 78400 }, { "epoch": 0.83, "learning_rate": 0.00033873365820949025, "loss": 3.5295, "step": 78500 }, { "epoch": 0.84, "learning_rate": 0.00033832136579632833, "loss": 3.4613, "step": 78600 }, { "epoch": 0.84, "learning_rate": 0.0003379087987303829, "loss": 3.459, "step": 78700 }, { "epoch": 0.84, "learning_rate": 0.00033749595829461304, "loss": 3.4423, "step": 78800 }, { "epoch": 0.84, "learning_rate": 0.00033708284577282796, "loss": 3.5483, "step": 78900 }, { "epoch": 0.84, "learning_rate": 0.0003366694624496828, "loss": 3.4994, "step": 79000 }, { "epoch": 0.84, "learning_rate": 0.0003362558096106749, "loss": 3.4706, "step": 79100 }, { "epoch": 0.84, "learning_rate": 0.00033584188854213974, "loss": 3.5044, "step": 79200 }, { "epoch": 0.84, "learning_rate": 0.00033542770053124696, "loss": 3.455, "step": 79300 }, { "epoch": 0.84, "learning_rate": 0.000335013246865996, "loss": 3.5131, "step": 79400 }, { "epoch": 0.85, "learning_rate": 0.0003345985288352129, "loss": 3.5119, "step": 79500 }, { "epoch": 0.85, "learning_rate": 0.0003341835477285453, "loss": 3.5121, "step": 79600 }, { "epoch": 0.85, "learning_rate": 0.00033376830483645937, "loss": 3.4693, "step": 79700 }, { "epoch": 0.85, "learning_rate": 0.00033335280145023493, "loss": 3.4531, "step": 79800 }, { "epoch": 0.85, "learning_rate": 0.00033293703886196226, "loss": 3.4548, "step": 79900 }, { "epoch": 0.85, "learning_rate": 0.00033252101836453733, "loss": 3.5033, "step": 80000 }, { "epoch": 0.85, "learning_rate": 0.00033210474125165853, "loss": 3.4889, "step": 80100 }, { "epoch": 0.85, "learning_rate": 0.0003316882088178217, "loss": 3.4725, "step": 80200 }, { "epoch": 0.85, "learning_rate": 0.00033127142235831716, "loss": 3.4618, "step": 80300 }, { "epoch": 0.85, "learning_rate": 0.0003308543831692249, "loss": 3.4913, "step": 80400 }, { "epoch": 0.86, "learning_rate": 0.0003304370925474109, "loss": 3.4637, "step": 80500 }, { "epoch": 0.86, "learning_rate": 0.0003300195517905231, "loss": 3.4736, "step": 80600 }, { "epoch": 0.86, "learning_rate": 0.000329601762196987, "loss": 3.4883, "step": 80700 }, { "epoch": 0.86, "learning_rate": 0.0003291837250660023, "loss": 3.4966, "step": 80800 }, { "epoch": 0.86, "learning_rate": 0.0003287654416975382, "loss": 3.4885, "step": 80900 }, { "epoch": 0.86, "learning_rate": 0.0003283469133923297, "loss": 3.4405, "step": 81000 }, { "epoch": 0.86, "learning_rate": 0.00032792814145187344, "loss": 3.5012, "step": 81100 }, { "epoch": 0.86, "learning_rate": 0.00032750912717842385, "loss": 3.4704, "step": 81200 }, { "epoch": 0.86, "learning_rate": 0.0003270898718749886, "loss": 3.5264, "step": 81300 }, { "epoch": 0.87, "learning_rate": 0.0003266703768453253, "loss": 3.4999, "step": 81400 }, { "epoch": 0.87, "learning_rate": 0.00032625064339393686, "loss": 3.5004, "step": 81500 }, { "epoch": 0.87, "learning_rate": 0.0003258306728260674, "loss": 3.4246, "step": 81600 }, { "epoch": 0.87, "learning_rate": 0.00032541046644769876, "loss": 3.4751, "step": 81700 }, { "epoch": 0.87, "learning_rate": 0.0003249900255655459, "loss": 3.5038, "step": 81800 }, { "epoch": 0.87, "learning_rate": 0.00032456935148705303, "loss": 3.5146, "step": 81900 }, { "epoch": 0.87, "learning_rate": 0.0003241484455203895, "loss": 3.4961, "step": 82000 }, { "epoch": 0.87, "learning_rate": 0.0003237273089744458, "loss": 3.4722, "step": 82100 }, { "epoch": 0.87, "learning_rate": 0.00032330594315882943, "loss": 3.4567, "step": 82200 }, { "epoch": 0.87, "learning_rate": 0.000322884349383861, "loss": 3.5115, "step": 82300 }, { "epoch": 0.88, "learning_rate": 0.0003224625289605696, "loss": 3.4695, "step": 82400 }, { "epoch": 0.88, "learning_rate": 0.00032204048320068964, "loss": 3.4923, "step": 82500 }, { "epoch": 0.88, "learning_rate": 0.0003216182134166559, "loss": 3.5214, "step": 82600 }, { "epoch": 0.88, "learning_rate": 0.00032119572092160006, "loss": 3.4668, "step": 82700 }, { "epoch": 0.88, "learning_rate": 0.00032077300702934607, "loss": 3.4876, "step": 82800 }, { "epoch": 0.88, "learning_rate": 0.00032035007305440655, "loss": 3.5109, "step": 82900 }, { "epoch": 0.88, "learning_rate": 0.00031992692031197853, "loss": 3.4911, "step": 83000 }, { "epoch": 0.88, "learning_rate": 0.0003195035501179392, "loss": 3.4561, "step": 83100 }, { "epoch": 0.88, "learning_rate": 0.0003190799637888423, "loss": 3.4836, "step": 83200 }, { "epoch": 0.89, "learning_rate": 0.00031865616264191313, "loss": 3.51, "step": 83300 }, { "epoch": 0.89, "learning_rate": 0.0003182321479950454, "loss": 3.5249, "step": 83400 }, { "epoch": 0.89, "learning_rate": 0.0003178079211667967, "loss": 3.4827, "step": 83500 }, { "epoch": 0.89, "learning_rate": 0.00031738348347638444, "loss": 3.4877, "step": 83600 }, { "epoch": 0.89, "learning_rate": 0.0003169588362436816, "loss": 3.438, "step": 83700 }, { "epoch": 0.89, "learning_rate": 0.0003165339807892129, "loss": 3.412, "step": 83800 }, { "epoch": 0.89, "learning_rate": 0.00031610891843415046, "loss": 3.4433, "step": 83900 }, { "epoch": 0.89, "learning_rate": 0.0003156836505003101, "loss": 3.4302, "step": 84000 }, { "epoch": 0.89, "learning_rate": 0.0003152581783101465, "loss": 3.44, "step": 84100 }, { "epoch": 0.9, "learning_rate": 0.0003148325031867498, "loss": 3.4679, "step": 84200 }, { "epoch": 0.9, "learning_rate": 0.00031440662645384115, "loss": 3.5349, "step": 84300 }, { "epoch": 0.9, "learning_rate": 0.0003139805494357685, "loss": 3.4902, "step": 84400 }, { "epoch": 0.9, "learning_rate": 0.00031355427345750286, "loss": 3.4661, "step": 84500 }, { "epoch": 0.9, "learning_rate": 0.0003131277998446338, "loss": 3.5095, "step": 84600 }, { "epoch": 0.9, "learning_rate": 0.0003127011299233656, "loss": 3.4287, "step": 84700 }, { "epoch": 0.9, "learning_rate": 0.00031227426502051267, "loss": 3.5032, "step": 84800 }, { "epoch": 0.9, "learning_rate": 0.0003118472064634961, "loss": 3.4561, "step": 84900 }, { "epoch": 0.9, "learning_rate": 0.00031141995558033915, "loss": 3.4984, "step": 85000 }, { "epoch": 0.9, "learning_rate": 0.0003109925136996631, "loss": 3.4991, "step": 85100 }, { "epoch": 0.91, "learning_rate": 0.00031056488215068295, "loss": 3.4838, "step": 85200 }, { "epoch": 0.91, "learning_rate": 0.00031013706226320386, "loss": 3.5089, "step": 85300 }, { "epoch": 0.91, "learning_rate": 0.0003097090553676165, "loss": 3.5005, "step": 85400 }, { "epoch": 0.91, "learning_rate": 0.0003092808627948931, "loss": 3.4652, "step": 85500 }, { "epoch": 0.91, "learning_rate": 0.00030885248587658336, "loss": 3.4901, "step": 85600 }, { "epoch": 0.91, "learning_rate": 0.0003084239259448101, "loss": 3.5131, "step": 85700 }, { "epoch": 0.91, "learning_rate": 0.0003079951843322653, "loss": 3.4778, "step": 85800 }, { "epoch": 0.91, "learning_rate": 0.0003075662623722059, "loss": 3.4706, "step": 85900 }, { "epoch": 0.91, "learning_rate": 0.0003071371613984498, "loss": 3.4887, "step": 86000 }, { "epoch": 0.92, "learning_rate": 0.0003067078827453715, "loss": 3.4765, "step": 86100 }, { "epoch": 0.92, "learning_rate": 0.00030627842774789797, "loss": 3.5069, "step": 86200 }, { "epoch": 0.92, "learning_rate": 0.0003058487977415046, "loss": 3.4825, "step": 86300 }, { "epoch": 0.92, "learning_rate": 0.0003054189940622109, "loss": 3.45, "step": 86400 }, { "epoch": 0.92, "learning_rate": 0.00030498901804657674, "loss": 3.4677, "step": 86500 }, { "epoch": 0.92, "learning_rate": 0.0003045588710316976, "loss": 3.4663, "step": 86600 }, { "epoch": 0.92, "learning_rate": 0.00030412855435520093, "loss": 3.507, "step": 86700 }, { "epoch": 0.92, "learning_rate": 0.0003036980693552415, "loss": 3.461, "step": 86800 }, { "epoch": 0.92, "learning_rate": 0.0003032674173704979, "loss": 3.4856, "step": 86900 }, { "epoch": 0.92, "learning_rate": 0.00030283659974016764, "loss": 3.554, "step": 87000 }, { "epoch": 0.93, "learning_rate": 0.00030240561780396364, "loss": 3.5279, "step": 87100 }, { "epoch": 0.93, "learning_rate": 0.00030197447290210945, "loss": 3.4643, "step": 87200 }, { "epoch": 0.93, "learning_rate": 0.0003015431663753357, "loss": 3.5135, "step": 87300 }, { "epoch": 0.93, "learning_rate": 0.0003011116995648753, "loss": 3.5021, "step": 87400 }, { "epoch": 0.93, "learning_rate": 0.00030068007381245994, "loss": 3.5225, "step": 87500 }, { "epoch": 0.93, "learning_rate": 0.0003002482904603153, "loss": 3.4466, "step": 87600 }, { "epoch": 0.93, "learning_rate": 0.00029981635085115727, "loss": 3.4411, "step": 87700 }, { "epoch": 0.93, "learning_rate": 0.00029938425632818766, "loss": 3.4759, "step": 87800 }, { "epoch": 0.93, "learning_rate": 0.00029895200823508997, "loss": 3.4777, "step": 87900 }, { "epoch": 0.94, "learning_rate": 0.0002985196079160252, "loss": 3.4094, "step": 88000 }, { "epoch": 0.94, "learning_rate": 0.00029808705671562796, "loss": 3.4614, "step": 88100 }, { "epoch": 0.94, "learning_rate": 0.00029765435597900187, "loss": 3.4643, "step": 88200 }, { "epoch": 0.94, "learning_rate": 0.0002972215070517154, "loss": 3.4988, "step": 88300 }, { "epoch": 0.94, "learning_rate": 0.00029678851127979826, "loss": 3.4302, "step": 88400 }, { "epoch": 0.94, "learning_rate": 0.0002963553700097364, "loss": 3.505, "step": 88500 }, { "epoch": 0.94, "learning_rate": 0.0002959220845884686, "loss": 3.4842, "step": 88600 }, { "epoch": 0.94, "learning_rate": 0.0002954886563633815, "loss": 3.4964, "step": 88700 }, { "epoch": 0.94, "learning_rate": 0.0002950550866823062, "loss": 3.4869, "step": 88800 }, { "epoch": 0.94, "learning_rate": 0.00029462137689351337, "loss": 3.4682, "step": 88900 }, { "epoch": 0.95, "learning_rate": 0.0002941875283457096, "loss": 3.4562, "step": 89000 }, { "epoch": 0.95, "learning_rate": 0.00029375354238803293, "loss": 3.4402, "step": 89100 }, { "epoch": 0.95, "learning_rate": 0.00029331942037004856, "loss": 3.4809, "step": 89200 }, { "epoch": 0.95, "learning_rate": 0.00029288516364174506, "loss": 3.4746, "step": 89300 }, { "epoch": 0.95, "learning_rate": 0.0002924507735535296, "loss": 3.4674, "step": 89400 }, { "epoch": 0.95, "learning_rate": 0.0002920162514562243, "loss": 3.5435, "step": 89500 }, { "epoch": 0.95, "learning_rate": 0.0002915815987010616, "loss": 3.5372, "step": 89600 }, { "epoch": 0.95, "learning_rate": 0.0002911468166396805, "loss": 3.4674, "step": 89700 }, { "epoch": 0.95, "learning_rate": 0.00029071190662412183, "loss": 3.4573, "step": 89800 }, { "epoch": 0.96, "learning_rate": 0.0002902768700068245, "loss": 3.4869, "step": 89900 }, { "epoch": 0.96, "learning_rate": 0.000289841708140621, "loss": 3.521, "step": 90000 }, { "epoch": 0.96, "learning_rate": 0.0002894064223787334, "loss": 3.4807, "step": 90100 }, { "epoch": 0.96, "learning_rate": 0.000288971014074769, "loss": 3.4656, "step": 90200 }, { "epoch": 0.96, "learning_rate": 0.00028853548458271616, "loss": 3.4716, "step": 90300 }, { "epoch": 0.96, "learning_rate": 0.00028809983525694016, "loss": 3.5101, "step": 90400 }, { "epoch": 0.96, "learning_rate": 0.00028766406745217875, "loss": 3.4879, "step": 90500 }, { "epoch": 0.96, "learning_rate": 0.0002872281825235385, "loss": 3.5152, "step": 90600 }, { "epoch": 0.96, "learning_rate": 0.0002867921818264897, "loss": 3.4802, "step": 90700 }, { "epoch": 0.97, "learning_rate": 0.00028635606671686297, "loss": 3.4585, "step": 90800 }, { "epoch": 0.97, "learning_rate": 0.0002859198385508447, "loss": 3.4754, "step": 90900 }, { "epoch": 0.97, "learning_rate": 0.00028548349868497266, "loss": 3.4861, "step": 91000 }, { "epoch": 0.97, "learning_rate": 0.00028504704847613215, "loss": 3.4643, "step": 91100 }, { "epoch": 0.97, "learning_rate": 0.00028461048928155166, "loss": 3.4961, "step": 91200 }, { "epoch": 0.97, "learning_rate": 0.00028417382245879836, "loss": 3.5433, "step": 91300 }, { "epoch": 0.97, "learning_rate": 0.00028373704936577427, "loss": 3.5039, "step": 91400 }, { "epoch": 0.97, "learning_rate": 0.0002833001713607119, "loss": 3.5384, "step": 91500 }, { "epoch": 0.97, "learning_rate": 0.00028286318980216986, "loss": 3.4986, "step": 91600 }, { "epoch": 0.97, "learning_rate": 0.000282426106049029, "loss": 3.4655, "step": 91700 }, { "epoch": 0.98, "learning_rate": 0.0002819889214604877, "loss": 3.4605, "step": 91800 }, { "epoch": 0.98, "learning_rate": 0.0002815516373960582, "loss": 3.5204, "step": 91900 }, { "epoch": 0.98, "learning_rate": 0.00028111425521556174, "loss": 3.4644, "step": 92000 }, { "epoch": 0.98, "learning_rate": 0.000280676776279125, "loss": 3.4554, "step": 92100 }, { "epoch": 0.98, "learning_rate": 0.00028023920194717534, "loss": 3.4838, "step": 92200 }, { "epoch": 0.98, "learning_rate": 0.0002798015335804369, "loss": 3.4925, "step": 92300 }, { "epoch": 0.98, "learning_rate": 0.00027936377253992594, "loss": 3.5146, "step": 92400 }, { "epoch": 0.98, "learning_rate": 0.0002789259201869474, "loss": 3.4192, "step": 92500 }, { "epoch": 0.98, "learning_rate": 0.00027848797788308983, "loss": 3.4699, "step": 92600 }, { "epoch": 0.99, "learning_rate": 0.00027804994699022153, "loss": 3.5011, "step": 92700 }, { "epoch": 0.99, "learning_rate": 0.00027761182887048633, "loss": 3.4562, "step": 92800 }, { "epoch": 0.99, "learning_rate": 0.0002771736248862994, "loss": 3.5495, "step": 92900 }, { "epoch": 0.99, "learning_rate": 0.00027673533640034276, "loss": 3.4774, "step": 93000 }, { "epoch": 0.99, "learning_rate": 0.00027629696477556135, "loss": 3.4738, "step": 93100 }, { "epoch": 0.99, "learning_rate": 0.00027585851137515855, "loss": 3.4803, "step": 93200 }, { "epoch": 0.99, "learning_rate": 0.00027541997756259196, "loss": 3.4923, "step": 93300 }, { "epoch": 0.99, "learning_rate": 0.00027498136470156955, "loss": 3.4801, "step": 93400 }, { "epoch": 0.99, "learning_rate": 0.00027454267415604464, "loss": 3.456, "step": 93500 }, { "epoch": 0.99, "learning_rate": 0.00027410390729021273, "loss": 3.5121, "step": 93600 }, { "epoch": 1.0, "learning_rate": 0.000273665065468506, "loss": 3.4838, "step": 93700 }, { "epoch": 1.0, "learning_rate": 0.0002732261500555901, "loss": 3.4894, "step": 93800 }, { "epoch": 1.0, "learning_rate": 0.0002727871624163596, "loss": 3.4759, "step": 93900 }, { "epoch": 1.0, "learning_rate": 0.0002723481039159334, "loss": 3.541, "step": 94000 }, { "epoch": 1.0, "learning_rate": 0.0002719089759196509, "loss": 3.467, "step": 94100 }, { "epoch": 1.0, "learning_rate": 0.0002714697797930675, "loss": 3.4861, "step": 94200 }, { "epoch": 1.0, "learning_rate": 0.00027103051690195083, "loss": 3.4222, "step": 94300 }, { "epoch": 1.0, "learning_rate": 0.0002705911886122757, "loss": 3.4357, "step": 94400 }, { "epoch": 1.0, "learning_rate": 0.00027015179629022034, "loss": 3.4971, "step": 94500 }, { "epoch": 1.01, "learning_rate": 0.0002697123413021624, "loss": 3.4512, "step": 94600 }, { "epoch": 1.01, "learning_rate": 0.00026927282501467423, "loss": 3.4607, "step": 94700 }, { "epoch": 1.01, "learning_rate": 0.00026883324879451863, "loss": 3.4858, "step": 94800 }, { "epoch": 1.01, "learning_rate": 0.00026839361400864505, "loss": 3.495, "step": 94900 }, { "epoch": 1.01, "learning_rate": 0.000267953922024185, "loss": 3.4637, "step": 95000 }, { "epoch": 1.01, "learning_rate": 0.0002675141742084477, "loss": 3.4934, "step": 95100 }, { "epoch": 1.01, "learning_rate": 0.0002670743719289161, "loss": 3.4604, "step": 95200 }, { "epoch": 1.01, "learning_rate": 0.00026663451655324253, "loss": 3.5176, "step": 95300 }, { "epoch": 1.01, "learning_rate": 0.0002661946094492446, "loss": 3.5227, "step": 95400 }, { "epoch": 1.02, "learning_rate": 0.0002657546519849003, "loss": 3.5155, "step": 95500 }, { "epoch": 1.02, "learning_rate": 0.00026531464552834465, "loss": 3.5119, "step": 95600 }, { "epoch": 1.02, "learning_rate": 0.0002648745914478649, "loss": 3.5198, "step": 95700 }, { "epoch": 1.02, "learning_rate": 0.0002644344911118965, "loss": 3.4797, "step": 95800 }, { "epoch": 1.02, "learning_rate": 0.00026399434588901836, "loss": 3.4992, "step": 95900 }, { "epoch": 1.02, "learning_rate": 0.00026355415714794954, "loss": 3.544, "step": 96000 }, { "epoch": 1.02, "learning_rate": 0.00026311392625754385, "loss": 3.5146, "step": 96100 }, { "epoch": 1.02, "learning_rate": 0.0002626736545867867, "loss": 3.4587, "step": 96200 }, { "epoch": 1.02, "learning_rate": 0.0002622333435047899, "loss": 3.4642, "step": 96300 }, { "epoch": 1.02, "learning_rate": 0.000261792994380788, "loss": 3.5103, "step": 96400 }, { "epoch": 1.03, "learning_rate": 0.000261352608584134, "loss": 3.4636, "step": 96500 }, { "epoch": 1.03, "learning_rate": 0.0002609121874842945, "loss": 3.4958, "step": 96600 }, { "epoch": 1.03, "learning_rate": 0.0002604717324508464, "loss": 3.4457, "step": 96700 }, { "epoch": 1.03, "learning_rate": 0.00026003124485347184, "loss": 3.48, "step": 96800 }, { "epoch": 1.03, "learning_rate": 0.00025959072606195424, "loss": 3.5188, "step": 96900 }, { "epoch": 1.03, "learning_rate": 0.0002591501774461739, "loss": 3.4497, "step": 97000 }, { "epoch": 1.03, "learning_rate": 0.00025870960037610417, "loss": 3.5086, "step": 97100 }, { "epoch": 1.03, "learning_rate": 0.00025826899622180674, "loss": 3.5139, "step": 97200 }, { "epoch": 1.03, "learning_rate": 0.0002578283663534275, "loss": 3.4254, "step": 97300 }, { "epoch": 1.04, "learning_rate": 0.00025738771214119224, "loss": 3.5688, "step": 97400 }, { "epoch": 1.04, "learning_rate": 0.00025694703495540255, "loss": 3.4497, "step": 97500 }, { "epoch": 1.04, "learning_rate": 0.00025650633616643143, "loss": 3.4947, "step": 97600 }, { "epoch": 1.04, "learning_rate": 0.00025606561714471915, "loss": 3.4521, "step": 97700 }, { "epoch": 1.04, "learning_rate": 0.00025562487926076877, "loss": 3.4697, "step": 97800 }, { "epoch": 1.04, "learning_rate": 0.000255184123885142, "loss": 3.4379, "step": 97900 }, { "epoch": 1.04, "learning_rate": 0.0002547433523884551, "loss": 3.528, "step": 98000 }, { "epoch": 1.04, "learning_rate": 0.0002543025661413742, "loss": 3.4807, "step": 98100 }, { "epoch": 1.04, "learning_rate": 0.00025386176651461163, "loss": 3.4575, "step": 98200 }, { "epoch": 1.04, "learning_rate": 0.00025342095487892097, "loss": 3.4028, "step": 98300 }, { "epoch": 1.05, "learning_rate": 0.0002529801326050935, "loss": 3.4966, "step": 98400 }, { "epoch": 1.05, "learning_rate": 0.00025253930106395337, "loss": 3.4547, "step": 98500 }, { "epoch": 1.05, "learning_rate": 0.00025209846162635343, "loss": 3.4785, "step": 98600 }, { "epoch": 1.05, "learning_rate": 0.00025165761566317134, "loss": 3.4614, "step": 98700 }, { "epoch": 1.05, "learning_rate": 0.00025121676454530506, "loss": 3.4976, "step": 98800 }, { "epoch": 1.05, "learning_rate": 0.0002507759096436684, "loss": 3.49, "step": 98900 }, { "epoch": 1.05, "learning_rate": 0.00025033505232918696, "loss": 3.4812, "step": 99000 }, { "epoch": 1.05, "learning_rate": 0.0002498941939727939, "loss": 3.4858, "step": 99100 }, { "epoch": 1.05, "learning_rate": 0.0002494533359454257, "loss": 3.4524, "step": 99200 }, { "epoch": 1.06, "learning_rate": 0.00024901247961801767, "loss": 3.4197, "step": 99300 }, { "epoch": 1.06, "learning_rate": 0.00024857162636149983, "loss": 3.4782, "step": 99400 }, { "epoch": 1.06, "learning_rate": 0.00024813077754679285, "loss": 3.5059, "step": 99500 }, { "epoch": 1.06, "learning_rate": 0.00024768993454480335, "loss": 3.5099, "step": 99600 }, { "epoch": 1.06, "learning_rate": 0.00024724909872642, "loss": 3.4788, "step": 99700 }, { "epoch": 1.06, "learning_rate": 0.00024680827146250915, "loss": 3.4652, "step": 99800 }, { "epoch": 1.06, "learning_rate": 0.0002463674541239104, "loss": 3.4577, "step": 99900 }, { "epoch": 1.06, "learning_rate": 0.00024592664808143264, "loss": 3.4207, "step": 100000 }, { "epoch": 1.06, "learning_rate": 0.0002454858547058497, "loss": 3.4953, "step": 100100 }, { "epoch": 1.07, "learning_rate": 0.00024504507536789573, "loss": 3.5048, "step": 100200 }, { "epoch": 1.07, "learning_rate": 0.0002446043114382615, "loss": 3.4709, "step": 100300 }, { "epoch": 1.07, "learning_rate": 0.00024416356428758984, "loss": 3.4563, "step": 100400 }, { "epoch": 1.07, "learning_rate": 0.0002437228352864711, "loss": 3.4406, "step": 100500 }, { "epoch": 1.07, "learning_rate": 0.00024328212580543963, "loss": 3.4617, "step": 100600 }, { "epoch": 1.07, "learning_rate": 0.0002428414372149687, "loss": 3.5065, "step": 100700 }, { "epoch": 1.07, "learning_rate": 0.00024240077088546688, "loss": 3.445, "step": 100800 }, { "epoch": 1.07, "learning_rate": 0.00024196012818727334, "loss": 3.469, "step": 100900 }, { "epoch": 1.07, "learning_rate": 0.00024151951049065402, "loss": 3.5099, "step": 101000 }, { "epoch": 1.07, "learning_rate": 0.00024107891916579674, "loss": 3.5347, "step": 101100 }, { "epoch": 1.08, "learning_rate": 0.00024063835558280766, "loss": 3.4266, "step": 101200 }, { "epoch": 1.08, "learning_rate": 0.00024019782111170637, "loss": 3.51, "step": 101300 }, { "epoch": 1.08, "learning_rate": 0.00023975731712242216, "loss": 3.5066, "step": 101400 }, { "epoch": 1.08, "learning_rate": 0.00023931684498478947, "loss": 3.484, "step": 101500 }, { "epoch": 1.08, "learning_rate": 0.0002388764060685436, "loss": 3.4547, "step": 101600 }, { "epoch": 1.08, "learning_rate": 0.0002384360017433167, "loss": 3.4793, "step": 101700 }, { "epoch": 1.08, "learning_rate": 0.00023799563337863314, "loss": 3.4553, "step": 101800 }, { "epoch": 1.08, "learning_rate": 0.0002375553023439056, "loss": 3.498, "step": 101900 }, { "epoch": 1.08, "learning_rate": 0.00023711501000843078, "loss": 3.4997, "step": 102000 }, { "epoch": 1.09, "learning_rate": 0.00023667475774138491, "loss": 3.4864, "step": 102100 }, { "epoch": 1.09, "learning_rate": 0.0002362345469118195, "loss": 3.4955, "step": 102200 }, { "epoch": 1.09, "learning_rate": 0.00023579437888865748, "loss": 3.4623, "step": 102300 }, { "epoch": 1.09, "learning_rate": 0.0002353542550406884, "loss": 3.491, "step": 102400 }, { "epoch": 1.09, "learning_rate": 0.00023491417673656456, "loss": 3.4934, "step": 102500 }, { "epoch": 1.09, "learning_rate": 0.00023447414534479675, "loss": 3.4448, "step": 102600 }, { "epoch": 1.09, "learning_rate": 0.0002340341622337496, "loss": 3.5072, "step": 102700 }, { "epoch": 1.09, "learning_rate": 0.0002335942287716379, "loss": 3.4392, "step": 102800 }, { "epoch": 1.09, "learning_rate": 0.00023315434632652162, "loss": 3.5145, "step": 102900 }, { "epoch": 1.09, "learning_rate": 0.0002327145162663027, "loss": 3.5082, "step": 103000 }, { "epoch": 1.1, "learning_rate": 0.0002322747399587197, "loss": 3.4582, "step": 103100 }, { "epoch": 1.1, "learning_rate": 0.0002318350187713442, "loss": 3.4514, "step": 103200 }, { "epoch": 1.1, "learning_rate": 0.0002313953540715763, "loss": 3.4805, "step": 103300 }, { "epoch": 1.1, "learning_rate": 0.00023095574722664053, "loss": 3.464, "step": 103400 }, { "epoch": 1.1, "learning_rate": 0.00023051619960358136, "loss": 3.5032, "step": 103500 }, { "epoch": 1.1, "learning_rate": 0.00023007671256925928, "loss": 3.4814, "step": 103600 }, { "epoch": 1.1, "learning_rate": 0.00022963728749034632, "loss": 3.4803, "step": 103700 }, { "epoch": 1.1, "learning_rate": 0.0002291979257333217, "loss": 3.5372, "step": 103800 }, { "epoch": 1.1, "learning_rate": 0.0002287586286644679, "loss": 3.4892, "step": 103900 }, { "epoch": 1.11, "learning_rate": 0.0002283193976498662, "loss": 3.4769, "step": 104000 }, { "epoch": 1.11, "learning_rate": 0.0002278802340553925, "loss": 3.4673, "step": 104100 }, { "epoch": 1.11, "learning_rate": 0.00022744113924671287, "loss": 3.4254, "step": 104200 }, { "epoch": 1.11, "learning_rate": 0.0002270021145892797, "loss": 3.4444, "step": 104300 }, { "epoch": 1.11, "learning_rate": 0.00022656316144832708, "loss": 3.4923, "step": 104400 }, { "epoch": 1.11, "learning_rate": 0.00022612428118886683, "loss": 3.5175, "step": 104500 }, { "epoch": 1.11, "learning_rate": 0.00022568547517568395, "loss": 3.4738, "step": 104600 }, { "epoch": 1.11, "learning_rate": 0.0002252467447733327, "loss": 3.4461, "step": 104700 }, { "epoch": 1.11, "learning_rate": 0.00022480809134613227, "loss": 3.4521, "step": 104800 }, { "epoch": 1.12, "learning_rate": 0.00022436951625816228, "loss": 3.4423, "step": 104900 }, { "epoch": 1.12, "learning_rate": 0.00022393102087325884, "loss": 3.5038, "step": 105000 }, { "epoch": 1.12, "learning_rate": 0.0002234926065550103, "loss": 3.5472, "step": 105100 }, { "epoch": 1.12, "learning_rate": 0.0002230542746667528, "loss": 3.4334, "step": 105200 }, { "epoch": 1.12, "learning_rate": 0.0002226160265715662, "loss": 3.4684, "step": 105300 }, { "epoch": 1.12, "learning_rate": 0.00022217786363226978, "loss": 3.4708, "step": 105400 }, { "epoch": 1.12, "learning_rate": 0.0002217397872114179, "loss": 3.4876, "step": 105500 }, { "epoch": 1.12, "learning_rate": 0.00022130179867129606, "loss": 3.4399, "step": 105600 }, { "epoch": 1.12, "learning_rate": 0.00022086389937391634, "loss": 3.4705, "step": 105700 }, { "epoch": 1.12, "learning_rate": 0.00022042609068101342, "loss": 3.4645, "step": 105800 }, { "epoch": 1.13, "learning_rate": 0.00021998837395404013, "loss": 3.4623, "step": 105900 }, { "epoch": 1.13, "learning_rate": 0.00021955075055416322, "loss": 3.4495, "step": 106000 }, { "epoch": 1.13, "learning_rate": 0.00021911322184225957, "loss": 3.4787, "step": 106100 }, { "epoch": 1.13, "learning_rate": 0.00021867578917891128, "loss": 3.4702, "step": 106200 }, { "epoch": 1.13, "learning_rate": 0.00021823845392440183, "loss": 3.4817, "step": 106300 }, { "epoch": 1.13, "learning_rate": 0.0002178012174387119, "loss": 3.5035, "step": 106400 }, { "epoch": 1.13, "learning_rate": 0.00021736408108151496, "loss": 3.4766, "step": 106500 }, { "epoch": 1.13, "learning_rate": 0.00021692704621217298, "loss": 3.5066, "step": 106600 }, { "epoch": 1.13, "learning_rate": 0.00021649011418973266, "loss": 3.5211, "step": 106700 }, { "epoch": 1.14, "learning_rate": 0.0002160532863729205, "loss": 3.4887, "step": 106800 }, { "epoch": 1.14, "learning_rate": 0.00021561656412013924, "loss": 3.516, "step": 106900 }, { "epoch": 1.14, "learning_rate": 0.00021517994878946314, "loss": 3.4538, "step": 107000 }, { "epoch": 1.14, "learning_rate": 0.00021474344173863424, "loss": 3.4798, "step": 107100 }, { "epoch": 1.14, "learning_rate": 0.00021430704432505755, "loss": 3.497, "step": 107200 }, { "epoch": 1.14, "learning_rate": 0.00021387075790579735, "loss": 3.5249, "step": 107300 }, { "epoch": 1.14, "learning_rate": 0.0002134345838375726, "loss": 3.5129, "step": 107400 }, { "epoch": 1.14, "learning_rate": 0.00021299852347675302, "loss": 3.5284, "step": 107500 }, { "epoch": 1.14, "learning_rate": 0.00021256257817935475, "loss": 3.423, "step": 107600 }, { "epoch": 1.14, "learning_rate": 0.0002121267493010359, "loss": 3.4436, "step": 107700 }, { "epoch": 1.15, "learning_rate": 0.0002116910381970929, "loss": 3.4876, "step": 107800 }, { "epoch": 1.15, "learning_rate": 0.00021125544622245553, "loss": 3.4873, "step": 107900 }, { "epoch": 1.15, "learning_rate": 0.00021081997473168344, "loss": 3.4589, "step": 108000 }, { "epoch": 1.15, "learning_rate": 0.0002103846250789615, "loss": 3.4876, "step": 108100 }, { "epoch": 1.15, "learning_rate": 0.00020994939861809574, "loss": 3.5278, "step": 108200 }, { "epoch": 1.15, "learning_rate": 0.00020951429670250897, "loss": 3.4383, "step": 108300 }, { "epoch": 1.15, "learning_rate": 0.0002090793206852369, "loss": 3.4604, "step": 108400 }, { "epoch": 1.15, "learning_rate": 0.00020864447191892344, "loss": 3.4604, "step": 108500 }, { "epoch": 1.15, "learning_rate": 0.0002082097517558171, "loss": 3.4143, "step": 108600 }, { "epoch": 1.16, "learning_rate": 0.0002077751615477664, "loss": 3.4799, "step": 108700 }, { "epoch": 1.16, "learning_rate": 0.00020734070264621557, "loss": 3.5034, "step": 108800 }, { "epoch": 1.16, "learning_rate": 0.00020690637640220072, "loss": 3.4856, "step": 108900 }, { "epoch": 1.16, "learning_rate": 0.00020647218416634512, "loss": 3.4974, "step": 109000 }, { "epoch": 1.16, "learning_rate": 0.0002060381272888559, "loss": 3.4974, "step": 109100 }, { "epoch": 1.16, "learning_rate": 0.00020560420711951866, "loss": 3.4761, "step": 109200 }, { "epoch": 1.16, "learning_rate": 0.00020517042500769426, "loss": 3.4425, "step": 109300 }, { "epoch": 1.16, "learning_rate": 0.00020473678230231398, "loss": 3.4528, "step": 109400 }, { "epoch": 1.16, "learning_rate": 0.00020430328035187585, "loss": 3.443, "step": 109500 }, { "epoch": 1.17, "learning_rate": 0.00020386992050443992, "loss": 3.4533, "step": 109600 }, { "epoch": 1.17, "learning_rate": 0.00020343670410762456, "loss": 3.4981, "step": 109700 }, { "epoch": 1.17, "learning_rate": 0.00020300363250860205, "loss": 3.4316, "step": 109800 }, { "epoch": 1.17, "learning_rate": 0.00020257070705409415, "loss": 3.4337, "step": 109900 }, { "epoch": 1.17, "learning_rate": 0.0002021379290903684, "loss": 3.49, "step": 110000 }, { "epoch": 1.17, "learning_rate": 0.00020170529996323366, "loss": 3.4601, "step": 110100 }, { "epoch": 1.17, "learning_rate": 0.00020127282101803593, "loss": 3.4827, "step": 110200 }, { "epoch": 1.17, "learning_rate": 0.00020084049359965402, "loss": 3.4761, "step": 110300 }, { "epoch": 1.17, "learning_rate": 0.00020040831905249586, "loss": 3.4625, "step": 110400 }, { "epoch": 1.17, "learning_rate": 0.00019997629872049366, "loss": 3.4822, "step": 110500 }, { "epoch": 1.18, "learning_rate": 0.00019954443394710035, "loss": 3.5124, "step": 110600 }, { "epoch": 1.18, "learning_rate": 0.00019911272607528484, "loss": 3.4623, "step": 110700 }, { "epoch": 1.18, "learning_rate": 0.0001986811764475284, "loss": 3.4704, "step": 110800 }, { "epoch": 1.18, "learning_rate": 0.00019824978640582013, "loss": 3.5546, "step": 110900 }, { "epoch": 1.18, "learning_rate": 0.00019781855729165265, "loss": 3.4981, "step": 111000 }, { "epoch": 1.18, "learning_rate": 0.00019738749044601847, "loss": 3.463, "step": 111100 }, { "epoch": 1.18, "learning_rate": 0.0001969565872094053, "loss": 3.4255, "step": 111200 }, { "epoch": 1.18, "learning_rate": 0.00019652584892179215, "loss": 3.4782, "step": 111300 }, { "epoch": 1.18, "learning_rate": 0.00019609527692264497, "loss": 3.4629, "step": 111400 }, { "epoch": 1.19, "learning_rate": 0.00019566487255091278, "loss": 3.4329, "step": 111500 }, { "epoch": 1.19, "learning_rate": 0.00019523463714502315, "loss": 3.4519, "step": 111600 }, { "epoch": 1.19, "learning_rate": 0.00019480457204287837, "loss": 3.4992, "step": 111700 }, { "epoch": 1.19, "learning_rate": 0.000194374678581851, "loss": 3.5083, "step": 111800 }, { "epoch": 1.19, "learning_rate": 0.00019394495809877996, "loss": 3.5005, "step": 111900 }, { "epoch": 1.19, "learning_rate": 0.0001935154119299663, "loss": 3.5134, "step": 112000 }, { "epoch": 1.19, "learning_rate": 0.0001930860414111687, "loss": 3.4555, "step": 112100 }, { "epoch": 1.19, "learning_rate": 0.00019265684787760006, "loss": 3.4719, "step": 112200 }, { "epoch": 1.19, "learning_rate": 0.00019222783266392265, "loss": 3.5005, "step": 112300 }, { "epoch": 1.19, "learning_rate": 0.0001917989971042443, "loss": 3.4918, "step": 112400 }, { "epoch": 1.2, "learning_rate": 0.00019137034253211403, "loss": 3.4612, "step": 112500 }, { "epoch": 1.2, "learning_rate": 0.00019094187028051825, "loss": 3.4352, "step": 112600 }, { "epoch": 1.2, "learning_rate": 0.00019051358168187618, "loss": 3.4624, "step": 112700 }, { "epoch": 1.2, "learning_rate": 0.00019008547806803622, "loss": 3.4301, "step": 112800 }, { "epoch": 1.2, "learning_rate": 0.00018965756077027118, "loss": 3.4142, "step": 112900 }, { "epoch": 1.2, "learning_rate": 0.00018922983111927484, "loss": 3.4817, "step": 113000 }, { "epoch": 1.2, "learning_rate": 0.00018880229044515711, "loss": 3.4376, "step": 113100 }, { "epoch": 1.2, "learning_rate": 0.00018837494007744064, "loss": 3.4768, "step": 113200 }, { "epoch": 1.2, "learning_rate": 0.00018794778134505587, "loss": 3.4391, "step": 113300 }, { "epoch": 1.21, "learning_rate": 0.00018752081557633755, "loss": 3.4482, "step": 113400 }, { "epoch": 1.21, "learning_rate": 0.00018709404409902042, "loss": 3.4682, "step": 113500 }, { "epoch": 1.21, "learning_rate": 0.00018666746824023476, "loss": 3.4296, "step": 113600 }, { "epoch": 1.21, "learning_rate": 0.00018624108932650287, "loss": 3.4653, "step": 113700 }, { "epoch": 1.21, "learning_rate": 0.00018581490868373426, "loss": 3.4661, "step": 113800 }, { "epoch": 1.21, "learning_rate": 0.00018538892763722225, "loss": 3.497, "step": 113900 }, { "epoch": 1.21, "learning_rate": 0.00018496314751163917, "loss": 3.4991, "step": 114000 }, { "epoch": 1.21, "learning_rate": 0.0001845375696310327, "loss": 3.4538, "step": 114100 }, { "epoch": 1.21, "learning_rate": 0.00018411219531882156, "loss": 3.5194, "step": 114200 }, { "epoch": 1.21, "learning_rate": 0.00018368702589779154, "loss": 3.472, "step": 114300 }, { "epoch": 1.22, "learning_rate": 0.00018326206269009106, "loss": 3.4382, "step": 114400 }, { "epoch": 1.22, "learning_rate": 0.0001828373070172275, "loss": 3.4555, "step": 114500 }, { "epoch": 1.22, "learning_rate": 0.0001824127602000626, "loss": 3.4512, "step": 114600 }, { "epoch": 1.22, "learning_rate": 0.00018198842355880896, "loss": 3.4754, "step": 114700 }, { "epoch": 1.22, "learning_rate": 0.0001815642984130254, "loss": 3.424, "step": 114800 }, { "epoch": 1.22, "learning_rate": 0.00018114038608161299, "loss": 3.4234, "step": 114900 }, { "epoch": 1.22, "learning_rate": 0.00018071668788281121, "loss": 3.4654, "step": 115000 }, { "epoch": 1.22, "learning_rate": 0.00018029320513419334, "loss": 3.4795, "step": 115100 }, { "epoch": 1.22, "learning_rate": 0.00017986993915266314, "loss": 3.4652, "step": 115200 }, { "epoch": 1.23, "learning_rate": 0.00017944689125444992, "loss": 3.4731, "step": 115300 }, { "epoch": 1.23, "learning_rate": 0.00017902406275510497, "loss": 3.4734, "step": 115400 }, { "epoch": 1.23, "learning_rate": 0.00017860145496949718, "loss": 3.5131, "step": 115500 }, { "epoch": 1.23, "learning_rate": 0.00017817906921180926, "loss": 3.4612, "step": 115600 }, { "epoch": 1.23, "learning_rate": 0.0001777569067955333, "loss": 3.466, "step": 115700 }, { "epoch": 1.23, "learning_rate": 0.00017733496903346704, "loss": 3.4542, "step": 115800 }, { "epoch": 1.23, "learning_rate": 0.00017691325723770957, "loss": 3.4701, "step": 115900 }, { "epoch": 1.23, "learning_rate": 0.00017649177271965717, "loss": 3.488, "step": 116000 }, { "epoch": 1.23, "learning_rate": 0.00017607051678999945, "loss": 3.5502, "step": 116100 }, { "epoch": 1.24, "learning_rate": 0.0001756494907587152, "loss": 3.4421, "step": 116200 }, { "epoch": 1.24, "learning_rate": 0.0001752286959350684, "loss": 3.4264, "step": 116300 }, { "epoch": 1.24, "learning_rate": 0.00017480813362760373, "loss": 3.4817, "step": 116400 }, { "epoch": 1.24, "learning_rate": 0.00017438780514414308, "loss": 3.4756, "step": 116500 }, { "epoch": 1.24, "learning_rate": 0.0001739677117917811, "loss": 3.4992, "step": 116600 }, { "epoch": 1.24, "learning_rate": 0.00017354785487688139, "loss": 3.4952, "step": 116700 }, { "epoch": 1.24, "learning_rate": 0.000173128235705072, "loss": 3.4887, "step": 116800 }, { "epoch": 1.24, "learning_rate": 0.00017270885558124204, "loss": 3.4912, "step": 116900 }, { "epoch": 1.24, "learning_rate": 0.00017228971580953708, "loss": 3.4863, "step": 117000 }, { "epoch": 1.24, "learning_rate": 0.0001718708176933551, "loss": 3.4861, "step": 117100 }, { "epoch": 1.25, "learning_rate": 0.000171452162535343, "loss": 3.5107, "step": 117200 }, { "epoch": 1.25, "learning_rate": 0.00017103375163739183, "loss": 3.4879, "step": 117300 }, { "epoch": 1.25, "learning_rate": 0.00017061558630063317, "loss": 3.4812, "step": 117400 }, { "epoch": 1.25, "learning_rate": 0.00017019766782543495, "loss": 3.4672, "step": 117500 }, { "epoch": 1.25, "learning_rate": 0.00016977999751139754, "loss": 3.4528, "step": 117600 }, { "epoch": 1.25, "learning_rate": 0.0001693625766573494, "loss": 3.5106, "step": 117700 }, { "epoch": 1.25, "learning_rate": 0.00016894540656134345, "loss": 3.4909, "step": 117800 }, { "epoch": 1.25, "learning_rate": 0.00016852848852065273, "loss": 3.5071, "step": 117900 }, { "epoch": 1.25, "learning_rate": 0.00016811182383176643, "loss": 3.5112, "step": 118000 }, { "epoch": 1.26, "learning_rate": 0.00016769541379038595, "loss": 3.4903, "step": 118100 }, { "epoch": 1.26, "learning_rate": 0.0001672792596914209, "loss": 3.4839, "step": 118200 }, { "epoch": 1.26, "learning_rate": 0.00016686336282898485, "loss": 3.4802, "step": 118300 }, { "epoch": 1.26, "learning_rate": 0.0001664477244963914, "loss": 3.4494, "step": 118400 }, { "epoch": 1.26, "learning_rate": 0.0001660323459861504, "loss": 3.4921, "step": 118500 }, { "epoch": 1.26, "learning_rate": 0.00016561722858996354, "loss": 3.5153, "step": 118600 }, { "epoch": 1.26, "learning_rate": 0.00016520237359872068, "loss": 3.4712, "step": 118700 }, { "epoch": 1.26, "learning_rate": 0.00016478778230249544, "loss": 3.4254, "step": 118800 }, { "epoch": 1.26, "learning_rate": 0.00016437345599054176, "loss": 3.4655, "step": 118900 }, { "epoch": 1.26, "learning_rate": 0.00016395939595128926, "loss": 3.4751, "step": 119000 }, { "epoch": 1.27, "learning_rate": 0.00016354560347233972, "loss": 3.4887, "step": 119100 }, { "epoch": 1.27, "learning_rate": 0.0001631320798404627, "loss": 3.4719, "step": 119200 }, { "epoch": 1.27, "learning_rate": 0.0001627188263415921, "loss": 3.4599, "step": 119300 }, { "epoch": 1.27, "learning_rate": 0.00016230584426082134, "loss": 3.4529, "step": 119400 }, { "epoch": 1.27, "learning_rate": 0.0001618931348824001, "loss": 3.4712, "step": 119500 }, { "epoch": 1.27, "learning_rate": 0.00016148069948972995, "loss": 3.4147, "step": 119600 }, { "epoch": 1.27, "learning_rate": 0.0001610685393653604, "loss": 3.4697, "step": 119700 }, { "epoch": 1.27, "learning_rate": 0.00016065665579098503, "loss": 3.522, "step": 119800 }, { "epoch": 1.27, "learning_rate": 0.0001602450500474374, "loss": 3.5019, "step": 119900 }, { "epoch": 1.28, "learning_rate": 0.00015983372341468716, "loss": 3.5104, "step": 120000 }, { "epoch": 1.28, "learning_rate": 0.00015942267717183588, "loss": 3.48, "step": 120100 }, { "epoch": 1.28, "learning_rate": 0.00015901191259711322, "loss": 3.4844, "step": 120200 }, { "epoch": 1.28, "learning_rate": 0.00015860143096787317, "loss": 3.4789, "step": 120300 }, { "epoch": 1.28, "learning_rate": 0.00015819123356058961, "loss": 3.4535, "step": 120400 }, { "epoch": 1.28, "learning_rate": 0.00015778132165085264, "loss": 3.4639, "step": 120500 }, { "epoch": 1.28, "learning_rate": 0.00015737169651336446, "loss": 3.5013, "step": 120600 }, { "epoch": 1.28, "learning_rate": 0.0001569623594219357, "loss": 3.4551, "step": 120700 }, { "epoch": 1.28, "learning_rate": 0.00015655331164948107, "loss": 3.4434, "step": 120800 }, { "epoch": 1.29, "learning_rate": 0.00015614455446801573, "loss": 3.4543, "step": 120900 }, { "epoch": 1.29, "learning_rate": 0.000155736089148651, "loss": 3.5137, "step": 121000 }, { "epoch": 1.29, "learning_rate": 0.00015532791696159078, "loss": 3.5108, "step": 121100 }, { "epoch": 1.29, "learning_rate": 0.00015492003917612715, "loss": 3.5088, "step": 121200 }, { "epoch": 1.29, "learning_rate": 0.0001545124570606372, "loss": 3.4808, "step": 121300 }, { "epoch": 1.29, "learning_rate": 0.0001541051718825781, "loss": 3.5011, "step": 121400 }, { "epoch": 1.29, "learning_rate": 0.00015369818490848386, "loss": 3.4427, "step": 121500 }, { "epoch": 1.29, "learning_rate": 0.00015329149740396102, "loss": 3.4482, "step": 121600 }, { "epoch": 1.29, "learning_rate": 0.00015288511063368498, "loss": 3.4824, "step": 121700 }, { "epoch": 1.29, "learning_rate": 0.00015247902586139583, "loss": 3.4745, "step": 121800 }, { "epoch": 1.3, "learning_rate": 0.00015207324434989472, "loss": 3.5294, "step": 121900 }, { "epoch": 1.3, "learning_rate": 0.00015166776736103964, "loss": 3.4532, "step": 122000 }, { "epoch": 1.3, "learning_rate": 0.00015126259615574157, "loss": 3.4671, "step": 122100 }, { "epoch": 1.3, "learning_rate": 0.00015085773199396054, "loss": 3.4737, "step": 122200 }, { "epoch": 1.3, "learning_rate": 0.00015045317613470206, "loss": 3.4988, "step": 122300 }, { "epoch": 1.3, "learning_rate": 0.00015004892983601264, "loss": 3.4739, "step": 122400 }, { "epoch": 1.3, "learning_rate": 0.0001496449943549762, "loss": 3.4934, "step": 122500 }, { "epoch": 1.3, "learning_rate": 0.00014924137094771017, "loss": 3.5086, "step": 122600 }, { "epoch": 1.3, "learning_rate": 0.00014883806086936146, "loss": 3.4656, "step": 122700 }, { "epoch": 1.31, "learning_rate": 0.00014843506537410274, "loss": 3.4651, "step": 122800 }, { "epoch": 1.31, "learning_rate": 0.00014803238571512817, "loss": 3.4964, "step": 122900 }, { "epoch": 1.31, "learning_rate": 0.00014763002314465, "loss": 3.4865, "step": 123000 }, { "epoch": 1.31, "learning_rate": 0.00014722797891389444, "loss": 3.5156, "step": 123100 }, { "epoch": 1.31, "learning_rate": 0.00014682625427309753, "loss": 3.4795, "step": 123200 }, { "epoch": 1.31, "learning_rate": 0.00014642485047150171, "loss": 3.5341, "step": 123300 }, { "epoch": 1.31, "learning_rate": 0.00014602376875735162, "loss": 3.5358, "step": 123400 }, { "epoch": 1.31, "learning_rate": 0.00014562301037789028, "loss": 3.4616, "step": 123500 }, { "epoch": 1.31, "learning_rate": 0.00014522257657935533, "loss": 3.4804, "step": 123600 }, { "epoch": 1.31, "learning_rate": 0.00014482246860697486, "loss": 3.4295, "step": 123700 }, { "epoch": 1.32, "learning_rate": 0.00014442268770496392, "loss": 3.5029, "step": 123800 }, { "epoch": 1.32, "learning_rate": 0.00014402323511652045, "loss": 3.4647, "step": 123900 }, { "epoch": 1.32, "learning_rate": 0.0001436241120838214, "loss": 3.4547, "step": 124000 }, { "epoch": 1.32, "learning_rate": 0.000143225319848019, "loss": 3.4493, "step": 124100 }, { "epoch": 1.32, "learning_rate": 0.00014282685964923643, "loss": 3.469, "step": 124200 }, { "epoch": 1.32, "learning_rate": 0.00014242873272656486, "loss": 3.4242, "step": 124300 }, { "epoch": 1.32, "learning_rate": 0.0001420309403180589, "loss": 3.4473, "step": 124400 }, { "epoch": 1.32, "learning_rate": 0.0001416334836607326, "loss": 3.5084, "step": 124500 }, { "epoch": 1.32, "learning_rate": 0.00014123636399055622, "loss": 3.4462, "step": 124600 }, { "epoch": 1.33, "learning_rate": 0.00014083958254245215, "loss": 3.5331, "step": 124700 }, { "epoch": 1.33, "learning_rate": 0.00014044314055029083, "loss": 3.5069, "step": 124800 }, { "epoch": 1.33, "learning_rate": 0.00014004703924688734, "loss": 3.4798, "step": 124900 }, { "epoch": 1.33, "learning_rate": 0.00013965127986399688, "loss": 3.468, "step": 125000 }, { "epoch": 1.33, "learning_rate": 0.0001392558636323118, "loss": 3.4743, "step": 125100 }, { "epoch": 1.33, "learning_rate": 0.00013886079178145717, "loss": 3.5451, "step": 125200 }, { "epoch": 1.33, "learning_rate": 0.00013846606553998716, "loss": 3.4554, "step": 125300 }, { "epoch": 1.33, "learning_rate": 0.00013807168613538129, "loss": 3.4819, "step": 125400 }, { "epoch": 1.33, "learning_rate": 0.00013767765479404036, "loss": 3.4847, "step": 125500 }, { "epoch": 1.34, "learning_rate": 0.00013728397274128293, "loss": 3.4506, "step": 125600 }, { "epoch": 1.34, "learning_rate": 0.00013689064120134132, "loss": 3.4751, "step": 125700 }, { "epoch": 1.34, "learning_rate": 0.000136497661397358, "loss": 3.5131, "step": 125800 }, { "epoch": 1.34, "learning_rate": 0.00013610503455138123, "loss": 3.4578, "step": 125900 }, { "epoch": 1.34, "learning_rate": 0.00013571276188436215, "loss": 3.4376, "step": 126000 }, { "epoch": 1.34, "learning_rate": 0.00013532084461615035, "loss": 3.4903, "step": 126100 }, { "epoch": 1.34, "learning_rate": 0.00013492928396549014, "loss": 3.4624, "step": 126200 }, { "epoch": 1.34, "learning_rate": 0.00013453808115001698, "loss": 3.468, "step": 126300 }, { "epoch": 1.34, "learning_rate": 0.00013414723738625352, "loss": 3.4748, "step": 126400 }, { "epoch": 1.34, "learning_rate": 0.0001337567538896058, "loss": 3.4878, "step": 126500 }, { "epoch": 1.35, "learning_rate": 0.0001333666318743598, "loss": 3.4782, "step": 126600 }, { "epoch": 1.35, "learning_rate": 0.00013297687255367697, "loss": 3.5124, "step": 126700 }, { "epoch": 1.35, "learning_rate": 0.00013258747713959125, "loss": 3.4443, "step": 126800 }, { "epoch": 1.35, "learning_rate": 0.00013219844684300475, "loss": 3.4409, "step": 126900 }, { "epoch": 1.35, "learning_rate": 0.00013180978287368435, "loss": 3.4815, "step": 127000 }, { "epoch": 1.35, "learning_rate": 0.00013142148644025765, "loss": 3.4525, "step": 127100 }, { "epoch": 1.35, "learning_rate": 0.00013103355875020923, "loss": 3.4578, "step": 127200 }, { "epoch": 1.35, "learning_rate": 0.000130646001009877, "loss": 3.4926, "step": 127300 }, { "epoch": 1.35, "learning_rate": 0.00013025881442444882, "loss": 3.4594, "step": 127400 }, { "epoch": 1.36, "learning_rate": 0.00012987200019795798, "loss": 3.441, "step": 127500 }, { "epoch": 1.36, "learning_rate": 0.00012948555953327983, "loss": 3.45, "step": 127600 }, { "epoch": 1.36, "learning_rate": 0.00012909949363212823, "loss": 3.4696, "step": 127700 }, { "epoch": 1.36, "learning_rate": 0.0001287138036950516, "loss": 3.5126, "step": 127800 }, { "epoch": 1.36, "learning_rate": 0.00012832849092142917, "loss": 3.4852, "step": 127900 }, { "epoch": 1.36, "learning_rate": 0.0001279435565094675, "loss": 3.4656, "step": 128000 }, { "epoch": 1.36, "learning_rate": 0.0001275590016561961, "loss": 3.4703, "step": 128100 }, { "epoch": 1.36, "learning_rate": 0.00012717482755746467, "loss": 3.469, "step": 128200 }, { "epoch": 1.36, "learning_rate": 0.00012679103540793864, "loss": 3.4687, "step": 128300 }, { "epoch": 1.36, "learning_rate": 0.0001264076264010957, "loss": 3.5109, "step": 128400 }, { "epoch": 1.37, "learning_rate": 0.00012602460172922214, "loss": 3.4951, "step": 128500 }, { "epoch": 1.37, "learning_rate": 0.00012564196258340904, "loss": 3.5326, "step": 128600 }, { "epoch": 1.37, "learning_rate": 0.00012525971015354864, "loss": 3.4683, "step": 128700 }, { "epoch": 1.37, "learning_rate": 0.00012487784562833067, "loss": 3.4843, "step": 128800 }, { "epoch": 1.37, "learning_rate": 0.00012449637019523832, "loss": 3.5223, "step": 128900 }, { "epoch": 1.37, "learning_rate": 0.00012411528504054518, "loss": 3.4825, "step": 129000 }, { "epoch": 1.37, "learning_rate": 0.00012373459134931095, "loss": 3.4544, "step": 129100 }, { "epoch": 1.37, "learning_rate": 0.00012335429030537812, "loss": 3.5195, "step": 129200 }, { "epoch": 1.37, "learning_rate": 0.00012297438309136812, "loss": 3.4476, "step": 129300 }, { "epoch": 1.38, "learning_rate": 0.0001225948708886777, "loss": 3.5221, "step": 129400 }, { "epoch": 1.38, "learning_rate": 0.0001222157548774752, "loss": 3.4944, "step": 129500 }, { "epoch": 1.38, "learning_rate": 0.00012183703623669704, "loss": 3.554, "step": 129600 }, { "epoch": 1.38, "learning_rate": 0.00012145871614404383, "loss": 3.4857, "step": 129700 }, { "epoch": 1.38, "learning_rate": 0.00012108079577597674, "loss": 3.4504, "step": 129800 }, { "epoch": 1.38, "learning_rate": 0.00012070327630771414, "loss": 3.4739, "step": 129900 }, { "epoch": 1.38, "learning_rate": 0.0001203261589132275, "loss": 3.4466, "step": 130000 }, { "epoch": 1.38, "learning_rate": 0.00011994944476523817, "loss": 3.4815, "step": 130100 }, { "epoch": 1.38, "learning_rate": 0.00011957313503521344, "loss": 3.5155, "step": 130200 }, { "epoch": 1.39, "learning_rate": 0.0001191972308933627, "loss": 3.4678, "step": 130300 }, { "epoch": 1.39, "learning_rate": 0.00011882173350863468, "loss": 3.468, "step": 130400 }, { "epoch": 1.39, "learning_rate": 0.00011844664404871281, "loss": 3.4707, "step": 130500 }, { "epoch": 1.39, "learning_rate": 0.00011807196368001192, "loss": 3.52, "step": 130600 }, { "epoch": 1.39, "learning_rate": 0.0001176976935676749, "loss": 3.5055, "step": 130700 }, { "epoch": 1.39, "learning_rate": 0.00011732383487556888, "loss": 3.4498, "step": 130800 }, { "epoch": 1.39, "learning_rate": 0.00011695038876628145, "loss": 3.4206, "step": 130900 }, { "epoch": 1.39, "learning_rate": 0.00011657735640111742, "loss": 3.4776, "step": 131000 }, { "epoch": 1.39, "learning_rate": 0.0001162047389400946, "loss": 3.4434, "step": 131100 }, { "epoch": 1.39, "learning_rate": 0.00011583253754194088, "loss": 3.484, "step": 131200 }, { "epoch": 1.4, "learning_rate": 0.00011546075336409018, "loss": 3.4894, "step": 131300 }, { "epoch": 1.4, "learning_rate": 0.00011508938756267933, "loss": 3.483, "step": 131400 }, { "epoch": 1.4, "learning_rate": 0.00011471844129254359, "loss": 3.4863, "step": 131500 }, { "epoch": 1.4, "learning_rate": 0.00011434791570721392, "loss": 3.4921, "step": 131600 }, { "epoch": 1.4, "learning_rate": 0.00011397781195891308, "loss": 3.4906, "step": 131700 }, { "epoch": 1.4, "learning_rate": 0.00011360813119855193, "loss": 3.5254, "step": 131800 }, { "epoch": 1.4, "learning_rate": 0.00011323887457572619, "loss": 3.4589, "step": 131900 }, { "epoch": 1.4, "learning_rate": 0.00011287004323871225, "loss": 3.4672, "step": 132000 }, { "epoch": 1.4, "learning_rate": 0.00011250163833446433, "loss": 3.4552, "step": 132100 }, { "epoch": 1.41, "learning_rate": 0.00011213366100861044, "loss": 3.4868, "step": 132200 }, { "epoch": 1.41, "learning_rate": 0.00011176611240544899, "loss": 3.471, "step": 132300 }, { "epoch": 1.41, "learning_rate": 0.00011139899366794517, "loss": 3.4567, "step": 132400 }, { "epoch": 1.41, "learning_rate": 0.0001110323059377274, "loss": 3.4666, "step": 132500 }, { "epoch": 1.41, "learning_rate": 0.00011066605035508385, "loss": 3.4836, "step": 132600 }, { "epoch": 1.41, "learning_rate": 0.0001103002280589589, "loss": 3.4704, "step": 132700 }, { "epoch": 1.41, "learning_rate": 0.00010993484018694921, "loss": 3.4604, "step": 132800 }, { "epoch": 1.41, "learning_rate": 0.00010956988787530092, "loss": 3.4447, "step": 132900 }, { "epoch": 1.41, "learning_rate": 0.0001092053722589055, "loss": 3.4769, "step": 133000 }, { "epoch": 1.41, "learning_rate": 0.00010884129447129648, "loss": 3.4205, "step": 133100 }, { "epoch": 1.42, "learning_rate": 0.00010847765564464593, "loss": 3.4452, "step": 133200 }, { "epoch": 1.42, "learning_rate": 0.00010811445690976068, "loss": 3.4749, "step": 133300 }, { "epoch": 1.42, "learning_rate": 0.00010775169939607913, "loss": 3.4909, "step": 133400 }, { "epoch": 1.42, "learning_rate": 0.00010738938423166778, "loss": 3.4493, "step": 133500 }, { "epoch": 1.42, "learning_rate": 0.00010702751254321744, "loss": 3.4452, "step": 133600 }, { "epoch": 1.42, "learning_rate": 0.00010666608545603962, "loss": 3.5166, "step": 133700 }, { "epoch": 1.42, "learning_rate": 0.00010630510409406355, "loss": 3.513, "step": 133800 }, { "epoch": 1.42, "learning_rate": 0.00010594456957983229, "loss": 3.4369, "step": 133900 }, { "epoch": 1.42, "learning_rate": 0.0001055844830344993, "loss": 3.4093, "step": 134000 }, { "epoch": 1.43, "learning_rate": 0.00010522484557782513, "loss": 3.4877, "step": 134100 }, { "epoch": 1.43, "learning_rate": 0.00010486565832817354, "loss": 3.5383, "step": 134200 }, { "epoch": 1.43, "learning_rate": 0.00010450692240250853, "loss": 3.4348, "step": 134300 }, { "epoch": 1.43, "learning_rate": 0.0001041486389163904, "loss": 3.5446, "step": 134400 }, { "epoch": 1.43, "learning_rate": 0.00010379080898397289, "loss": 3.4961, "step": 134500 }, { "epoch": 1.43, "learning_rate": 0.00010343343371799885, "loss": 3.4675, "step": 134600 }, { "epoch": 1.43, "learning_rate": 0.0001030765142297975, "loss": 3.4036, "step": 134700 }, { "epoch": 1.43, "learning_rate": 0.00010272005162928072, "loss": 3.4214, "step": 134800 }, { "epoch": 1.43, "learning_rate": 0.00010236404702493967, "loss": 3.5284, "step": 134900 }, { "epoch": 1.44, "learning_rate": 0.000102008501523841, "loss": 3.4838, "step": 135000 }, { "epoch": 1.44, "learning_rate": 0.000101653416231624, "loss": 3.4514, "step": 135100 }, { "epoch": 1.44, "learning_rate": 0.00010129879225249666, "loss": 3.45, "step": 135200 }, { "epoch": 1.44, "learning_rate": 0.00010094463068923257, "loss": 3.4556, "step": 135300 }, { "epoch": 1.44, "learning_rate": 0.00010059093264316724, "loss": 3.4745, "step": 135400 }, { "epoch": 1.44, "learning_rate": 0.00010023769921419481, "loss": 3.4483, "step": 135500 }, { "epoch": 1.44, "learning_rate": 9.98849315007646e-05, "loss": 3.4857, "step": 135600 }, { "epoch": 1.44, "learning_rate": 9.953263059987772e-05, "loss": 3.429, "step": 135700 }, { "epoch": 1.44, "learning_rate": 9.918079760708365e-05, "loss": 3.4677, "step": 135800 }, { "epoch": 1.44, "learning_rate": 9.882943361647667e-05, "loss": 3.516, "step": 135900 }, { "epoch": 1.45, "learning_rate": 9.847853972069277e-05, "loss": 3.4223, "step": 136000 }, { "epoch": 1.45, "learning_rate": 9.812811701090599e-05, "loss": 3.4708, "step": 136100 }, { "epoch": 1.45, "learning_rate": 9.777816657682523e-05, "loss": 3.4884, "step": 136200 }, { "epoch": 1.45, "learning_rate": 9.742868950669076e-05, "loss": 3.4627, "step": 136300 }, { "epoch": 1.45, "learning_rate": 9.707968688727047e-05, "loss": 3.4592, "step": 136400 }, { "epoch": 1.45, "learning_rate": 9.673115980385744e-05, "loss": 3.5064, "step": 136500 }, { "epoch": 1.45, "learning_rate": 9.638310934026567e-05, "loss": 3.5205, "step": 136600 }, { "epoch": 1.45, "learning_rate": 9.603553657882686e-05, "loss": 3.4942, "step": 136700 }, { "epoch": 1.45, "learning_rate": 9.56884426003874e-05, "loss": 3.4722, "step": 136800 }, { "epoch": 1.46, "learning_rate": 9.534182848430484e-05, "loss": 3.4912, "step": 136900 }, { "epoch": 1.46, "learning_rate": 9.49956953084444e-05, "loss": 3.4864, "step": 137000 }, { "epoch": 1.46, "learning_rate": 9.465004414917586e-05, "loss": 3.4623, "step": 137100 }, { "epoch": 1.46, "learning_rate": 9.430487608136981e-05, "loss": 3.4253, "step": 137200 }, { "epoch": 1.46, "learning_rate": 9.39601921783948e-05, "loss": 3.4765, "step": 137300 }, { "epoch": 1.46, "learning_rate": 9.36159935121136e-05, "loss": 3.5379, "step": 137400 }, { "epoch": 1.46, "learning_rate": 9.32722811528805e-05, "loss": 3.5132, "step": 137500 }, { "epoch": 1.46, "learning_rate": 9.292905616953681e-05, "loss": 3.4339, "step": 137600 }, { "epoch": 1.46, "learning_rate": 9.258631962940875e-05, "loss": 3.481, "step": 137700 }, { "epoch": 1.46, "learning_rate": 9.224407259830347e-05, "loss": 3.425, "step": 137800 }, { "epoch": 1.47, "learning_rate": 9.190231614050592e-05, "loss": 3.4525, "step": 137900 }, { "epoch": 1.47, "learning_rate": 9.156105131877559e-05, "loss": 3.4889, "step": 138000 }, { "epoch": 1.47, "learning_rate": 9.122027919434287e-05, "loss": 3.4895, "step": 138100 }, { "epoch": 1.47, "learning_rate": 9.088000082690629e-05, "loss": 3.4183, "step": 138200 }, { "epoch": 1.47, "learning_rate": 9.05402172746288e-05, "loss": 3.4894, "step": 138300 }, { "epoch": 1.47, "learning_rate": 9.020092959413473e-05, "loss": 3.4553, "step": 138400 }, { "epoch": 1.47, "learning_rate": 8.986213884050629e-05, "loss": 3.4826, "step": 138500 }, { "epoch": 1.47, "learning_rate": 8.952384606728045e-05, "loss": 3.4792, "step": 138600 }, { "epoch": 1.47, "learning_rate": 8.918605232644564e-05, "loss": 3.4568, "step": 138700 }, { "epoch": 1.48, "learning_rate": 8.884875866843844e-05, "loss": 3.4889, "step": 138800 }, { "epoch": 1.48, "learning_rate": 8.851196614214016e-05, "loss": 3.4883, "step": 138900 }, { "epoch": 1.48, "learning_rate": 8.817567579487399e-05, "loss": 3.499, "step": 139000 }, { "epoch": 1.48, "learning_rate": 8.783988867240133e-05, "loss": 3.4862, "step": 139100 }, { "epoch": 1.48, "learning_rate": 8.750460581891877e-05, "loss": 3.4511, "step": 139200 }, { "epoch": 1.48, "learning_rate": 8.716982827705489e-05, "loss": 3.4998, "step": 139300 }, { "epoch": 1.48, "learning_rate": 8.683555708786658e-05, "loss": 3.4064, "step": 139400 }, { "epoch": 1.48, "learning_rate": 8.650179329083629e-05, "loss": 3.4684, "step": 139500 }, { "epoch": 1.48, "learning_rate": 8.616853792386889e-05, "loss": 3.4596, "step": 139600 }, { "epoch": 1.48, "learning_rate": 8.583579202328792e-05, "loss": 3.4669, "step": 139700 }, { "epoch": 1.49, "learning_rate": 8.550355662383253e-05, "loss": 3.5372, "step": 139800 }, { "epoch": 1.49, "learning_rate": 8.517183275865456e-05, "loss": 3.5077, "step": 139900 }, { "epoch": 1.49, "learning_rate": 8.484062145931514e-05, "loss": 3.4921, "step": 140000 }, { "epoch": 1.49, "learning_rate": 8.450992375578137e-05, "loss": 3.4327, "step": 140100 }, { "epoch": 1.49, "learning_rate": 8.417974067642336e-05, "loss": 3.5026, "step": 140200 }, { "epoch": 1.49, "learning_rate": 8.385007324801066e-05, "loss": 3.4435, "step": 140300 }, { "epoch": 1.49, "learning_rate": 8.352092249570953e-05, "loss": 3.4548, "step": 140400 }, { "epoch": 1.49, "learning_rate": 8.319228944307933e-05, "loss": 3.4522, "step": 140500 }, { "epoch": 1.49, "learning_rate": 8.286417511206992e-05, "loss": 3.4525, "step": 140600 }, { "epoch": 1.5, "learning_rate": 8.253658052301751e-05, "loss": 3.4803, "step": 140700 }, { "epoch": 1.5, "learning_rate": 8.220950669464253e-05, "loss": 3.4681, "step": 140800 }, { "epoch": 1.5, "learning_rate": 8.188295464404577e-05, "loss": 3.5165, "step": 140900 }, { "epoch": 1.5, "learning_rate": 8.155692538670568e-05, "loss": 3.4869, "step": 141000 }, { "epoch": 1.5, "learning_rate": 8.123141993647456e-05, "loss": 3.4181, "step": 141100 }, { "epoch": 1.5, "learning_rate": 8.090643930557625e-05, "loss": 3.4428, "step": 141200 }, { "epoch": 1.5, "learning_rate": 8.058198450460239e-05, "loss": 3.4768, "step": 141300 }, { "epoch": 1.5, "learning_rate": 8.025805654250942e-05, "loss": 3.4992, "step": 141400 }, { "epoch": 1.5, "learning_rate": 7.993465642661555e-05, "loss": 3.4324, "step": 141500 }, { "epoch": 1.51, "learning_rate": 7.961178516259745e-05, "loss": 3.4736, "step": 141600 }, { "epoch": 1.51, "learning_rate": 7.928944375448734e-05, "loss": 3.4966, "step": 141700 }, { "epoch": 1.51, "learning_rate": 7.896763320466968e-05, "loss": 3.4269, "step": 141800 }, { "epoch": 1.51, "learning_rate": 7.864635451387817e-05, "loss": 3.5562, "step": 141900 }, { "epoch": 1.51, "learning_rate": 7.83256086811924e-05, "loss": 3.4452, "step": 142000 }, { "epoch": 1.51, "learning_rate": 7.800539670403514e-05, "loss": 3.465, "step": 142100 }, { "epoch": 1.51, "learning_rate": 7.7685719578169e-05, "loss": 3.4184, "step": 142200 }, { "epoch": 1.51, "learning_rate": 7.73665782976933e-05, "loss": 3.4772, "step": 142300 }, { "epoch": 1.51, "learning_rate": 7.704797385504117e-05, "loss": 3.467, "step": 142400 }, { "epoch": 1.51, "learning_rate": 7.6729907240976e-05, "loss": 3.4636, "step": 142500 }, { "epoch": 1.52, "learning_rate": 7.641237944458918e-05, "loss": 3.4184, "step": 142600 }, { "epoch": 1.52, "learning_rate": 7.609539145329622e-05, "loss": 3.4818, "step": 142700 }, { "epoch": 1.52, "learning_rate": 7.57789442528341e-05, "loss": 3.4073, "step": 142800 }, { "epoch": 1.52, "learning_rate": 7.5463038827258e-05, "loss": 3.4903, "step": 142900 }, { "epoch": 1.52, "learning_rate": 7.514767615893844e-05, "loss": 3.482, "step": 143000 }, { "epoch": 1.52, "learning_rate": 7.483285722855815e-05, "loss": 3.4642, "step": 143100 }, { "epoch": 1.52, "learning_rate": 7.4518583015109e-05, "loss": 3.4445, "step": 143200 }, { "epoch": 1.52, "learning_rate": 7.420485449588878e-05, "loss": 3.4651, "step": 143300 }, { "epoch": 1.52, "learning_rate": 7.389167264649855e-05, "loss": 3.4787, "step": 143400 }, { "epoch": 1.53, "learning_rate": 7.357903844083924e-05, "loss": 3.4666, "step": 143500 }, { "epoch": 1.53, "learning_rate": 7.326695285110906e-05, "loss": 3.5029, "step": 143600 }, { "epoch": 1.53, "learning_rate": 7.295541684779975e-05, "loss": 3.4557, "step": 143700 }, { "epoch": 1.53, "learning_rate": 7.264443139969432e-05, "loss": 3.4807, "step": 143800 }, { "epoch": 1.53, "learning_rate": 7.23339974738636e-05, "loss": 3.4719, "step": 143900 }, { "epoch": 1.53, "learning_rate": 7.202411603566339e-05, "loss": 3.4985, "step": 144000 }, { "epoch": 1.53, "learning_rate": 7.171478804873152e-05, "loss": 3.4518, "step": 144100 }, { "epoch": 1.53, "learning_rate": 7.140601447498443e-05, "loss": 3.5169, "step": 144200 }, { "epoch": 1.53, "learning_rate": 7.109779627461482e-05, "loss": 3.4492, "step": 144300 }, { "epoch": 1.53, "learning_rate": 7.079013440608827e-05, "loss": 3.4762, "step": 144400 }, { "epoch": 1.54, "learning_rate": 7.048302982614026e-05, "loss": 3.4617, "step": 144500 }, { "epoch": 1.54, "learning_rate": 7.017648348977335e-05, "loss": 3.4682, "step": 144600 }, { "epoch": 1.54, "learning_rate": 6.98704963502541e-05, "loss": 3.4709, "step": 144700 }, { "epoch": 1.54, "learning_rate": 6.95650693591101e-05, "loss": 3.4152, "step": 144800 }, { "epoch": 1.54, "learning_rate": 6.926020346612722e-05, "loss": 3.496, "step": 144900 }, { "epoch": 1.54, "learning_rate": 6.895589961934615e-05, "loss": 3.4792, "step": 145000 }, { "epoch": 1.54, "learning_rate": 6.865215876506006e-05, "loss": 3.4939, "step": 145100 }, { "epoch": 1.54, "learning_rate": 6.834898184781135e-05, "loss": 3.4709, "step": 145200 }, { "epoch": 1.54, "learning_rate": 6.804636981038867e-05, "loss": 3.4498, "step": 145300 }, { "epoch": 1.55, "learning_rate": 6.774432359382415e-05, "loss": 3.4552, "step": 145400 }, { "epoch": 1.55, "learning_rate": 6.744284413739025e-05, "loss": 3.3939, "step": 145500 }, { "epoch": 1.55, "learning_rate": 6.7141932378597e-05, "loss": 3.449, "step": 145600 }, { "epoch": 1.55, "learning_rate": 6.684158925318931e-05, "loss": 3.4837, "step": 145700 }, { "epoch": 1.55, "learning_rate": 6.654181569514362e-05, "loss": 3.4736, "step": 145800 }, { "epoch": 1.55, "learning_rate": 6.624261263666504e-05, "loss": 3.4677, "step": 145900 }, { "epoch": 1.55, "learning_rate": 6.594398100818483e-05, "loss": 3.4708, "step": 146000 }, { "epoch": 1.55, "learning_rate": 6.564592173835718e-05, "loss": 3.4526, "step": 146100 }, { "epoch": 1.55, "learning_rate": 6.534843575405647e-05, "loss": 3.5345, "step": 146200 }, { "epoch": 1.56, "learning_rate": 6.505152398037433e-05, "loss": 3.4945, "step": 146300 }, { "epoch": 1.56, "learning_rate": 6.475518734061667e-05, "loss": 3.4661, "step": 146400 }, { "epoch": 1.56, "learning_rate": 6.445942675630095e-05, "loss": 3.4289, "step": 146500 }, { "epoch": 1.56, "learning_rate": 6.416424314715327e-05, "loss": 3.4531, "step": 146600 }, { "epoch": 1.56, "learning_rate": 6.38696374311058e-05, "loss": 3.4917, "step": 146700 }, { "epoch": 1.56, "learning_rate": 6.357561052429305e-05, "loss": 3.3964, "step": 146800 }, { "epoch": 1.56, "learning_rate": 6.328216334105014e-05, "loss": 3.4736, "step": 146900 }, { "epoch": 1.56, "learning_rate": 6.29892967939091e-05, "loss": 3.5101, "step": 147000 }, { "epoch": 1.56, "learning_rate": 6.269701179359663e-05, "loss": 3.4323, "step": 147100 }, { "epoch": 1.56, "learning_rate": 6.240530924903065e-05, "loss": 3.4316, "step": 147200 }, { "epoch": 1.57, "learning_rate": 6.211419006731808e-05, "loss": 3.4749, "step": 147300 }, { "epoch": 1.57, "learning_rate": 6.182365515375172e-05, "loss": 3.4766, "step": 147400 }, { "epoch": 1.57, "learning_rate": 6.153370541180739e-05, "loss": 3.4461, "step": 147500 }, { "epoch": 1.57, "learning_rate": 6.124434174314131e-05, "loss": 3.5406, "step": 147600 }, { "epoch": 1.57, "learning_rate": 6.0955565047587064e-05, "loss": 3.5152, "step": 147700 }, { "epoch": 1.57, "learning_rate": 6.0667376223153075e-05, "loss": 3.504, "step": 147800 }, { "epoch": 1.57, "learning_rate": 6.03797761660195e-05, "loss": 3.466, "step": 147900 }, { "epoch": 1.57, "learning_rate": 6.009276577053582e-05, "loss": 3.4596, "step": 148000 }, { "epoch": 1.57, "learning_rate": 5.9806345929217546e-05, "loss": 3.4806, "step": 148100 }, { "epoch": 1.58, "learning_rate": 5.9520517532744015e-05, "loss": 3.4958, "step": 148200 }, { "epoch": 1.58, "learning_rate": 5.923528146995519e-05, "loss": 3.4599, "step": 148300 }, { "epoch": 1.58, "learning_rate": 5.895063862784916e-05, "loss": 3.4378, "step": 148400 }, { "epoch": 1.58, "learning_rate": 5.8666589891579306e-05, "loss": 3.4499, "step": 148500 }, { "epoch": 1.58, "learning_rate": 5.83831361444512e-05, "loss": 3.4667, "step": 148600 }, { "epoch": 1.58, "learning_rate": 5.8100278267920665e-05, "loss": 3.4887, "step": 148700 }, { "epoch": 1.58, "learning_rate": 5.781801714159021e-05, "loss": 3.4164, "step": 148800 }, { "epoch": 1.58, "learning_rate": 5.7536353643206806e-05, "loss": 3.4847, "step": 148900 }, { "epoch": 1.58, "learning_rate": 5.7255288648658754e-05, "loss": 3.43, "step": 149000 }, { "epoch": 1.58, "learning_rate": 5.6974823031973405e-05, "loss": 3.5193, "step": 149100 }, { "epoch": 1.59, "learning_rate": 5.669495766531413e-05, "loss": 3.4507, "step": 149200 }, { "epoch": 1.59, "learning_rate": 5.6415693418977844e-05, "loss": 3.494, "step": 149300 }, { "epoch": 1.59, "learning_rate": 5.613703116139185e-05, "loss": 3.5268, "step": 149400 }, { "epoch": 1.59, "learning_rate": 5.5858971759111756e-05, "loss": 3.4471, "step": 149500 }, { "epoch": 1.59, "learning_rate": 5.558151607681827e-05, "loss": 3.4171, "step": 149600 }, { "epoch": 1.59, "learning_rate": 5.530466497731501e-05, "loss": 3.4843, "step": 149700 }, { "epoch": 1.59, "learning_rate": 5.502841932152511e-05, "loss": 3.4613, "step": 149800 }, { "epoch": 1.59, "learning_rate": 5.475277996848921e-05, "loss": 3.5356, "step": 149900 }, { "epoch": 1.59, "learning_rate": 5.447774777536249e-05, "loss": 3.4039, "step": 150000 }, { "epoch": 1.6, "learning_rate": 5.4203323597412066e-05, "loss": 3.4498, "step": 150100 }, { "epoch": 1.6, "learning_rate": 5.392950828801435e-05, "loss": 3.4847, "step": 150200 }, { "epoch": 1.6, "learning_rate": 5.3656302698652096e-05, "loss": 3.4883, "step": 150300 }, { "epoch": 1.6, "learning_rate": 5.3383707678912345e-05, "loss": 3.4473, "step": 150400 }, { "epoch": 1.6, "learning_rate": 5.311172407648327e-05, "loss": 3.4113, "step": 150500 }, { "epoch": 1.6, "learning_rate": 5.2840352737151766e-05, "loss": 3.4673, "step": 150600 }, { "epoch": 1.6, "learning_rate": 5.256959450480078e-05, "loss": 3.4446, "step": 150700 }, { "epoch": 1.6, "learning_rate": 5.229945022140667e-05, "loss": 3.4601, "step": 150800 }, { "epoch": 1.6, "learning_rate": 5.2029920727036605e-05, "loss": 3.4191, "step": 150900 }, { "epoch": 1.61, "learning_rate": 5.1761006859846e-05, "loss": 3.47, "step": 151000 }, { "epoch": 1.61, "learning_rate": 5.1492709456075675e-05, "loss": 3.5108, "step": 151100 }, { "epoch": 1.61, "learning_rate": 5.1225029350049604e-05, "loss": 3.4592, "step": 151200 }, { "epoch": 1.61, "learning_rate": 5.0957967374172134e-05, "loss": 3.4277, "step": 151300 }, { "epoch": 1.61, "learning_rate": 5.069152435892535e-05, "loss": 3.4706, "step": 151400 }, { "epoch": 1.61, "learning_rate": 5.042570113286668e-05, "loss": 3.4567, "step": 151500 }, { "epoch": 1.61, "learning_rate": 5.016049852262591e-05, "loss": 3.4528, "step": 151600 }, { "epoch": 1.61, "learning_rate": 4.989591735290328e-05, "loss": 3.4114, "step": 151700 }, { "epoch": 1.61, "learning_rate": 4.9631958446466256e-05, "loss": 3.5006, "step": 151800 }, { "epoch": 1.61, "learning_rate": 4.936862262414748e-05, "loss": 3.4768, "step": 151900 }, { "epoch": 1.62, "learning_rate": 4.910591070484169e-05, "loss": 3.4693, "step": 152000 }, { "epoch": 1.62, "learning_rate": 4.884382350550368e-05, "loss": 3.447, "step": 152100 }, { "epoch": 1.62, "learning_rate": 4.8582361841145564e-05, "loss": 3.4081, "step": 152200 }, { "epoch": 1.62, "learning_rate": 4.8321526524834156e-05, "loss": 3.4792, "step": 152300 }, { "epoch": 1.62, "learning_rate": 4.806131836768862e-05, "loss": 3.4483, "step": 152400 }, { "epoch": 1.62, "learning_rate": 4.780173817887765e-05, "loss": 3.5346, "step": 152500 }, { "epoch": 1.62, "learning_rate": 4.7542786765617296e-05, "loss": 3.4657, "step": 152600 }, { "epoch": 1.62, "learning_rate": 4.7284464933168235e-05, "loss": 3.4891, "step": 152700 }, { "epoch": 1.62, "learning_rate": 4.702677348483358e-05, "loss": 3.4926, "step": 152800 }, { "epoch": 1.63, "learning_rate": 4.6769713221955723e-05, "loss": 3.4253, "step": 152900 }, { "epoch": 1.63, "learning_rate": 4.6513284943914547e-05, "loss": 3.4591, "step": 153000 }, { "epoch": 1.63, "learning_rate": 4.625748944812458e-05, "loss": 3.5055, "step": 153100 }, { "epoch": 1.63, "learning_rate": 4.600232753003267e-05, "loss": 3.4436, "step": 153200 }, { "epoch": 1.63, "learning_rate": 4.574779998311518e-05, "loss": 3.4526, "step": 153300 }, { "epoch": 1.63, "learning_rate": 4.549390759887606e-05, "loss": 3.4926, "step": 153400 }, { "epoch": 1.63, "learning_rate": 4.524065116684392e-05, "loss": 3.4372, "step": 153500 }, { "epoch": 1.63, "learning_rate": 4.498803147456987e-05, "loss": 3.4741, "step": 153600 }, { "epoch": 1.63, "learning_rate": 4.4736049307624817e-05, "loss": 3.4904, "step": 153700 }, { "epoch": 1.63, "learning_rate": 4.448470544959726e-05, "loss": 3.4679, "step": 153800 }, { "epoch": 1.64, "learning_rate": 4.423400068209071e-05, "loss": 3.4663, "step": 153900 }, { "epoch": 1.64, "learning_rate": 4.398393578472132e-05, "loss": 3.5239, "step": 154000 }, { "epoch": 1.64, "learning_rate": 4.3734511535115486e-05, "loss": 3.4588, "step": 154100 }, { "epoch": 1.64, "learning_rate": 4.348572870890718e-05, "loss": 3.5164, "step": 154200 }, { "epoch": 1.64, "learning_rate": 4.323758807973596e-05, "loss": 3.4214, "step": 154300 }, { "epoch": 1.64, "learning_rate": 4.299009041924426e-05, "loss": 3.5172, "step": 154400 }, { "epoch": 1.64, "learning_rate": 4.274323649707509e-05, "loss": 3.5102, "step": 154500 }, { "epoch": 1.64, "learning_rate": 4.249702708086972e-05, "loss": 3.4565, "step": 154600 }, { "epoch": 1.64, "learning_rate": 4.225146293626486e-05, "loss": 3.5431, "step": 154700 }, { "epoch": 1.65, "learning_rate": 4.2006544826891065e-05, "loss": 3.4823, "step": 154800 }, { "epoch": 1.65, "learning_rate": 4.176227351436967e-05, "loss": 3.4112, "step": 154900 }, { "epoch": 1.65, "learning_rate": 4.1518649758310766e-05, "loss": 3.468, "step": 155000 }, { "epoch": 1.65, "learning_rate": 4.1275674316310574e-05, "loss": 3.4689, "step": 155100 }, { "epoch": 1.65, "learning_rate": 4.103334794394939e-05, "loss": 3.4525, "step": 155200 }, { "epoch": 1.65, "learning_rate": 4.079167139478909e-05, "loss": 3.4909, "step": 155300 }, { "epoch": 1.65, "learning_rate": 4.055064542037087e-05, "loss": 3.5342, "step": 155400 }, { "epoch": 1.65, "learning_rate": 4.03102707702126e-05, "loss": 3.4856, "step": 155500 }, { "epoch": 1.65, "learning_rate": 4.007054819180692e-05, "loss": 3.4777, "step": 155600 }, { "epoch": 1.66, "learning_rate": 3.983147843061863e-05, "loss": 3.5254, "step": 155700 }, { "epoch": 1.66, "learning_rate": 3.9593062230082685e-05, "loss": 3.4694, "step": 155800 }, { "epoch": 1.66, "learning_rate": 3.935530033160134e-05, "loss": 3.4937, "step": 155900 }, { "epoch": 1.66, "learning_rate": 3.911819347454234e-05, "loss": 3.4633, "step": 156000 }, { "epoch": 1.66, "learning_rate": 3.8881742396236455e-05, "loss": 3.4595, "step": 156100 }, { "epoch": 1.66, "learning_rate": 3.8645947831975145e-05, "loss": 3.4702, "step": 156200 }, { "epoch": 1.66, "learning_rate": 3.841081051500836e-05, "loss": 3.4524, "step": 156300 }, { "epoch": 1.66, "learning_rate": 3.817633117654207e-05, "loss": 3.4967, "step": 156400 }, { "epoch": 1.66, "learning_rate": 3.79425105457363e-05, "loss": 3.4595, "step": 156500 }, { "epoch": 1.66, "learning_rate": 3.77093493497026e-05, "loss": 3.4135, "step": 156600 }, { "epoch": 1.67, "learning_rate": 3.74768483135019e-05, "loss": 3.4355, "step": 156700 }, { "epoch": 1.67, "learning_rate": 3.724500816014223e-05, "loss": 3.4815, "step": 156800 }, { "epoch": 1.67, "learning_rate": 3.701382961057648e-05, "loss": 3.4318, "step": 156900 }, { "epoch": 1.67, "learning_rate": 3.678331338370014e-05, "loss": 3.5005, "step": 157000 }, { "epoch": 1.67, "learning_rate": 3.655346019634909e-05, "loss": 3.4748, "step": 157100 }, { "epoch": 1.67, "learning_rate": 3.632427076329739e-05, "loss": 3.4054, "step": 157200 }, { "epoch": 1.67, "learning_rate": 3.609574579725491e-05, "loss": 3.4565, "step": 157300 }, { "epoch": 1.67, "learning_rate": 3.5867886008865315e-05, "loss": 3.4485, "step": 157400 }, { "epoch": 1.67, "learning_rate": 3.564069210670379e-05, "loss": 3.4623, "step": 157500 }, { "epoch": 1.68, "learning_rate": 3.541416479727483e-05, "loss": 3.4946, "step": 157600 }, { "epoch": 1.68, "learning_rate": 3.518830478500978e-05, "loss": 3.4598, "step": 157700 }, { "epoch": 1.68, "learning_rate": 3.49631127722653e-05, "loss": 3.4863, "step": 157800 }, { "epoch": 1.68, "learning_rate": 3.473858945932046e-05, "loss": 3.475, "step": 157900 }, { "epoch": 1.68, "learning_rate": 3.451473554437509e-05, "loss": 3.4652, "step": 158000 }, { "epoch": 1.68, "learning_rate": 3.4291551723547146e-05, "loss": 3.4687, "step": 158100 }, { "epoch": 1.68, "learning_rate": 3.4069038690871e-05, "loss": 3.4325, "step": 158200 }, { "epoch": 1.68, "learning_rate": 3.384719713829498e-05, "loss": 3.4336, "step": 158300 }, { "epoch": 1.68, "learning_rate": 3.362602775567935e-05, "loss": 3.4393, "step": 158400 }, { "epoch": 1.68, "learning_rate": 3.340553123079421e-05, "loss": 3.5204, "step": 158500 }, { "epoch": 1.69, "learning_rate": 3.3185708249317045e-05, "loss": 3.4819, "step": 158600 }, { "epoch": 1.69, "learning_rate": 3.2966559494830934e-05, "loss": 3.4414, "step": 158700 }, { "epoch": 1.69, "learning_rate": 3.2748085648822506e-05, "loss": 3.4852, "step": 158800 }, { "epoch": 1.69, "learning_rate": 3.2530287390679426e-05, "loss": 3.4863, "step": 158900 }, { "epoch": 1.69, "learning_rate": 3.2313165397688445e-05, "loss": 3.4531, "step": 159000 }, { "epoch": 1.69, "learning_rate": 3.2096720345033445e-05, "loss": 3.4858, "step": 159100 }, { "epoch": 1.69, "learning_rate": 3.188095290579318e-05, "loss": 3.4303, "step": 159200 }, { "epoch": 1.69, "learning_rate": 3.166586375093935e-05, "loss": 3.4155, "step": 159300 }, { "epoch": 1.69, "learning_rate": 3.145145354933415e-05, "loss": 3.5033, "step": 159400 }, { "epoch": 1.7, "learning_rate": 3.123772296772862e-05, "loss": 3.4965, "step": 159500 }, { "epoch": 1.7, "learning_rate": 3.102467267076037e-05, "loss": 3.499, "step": 159600 }, { "epoch": 1.7, "learning_rate": 3.0812303320951475e-05, "loss": 3.4457, "step": 159700 }, { "epoch": 1.7, "learning_rate": 3.0600615578706524e-05, "loss": 3.4652, "step": 159800 }, { "epoch": 1.7, "learning_rate": 3.038961010231048e-05, "loss": 3.4304, "step": 159900 }, { "epoch": 1.7, "learning_rate": 3.0179287547926676e-05, "loss": 3.4779, "step": 160000 }, { "epoch": 1.7, "learning_rate": 2.996964856959475e-05, "loss": 3.4483, "step": 160100 }, { "epoch": 1.7, "learning_rate": 2.976069381922869e-05, "loss": 3.4724, "step": 160200 }, { "epoch": 1.7, "learning_rate": 2.955242394661456e-05, "loss": 3.4281, "step": 160300 }, { "epoch": 1.71, "learning_rate": 2.9344839599408897e-05, "loss": 3.5002, "step": 160400 }, { "epoch": 1.71, "learning_rate": 2.9137941423136305e-05, "loss": 3.4879, "step": 160500 }, { "epoch": 1.71, "learning_rate": 2.8931730061187656e-05, "loss": 3.4045, "step": 160600 }, { "epoch": 1.71, "learning_rate": 2.872620615481808e-05, "loss": 3.4483, "step": 160700 }, { "epoch": 1.71, "learning_rate": 2.8521370343144752e-05, "loss": 3.4465, "step": 160800 }, { "epoch": 1.71, "learning_rate": 2.8317223263145313e-05, "loss": 3.4971, "step": 160900 }, { "epoch": 1.71, "learning_rate": 2.811376554965553e-05, "loss": 3.4879, "step": 161000 }, { "epoch": 1.71, "learning_rate": 2.7910997835367548e-05, "loss": 3.4802, "step": 161100 }, { "epoch": 1.71, "learning_rate": 2.7708920750827565e-05, "loss": 3.4915, "step": 161200 }, { "epoch": 1.71, "learning_rate": 2.750753492443442e-05, "loss": 3.4362, "step": 161300 }, { "epoch": 1.72, "learning_rate": 2.7306840982437215e-05, "loss": 3.4877, "step": 161400 }, { "epoch": 1.72, "learning_rate": 2.710683954893356e-05, "loss": 3.5149, "step": 161500 }, { "epoch": 1.72, "learning_rate": 2.690753124586745e-05, "loss": 3.4533, "step": 161600 }, { "epoch": 1.72, "learning_rate": 2.6708916693027553e-05, "loss": 3.4888, "step": 161700 }, { "epoch": 1.72, "learning_rate": 2.651099650804517e-05, "loss": 3.4699, "step": 161800 }, { "epoch": 1.72, "learning_rate": 2.6313771306392453e-05, "loss": 3.4967, "step": 161900 }, { "epoch": 1.72, "learning_rate": 2.6117241701380052e-05, "loss": 3.4676, "step": 162000 }, { "epoch": 1.72, "learning_rate": 2.592140830415579e-05, "loss": 3.484, "step": 162100 }, { "epoch": 1.72, "learning_rate": 2.5726271723702428e-05, "loss": 3.4728, "step": 162200 }, { "epoch": 1.73, "learning_rate": 2.553183256683578e-05, "loss": 3.4194, "step": 162300 }, { "epoch": 1.73, "learning_rate": 2.533809143820298e-05, "loss": 3.482, "step": 162400 }, { "epoch": 1.73, "learning_rate": 2.5145048940280384e-05, "loss": 3.4789, "step": 162500 }, { "epoch": 1.73, "learning_rate": 2.4952705673371877e-05, "loss": 3.5145, "step": 162600 }, { "epoch": 1.73, "learning_rate": 2.4761062235607007e-05, "loss": 3.4865, "step": 162700 }, { "epoch": 1.73, "learning_rate": 2.4570119222938945e-05, "loss": 3.4171, "step": 162800 }, { "epoch": 1.73, "learning_rate": 2.4379877229142867e-05, "loss": 3.4954, "step": 162900 }, { "epoch": 1.73, "learning_rate": 2.4190336845813928e-05, "loss": 3.4762, "step": 163000 }, { "epoch": 1.73, "learning_rate": 2.400149866236548e-05, "loss": 3.457, "step": 163100 }, { "epoch": 1.73, "learning_rate": 2.3813363266027262e-05, "loss": 3.4829, "step": 163200 }, { "epoch": 1.74, "learning_rate": 2.3625931241843646e-05, "loss": 3.443, "step": 163300 }, { "epoch": 1.74, "learning_rate": 2.3439203172671507e-05, "loss": 3.4499, "step": 163400 }, { "epoch": 1.74, "learning_rate": 2.3253179639178806e-05, "loss": 3.4199, "step": 163500 }, { "epoch": 1.74, "learning_rate": 2.306786121984261e-05, "loss": 3.4471, "step": 163600 }, { "epoch": 1.74, "learning_rate": 2.2883248490947306e-05, "loss": 3.434, "step": 163700 }, { "epoch": 1.74, "learning_rate": 2.2699342026582553e-05, "loss": 3.4878, "step": 163800 }, { "epoch": 1.74, "learning_rate": 2.251614239864211e-05, "loss": 3.5202, "step": 163900 }, { "epoch": 1.74, "learning_rate": 2.2333650176821485e-05, "loss": 3.4719, "step": 164000 }, { "epoch": 1.74, "learning_rate": 2.21518659286164e-05, "loss": 3.4925, "step": 164100 }, { "epoch": 1.75, "learning_rate": 2.1970790219320885e-05, "loss": 3.4056, "step": 164200 }, { "epoch": 1.75, "learning_rate": 2.179042361202582e-05, "loss": 3.48, "step": 164300 }, { "epoch": 1.75, "learning_rate": 2.161076666761688e-05, "loss": 3.4169, "step": 164400 }, { "epoch": 1.75, "learning_rate": 2.1431819944772945e-05, "loss": 3.4857, "step": 164500 }, { "epoch": 1.75, "learning_rate": 2.1253583999964298e-05, "loss": 3.4628, "step": 164600 }, { "epoch": 1.75, "learning_rate": 2.107605938745086e-05, "loss": 3.5136, "step": 164700 }, { "epoch": 1.75, "learning_rate": 2.0899246659280584e-05, "loss": 3.3885, "step": 164800 }, { "epoch": 1.75, "learning_rate": 2.0723146365287743e-05, "loss": 3.4974, "step": 164900 }, { "epoch": 1.75, "learning_rate": 2.0547759053091088e-05, "loss": 3.4751, "step": 165000 }, { "epoch": 1.75, "learning_rate": 2.0373085268092144e-05, "loss": 3.4691, "step": 165100 }, { "epoch": 1.76, "learning_rate": 2.0199125553473696e-05, "loss": 3.4679, "step": 165200 }, { "epoch": 1.76, "learning_rate": 2.0025880450197902e-05, "loss": 3.4301, "step": 165300 }, { "epoch": 1.76, "learning_rate": 1.9853350497004763e-05, "loss": 3.4768, "step": 165400 }, { "epoch": 1.76, "learning_rate": 1.9681536230410386e-05, "loss": 3.4745, "step": 165500 }, { "epoch": 1.76, "learning_rate": 1.951043818470516e-05, "loss": 3.4939, "step": 165600 }, { "epoch": 1.76, "learning_rate": 1.9340056891952396e-05, "loss": 3.4786, "step": 165700 }, { "epoch": 1.76, "learning_rate": 1.917039288198652e-05, "loss": 3.4544, "step": 165800 }, { "epoch": 1.76, "learning_rate": 1.9001446682411355e-05, "loss": 3.4916, "step": 165900 }, { "epoch": 1.76, "learning_rate": 1.8833218818598563e-05, "loss": 3.4662, "step": 166000 }, { "epoch": 1.77, "learning_rate": 1.866570981368601e-05, "loss": 3.4395, "step": 166100 }, { "epoch": 1.77, "learning_rate": 1.8498920188576187e-05, "loss": 3.4923, "step": 166200 }, { "epoch": 1.77, "learning_rate": 1.833285046193442e-05, "loss": 3.4653, "step": 166300 }, { "epoch": 1.77, "learning_rate": 1.816750115018742e-05, "loss": 3.4962, "step": 166400 }, { "epoch": 1.77, "learning_rate": 1.800287276752166e-05, "loss": 3.4816, "step": 166500 }, { "epoch": 1.77, "learning_rate": 1.783896582588168e-05, "loss": 3.3854, "step": 166600 }, { "epoch": 1.77, "learning_rate": 1.767578083496857e-05, "loss": 3.465, "step": 166700 }, { "epoch": 1.77, "learning_rate": 1.7513318302238486e-05, "loss": 3.4746, "step": 166800 }, { "epoch": 1.77, "learning_rate": 1.735157873290069e-05, "loss": 3.4732, "step": 166900 }, { "epoch": 1.78, "learning_rate": 1.7190562629916557e-05, "loss": 3.4785, "step": 167000 }, { "epoch": 1.78, "learning_rate": 1.7030270493997495e-05, "loss": 3.4685, "step": 167100 }, { "epoch": 1.78, "learning_rate": 1.68707028236037e-05, "loss": 3.4655, "step": 167200 }, { "epoch": 1.78, "learning_rate": 1.6711860114942383e-05, "loss": 3.4566, "step": 167300 }, { "epoch": 1.78, "learning_rate": 1.6553742861966452e-05, "loss": 3.4388, "step": 167400 }, { "epoch": 1.78, "learning_rate": 1.639635155637284e-05, "loss": 3.502, "step": 167500 }, { "epoch": 1.78, "learning_rate": 1.623968668760101e-05, "loss": 3.4313, "step": 167600 }, { "epoch": 1.78, "learning_rate": 1.6083748742831304e-05, "loss": 3.4205, "step": 167700 }, { "epoch": 1.78, "learning_rate": 1.5928538206983755e-05, "loss": 3.4836, "step": 167800 }, { "epoch": 1.78, "learning_rate": 1.577405556271619e-05, "loss": 3.4634, "step": 167900 }, { "epoch": 1.79, "learning_rate": 1.5620301290423135e-05, "loss": 3.4655, "step": 168000 }, { "epoch": 1.79, "learning_rate": 1.546727586823382e-05, "loss": 3.4441, "step": 168100 }, { "epoch": 1.79, "learning_rate": 1.5314979772011155e-05, "loss": 3.4556, "step": 168200 }, { "epoch": 1.79, "learning_rate": 1.5163413475350025e-05, "loss": 3.5384, "step": 168300 }, { "epoch": 1.79, "learning_rate": 1.5012577449575848e-05, "loss": 3.4572, "step": 168400 }, { "epoch": 1.79, "learning_rate": 1.4862472163743146e-05, "loss": 3.4505, "step": 168500 }, { "epoch": 1.79, "learning_rate": 1.471309808463403e-05, "loss": 3.4776, "step": 168600 }, { "epoch": 1.79, "learning_rate": 1.4564455676756766e-05, "loss": 3.5107, "step": 168700 }, { "epoch": 1.79, "learning_rate": 1.4416545402344383e-05, "loss": 3.5006, "step": 168800 }, { "epoch": 1.8, "learning_rate": 1.4269367721353205e-05, "loss": 3.519, "step": 168900 }, { "epoch": 1.8, "learning_rate": 1.4122923091461348e-05, "loss": 3.4775, "step": 169000 }, { "epoch": 1.8, "learning_rate": 1.3977211968067422e-05, "loss": 3.4108, "step": 169100 }, { "epoch": 1.8, "learning_rate": 1.3832234804289023e-05, "loss": 3.498, "step": 169200 }, { "epoch": 1.8, "learning_rate": 1.3687992050961356e-05, "loss": 3.4988, "step": 169300 }, { "epoch": 1.8, "learning_rate": 1.3544484156635923e-05, "loss": 3.4937, "step": 169400 }, { "epoch": 1.8, "learning_rate": 1.34017115675788e-05, "loss": 3.47, "step": 169500 }, { "epoch": 1.8, "learning_rate": 1.3259674727769732e-05, "loss": 3.4696, "step": 169600 }, { "epoch": 1.8, "learning_rate": 1.3118374078900376e-05, "loss": 3.4722, "step": 169700 }, { "epoch": 1.8, "learning_rate": 1.297781006037313e-05, "loss": 3.4794, "step": 169800 }, { "epoch": 1.81, "learning_rate": 1.2837983109299566e-05, "loss": 3.5015, "step": 169900 }, { "epoch": 1.81, "learning_rate": 1.2698893660499394e-05, "loss": 3.4986, "step": 170000 }, { "epoch": 1.81, "learning_rate": 1.2560542146498766e-05, "loss": 3.4245, "step": 170100 }, { "epoch": 1.81, "learning_rate": 1.2422928997529142e-05, "loss": 3.4625, "step": 170200 }, { "epoch": 1.81, "learning_rate": 1.2286054641525824e-05, "loss": 3.4951, "step": 170300 }, { "epoch": 1.81, "learning_rate": 1.214991950412675e-05, "loss": 3.4903, "step": 170400 }, { "epoch": 1.81, "learning_rate": 1.2014524008671118e-05, "loss": 3.4828, "step": 170500 }, { "epoch": 1.81, "learning_rate": 1.1879868576198049e-05, "loss": 3.4202, "step": 170600 }, { "epoch": 1.81, "learning_rate": 1.1745953625445283e-05, "loss": 3.457, "step": 170700 }, { "epoch": 1.82, "learning_rate": 1.161277957284787e-05, "loss": 3.4711, "step": 170800 }, { "epoch": 1.82, "learning_rate": 1.1480346832536847e-05, "loss": 3.4668, "step": 170900 }, { "epoch": 1.82, "learning_rate": 1.1348655816338176e-05, "loss": 3.4941, "step": 171000 }, { "epoch": 1.82, "learning_rate": 1.1217706933771165e-05, "loss": 3.4354, "step": 171100 }, { "epoch": 1.82, "learning_rate": 1.1087500592047189e-05, "loss": 3.4694, "step": 171200 }, { "epoch": 1.82, "learning_rate": 1.0958037196068693e-05, "loss": 3.4025, "step": 171300 }, { "epoch": 1.82, "learning_rate": 1.0829317148427831e-05, "loss": 3.5235, "step": 171400 }, { "epoch": 1.82, "learning_rate": 1.0701340849404995e-05, "loss": 3.4876, "step": 171500 }, { "epoch": 1.82, "learning_rate": 1.0574108696967955e-05, "loss": 3.5077, "step": 171600 }, { "epoch": 1.83, "learning_rate": 1.0447621086770164e-05, "loss": 3.4487, "step": 171700 }, { "epoch": 1.83, "learning_rate": 1.0321878412149959e-05, "loss": 3.5235, "step": 171800 }, { "epoch": 1.83, "learning_rate": 1.019688106412911e-05, "loss": 3.5167, "step": 171900 }, { "epoch": 1.83, "learning_rate": 1.0072629431411629e-05, "loss": 3.5023, "step": 172000 }, { "epoch": 1.83, "learning_rate": 9.949123900382578e-06, "loss": 3.4944, "step": 172100 }, { "epoch": 1.83, "learning_rate": 9.826364855106851e-06, "loss": 3.4764, "step": 172200 }, { "epoch": 1.83, "learning_rate": 9.704352677328055e-06, "loss": 3.4143, "step": 172300 }, { "epoch": 1.83, "learning_rate": 9.583087746467212e-06, "loss": 3.478, "step": 172400 }, { "epoch": 1.83, "learning_rate": 9.46257043962162e-06, "loss": 3.4686, "step": 172500 }, { "epoch": 1.83, "learning_rate": 9.342801131563772e-06, "loss": 3.4824, "step": 172600 }, { "epoch": 1.84, "learning_rate": 9.22378019473999e-06, "loss": 3.456, "step": 172700 }, { "epoch": 1.84, "learning_rate": 9.105507999269513e-06, "loss": 3.4789, "step": 172800 }, { "epoch": 1.84, "learning_rate": 8.98798491294317e-06, "loss": 3.4921, "step": 172900 }, { "epoch": 1.84, "learning_rate": 8.871211301222205e-06, "loss": 3.4355, "step": 173000 }, { "epoch": 1.84, "learning_rate": 8.755187527237362e-06, "loss": 3.4613, "step": 173100 }, { "epoch": 1.84, "learning_rate": 8.639913951787537e-06, "loss": 3.4982, "step": 173200 }, { "epoch": 1.84, "learning_rate": 8.525390933338761e-06, "loss": 3.498, "step": 173300 }, { "epoch": 1.84, "learning_rate": 8.411618828022994e-06, "loss": 3.4502, "step": 173400 }, { "epoch": 1.84, "learning_rate": 8.298597989637197e-06, "loss": 3.4695, "step": 173500 }, { "epoch": 1.85, "learning_rate": 8.18632876964201e-06, "loss": 3.4537, "step": 173600 }, { "epoch": 1.85, "learning_rate": 8.074811517160885e-06, "loss": 3.476, "step": 173700 }, { "epoch": 1.85, "learning_rate": 7.964046578978701e-06, "loss": 3.418, "step": 173800 }, { "epoch": 1.85, "learning_rate": 7.854034299541068e-06, "loss": 3.4262, "step": 173900 }, { "epoch": 1.85, "learning_rate": 7.744775020952887e-06, "loss": 3.4676, "step": 174000 }, { "epoch": 1.85, "learning_rate": 7.636269082977626e-06, "loss": 3.44, "step": 174100 }, { "epoch": 1.85, "learning_rate": 7.528516823035902e-06, "loss": 3.4154, "step": 174200 }, { "epoch": 1.85, "learning_rate": 7.4215185762047385e-06, "loss": 3.4926, "step": 174300 }, { "epoch": 1.85, "learning_rate": 7.315274675216421e-06, "loss": 3.4514, "step": 174400 }, { "epoch": 1.85, "learning_rate": 7.2097854504573626e-06, "loss": 3.5559, "step": 174500 }, { "epoch": 1.86, "learning_rate": 7.105051229967241e-06, "loss": 3.4448, "step": 174600 }, { "epoch": 1.86, "learning_rate": 7.001072339437836e-06, "loss": 3.4527, "step": 174700 }, { "epoch": 1.86, "learning_rate": 6.897849102212083e-06, "loss": 3.4917, "step": 174800 }, { "epoch": 1.86, "learning_rate": 6.795381839283133e-06, "loss": 3.4435, "step": 174900 }, { "epoch": 1.86, "learning_rate": 6.693670869293206e-06, "loss": 3.5062, "step": 175000 }, { "epoch": 1.86, "learning_rate": 6.592716508532742e-06, "loss": 3.4505, "step": 175100 }, { "epoch": 1.86, "learning_rate": 6.492519070939312e-06, "loss": 3.4699, "step": 175200 }, { "epoch": 1.86, "learning_rate": 6.393078868096674e-06, "loss": 3.5317, "step": 175300 }, { "epoch": 1.86, "learning_rate": 6.294396209233888e-06, "loss": 3.5148, "step": 175400 }, { "epoch": 1.87, "learning_rate": 6.196471401224202e-06, "loss": 3.4562, "step": 175500 }, { "epoch": 1.87, "learning_rate": 6.0993047485841944e-06, "loss": 3.4535, "step": 175600 }, { "epoch": 1.87, "learning_rate": 6.002896553472831e-06, "loss": 3.4372, "step": 175700 }, { "epoch": 1.87, "learning_rate": 5.907247115690489e-06, "loss": 3.4692, "step": 175800 }, { "epoch": 1.87, "learning_rate": 5.812356732678076e-06, "loss": 3.4198, "step": 175900 }, { "epoch": 1.87, "learning_rate": 5.718225699515939e-06, "loss": 3.4826, "step": 176000 }, { "epoch": 1.87, "learning_rate": 5.624854308923289e-06, "loss": 3.4803, "step": 176100 }, { "epoch": 1.87, "learning_rate": 5.532242851256891e-06, "loss": 3.4432, "step": 176200 }, { "epoch": 1.87, "learning_rate": 5.440391614510487e-06, "loss": 3.4248, "step": 176300 }, { "epoch": 1.88, "learning_rate": 5.349300884313624e-06, "loss": 3.4578, "step": 176400 }, { "epoch": 1.88, "learning_rate": 5.258970943930991e-06, "loss": 3.4432, "step": 176500 }, { "epoch": 1.88, "learning_rate": 5.1694020742614474e-06, "loss": 3.4696, "step": 176600 }, { "epoch": 1.88, "learning_rate": 5.080594553837109e-06, "loss": 3.4922, "step": 176700 }, { "epoch": 1.88, "learning_rate": 4.992548658822593e-06, "loss": 3.4488, "step": 176800 }, { "epoch": 1.88, "learning_rate": 4.905264663014025e-06, "loss": 3.4413, "step": 176900 }, { "epoch": 1.88, "learning_rate": 4.818742837838258e-06, "loss": 3.4904, "step": 177000 }, { "epoch": 1.88, "learning_rate": 4.732983452352096e-06, "loss": 3.4597, "step": 177100 }, { "epoch": 1.88, "learning_rate": 4.647986773241353e-06, "loss": 3.3803, "step": 177200 }, { "epoch": 1.88, "learning_rate": 4.563753064819959e-06, "loss": 3.4865, "step": 177300 }, { "epoch": 1.89, "learning_rate": 4.480282589029383e-06, "loss": 3.4853, "step": 177400 }, { "epoch": 1.89, "learning_rate": 4.397575605437576e-06, "loss": 3.4592, "step": 177500 }, { "epoch": 1.89, "learning_rate": 4.315632371238304e-06, "loss": 3.4212, "step": 177600 }, { "epoch": 1.89, "learning_rate": 4.234453141250288e-06, "loss": 3.4781, "step": 177700 }, { "epoch": 1.89, "learning_rate": 4.154038167916402e-06, "loss": 3.5055, "step": 177800 }, { "epoch": 1.89, "learning_rate": 4.074387701302973e-06, "loss": 3.4421, "step": 177900 }, { "epoch": 1.89, "learning_rate": 3.995501989098843e-06, "loss": 3.4785, "step": 178000 }, { "epoch": 1.89, "learning_rate": 3.9173812766148394e-06, "loss": 3.4931, "step": 178100 }, { "epoch": 1.89, "learning_rate": 3.840025806782721e-06, "loss": 3.4689, "step": 178200 }, { "epoch": 1.9, "learning_rate": 3.7634358201547035e-06, "loss": 3.4653, "step": 178300 }, { "epoch": 1.9, "learning_rate": 3.6876115549024923e-06, "loss": 3.4776, "step": 178400 }, { "epoch": 1.9, "learning_rate": 3.612553246816669e-06, "loss": 3.4824, "step": 178500 }, { "epoch": 1.9, "learning_rate": 3.538261129305914e-06, "loss": 3.4846, "step": 178600 }, { "epoch": 1.9, "learning_rate": 3.464735433396288e-06, "loss": 3.4451, "step": 178700 }, { "epoch": 1.9, "learning_rate": 3.3919763877304777e-06, "loss": 3.4596, "step": 178800 }, { "epoch": 1.9, "learning_rate": 3.3199842185671903e-06, "loss": 3.4577, "step": 178900 }, { "epoch": 1.9, "learning_rate": 3.248759149780317e-06, "loss": 3.457, "step": 179000 }, { "epoch": 1.9, "learning_rate": 3.1783014028582967e-06, "loss": 3.4635, "step": 179100 }, { "epoch": 1.9, "learning_rate": 3.1086111969035048e-06, "loss": 3.5187, "step": 179200 }, { "epoch": 1.91, "learning_rate": 3.0396887486313916e-06, "loss": 3.4766, "step": 179300 }, { "epoch": 1.91, "learning_rate": 2.9715342723700133e-06, "loss": 3.4628, "step": 179400 }, { "epoch": 1.91, "learning_rate": 2.9041479800591685e-06, "loss": 3.4998, "step": 179500 }, { "epoch": 1.91, "learning_rate": 2.8375300812499007e-06, "loss": 3.4767, "step": 179600 }, { "epoch": 1.91, "learning_rate": 2.771680783103747e-06, "loss": 3.4474, "step": 179700 }, { "epoch": 1.91, "learning_rate": 2.706600290392186e-06, "loss": 3.4676, "step": 179800 }, { "epoch": 1.91, "learning_rate": 2.642288805495885e-06, "loss": 3.4556, "step": 179900 }, { "epoch": 1.91, "learning_rate": 2.5787465284041188e-06, "loss": 3.4032, "step": 180000 }, { "epoch": 1.91, "learning_rate": 2.5159736567141876e-06, "loss": 3.4813, "step": 180100 }, { "epoch": 1.92, "learning_rate": 2.4539703856308326e-06, "loss": 3.4864, "step": 180200 }, { "epoch": 1.92, "learning_rate": 2.3927369079654313e-06, "loss": 3.4783, "step": 180300 }, { "epoch": 1.92, "learning_rate": 2.332273414135666e-06, "loss": 3.4934, "step": 180400 }, { "epoch": 1.92, "learning_rate": 2.2725800921647164e-06, "loss": 3.4395, "step": 180500 }, { "epoch": 1.92, "learning_rate": 2.213657127680818e-06, "loss": 3.4599, "step": 180600 }, { "epoch": 1.92, "learning_rate": 2.1555047039165944e-06, "loss": 3.482, "step": 180700 }, { "epoch": 1.92, "learning_rate": 2.0981230017085017e-06, "loss": 3.4425, "step": 180800 }, { "epoch": 1.92, "learning_rate": 2.0415121994963314e-06, "loss": 3.4422, "step": 180900 }, { "epoch": 1.92, "learning_rate": 1.9856724733225695e-06, "loss": 3.4536, "step": 181000 }, { "epoch": 1.93, "learning_rate": 1.9306039968319535e-06, "loss": 3.5386, "step": 181100 }, { "epoch": 1.93, "learning_rate": 1.8763069412707778e-06, "loss": 3.5005, "step": 181200 }, { "epoch": 1.93, "learning_rate": 1.822781475486507e-06, "loss": 3.4209, "step": 181300 }, { "epoch": 1.93, "learning_rate": 1.7700277659271625e-06, "loss": 3.4426, "step": 181400 }, { "epoch": 1.93, "learning_rate": 1.7180459766408806e-06, "loss": 3.4718, "step": 181500 }, { "epoch": 1.93, "learning_rate": 1.6668362692753569e-06, "loss": 3.4696, "step": 181600 }, { "epoch": 1.93, "learning_rate": 1.6163988030772347e-06, "loss": 3.4379, "step": 181700 }, { "epoch": 1.93, "learning_rate": 1.5667337348918841e-06, "loss": 3.4895, "step": 181800 }, { "epoch": 1.93, "learning_rate": 1.5178412191626524e-06, "loss": 3.4887, "step": 181900 }, { "epoch": 1.93, "learning_rate": 1.469721407930502e-06, "loss": 3.4375, "step": 182000 }, { "epoch": 1.94, "learning_rate": 1.4223744508334857e-06, "loss": 3.4431, "step": 182100 }, { "epoch": 1.94, "learning_rate": 1.375800495106383e-06, "loss": 3.4643, "step": 182200 }, { "epoch": 1.94, "learning_rate": 1.3299996855801189e-06, "loss": 3.4656, "step": 182300 }, { "epoch": 1.94, "learning_rate": 1.2849721646814306e-06, "loss": 3.4747, "step": 182400 }, { "epoch": 1.94, "learning_rate": 1.2407180724322565e-06, "loss": 3.3942, "step": 182500 }, { "epoch": 1.94, "learning_rate": 1.1972375464494867e-06, "loss": 3.475, "step": 182600 }, { "epoch": 1.94, "learning_rate": 1.154530721944408e-06, "loss": 3.513, "step": 182700 }, { "epoch": 1.94, "learning_rate": 1.11259773172237e-06, "loss": 3.3924, "step": 182800 }, { "epoch": 1.94, "learning_rate": 1.0714387061823427e-06, "loss": 3.4743, "step": 182900 }, { "epoch": 1.95, "learning_rate": 1.031053773316415e-06, "loss": 3.5169, "step": 183000 }, { "epoch": 1.95, "learning_rate": 9.914430587095735e-07, "loss": 3.4972, "step": 183100 }, { "epoch": 1.95, "learning_rate": 9.52606685539148e-07, "loss": 3.456, "step": 183200 }, { "epoch": 1.95, "learning_rate": 9.145447745745883e-07, "loss": 3.4078, "step": 183300 }, { "epoch": 1.95, "learning_rate": 8.772574441768821e-07, "loss": 3.4546, "step": 183400 }, { "epoch": 1.95, "learning_rate": 8.407448102984161e-07, "loss": 3.4795, "step": 183500 }, { "epoch": 1.95, "learning_rate": 8.050069864824483e-07, "loss": 3.4461, "step": 183600 }, { "epoch": 1.95, "learning_rate": 7.700440838628031e-07, "loss": 3.4308, "step": 183700 }, { "epoch": 1.95, "learning_rate": 7.358562111635936e-07, "loss": 3.4372, "step": 183800 }, { "epoch": 1.95, "learning_rate": 7.024434746987218e-07, "loss": 3.4851, "step": 183900 }, { "epoch": 1.96, "learning_rate": 6.698059783717681e-07, "loss": 3.4375, "step": 184000 }, { "epoch": 1.96, "learning_rate": 6.379438236754354e-07, "loss": 3.4974, "step": 184100 }, { "epoch": 1.96, "learning_rate": 6.068571096914666e-07, "loss": 3.4536, "step": 184200 }, { "epoch": 1.96, "learning_rate": 5.765459330901446e-07, "loss": 3.4817, "step": 184300 }, { "epoch": 1.96, "learning_rate": 5.470103881300425e-07, "loss": 3.4776, "step": 184400 }, { "epoch": 1.96, "learning_rate": 5.182505666578019e-07, "loss": 3.44, "step": 184500 }, { "epoch": 1.96, "learning_rate": 4.902665581077991e-07, "loss": 3.4449, "step": 184600 }, { "epoch": 1.96, "learning_rate": 4.630584495018408e-07, "loss": 3.5173, "step": 184700 }, { "epoch": 1.96, "learning_rate": 4.366263254489411e-07, "loss": 3.4473, "step": 184800 }, { "epoch": 1.97, "learning_rate": 4.109702681450167e-07, "loss": 3.442, "step": 184900 }, { "epoch": 1.97, "learning_rate": 3.8609035737266486e-07, "loss": 3.436, "step": 185000 } ], "logging_steps": 100, "max_steps": 188152, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "total_flos": 3.0232783618595517e+24, "train_batch_size": 6, "trial_name": null, "trial_params": null }